RBF核函数与SVM分类器的近似特征映射

在机器学习领域,支持向量机(SVM)是一种常用的分类算法。然而,当数据维度很高时,使用SVM可能会变得计算密集。为了解决这个问题,可以采用核函数近似技术,如RBFSampler和Nystroem,来近似RBF核函数的特征映射。本文将介绍如何在手写数字数据集上使用这些技术,并比较不同方法的性能。

数据集和模型准备

首先,需要导入必要的Python库,并加载手写数字数据集。这个数据集包含了1797个8x8的手写数字图像,每个图像被转换为64维的特征向量。将使用这个数据集来训练和测试分类器。

from sklearn import datasets, svm from sklearn.kernel_approximation import RBFSampler, Nystroem import numpy as np import time # 加载手写数字数据集 digits = datasets.load_digits() data = digits.data / 16.0 data -= data.mean(axis=0) # 划分训练集和测试集 n_samples = len(digits.data) data_train, targets_train = data[:n_samples//2], digits.target[:n_samples//2] data_test, targets_test = data[n_samples//2:], digits.target[n_samples//2:]

近似特征映射

RBFSampler和Nystroem是两种常用的核函数近似方法。RBFSampler通过随机傅里叶特征来近似RBF核,而Nystroem则通过选择训练集的一个子集来近似核矩阵。这两种方法都可以减少SVM的计算复杂度,但可能会牺牲一些分类精度。

# 创建RBFSampler和Nystroem近似器 feature_map_fourier = RBFSampler(gamma=0.2, random_state=1) feature_map_nystroem = Nystroem(gamma=0.2, random_state=1) # 创建线性SVM分类器 linear_svm = svm.LinearSVC(random_state=42) # 创建包含近似特征映射的管道 fourier_approx_svm = svm.LinearSVC(random_state=42) nystroem_approx_svm = svm.LinearSVC(random_state=42)

性能比较

比较了在原始空间中使用线性SVM、使用近似映射的线性SVM以及核化SVM的性能。实验结果表明,随着近似维度的增加,分类精度逐渐提高,但计算时间也随之增加。这说明在运行时间和分类精度之间存在权衡。

# 训练和测试不同的SVM分类器 kernel_svm = svm.SVC(gamma=0.2) kernel_svm_time = time.time() kernel_svm.fit(data_train, targets_train) kernel_svm_score = kernel_svm.score(data_test, targets_test) kernel_svm_time = time.time() - kernel_svm_time linear_svm_time = time.time() linear_svm.fit(data_train, targets_train) linear_svm_score = linear_svm.score(data_test, targets_test) linear_svm_time = time.time() - linear_svm_time # 绘制性能比较图 import matplotlib.pyplot as plt sample_sizes = 30 * np.arange(1, 10) fourier_scores = [] nystroem_scores = [] fourier_times = [] nystroem_times = [] for D in sample_sizes: feature_map_fourier.set_params(n_components=D) feature_map_nystroem.set_params(n_components=D) start = time.time() nystroem_approx_svm.fit(data_train, targets_train) nystroem_times.append(time.time() - start) start = time.time() fourier_approx_svm.fit(data_train, targets_train) fourier_times.append(time.time() - start) fourier_score = fourier_approx_svm.score(data_test, targets_test) nystroem_score = nystroem_approx_svm.score(data_test, targets_test) nystroem_scores.append(nystroem_score) fourier_scores.append(fourier_score) plt.figure(figsize=(16, 4)) accuracy = plt.subplot(121) timescale = plt.subplot(122) accuracy.plot(sample_sizes, nystroem_scores, label="Nystroem approx. kernel") timescale.plot(sample_sizes, nystroem_times, "--", label="Nystroem approx. kernel") accuracy.plot(sample_sizes, fourier_scores, label="Fourier approx. kernel") timescale.plot(sample_sizes, fourier_times, "--", label="Fourier approx. kernel") accuracy.plot([sample_sizes[0], sample_sizes[-1]], [linear_svm_score, linear_svm_score], label="linear svm", ) timescale.plot([sample_sizes[0], sample_sizes[-1]], [linear_svm_time, linear_svm_time], "--", label="linear svm", ) accuracy.plot([sample_sizes[0], sample_sizes[-1]], [kernel_svm_score, kernel_svm_score], label="rbf svm", ) timescale.plot([sample_sizes[0], sample_sizes[-1]], [kernel_svm_time, kernel_svm_time], "--", label="rbf svm", ) accuracy.set_title("Classification accuracy") timescale.set_title("Training times") accuracy.set_xlim(sample_sizes[0], sample_sizes[-1]) accuracy.set_xticks(()) accuracy.set_ylim(np.min(fourier_scores), 1) timescale.set_xlabel("Sampling steps = transformed feature dimension") accuracy.set_ylabel("Classification accuracy") timescale.set_ylabel("Training time in seconds") accuracy.legend(loc="best") timescale.legend(loc="best") plt.tight_layout() plt.show()

决策边界可视化

为了更直观地理解不同分类器的性能,可以将决策边界投影到数据的前两个主成分上。需要注意的是,这种可视化只是一个有趣的切片,因为决策边界实际上是在64维空间中的。特别是,一个数据点(表示为一个点)不一定被分类到它所在的区域,因为它不会位于前两个主成分所跨越的平面上。

from sklearn.decomposition import PCA # 将数据投影到前两个主成分上 pca = PCA(n_components=8, random_state=42).fit(data_train) X = pca.transform(data_train) # 生成网格 multiples = np.arange(-2, 2, 0.1) first = multiples[:, np.newaxis] * pca.components_[0, :] second = multiples[:, np.newaxis] * pca.components_[1, :] grid = first[np.newaxis, :, :] + second[:, np.newaxis, :] flat_grid = grid.reshape(-1, data.shape[1]) # 绘制决策边界 titles = ["SVC with rbf kernel", "SVC (linear kernel) with Fourier rbf feature map n_components=100", "SVC (linear kernel) with Nystroem rbf feature map n_components=100"] plt.figure(figsize=(18, 7.5)) plt.rcParams.update({"font.size": 14}) for i, clf in enumerate((kernel_svm, nystroem_approx_svm, fourier_approx_svm)): plt.subplot(1, 3, i + 1) Z = clf.predict(flat_grid) Z = Z.reshape(grid.shape[:-1]) levels = np.arange(10) lv_eps = 0.01 plt.contourf(multiples, multiples, Z, levels=levels - lv_eps, cmap=plt.cm.tab10, vmin=0, vmax=10, alpha=0.7) plt.axis("off") plt.scatter(X[:, 0], X[:, 1], c=targets_train, cmap=plt.cm.tab10, edgecolors=(0, 0, 0), vmin=0, vmax=10) plt.title(titles[i]) plt.tight_layout() plt.show()
沪ICP备2024098111号-1
上海秋旦网络科技中心:上海市奉贤区金大公路8218号1幢 联系电话:17898875485