Newer
Older
import importlib_resources
import numpy as np
import scipy.linalg as linalg
from matplotlib.pyplot import figure, plot, show, xlabel, ylabel
from scipy.io import loadmat
from sklearn.neighbors import KNeighborsClassifier
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
# Number of principal components to use for classification,
# i.e. the reduced dimensionality
mat_data = loadmat(filename)
X = mat_data["traindata"][:, 1:]
y = mat_data["traindata"][:, 0]
Xtest = mat_data["testdata"][:, 1:]
ytest = mat_data["testdata"][:, 0]
N, M = X.shape
Ntest = Xtest.shape[0] # or Xtest[:,0].shape
Y = X - np.ones((N, 1)) * X.mean(0)
Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0)
V = V.T
# Repeat classification for different values of K
error_rates = []
for k in K:
# Project data onto principal component space,
# Classify data with knn classifier
knn_classifier = KNeighborsClassifier(n_neighbors=1)
y_estimated = knn_classifier.predict(Ztest)
# Compute classification error rates
y_estimated = y_estimated.T
er = (sum(ytest != y_estimated) / float(len(ytest))) * 100
# Visualize error rates vs. number of principal components considered
figure()
plot(K, error_rates, "o-")
xlabel("Number of principal components K")
ylabel("Error rate [%]")