Skip to content
Snippets Groups Projects
ex2_3_1.py 1.73 KiB
Newer Older
  • Learn to ignore specific revisions
  • bjje's avatar
    bjje committed
    # exercise 2.3.1
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    import importlib_resources
    import numpy as np
    import scipy.linalg as linalg
    from matplotlib.pyplot import figure, plot, show, xlabel, ylabel
    
    bjje's avatar
    bjje committed
    from scipy.io import loadmat
    from sklearn.neighbors import KNeighborsClassifier
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
    
    bjje's avatar
    bjje committed
    # Number of principal components to use for classification,
    # i.e. the reduced dimensionality
    
    Stas Syrota's avatar
    Stas Syrota committed
    K = [8, 10, 15, 20, 30, 40, 50, 60, 100, 150]
    
    bjje's avatar
    bjje committed
    
    # Load Matlab data file and extract training set and test set
    
    Stas Syrota's avatar
    Stas Syrota committed
    mat_data = loadmat(filename)
    X = mat_data["traindata"][:, 1:]
    y = mat_data["traindata"][:, 0]
    Xtest = mat_data["testdata"][:, 1:]
    ytest = mat_data["testdata"][:, 0]
    N, M = X.shape
    Ntest = Xtest.shape[0]  # or Xtest[:,0].shape
    
    bjje's avatar
    bjje committed
    
    # Subtract the mean from the data
    
    Stas Syrota's avatar
    Stas Syrota committed
    Y = X - np.ones((N, 1)) * X.mean(0)
    Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0)
    
    bjje's avatar
    bjje committed
    
    # Obtain the PCA solution  by calculate the SVD of Y
    
    Stas Syrota's avatar
    Stas Syrota committed
    U, S, V = linalg.svd(Y, full_matrices=False)
    
    bjje's avatar
    bjje committed
    V = V.T
    
    
    # Repeat classification for different values of K
    error_rates = []
    for k in K:
        # Project data onto principal component space,
    
    Stas Syrota's avatar
    Stas Syrota committed
        Z = Y @ V[:, :k]
        Ztest = Ytest @ V[:, :k]
    
    bjje's avatar
    bjje committed
    
        # Classify data with knn classifier
        knn_classifier = KNeighborsClassifier(n_neighbors=1)
    
    Stas Syrota's avatar
    Stas Syrota committed
        knn_classifier.fit(Z, y.ravel())
    
    bjje's avatar
    bjje committed
        y_estimated = knn_classifier.predict(Ztest)
    
        # Compute classification error rates
        y_estimated = y_estimated.T
    
    Stas Syrota's avatar
    Stas Syrota committed
        er = (sum(ytest != y_estimated) / float(len(ytest))) * 100
    
    bjje's avatar
    bjje committed
        error_rates.append(er)
    
    Stas Syrota's avatar
    Stas Syrota committed
        print("K={0}: Error rate: {1:.1f}%".format(k, er))
    
    bjje's avatar
    bjje committed
    
    # Visualize error rates vs. number of principal components considered
    figure()
    
    Stas Syrota's avatar
    Stas Syrota committed
    plot(K, error_rates, "o-")
    xlabel("Number of principal components K")
    ylabel("Error rate [%]")
    
    bjje's avatar
    bjje committed
    show()
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    print("Ran Exercise 2.3.1")