Skip to content
Snippets Groups Projects
ex2_2_2.py 2.58 KiB
Newer Older
  • Learn to ignore specific revisions
  • bjje's avatar
    bjje committed
    # exercise 2.2.2
    
    Stas Syrota's avatar
    Stas Syrota committed
    import importlib_resources
    
    bjje's avatar
    bjje committed
    import numpy as np
    
    Stas Syrota's avatar
    Stas Syrota committed
    import scipy.linalg as linalg
    from matplotlib.pyplot import (
        cm,
        figure,
        imshow,
        legend,
        plot,
        show,
        subplot,
        title,
        xlabel,
        ylabel,
        yticks,
    )
    from scipy.io import loadmat
    
    filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
    
    bjje's avatar
    bjje committed
    
    # Digits to include in analysis (to include all, n = range(10) )
    
    Stas Syrota's avatar
    Stas Syrota committed
    n = [0, 1]
    
    bjje's avatar
    bjje committed
    # Number of principal components for reconstruction
    K = 16
    # Digits to visualize
    nD = range(6)
    
    
    # Load Matlab data file to python dict structure
    # and extract variables of interest
    
    Stas Syrota's avatar
    Stas Syrota committed
    traindata = loadmat(filename)["traindata"]
    X = traindata[:, 1:]
    y = traindata[:, 0]
    
    bjje's avatar
    bjje committed
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    N, M = X.shape
    
    bjje's avatar
    bjje committed
    C = len(n)
    
    classValues = n
    classNames = [str(num) for num in n]
    
    Stas Syrota's avatar
    Stas Syrota committed
    classDict = dict(zip(classNames, classValues))
    
    bjje's avatar
    bjje committed
    
    
    # Select subset of digits classes to be inspected
    class_mask = np.zeros(N).astype(bool)
    for v in n:
    
    Stas Syrota's avatar
    Stas Syrota committed
        cmsk = y == v
    
    bjje's avatar
    bjje committed
        class_mask = class_mask | cmsk
    
    Stas Syrota's avatar
    Stas Syrota committed
    X = X[class_mask, :]
    
    bjje's avatar
    bjje committed
    y = y[class_mask]
    
    Stas Syrota's avatar
    Stas Syrota committed
    N = X.shape[0]
    
    bjje's avatar
    bjje committed
    
    # Center the data (subtract mean column values)
    
    Stas Syrota's avatar
    Stas Syrota committed
    Xc = X - np.ones((N, 1)) * X.mean(0)
    
    bjje's avatar
    bjje committed
    
    # PCA by computing SVD of Y
    
    Stas Syrota's avatar
    Stas Syrota committed
    U, S, V = linalg.svd(Xc, full_matrices=False)
    # U = mat(U)
    
    bjje's avatar
    bjje committed
    V = V.T
    
    # Compute variance explained by principal components
    
    Stas Syrota's avatar
    Stas Syrota committed
    rho = (S * S) / (S * S).sum()
    
    bjje's avatar
    bjje committed
    
    # Project data onto principal component space
    Z = Xc @ V
    
    # Plot variance explained
    figure()
    
    Stas Syrota's avatar
    Stas Syrota committed
    plot(rho, "o-")
    title("Variance explained by principal components")
    xlabel("Principal component")
    ylabel("Variance explained value")
    
    bjje's avatar
    bjje committed
    
    
    # Plot PCA of the data
    f = figure()
    
    Stas Syrota's avatar
    Stas Syrota committed
    title("pixel vectors of handwr. digits projected on PCs")
    
    bjje's avatar
    bjje committed
    for c in n:
        # select indices belonging to class c:
    
    Stas Syrota's avatar
    Stas Syrota committed
        class_mask = y == c
        plot(Z[class_mask, 0], Z[class_mask, 1], "o")
    
    bjje's avatar
    bjje committed
    legend(classNames)
    
    Stas Syrota's avatar
    Stas Syrota committed
    xlabel("PC1")
    ylabel("PC2")
    
    bjje's avatar
    bjje committed
    
    
    # Visualize the reconstructed data from the first K principal components
    # Select randomly D digits.
    
    Stas Syrota's avatar
    Stas Syrota committed
    figure(figsize=(10, 3))
    W = Z[:, range(K)] @ V[:, range(K)].T
    
    bjje's avatar
    bjje committed
    D = len(nD)
    for d in range(D):
    
    Stas Syrota's avatar
    Stas Syrota committed
        digit_ix = np.random.randint(0, N)
        subplot(2, D, int(d + 1))
        I = np.reshape(X[digit_ix, :], (16, 16))
    
    bjje's avatar
    bjje committed
        imshow(I, cmap=cm.gray_r)
    
    Stas Syrota's avatar
    Stas Syrota committed
        title("Original")
        subplot(2, D, D + d + 1)
        I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16))
    
    bjje's avatar
    bjje committed
        imshow(I, cmap=cm.gray_r)
    
    Stas Syrota's avatar
    Stas Syrota committed
        title("Reconstr.")
    
    
    bjje's avatar
    bjje committed
    
    # Visualize the pricipal components
    
    Stas Syrota's avatar
    Stas Syrota committed
    figure(figsize=(8, 6))
    
    bjje's avatar
    bjje committed
    for k in range(K):
    
    Stas Syrota's avatar
    Stas Syrota committed
        N1 = int(np.ceil(np.sqrt(K)))
        N2 = int(np.ceil(K / N1))
        subplot(N2, N1, int(k + 1))
        I = np.reshape(V[:, k], (16, 16))
    
    bjje's avatar
    bjje committed
        imshow(I, cmap=cm.hot)
    
    Stas Syrota's avatar
    Stas Syrota committed
        title("PC{0}".format(k + 1))
    
    bjje's avatar
    bjje committed
    
    # output to screen
    show()
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    print("Ran Exercise 2.2.2")