diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py b/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py index 98d81d30af0dc3e90ed0b06be631b75d063ee994..2a818546aef751bad53b318dc9c980c80851adfd 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py @@ -2,7 +2,7 @@ # (requires data structures from ex. 3.1.1) # Imports the numpy and xlrd package, then runs the ex3_1_1 code from ex3_1_1 import * -from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel +import matplotlib.pyplot as plt # Data attributes to be plotted i = 0 @@ -10,25 +10,23 @@ j = 1 ## # Make a simple plot of the i'th attribute against the j'th attribute -# Notice that X is of matrix type (but it will also work with a numpy array) -# X = np.array(X) #Try to uncomment this line -plot(X[:, i], X[:, j], "o") +plt.plot(X[:, i], X[:, j], "o") ## # Make another more fancy plot that includes legend, class labels, # attribute names, and a title. -f = figure() -title("NanoNose data") +f = plt.figure() +plt.title("NanoNose data") for c in range(C): # select indices belonging to class c: - class_mask = y == c - plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3) + class_mask = y == c + plt.plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3) -legend(classNames) -xlabel(attributeNames[i]) -ylabel(attributeNames[j]) +plt.legend(classNames) +plt.xlabel(attributeNames[i]) +plt.ylabel(attributeNames[j]) # Output result to screen -show() +plt.show() print("Ran Exercise 3.1.2") diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py b/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py index 947bd45c321d6e26fe347bdd7c252ba4f3ba5ac9..d7a229c8bc50c3cb05fbd876151f10b8d9aad900 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py @@ -2,15 +2,22 @@ # (requires data structures from ex. 3.1.1) import matplotlib.pyplot as plt from ex3_1_1 import * -from scipy.linalg import svd +from scipy.linalg import svd # Subtract mean value from data +# Note: Here we use Y to in teh book we often use X with a hat-symbol on top. Y = X - np.ones((N, 1)) * X.mean(axis=0) # PCA by computing SVD of Y -U, S, V = svd(Y, full_matrices=False) +# Note: Here we call the Sigma matrix in the SVD S for notational convinience +U, S, Vh = svd(Y, full_matrices=False) -# Compute variance explained by principal components +# scipy.linalg.svd returns "Vh", which is the Hermitian (transpose) +# of the vector V. So, for us to obtain the correct V, we transpose: +V = Vh.T + +# Compute variance explained by principal components +# Note: This is an important equation, see Eq. 3.18 on page 40 in the book. rho = (S * S) / (S * S).sum() threshold = 0.9 diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py b/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py index 6c6e8c9547048f8c304e2361ccbadb7da535e268..75c7d53ac8690bdc91c9cf2e14d511fb5b65bca6 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py @@ -1,7 +1,7 @@ # exercise 3.1.4 # (requires data structures from ex. 3.1.1) from ex3_1_1 import * -from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel +import matplotlib.pyplot as plt from scipy.linalg import svd # Subtract mean value from data @@ -14,6 +14,8 @@ U, S, Vh = svd(Y, full_matrices=False) V = Vh.T # Project the centered data onto principal component space +# Note: Make absolutely sure you understand what the @ symbol +# does by inspecing the numpy documentation! Z = Y @ V # Indices of the principal components to be plotted @@ -21,18 +23,18 @@ i = 0 j = 1 # Plot PCA of the data -f = figure() -title("NanoNose data: PCA") +f = plt.figure() +plt.title("NanoNose data: PCA") # Z = array(Z) for c in range(C): # select indices belonging to class c: class_mask = y == c - plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5) -legend(classNames) -xlabel("PC{0}".format(i + 1)) -ylabel("PC{0}".format(j + 1)) + plt.plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5) +plt.legend(classNames) +plt.xlabel("PC{0}".format(i + 1)) +plt.ylabel("PC{0}".format(j + 1)) # Output result to screen -show() +plt.show() print("Ran Exercise 3.1.4") diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py b/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py index 19a444591042a9c057de09fd9834b6a1464cbb4f..b11e1afa463cc5143375ae6d7f4c5abea244ee37 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py @@ -14,11 +14,12 @@ N, M = X.shape # percent of the variance. Let's look at their coefficients: pcs = [0, 1, 2] legendStrs = ["PC" + str(e + 1) for e in pcs] -c = ["r", "g", "b"] bw = 0.2 r = np.arange(1, M + 1) + for i in pcs: plt.bar(r + i * bw, V[:, i], width=bw) + plt.xticks(r + bw, attributeNames) plt.xlabel("Attributes") plt.ylabel("Component coefficients") @@ -48,6 +49,5 @@ print(all_water_data[0, :]) # coefficient and the attribute! # You can determine the projection by (remove comments): -print("...and its projection onto PC2") -print(all_water_data[0, :] @ V[:, 1]) -# Try to explain why? +# print("...and its projection onto PC2") +# print(all_water_data[0, :] @ V[:, 1]) diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py b/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py index 3045c7912e4b7912bf888484d56fe0fba6ce3080..493c29fbcd5015a1aabad606d8ee45f46ef40f61 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py @@ -65,7 +65,7 @@ for k in range(2): for c in range(C): plt.plot(Z[y == c, i], Z[y == c, j], ".", alpha=0.5) plt.xlabel("PC" + str(i + 1)) - plt.xlabel("PC" + str(j + 1)) + plt.ylabel("PC" + str(j + 1)) plt.title(titles[k] + "\n" + "Projection") plt.legend(classNames) plt.axis("equal") diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py b/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py index 88cd3ccb7a1c413fccfe4e9444494d953167118e..69537b2142db1aae35b63578c4e02c57aca9cc2e 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py @@ -1,7 +1,7 @@ ## exercise 3.2.1 import importlib_resources import numpy as np -from matplotlib.pyplot import cm, figure, imshow, show, subplot, title, xlabel, yticks +import matplotlib.pyplot as plt from scipy.io import loadmat filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") @@ -19,19 +19,19 @@ y = traindata[:, 0] # Visualize the i'th digit as a vector -f = figure() -subplot(4, 1, 4) -imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=cm.gray_r) -xlabel("Pixel number") -title("Digit in vector format") -yticks([]) +f = plt.figure() +plt.subplot(4, 1, 4) +plt.imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=plt.cm.gray_r) +plt.xlabel("Pixel number") +plt.title("Digit in vector format") +plt.yticks([]) # Visualize the i'th digit as an image -subplot(2, 1, 1) +plt.subplot(2, 1, 1) I = np.reshape(X[i, :], (16, 16)) -imshow(I, extent=(0, 16, 0, 16), cmap=cm.gray_r) -title("Digit as an image") +plt.imshow(I, extent=(0, 16, 0, 16), cmap=plt.cm.gray_r) +plt.title("Digit as an image") -show() +plt.show() diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py b/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py index e0b1f0857b4a66ec97aaea0653f689c4225bf740..44a2c874d78c2935dadb20ee2e5578eb919aa0e5 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py @@ -1,20 +1,8 @@ # exercise 3.2.2 import importlib_resources import numpy as np -import scipy.linalg as linalg -from matplotlib.pyplot import ( - cm, - figure, - imshow, - legend, - plot, - show, - subplot, - title, - xlabel, - ylabel, - yticks, -) +from scipy.linalg import svd +import matplotlib.pyplot as plt from scipy.io import loadmat filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") @@ -26,7 +14,6 @@ K = 16 # Digits to visualize nD = range(6) - # Load Matlab data file to python dict structure # and extract variables of interest traindata = loadmat(filename)["traindata"] @@ -45,73 +32,76 @@ classDict = dict(zip(classNames, classValues)) class_mask = np.zeros(N).astype(bool) for v in n: cmsk = y == v + # Use the logical OR operator ("|") to select rows + # already in class_mask OR cmsk class_mask = class_mask | cmsk X = X[class_mask, :] y = y[class_mask] N = X.shape[0] # Center the data (subtract mean column values) -Xc = X - np.ones((N, 1)) * X.mean(0) +Y = X - np.ones((N, 1)) * X.mean(0) # PCA by computing SVD of Y -U, S, V = linalg.svd(Xc, full_matrices=False) +U, S, Vh = svd(Y, full_matrices=False) #NOTE: Change to Vh # U = mat(U) -V = V.T +V = Vh.T # Compute variance explained by principal components rho = (S * S) / (S * S).sum() # Project data onto principal component space -Z = Xc @ V +Z = Y @ V # Plot variance explained -figure() -plot(rho, "o-") -title("Variance explained by principal components") -xlabel("Principal component") -ylabel("Variance explained value") +plt.figure() +plt.plot(rho, "o-") +plt.title("Variance explained by principal components") +plt.xlabel("Principal component") +plt.ylabel("Variance explained value") # Plot PCA of the data -f = figure() -title("pixel vectors of handwr. digits projected on PCs") +f = plt.figure() +plt.title("pixel vectors of handwr. digits projected on PCs") for c in n: # select indices belonging to class c: class_mask = y == c - plot(Z[class_mask, 0], Z[class_mask, 1], "o") -legend(classNames) -xlabel("PC1") -ylabel("PC2") + plt.plot(Z[class_mask, 0], Z[class_mask, 1], "o") +plt.legend(classNames) +plt.xlabel("PC1") +plt.ylabel("PC2") # Visualize the reconstructed data from the first K principal components # Select randomly D digits. -figure(figsize=(10, 3)) +plt.figure(figsize=(10, 3)) W = Z[:, range(K)] @ V[:, range(K)].T D = len(nD) for d in range(D): + # Select random digit index digit_ix = np.random.randint(0, N) - subplot(2, D, int(d + 1)) + plt.subplot(2, D, int(d + 1)) + # Reshape the digit from vector to 16x16 array I = np.reshape(X[digit_ix, :], (16, 16)) - imshow(I, cmap=cm.gray_r) - title("Original") - subplot(2, D, D + d + 1) + plt.imshow(I, cmap=plt.cm.gray_r) + plt.title("Original") + plt.subplot(2, D, D + d + 1) + # Reshape the digit from vector to 16x16 array I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16)) - imshow(I, cmap=cm.gray_r) - title("Reconstr.") + plt.imshow(I, cmap=plt.cm.gray_r) + plt.title("Reconstr.") # Visualize the pricipal components -figure(figsize=(8, 6)) +plt.figure(figsize=(8, 6)) for k in range(K): N1 = int(np.ceil(np.sqrt(K))) N2 = int(np.ceil(K / N1)) - subplot(N2, N1, int(k + 1)) + plt.subplot(N2, N1, int(k + 1)) I = np.reshape(V[:, k], (16, 16)) - imshow(I, cmap=cm.hot) - title("PC{0}".format(k + 1)) + plt.imshow(I, cmap=plt.cm.hot) + plt.title("PC{0}".format(k + 1)) # output to screen -show() - - +plt.show() diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py index a3104d8dff87096bb3acbd6998ea6d97df7ac26e..081970c7e159bd7b60471ad584021cb8e7a868a0 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py @@ -2,8 +2,8 @@ import importlib_resources import numpy as np -import scipy.linalg as linalg -from matplotlib.pyplot import figure, plot, show, xlabel, ylabel +from scipy.linalg import svd +import matplotlib.pyplot as plt from scipy.io import loadmat from sklearn.neighbors import KNeighborsClassifier @@ -21,13 +21,13 @@ ytest = mat_data["testdata"][:, 0] N, M = X.shape Ntest = Xtest.shape[0] # or Xtest[:,0].shape -# Subtract the mean from the data +# Subtract the mean of X from the data Y = X - np.ones((N, 1)) * X.mean(0) Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0) # Obtain the PCA solution by calculate the SVD of Y -U, S, V = linalg.svd(Y, full_matrices=False) -V = V.T +U, S, Vh = svd(Y, full_matrices=False) +V = Vh.T # Repeat classification for different values of K @@ -49,8 +49,8 @@ for k in K: print("K={0}: Error rate: {1:.1f}%".format(k, er)) # Visualize error rates vs. number of principal components considered -figure() -plot(K, error_rates, "o-") -xlabel("Number of principal components K") -ylabel("Error rate [%]") -show() +plt.figure() +plt.plot(K, error_rates, "o-") +plt.xlabel("Number of principal components K") +plt.ylabel("Error rate [%]") +plt.show()