Fix minor issues in ex3

245c9d47 · bjje · 829eccef · 245c9d47 · 245c9d47 · 245c9d47
Commit 245c9d47 authored 5 months ago by bjje
--- a/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_2.py
@@ -2,7 +2,7 @@
 # (requires data structures from ex. 3.1.1)
 # Imports the numpy and xlrd package, then runs the ex3_1_1 code
 from ex3_1_1 import *
-from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel
+import matplotlib.pyplot as plt

 # Data attributes to be plotted
 i = 0
@@ -10,25 +10,23 @@ j = 1

 ##
 # Make a simple plot of the i'th attribute against the j'th attribute
-# Notice that X is of matrix type (but it will also work with a numpy array)
-# X = np.array(X) #Try to uncomment this line
-plot(X[:, i], X[:, j], "o")
+plt.plot(X[:, i], X[:, j], "o")

 ##
 # Make another more fancy plot that includes legend, class labels,
 # attribute names, and a title.
-f = figure()
-title("NanoNose data")
+f = plt.figure()
+plt.title("NanoNose data")

 for c in range(C):
    # select indices belonging to class c:
    class_mask = y == c 
-    plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3)
+    plt.plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3)

-legend(classNames)
-xlabel(attributeNames[i])
-ylabel(attributeNames[j])
+plt.legend(classNames)
+plt.xlabel(attributeNames[i])
+plt.ylabel(attributeNames[j])

 # Output result to screen
-show()
+plt.show()
 print("Ran Exercise 3.1.2")
--- a/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_3.py
@@ -5,12 +5,19 @@ from ex3_1_1 import *
 from scipy.linalg import svd 

 # Subtract mean value from data
+# Note: Here we use Y to in teh book we often use X with a hat-symbol on top.
 Y = X - np.ones((N, 1)) * X.mean(axis=0)

 # PCA by computing SVD of Y
-U, S, V = svd(Y, full_matrices=False)
+# Note: Here we call the Sigma matrix in the SVD S for notational convinience
+U, S, Vh = svd(Y, full_matrices=False)
+
+# scipy.linalg.svd returns "Vh", which is the Hermitian (transpose)
+# of the vector V. So, for us to obtain the correct V, we transpose:
+V = Vh.T

 # Compute variance explained by principal components 
+# Note: This is an important equation, see Eq. 3.18 on page 40 in the book.
 rho = (S * S) / (S * S).sum()

 threshold = 0.9

--- a/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_4.py
 # exercise 3.1.4
 # (requires data structures from ex. 3.1.1)
 from ex3_1_1 import *
-from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel
+import matplotlib.pyplot as plt 
 from scipy.linalg import svd

 # Subtract mean value from data
@@ -14,6 +14,8 @@ U, S, Vh = svd(Y, full_matrices=False)
 V = Vh.T

 # Project the centered data onto principal component space
+# Note: Make absolutely sure you understand what the @ symbol 
+# does by inspecing the numpy documentation!
 Z = Y @ V

 # Indices of the principal components to be plotted
@@ -21,18 +23,18 @@ i = 0
 j = 1

 # Plot PCA of the data
-f = figure()
-title("NanoNose data: PCA")
+f = plt.figure()
+plt.title("NanoNose data: PCA")
 # Z = array(Z)
 for c in range(C):
    # select indices belonging to class c:
    class_mask = y == c
-    plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5)
-legend(classNames)
-xlabel("PC{0}".format(i + 1))
-ylabel("PC{0}".format(j + 1))
+    plt.plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5)
+plt.legend(classNames)
+plt.xlabel("PC{0}".format(i + 1))
+plt.ylabel("PC{0}".format(j + 1))

 # Output result to screen
-show()
+plt.show()

 print("Ran Exercise 3.1.4")
--- a/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_5.py
@@ -14,11 +14,12 @@ N, M = X.shape
 # percent of the variance. Let's look at their coefficients:
 pcs = [0, 1, 2]
 legendStrs = ["PC" + str(e + 1) for e in pcs]
-c = ["r", "g", "b"]
 bw = 0.2
 r = np.arange(1, M + 1)
+
 for i in pcs:
    plt.bar(r + i * bw, V[:, i], width=bw)
+
 plt.xticks(r + bw, attributeNames)
 plt.xlabel("Attributes")
 plt.ylabel("Component coefficients")
@@ -48,6 +49,5 @@ print(all_water_data[0, :])
 # coefficient and the attribute!

 # You can determine the projection by (remove comments):
-print("...and its projection onto PC2")
-print(all_water_data[0, :] @ V[:, 1])
-# Try to explain why?
+# print("...and its projection onto PC2") 
+# print(all_water_data[0, :] @ V[:, 1])  
--- a/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_1_6.py
@@ -65,7 +65,7 @@ for k in range(2):
    for c in range(C):
        plt.plot(Z[y == c, i], Z[y == c, j], ".", alpha=0.5)
    plt.xlabel("PC" + str(i + 1))
-    plt.xlabel("PC" + str(j + 1))
+    plt.ylabel("PC" + str(j + 1)) 
    plt.title(titles[k] + "\n" + "Projection")
    plt.legend(classNames)
    plt.axis("equal")

--- a/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_2_1.py
 ## exercise 3.2.1
 import importlib_resources
 import numpy as np
-from matplotlib.pyplot import cm, figure, imshow, show, subplot, title, xlabel, yticks
+import matplotlib.pyplot as plt 
 from scipy.io import loadmat

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
@@ -19,19 +19,19 @@ y = traindata[:, 0]


 # Visualize the i'th digit as a vector
-f = figure()
-subplot(4, 1, 4)
-imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=cm.gray_r)
-xlabel("Pixel number")
-title("Digit in vector format")
-yticks([])
+f = plt.figure()
+plt.subplot(4, 1, 4)
+plt.imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=plt.cm.gray_r)
+plt.xlabel("Pixel number")
+plt.title("Digit in vector format")
+plt.yticks([])

 # Visualize the i'th digit as an image
-subplot(2, 1, 1)
+plt.subplot(2, 1, 1)
 I = np.reshape(X[i, :], (16, 16))
-imshow(I, extent=(0, 16, 0, 16), cmap=cm.gray_r)
-title("Digit as an image")
+plt.imshow(I, extent=(0, 16, 0, 16), cmap=plt.cm.gray_r)
+plt.title("Digit as an image")

-show()
+plt.show()


--- a/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_2_2.py
 # exercise 3.2.2
 import importlib_resources
 import numpy as np
-import scipy.linalg as linalg
-from matplotlib.pyplot import (
-    cm,
-    figure,
-    imshow,
-    legend,
-    plot,
-    show,
-    subplot,
-    title,
-    xlabel,
-    ylabel,
-    yticks,
-)
+from scipy.linalg import svd 
+import matplotlib.pyplot as plt 
 from scipy.io import loadmat

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
@@ -26,7 +14,6 @@ K = 16
 # Digits to visualize
 nD = range(6)

-
 # Load Matlab data file to python dict structure
 # and extract variables of interest
 traindata = loadmat(filename)["traindata"]
@@ -45,73 +32,76 @@ classDict = dict(zip(classNames, classValues))
 class_mask = np.zeros(N).astype(bool)
 for v in n:
    cmsk = y == v
+    # Use the logical OR operator ("|") to select rows
+    # already in class_mask OR cmsk
    class_mask = class_mask | cmsk
 X = X[class_mask, :]
 y = y[class_mask]
 N = X.shape[0]

 # Center the data (subtract mean column values)
-Xc = X - np.ones((N, 1)) * X.mean(0)
+Y = X - np.ones((N, 1)) * X.mean(0) 

 # PCA by computing SVD of Y
-U, S, V = linalg.svd(Xc, full_matrices=False)
+U, S, Vh = svd(Y, full_matrices=False) #NOTE: Change to Vh
 # U = mat(U)
-V = V.T
+V = Vh.T

 # Compute variance explained by principal components
 rho = (S * S) / (S * S).sum()

 # Project data onto principal component space
-Z = Xc @ V
+Z = Y @ V

 # Plot variance explained
-figure()
-plot(rho, "o-")
-title("Variance explained by principal components")
-xlabel("Principal component")
-ylabel("Variance explained value")
+plt.figure()
+plt.plot(rho, "o-")
+plt.title("Variance explained by principal components")
+plt.xlabel("Principal component")
+plt.ylabel("Variance explained value")


 # Plot PCA of the data
-f = figure()
-title("pixel vectors of handwr. digits projected on PCs")
+f = plt.figure()
+plt.title("pixel vectors of handwr. digits projected on PCs")
 for c in n:
    # select indices belonging to class c:
    class_mask = y == c
-    plot(Z[class_mask, 0], Z[class_mask, 1], "o")
-legend(classNames)
-xlabel("PC1")
-ylabel("PC2")
+    plt.plot(Z[class_mask, 0], Z[class_mask, 1], "o")
+plt.legend(classNames)
+plt.xlabel("PC1")
+plt.ylabel("PC2")


 # Visualize the reconstructed data from the first K principal components
 # Select randomly D digits.
-figure(figsize=(10, 3))
+plt.figure(figsize=(10, 3))
 W = Z[:, range(K)] @ V[:, range(K)].T
 D = len(nD)
 for d in range(D):
+    # Select random digit index
    digit_ix = np.random.randint(0, N)
-    subplot(2, D, int(d + 1))
+    plt.subplot(2, D, int(d + 1))
+    # Reshape the digit from vector to 16x16 array
    I = np.reshape(X[digit_ix, :], (16, 16))
-    imshow(I, cmap=cm.gray_r)
-    title("Original")
-    subplot(2, D, D + d + 1)
+    plt.imshow(I, cmap=plt.cm.gray_r)
+    plt.title("Original")
+    plt.subplot(2, D, D + d + 1)
+    # Reshape the digit from vector to 16x16 array
    I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16))
-    imshow(I, cmap=cm.gray_r)
-    title("Reconstr.")
+    plt.imshow(I, cmap=plt.cm.gray_r)
+    plt.title("Reconstr.")


 # Visualize the pricipal components
-figure(figsize=(8, 6))
+plt.figure(figsize=(8, 6))
 for k in range(K):
    N1 = int(np.ceil(np.sqrt(K)))
    N2 = int(np.ceil(K / N1))
-    subplot(N2, N1, int(k + 1))
+    plt.subplot(N2, N1, int(k + 1))
    I = np.reshape(V[:, k], (16, 16))
-    imshow(I, cmap=cm.hot)
-    title("PC{0}".format(k + 1))
+    plt.imshow(I, cmap=plt.cm.hot)
+    plt.title("PC{0}".format(k + 1))

 # output to screen
-show()
-
-
+plt.show()
--- a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
@@ -2,8 +2,8 @@

 import importlib_resources
 import numpy as np
-import scipy.linalg as linalg
-from matplotlib.pyplot import figure, plot, show, xlabel, ylabel
+from scipy.linalg import svd 
+import matplotlib.pyplot as plt 
 from scipy.io import loadmat
 from sklearn.neighbors import KNeighborsClassifier

@@ -21,13 +21,13 @@ ytest = mat_data["testdata"][:, 0]
 N, M = X.shape
 Ntest = Xtest.shape[0]  # or Xtest[:,0].shape

-# Subtract the mean from the data
+# Subtract the mean of X from the data
 Y = X - np.ones((N, 1)) * X.mean(0)
 Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0)

 # Obtain the PCA solution  by calculate the SVD of Y
-U, S, V = linalg.svd(Y, full_matrices=False)
-V = V.T
+U, S, Vh = svd(Y, full_matrices=False)
+V = Vh.T


 # Repeat classification for different values of K
@@ -49,8 +49,8 @@ for k in K:
    print("K={0}: Error rate: {1:.1f}%".format(k, er))

 # Visualize error rates vs. number of principal components considered
-figure()
-plot(K, error_rates, "o-")
-xlabel("Number of principal components K")
-ylabel("Error rate [%]")
-show()
+plt.figure()
+plt.plot(K, error_rates, "o-")
+plt.xlabel("Number of principal components K")
+plt.ylabel("Error rate [%]")
+plt.show()