Skip to content
Snippets Groups Projects
Commit 245c9d47 authored by bjje's avatar bjje
Browse files

Fix minor issues in ex3

parent 829eccef
Branches
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# (requires data structures from ex. 3.1.1) # (requires data structures from ex. 3.1.1)
# Imports the numpy and xlrd package, then runs the ex3_1_1 code # Imports the numpy and xlrd package, then runs the ex3_1_1 code
from ex3_1_1 import * from ex3_1_1 import *
from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel import matplotlib.pyplot as plt
# Data attributes to be plotted # Data attributes to be plotted
i = 0 i = 0
...@@ -10,25 +10,23 @@ j = 1 ...@@ -10,25 +10,23 @@ j = 1
## ##
# Make a simple plot of the i'th attribute against the j'th attribute # Make a simple plot of the i'th attribute against the j'th attribute
# Notice that X is of matrix type (but it will also work with a numpy array) plt.plot(X[:, i], X[:, j], "o")
# X = np.array(X) #Try to uncomment this line
plot(X[:, i], X[:, j], "o")
## ##
# Make another more fancy plot that includes legend, class labels, # Make another more fancy plot that includes legend, class labels,
# attribute names, and a title. # attribute names, and a title.
f = figure() f = plt.figure()
title("NanoNose data") plt.title("NanoNose data")
for c in range(C): for c in range(C):
# select indices belonging to class c: # select indices belonging to class c:
class_mask = y == c class_mask = y == c
plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3) plt.plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3)
legend(classNames) plt.legend(classNames)
xlabel(attributeNames[i]) plt.xlabel(attributeNames[i])
ylabel(attributeNames[j]) plt.ylabel(attributeNames[j])
# Output result to screen # Output result to screen
show() plt.show()
print("Ran Exercise 3.1.2") print("Ran Exercise 3.1.2")
...@@ -5,12 +5,19 @@ from ex3_1_1 import * ...@@ -5,12 +5,19 @@ from ex3_1_1 import *
from scipy.linalg import svd from scipy.linalg import svd
# Subtract mean value from data # Subtract mean value from data
# Note: Here we use Y to in teh book we often use X with a hat-symbol on top.
Y = X - np.ones((N, 1)) * X.mean(axis=0) Y = X - np.ones((N, 1)) * X.mean(axis=0)
# PCA by computing SVD of Y # PCA by computing SVD of Y
U, S, V = svd(Y, full_matrices=False) # Note: Here we call the Sigma matrix in the SVD S for notational convinience
U, S, Vh = svd(Y, full_matrices=False)
# scipy.linalg.svd returns "Vh", which is the Hermitian (transpose)
# of the vector V. So, for us to obtain the correct V, we transpose:
V = Vh.T
# Compute variance explained by principal components # Compute variance explained by principal components
# Note: This is an important equation, see Eq. 3.18 on page 40 in the book.
rho = (S * S) / (S * S).sum() rho = (S * S) / (S * S).sum()
threshold = 0.9 threshold = 0.9
......
# exercise 3.1.4 # exercise 3.1.4
# (requires data structures from ex. 3.1.1) # (requires data structures from ex. 3.1.1)
from ex3_1_1 import * from ex3_1_1 import *
from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel import matplotlib.pyplot as plt
from scipy.linalg import svd from scipy.linalg import svd
# Subtract mean value from data # Subtract mean value from data
...@@ -14,6 +14,8 @@ U, S, Vh = svd(Y, full_matrices=False) ...@@ -14,6 +14,8 @@ U, S, Vh = svd(Y, full_matrices=False)
V = Vh.T V = Vh.T
# Project the centered data onto principal component space # Project the centered data onto principal component space
# Note: Make absolutely sure you understand what the @ symbol
# does by inspecing the numpy documentation!
Z = Y @ V Z = Y @ V
# Indices of the principal components to be plotted # Indices of the principal components to be plotted
...@@ -21,18 +23,18 @@ i = 0 ...@@ -21,18 +23,18 @@ i = 0
j = 1 j = 1
# Plot PCA of the data # Plot PCA of the data
f = figure() f = plt.figure()
title("NanoNose data: PCA") plt.title("NanoNose data: PCA")
# Z = array(Z) # Z = array(Z)
for c in range(C): for c in range(C):
# select indices belonging to class c: # select indices belonging to class c:
class_mask = y == c class_mask = y == c
plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5) plt.plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5)
legend(classNames) plt.legend(classNames)
xlabel("PC{0}".format(i + 1)) plt.xlabel("PC{0}".format(i + 1))
ylabel("PC{0}".format(j + 1)) plt.ylabel("PC{0}".format(j + 1))
# Output result to screen # Output result to screen
show() plt.show()
print("Ran Exercise 3.1.4") print("Ran Exercise 3.1.4")
...@@ -14,11 +14,12 @@ N, M = X.shape ...@@ -14,11 +14,12 @@ N, M = X.shape
# percent of the variance. Let's look at their coefficients: # percent of the variance. Let's look at their coefficients:
pcs = [0, 1, 2] pcs = [0, 1, 2]
legendStrs = ["PC" + str(e + 1) for e in pcs] legendStrs = ["PC" + str(e + 1) for e in pcs]
c = ["r", "g", "b"]
bw = 0.2 bw = 0.2
r = np.arange(1, M + 1) r = np.arange(1, M + 1)
for i in pcs: for i in pcs:
plt.bar(r + i * bw, V[:, i], width=bw) plt.bar(r + i * bw, V[:, i], width=bw)
plt.xticks(r + bw, attributeNames) plt.xticks(r + bw, attributeNames)
plt.xlabel("Attributes") plt.xlabel("Attributes")
plt.ylabel("Component coefficients") plt.ylabel("Component coefficients")
...@@ -48,6 +49,5 @@ print(all_water_data[0, :]) ...@@ -48,6 +49,5 @@ print(all_water_data[0, :])
# coefficient and the attribute! # coefficient and the attribute!
# You can determine the projection by (remove comments): # You can determine the projection by (remove comments):
print("...and its projection onto PC2") # print("...and its projection onto PC2")
print(all_water_data[0, :] @ V[:, 1]) # print(all_water_data[0, :] @ V[:, 1])
# Try to explain why?
...@@ -65,7 +65,7 @@ for k in range(2): ...@@ -65,7 +65,7 @@ for k in range(2):
for c in range(C): for c in range(C):
plt.plot(Z[y == c, i], Z[y == c, j], ".", alpha=0.5) plt.plot(Z[y == c, i], Z[y == c, j], ".", alpha=0.5)
plt.xlabel("PC" + str(i + 1)) plt.xlabel("PC" + str(i + 1))
plt.xlabel("PC" + str(j + 1)) plt.ylabel("PC" + str(j + 1))
plt.title(titles[k] + "\n" + "Projection") plt.title(titles[k] + "\n" + "Projection")
plt.legend(classNames) plt.legend(classNames)
plt.axis("equal") plt.axis("equal")
......
## exercise 3.2.1 ## exercise 3.2.1
import importlib_resources import importlib_resources
import numpy as np import numpy as np
from matplotlib.pyplot import cm, figure, imshow, show, subplot, title, xlabel, yticks import matplotlib.pyplot as plt
from scipy.io import loadmat from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
...@@ -19,19 +19,19 @@ y = traindata[:, 0] ...@@ -19,19 +19,19 @@ y = traindata[:, 0]
# Visualize the i'th digit as a vector # Visualize the i'th digit as a vector
f = figure() f = plt.figure()
subplot(4, 1, 4) plt.subplot(4, 1, 4)
imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=cm.gray_r) plt.imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=plt.cm.gray_r)
xlabel("Pixel number") plt.xlabel("Pixel number")
title("Digit in vector format") plt.title("Digit in vector format")
yticks([]) plt.yticks([])
# Visualize the i'th digit as an image # Visualize the i'th digit as an image
subplot(2, 1, 1) plt.subplot(2, 1, 1)
I = np.reshape(X[i, :], (16, 16)) I = np.reshape(X[i, :], (16, 16))
imshow(I, extent=(0, 16, 0, 16), cmap=cm.gray_r) plt.imshow(I, extent=(0, 16, 0, 16), cmap=plt.cm.gray_r)
title("Digit as an image") plt.title("Digit as an image")
show() plt.show()
# exercise 3.2.2 # exercise 3.2.2
import importlib_resources import importlib_resources
import numpy as np import numpy as np
import scipy.linalg as linalg from scipy.linalg import svd
from matplotlib.pyplot import ( import matplotlib.pyplot as plt
cm,
figure,
imshow,
legend,
plot,
show,
subplot,
title,
xlabel,
ylabel,
yticks,
)
from scipy.io import loadmat from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
...@@ -26,7 +14,6 @@ K = 16 ...@@ -26,7 +14,6 @@ K = 16
# Digits to visualize # Digits to visualize
nD = range(6) nD = range(6)
# Load Matlab data file to python dict structure # Load Matlab data file to python dict structure
# and extract variables of interest # and extract variables of interest
traindata = loadmat(filename)["traindata"] traindata = loadmat(filename)["traindata"]
...@@ -45,73 +32,76 @@ classDict = dict(zip(classNames, classValues)) ...@@ -45,73 +32,76 @@ classDict = dict(zip(classNames, classValues))
class_mask = np.zeros(N).astype(bool) class_mask = np.zeros(N).astype(bool)
for v in n: for v in n:
cmsk = y == v cmsk = y == v
# Use the logical OR operator ("|") to select rows
# already in class_mask OR cmsk
class_mask = class_mask | cmsk class_mask = class_mask | cmsk
X = X[class_mask, :] X = X[class_mask, :]
y = y[class_mask] y = y[class_mask]
N = X.shape[0] N = X.shape[0]
# Center the data (subtract mean column values) # Center the data (subtract mean column values)
Xc = X - np.ones((N, 1)) * X.mean(0) Y = X - np.ones((N, 1)) * X.mean(0)
# PCA by computing SVD of Y # PCA by computing SVD of Y
U, S, V = linalg.svd(Xc, full_matrices=False) U, S, Vh = svd(Y, full_matrices=False) #NOTE: Change to Vh
# U = mat(U) # U = mat(U)
V = V.T V = Vh.T
# Compute variance explained by principal components # Compute variance explained by principal components
rho = (S * S) / (S * S).sum() rho = (S * S) / (S * S).sum()
# Project data onto principal component space # Project data onto principal component space
Z = Xc @ V Z = Y @ V
# Plot variance explained # Plot variance explained
figure() plt.figure()
plot(rho, "o-") plt.plot(rho, "o-")
title("Variance explained by principal components") plt.title("Variance explained by principal components")
xlabel("Principal component") plt.xlabel("Principal component")
ylabel("Variance explained value") plt.ylabel("Variance explained value")
# Plot PCA of the data # Plot PCA of the data
f = figure() f = plt.figure()
title("pixel vectors of handwr. digits projected on PCs") plt.title("pixel vectors of handwr. digits projected on PCs")
for c in n: for c in n:
# select indices belonging to class c: # select indices belonging to class c:
class_mask = y == c class_mask = y == c
plot(Z[class_mask, 0], Z[class_mask, 1], "o") plt.plot(Z[class_mask, 0], Z[class_mask, 1], "o")
legend(classNames) plt.legend(classNames)
xlabel("PC1") plt.xlabel("PC1")
ylabel("PC2") plt.ylabel("PC2")
# Visualize the reconstructed data from the first K principal components # Visualize the reconstructed data from the first K principal components
# Select randomly D digits. # Select randomly D digits.
figure(figsize=(10, 3)) plt.figure(figsize=(10, 3))
W = Z[:, range(K)] @ V[:, range(K)].T W = Z[:, range(K)] @ V[:, range(K)].T
D = len(nD) D = len(nD)
for d in range(D): for d in range(D):
# Select random digit index
digit_ix = np.random.randint(0, N) digit_ix = np.random.randint(0, N)
subplot(2, D, int(d + 1)) plt.subplot(2, D, int(d + 1))
# Reshape the digit from vector to 16x16 array
I = np.reshape(X[digit_ix, :], (16, 16)) I = np.reshape(X[digit_ix, :], (16, 16))
imshow(I, cmap=cm.gray_r) plt.imshow(I, cmap=plt.cm.gray_r)
title("Original") plt.title("Original")
subplot(2, D, D + d + 1) plt.subplot(2, D, D + d + 1)
# Reshape the digit from vector to 16x16 array
I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16)) I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16))
imshow(I, cmap=cm.gray_r) plt.imshow(I, cmap=plt.cm.gray_r)
title("Reconstr.") plt.title("Reconstr.")
# Visualize the pricipal components # Visualize the pricipal components
figure(figsize=(8, 6)) plt.figure(figsize=(8, 6))
for k in range(K): for k in range(K):
N1 = int(np.ceil(np.sqrt(K))) N1 = int(np.ceil(np.sqrt(K)))
N2 = int(np.ceil(K / N1)) N2 = int(np.ceil(K / N1))
subplot(N2, N1, int(k + 1)) plt.subplot(N2, N1, int(k + 1))
I = np.reshape(V[:, k], (16, 16)) I = np.reshape(V[:, k], (16, 16))
imshow(I, cmap=cm.hot) plt.imshow(I, cmap=plt.cm.hot)
title("PC{0}".format(k + 1)) plt.title("PC{0}".format(k + 1))
# output to screen # output to screen
show() plt.show()
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
import importlib_resources import importlib_resources
import numpy as np import numpy as np
import scipy.linalg as linalg from scipy.linalg import svd
from matplotlib.pyplot import figure, plot, show, xlabel, ylabel import matplotlib.pyplot as plt
from scipy.io import loadmat from scipy.io import loadmat
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
...@@ -21,13 +21,13 @@ ytest = mat_data["testdata"][:, 0] ...@@ -21,13 +21,13 @@ ytest = mat_data["testdata"][:, 0]
N, M = X.shape N, M = X.shape
Ntest = Xtest.shape[0] # or Xtest[:,0].shape Ntest = Xtest.shape[0] # or Xtest[:,0].shape
# Subtract the mean from the data # Subtract the mean of X from the data
Y = X - np.ones((N, 1)) * X.mean(0) Y = X - np.ones((N, 1)) * X.mean(0)
Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0) Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0)
# Obtain the PCA solution by calculate the SVD of Y # Obtain the PCA solution by calculate the SVD of Y
U, S, V = linalg.svd(Y, full_matrices=False) U, S, Vh = svd(Y, full_matrices=False)
V = V.T V = Vh.T
# Repeat classification for different values of K # Repeat classification for different values of K
...@@ -49,8 +49,8 @@ for k in K: ...@@ -49,8 +49,8 @@ for k in K:
print("K={0}: Error rate: {1:.1f}%".format(k, er)) print("K={0}: Error rate: {1:.1f}%".format(k, er))
# Visualize error rates vs. number of principal components considered # Visualize error rates vs. number of principal components considered
figure() plt.figure()
plot(K, error_rates, "o-") plt.plot(K, error_rates, "o-")
xlabel("Number of principal components K") plt.xlabel("Number of principal components K")
ylabel("Error rate [%]") plt.ylabel("Error rate [%]")
show() plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment