Skip to content
Snippets Groups Projects
Commit 245c9d47 authored by bjje's avatar bjje
Browse files

Fix minor issues in ex3

parent 829eccef
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@
# (requires data structures from ex. 3.1.1)
# Imports the numpy and xlrd package, then runs the ex3_1_1 code
from ex3_1_1 import *
from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel
import matplotlib.pyplot as plt
# Data attributes to be plotted
i = 0
......@@ -10,25 +10,23 @@ j = 1
##
# Make a simple plot of the i'th attribute against the j'th attribute
# Notice that X is of matrix type (but it will also work with a numpy array)
# X = np.array(X) #Try to uncomment this line
plot(X[:, i], X[:, j], "o")
plt.plot(X[:, i], X[:, j], "o")
##
# Make another more fancy plot that includes legend, class labels,
# attribute names, and a title.
f = figure()
title("NanoNose data")
f = plt.figure()
plt.title("NanoNose data")
for c in range(C):
# select indices belonging to class c:
class_mask = y == c
plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3)
plt.plot(X[class_mask, i], X[class_mask, j], "o", alpha=0.3)
legend(classNames)
xlabel(attributeNames[i])
ylabel(attributeNames[j])
plt.legend(classNames)
plt.xlabel(attributeNames[i])
plt.ylabel(attributeNames[j])
# Output result to screen
show()
plt.show()
print("Ran Exercise 3.1.2")
......@@ -5,12 +5,19 @@ from ex3_1_1 import *
from scipy.linalg import svd
# Subtract mean value from data
# Note: Here we use Y to in teh book we often use X with a hat-symbol on top.
Y = X - np.ones((N, 1)) * X.mean(axis=0)
# PCA by computing SVD of Y
U, S, V = svd(Y, full_matrices=False)
# Note: Here we call the Sigma matrix in the SVD S for notational convinience
U, S, Vh = svd(Y, full_matrices=False)
# scipy.linalg.svd returns "Vh", which is the Hermitian (transpose)
# of the vector V. So, for us to obtain the correct V, we transpose:
V = Vh.T
# Compute variance explained by principal components
# Note: This is an important equation, see Eq. 3.18 on page 40 in the book.
rho = (S * S) / (S * S).sum()
threshold = 0.9
......
# exercise 3.1.4
# (requires data structures from ex. 3.1.1)
from ex3_1_1 import *
from matplotlib.pyplot import figure, legend, plot, show, title, xlabel, ylabel
import matplotlib.pyplot as plt
from scipy.linalg import svd
# Subtract mean value from data
......@@ -14,6 +14,8 @@ U, S, Vh = svd(Y, full_matrices=False)
V = Vh.T
# Project the centered data onto principal component space
# Note: Make absolutely sure you understand what the @ symbol
# does by inspecing the numpy documentation!
Z = Y @ V
# Indices of the principal components to be plotted
......@@ -21,18 +23,18 @@ i = 0
j = 1
# Plot PCA of the data
f = figure()
title("NanoNose data: PCA")
f = plt.figure()
plt.title("NanoNose data: PCA")
# Z = array(Z)
for c in range(C):
# select indices belonging to class c:
class_mask = y == c
plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5)
legend(classNames)
xlabel("PC{0}".format(i + 1))
ylabel("PC{0}".format(j + 1))
plt.plot(Z[class_mask, i], Z[class_mask, j], "o", alpha=0.5)
plt.legend(classNames)
plt.xlabel("PC{0}".format(i + 1))
plt.ylabel("PC{0}".format(j + 1))
# Output result to screen
show()
plt.show()
print("Ran Exercise 3.1.4")
......@@ -14,11 +14,12 @@ N, M = X.shape
# percent of the variance. Let's look at their coefficients:
pcs = [0, 1, 2]
legendStrs = ["PC" + str(e + 1) for e in pcs]
c = ["r", "g", "b"]
bw = 0.2
r = np.arange(1, M + 1)
for i in pcs:
plt.bar(r + i * bw, V[:, i], width=bw)
plt.xticks(r + bw, attributeNames)
plt.xlabel("Attributes")
plt.ylabel("Component coefficients")
......@@ -48,6 +49,5 @@ print(all_water_data[0, :])
# coefficient and the attribute!
# You can determine the projection by (remove comments):
print("...and its projection onto PC2")
print(all_water_data[0, :] @ V[:, 1])
# Try to explain why?
# print("...and its projection onto PC2")
# print(all_water_data[0, :] @ V[:, 1])
......@@ -65,7 +65,7 @@ for k in range(2):
for c in range(C):
plt.plot(Z[y == c, i], Z[y == c, j], ".", alpha=0.5)
plt.xlabel("PC" + str(i + 1))
plt.xlabel("PC" + str(j + 1))
plt.ylabel("PC" + str(j + 1))
plt.title(titles[k] + "\n" + "Projection")
plt.legend(classNames)
plt.axis("equal")
......
## exercise 3.2.1
import importlib_resources
import numpy as np
from matplotlib.pyplot import cm, figure, imshow, show, subplot, title, xlabel, yticks
import matplotlib.pyplot as plt
from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
......@@ -19,19 +19,19 @@ y = traindata[:, 0]
# Visualize the i'th digit as a vector
f = figure()
subplot(4, 1, 4)
imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=cm.gray_r)
xlabel("Pixel number")
title("Digit in vector format")
yticks([])
f = plt.figure()
plt.subplot(4, 1, 4)
plt.imshow(np.expand_dims(X[i, :], axis=0), extent=(0, 256, 0, 10), cmap=plt.cm.gray_r)
plt.xlabel("Pixel number")
plt.title("Digit in vector format")
plt.yticks([])
# Visualize the i'th digit as an image
subplot(2, 1, 1)
plt.subplot(2, 1, 1)
I = np.reshape(X[i, :], (16, 16))
imshow(I, extent=(0, 16, 0, 16), cmap=cm.gray_r)
title("Digit as an image")
plt.imshow(I, extent=(0, 16, 0, 16), cmap=plt.cm.gray_r)
plt.title("Digit as an image")
show()
plt.show()
# exercise 3.2.2
import importlib_resources
import numpy as np
import scipy.linalg as linalg
from matplotlib.pyplot import (
cm,
figure,
imshow,
legend,
plot,
show,
subplot,
title,
xlabel,
ylabel,
yticks,
)
from scipy.linalg import svd
import matplotlib.pyplot as plt
from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
......@@ -26,7 +14,6 @@ K = 16
# Digits to visualize
nD = range(6)
# Load Matlab data file to python dict structure
# and extract variables of interest
traindata = loadmat(filename)["traindata"]
......@@ -45,73 +32,76 @@ classDict = dict(zip(classNames, classValues))
class_mask = np.zeros(N).astype(bool)
for v in n:
cmsk = y == v
# Use the logical OR operator ("|") to select rows
# already in class_mask OR cmsk
class_mask = class_mask | cmsk
X = X[class_mask, :]
y = y[class_mask]
N = X.shape[0]
# Center the data (subtract mean column values)
Xc = X - np.ones((N, 1)) * X.mean(0)
Y = X - np.ones((N, 1)) * X.mean(0)
# PCA by computing SVD of Y
U, S, V = linalg.svd(Xc, full_matrices=False)
U, S, Vh = svd(Y, full_matrices=False) #NOTE: Change to Vh
# U = mat(U)
V = V.T
V = Vh.T
# Compute variance explained by principal components
rho = (S * S) / (S * S).sum()
# Project data onto principal component space
Z = Xc @ V
Z = Y @ V
# Plot variance explained
figure()
plot(rho, "o-")
title("Variance explained by principal components")
xlabel("Principal component")
ylabel("Variance explained value")
plt.figure()
plt.plot(rho, "o-")
plt.title("Variance explained by principal components")
plt.xlabel("Principal component")
plt.ylabel("Variance explained value")
# Plot PCA of the data
f = figure()
title("pixel vectors of handwr. digits projected on PCs")
f = plt.figure()
plt.title("pixel vectors of handwr. digits projected on PCs")
for c in n:
# select indices belonging to class c:
class_mask = y == c
plot(Z[class_mask, 0], Z[class_mask, 1], "o")
legend(classNames)
xlabel("PC1")
ylabel("PC2")
plt.plot(Z[class_mask, 0], Z[class_mask, 1], "o")
plt.legend(classNames)
plt.xlabel("PC1")
plt.ylabel("PC2")
# Visualize the reconstructed data from the first K principal components
# Select randomly D digits.
figure(figsize=(10, 3))
plt.figure(figsize=(10, 3))
W = Z[:, range(K)] @ V[:, range(K)].T
D = len(nD)
for d in range(D):
# Select random digit index
digit_ix = np.random.randint(0, N)
subplot(2, D, int(d + 1))
plt.subplot(2, D, int(d + 1))
# Reshape the digit from vector to 16x16 array
I = np.reshape(X[digit_ix, :], (16, 16))
imshow(I, cmap=cm.gray_r)
title("Original")
subplot(2, D, D + d + 1)
plt.imshow(I, cmap=plt.cm.gray_r)
plt.title("Original")
plt.subplot(2, D, D + d + 1)
# Reshape the digit from vector to 16x16 array
I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16))
imshow(I, cmap=cm.gray_r)
title("Reconstr.")
plt.imshow(I, cmap=plt.cm.gray_r)
plt.title("Reconstr.")
# Visualize the pricipal components
figure(figsize=(8, 6))
plt.figure(figsize=(8, 6))
for k in range(K):
N1 = int(np.ceil(np.sqrt(K)))
N2 = int(np.ceil(K / N1))
subplot(N2, N1, int(k + 1))
plt.subplot(N2, N1, int(k + 1))
I = np.reshape(V[:, k], (16, 16))
imshow(I, cmap=cm.hot)
title("PC{0}".format(k + 1))
plt.imshow(I, cmap=plt.cm.hot)
plt.title("PC{0}".format(k + 1))
# output to screen
show()
plt.show()
......@@ -2,8 +2,8 @@
import importlib_resources
import numpy as np
import scipy.linalg as linalg
from matplotlib.pyplot import figure, plot, show, xlabel, ylabel
from scipy.linalg import svd
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.neighbors import KNeighborsClassifier
......@@ -21,13 +21,13 @@ ytest = mat_data["testdata"][:, 0]
N, M = X.shape
Ntest = Xtest.shape[0] # or Xtest[:,0].shape
# Subtract the mean from the data
# Subtract the mean of X from the data
Y = X - np.ones((N, 1)) * X.mean(0)
Ytest = Xtest - np.ones((Ntest, 1)) * X.mean(0)
# Obtain the PCA solution by calculate the SVD of Y
U, S, V = linalg.svd(Y, full_matrices=False)
V = V.T
U, S, Vh = svd(Y, full_matrices=False)
V = Vh.T
# Repeat classification for different values of K
......@@ -49,8 +49,8 @@ for k in K:
print("K={0}: Error rate: {1:.1f}%".format(k, er))
# Visualize error rates vs. number of principal components considered
figure()
plot(K, error_rates, "o-")
xlabel("Number of principal components K")
ylabel("Error rate [%]")
show()
plt.figure()
plt.plot(K, error_rates, "o-")
plt.xlabel("Number of principal components K")
plt.ylabel("Error rate [%]")
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment