Newer
Older
import scipy.linalg as linalg
from matplotlib.pyplot import (
cm,
figure,
imshow,
legend,
plot,
show,
subplot,
title,
xlabel,
ylabel,
yticks,
)
from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
# Digits to include in analysis (to include all, n = range(10) )
# Number of principal components for reconstruction
K = 16
# Digits to visualize
nD = range(6)
# Load Matlab data file to python dict structure
# and extract variables of interest
traindata = loadmat(filename)["traindata"]
X = traindata[:, 1:]
y = traindata[:, 0]
C = len(n)
classValues = n
classNames = [str(num) for num in n]
# Select subset of digits classes to be inspected
class_mask = np.zeros(N).astype(bool)
for v in n:
U, S, V = linalg.svd(Xc, full_matrices=False)
# U = mat(U)
# Project data onto principal component space
Z = Xc @ V
# Plot variance explained
figure()
plot(rho, "o-")
title("Variance explained by principal components")
xlabel("Principal component")
ylabel("Variance explained value")
class_mask = y == c
plot(Z[class_mask, 0], Z[class_mask, 1], "o")
# Visualize the reconstructed data from the first K principal components
# Select randomly D digits.
figure(figsize=(10, 3))
W = Z[:, range(K)] @ V[:, range(K)].T
digit_ix = np.random.randint(0, N)
subplot(2, D, int(d + 1))
I = np.reshape(X[digit_ix, :], (16, 16))
title("Original")
subplot(2, D, D + d + 1)
I = np.reshape(W[digit_ix, :] + X.mean(0), (16, 16))
N1 = int(np.ceil(np.sqrt(K)))
N2 = int(np.ceil(K / N1))
subplot(N2, N1, int(k + 1))
I = np.reshape(V[:, k], (16, 16))