diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_1.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_1.m new file mode 100644 index 0000000000000000000000000000000000000000..836189bcade545f540ca4c1ea4ee2c0298a41ef7 --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_1.m @@ -0,0 +1,23 @@ +% exercise 4.1.1 + +% Number of samples +N = 200; + +% Mean +mu = 17; + +% Standard deviation +s = 2; + +% Number of bins in histogram +NBins = 20; + +%% Generate samples from the Normal distribution +X = normrnd(mu, s, N, 1); + +%% Plot a histogram +mfig('Normal distribution'); +subplot(1,2,1); +plot(X, 'x'); +subplot(1,2,2); +hist(X, NBins); diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_2.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_2.m new file mode 100644 index 0000000000000000000000000000000000000000..8f1263219fc092b68dc35050c7aa6fcaca688007 --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_2.m @@ -0,0 +1,30 @@ +% exercise 4.1.2 + +% Number of samples +N = 100; + +% Mean +mu = 17; + +% Standard deviation +s = 2; + +% Number of bins in histogram +NBins = 10; + +%% Generate samples from the Normal distribution +X = normrnd(mu, s, N, 1); + +%% Plot a histogram +mfig('Normal distribution'); +subplot(1,2,1); +plot(X, 'x'); +subplot(1,2,2); +hist(X, NBins); + +%% Compute empirical mean and standard deviation +mu_ = mean(X); +s_ = std(X); + +display(mu_); +display(s_); \ No newline at end of file diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_3.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_3.m new file mode 100644 index 0000000000000000000000000000000000000000..02d28b4fe72f6ed5e078932908d3c4ec3560b13c --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_3.m @@ -0,0 +1,31 @@ +% exercise 4.1.3 + +% Number of samples +N = 1000; + +% Mean +mu = 17; + +% Standard deviation +s = 2; + +% Number of bins in histogram +NBins = 50; + +%% Generate samples from the Normal distribution +X = normrnd(mu, s, N, 1); + +% Plot a histogram +mfig('Normal distribution'); clf; hold all; +[n, x] = hist(X, NBins); +bar(x, n/N./gradient(x)); +x = linspace(min(x), max(x), 1000); +plot(x, normpdf(x, mu, s), 'r', 'LineWidth', 5); +xlim([min(x), max(x)]); + +%% Compute empirical mean and standard deviation +mu_ = mean(X); +s_ = std(X); + +display(mu_); +display(s_); \ No newline at end of file diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_4.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_4.m new file mode 100644 index 0000000000000000000000000000000000000000..eba312a920b6d76ec7465a9d1cf4335b5e5f29a3 --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_4.m @@ -0,0 +1,13 @@ +% exercise 4.1.4 + +% Number of samples +N = 1000; + +% Mean +mu = [13 17]; + +% Covariance matrix +S = [4 3;3 9]; + +%% Generate samples from the Normal distribution +X = mvnrnd(mu, S, N); diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_5.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_5.m new file mode 100644 index 0000000000000000000000000000000000000000..5362705dc6904be5e283b27e978a38895d76df17 --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_5.m @@ -0,0 +1,44 @@ +% exercise 4.1.5 + +% Number of samples +N = 1000; + +% Mean +mu = [13 17]; + +% Standard deviation of x1 +s1 = 2; + +% Standard deviation of x2 +s2 = 3; + +% Correlation between x1 and x2 +corr = 0; + +% Covariance matrix +S = [s1^2 corr*s1*s2;corr*s1*s2 s2^2]; + +% Number of bins in histogram +NBins = 20; + +%% Generate samples from the Normal distribution +X = mvnrnd(mu, S, N); + +%% Plot scatter plot of data +mfig('2-D Normal distribution'); clf; + +subplot(1,2,1); +plot(X(:,1), X(:,2), 'x'); +axis equal; +xlabel('x_1'); ylabel('x_2'); +title('Scatter plot of data'); + +subplot(1,2,2); +[n, x] = hist2d(X, NBins); +imagesc(x(1,:), x(2,:), n); +axis equal; +axis xy; +colorbar('South'); +colormap(1-gray); +xlabel('x_1'); ylabel('x_2'); +title('2D histogram'); diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_6.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_6.m new file mode 100644 index 0000000000000000000000000000000000000000..6f104e34242d721bb3b828499ee7eca60dafea9b --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_6.m @@ -0,0 +1,40 @@ +%% exercise 4.1.6 + +% Digits to include in analysis (to include all, n = 1:10); +n = [1]; + +%% Load data +cdir = fileparts(mfilename('fullpath')); +load(fullfile(cdir,'../Data/zipdata.mat')); + +% Extract digits +X = traindata(:,2:end); +y = traindata(:,1); +classNames = {'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'10'}; +classLabels = classNames(y+1); + +% Remove digits that are not to be inspected +j = ismember(y, n); +X = X(j,:); +classLabels = classLabels(j); +classNames = classNames(n+1); +y = cellfun(@(str) find(strcmp(str, classNames)), classLabels)-1; + +%% Compute mean, standard deviations, and covariance matrix +mu = mean(X); +s = std(X); +S = cov(X); + +%% Plot result +mfig('Digits: Mean and std'); clf; +subplot(1,2,1); +I = reshape(mu, [16,16])'; +imagesc(I); +axis image off +title('Mean'); +subplot(1,2,2); +I = reshape(s, [16,16])'; +imagesc(I); +axis image off +title('Standard deviation'); +colormap(1-gray); diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex4_1_7.m b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_7.m new file mode 100644 index 0000000000000000000000000000000000000000..a79498fca3ed862980da5957982100639c337c44 --- /dev/null +++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_1_7.m @@ -0,0 +1,53 @@ +%% exercise 4.1.7 + +% Digits to include in analysis (to include all, n = 1:10); +n = [1]; + +%% Load data +cdir = fileparts(mfilename('fullpath')); +load(fullfile(cdir,'../Data/zipdata.mat')); + +% Extract digits +X = traindata(:,2:end); +y = traindata(:,1); +classNames = {'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'10'}; +classLabels = classNames(y+1); + +% Remove digits that are not to be inspected +j = ismember(y, n); +X = X(j,:); +classLabels = classLabels(j); +classNames = classNames(n+1); +y = cellfun(@(str) find(strcmp(str, classNames)), classLabels)-1; + +%% Compute mean, standard deviations, and covariance matrix +mu = mean(X); +s = std(X); +S = cov(X); + +%% Generate 10 images with same mean and standard deviation +Xgen = normrnd(repmat(mu,10,1), repmat(s,10,1)); + +%% Plot images generated using the Normal distribution +mfig('Digits: 1-D Normal'); +for k = 1:10 + subplot(2,5,k); + I = reshape(Xgen(k,:), [16,16])'; + imagesc(I); + axis image off +end +colormap(1-gray); + + +%% Generate 10 images with same mean and covariance matrix +Xgen = mvnrnd(mu, S, 10); + +%% Plot images generated using the Normal distribution +mfig('Digits: Multivariate Normal'); +for k = 1:10 + subplot(2,5,k); + I = reshape(Xgen(k,:), [16,16])'; + imagesc(I); + axis image off +end +colormap(1-gray); diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_2.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_2.py new file mode 100644 index 0000000000000000000000000000000000000000..9f613d9590de7e7543594708599b41a9fba0e2ea --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_2.py @@ -0,0 +1,41 @@ +# exercise 4.1.2 + +import numpy as np +import matplotlib.pyplot as plt + +# Number of samples +N = 200 + +# Mean +mu = 17 + +# Standard deviation +s = 2 + +# Number of bins in histogram +nbins = 20 + +# Generate samples from the Normal distribution +X = np.random.normal(mu, s, N).T +# or equally: +X = np.random.randn(N).T * s + mu + + +# Compute empirical mean and standard deviation +mu_ = X.mean() +s_ = X.std(ddof=1) + +print("Theoretical mean: ", mu) +print("Theoretical std.dev.: ", s) +print("Empirical mean: ", mu_) +print("Empirical std.dev.: ", s_) + +# Plot the samples and histogram +plt.figure() +plt.title("Normal distribution") +plt.subplot(1, 2, 1) +plt.plot(X, "x") +plt.subplot(1, 2, 2) +plt.hist(X, bins=nbins) +plt.show() + diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_3.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_3.py new file mode 100644 index 0000000000000000000000000000000000000000..0a6c637b2aeda368d26b7470be3dbf9f44d47545 --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_3.py @@ -0,0 +1,43 @@ +# exercise 4.1.3 +import numpy as np +import matplotlib.pyplot as plt +from scipy import stats + +# Number of samples +N = 500 + +# Mean +mu = 17 + +# Standard deviation +s = 2 + +# Number of bins in histogram +nbins = 20 + +# Generate samples from the Normal distribution +X = np.random.normal(mu, s, N).T +# or equally: +X = np.random.randn(N).T * s + mu + +# Plot the histogram +f = plt.figure() +plt.title("Normal distribution") +plt.hist(X, bins=nbins, density=True) + +# Over the histogram, plot the theoretical probability distribution function: +x = np.linspace(X.min(), X.max(), 1000) +pdf = stats.norm.pdf(x, loc=17, scale=2) +plt.plot(x, pdf, ".", color="red") + +# Compute empirical mean and standard deviation +mu_ = X.mean() +s_ = X.std(ddof=1) + +print("Theoretical mean: ", mu) +print("Theoretical std.dev.: ", s) +print("Empirical mean: ", mu_) +print("Empirical std.dev.: ", s_) + +plt.show() + diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_4.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_4.py new file mode 100644 index 0000000000000000000000000000000000000000..2db6dac00758edb1c945a8fefcb1e98e00172c08 --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_4.py @@ -0,0 +1,17 @@ +# exercise 4.2.4 + +import numpy as np + +# Number of samples +N = 1000 + +# Mean +mu = np.array([13, 17]) + +# Covariance matrix +S = np.array([[4, 3], [3, 9]]) + +# Generate samples from the Normal distribution +X = np.random.multivariate_normal(mu, S, N) + +print("Ran Exercise 4.2.4") diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_5.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_5.py new file mode 100644 index 0000000000000000000000000000000000000000..d364cc8f672a4f9d674f67cf5d05453559ab953d --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_5.py @@ -0,0 +1,53 @@ +# exercise 4.1.5 + +import numpy as np +import matplotlib.pyplot as plt + +# Number of samples +N = 1000 + +# Standard deviation of x1 +s1 = 2 + +# Standard deviation of x2 +s2 = 3 + +# Correlation between x1 and x2 +corr = 0.5 + +# Covariance matrix +S = np.matrix([[s1 * s1, corr * s1 * s2], [corr * s1 * s2, s2 * s2]]) + +# Mean +mu = np.array([13, 17]) + +# Number of bins in histogram +nbins = 20 + +# Generate samples from multivariate normal distribution +X = np.random.multivariate_normal(mu, S, N) + + +# Plot scatter plot of data +plt.figure(figsize=(12, 8)) +plt.suptitle("2-D Normal distribution") + +plt.subplot(1, 2, 1) +plt.plot(X[:, 0], X[:, 1], "x") +plt.xlabel("x1") +plt.ylabel("x2") +plt.title("Scatter plot of data") + +plt.subplot(1, 2, 2) +x = np.histogram2d(X[:, 0], X[:, 1], nbins) +plt.imshow(x[0], cmap=plt.cm.gray_r, interpolation="None", origin="lower") +plt.colorbar() +plt.xlabel("x1") +plt.ylabel("x2") +plt.xticks([]) +plt.yticks([]) +plt.title("2D histogram") + +plt.show() + + diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_6.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_6.py new file mode 100644 index 0000000000000000000000000000000000000000..11ce3af61ed99a4c271869455f2a8ee5c368958f --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_6.py @@ -0,0 +1,48 @@ +# exercise 4.1.6 +import importlib_resources +import numpy as np +import scipy.linalg as linalg +import matplotlib.pyplot as plt +from scipy.io import loadmat + +filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") +# Digits to include in analysis (to include all: n = range(10)) +n = [0] + +# Load Matlab data file to python dict structure +# and extract variables of interest +traindata = loadmat(filename)["traindata"] +X = traindata[:, 1:] +y = traindata[:, 0] +N, M = X.shape +C = len(n) + +# Remove digits that are not to be inspected +class_mask = np.zeros(N).astype(bool) +for v in n: + cmsk = y == v + class_mask = class_mask | cmsk +X = X[class_mask, :] +y = y[class_mask] +N = np.shape(X)[0] + +mu = X.mean(axis=0) +s = X.std(ddof=1, axis=0) +S = np.cov(X, rowvar=0, ddof=1) + +plt.figure() +plt.subplot(1, 2, 1) +I = np.reshape(mu, (16, 16)) +plt.imshow(I, cmap=plt.cm.gray_r) +plt.title("Mean") +plt.xticks([]) +plt.yticks([]) +plt.subplot(1, 2, 2) +I = np.reshape(s, (16, 16)) +plt.imshow(I, cmap=plt.cm.gray_r) +plt.title("Standard deviation") +plt.xticks([]) +plt.yticks([]) + +plt.show() + diff --git a/exercises/02450Toolbox_Python/Scripts/ex4_1_7.py b/exercises/02450Toolbox_Python/Scripts/ex4_1_7.py new file mode 100644 index 0000000000000000000000000000000000000000..f9d62f0844155350c01867801ecf1e5312429a73 --- /dev/null +++ b/exercises/02450Toolbox_Python/Scripts/ex4_1_7.py @@ -0,0 +1,72 @@ +# exercise 4.1.7 + +import importlib_resources +import numpy as np +import matplotlib.pyplot as plt +from scipy.io import loadmat + +filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat") +# Digits to include in analysis (to include all, n = range(10) ) +n = [1] + +# Number of digits to generate from normal distributions +ngen = 10 + +# Load Matlab data file to python dict structure +# and extract variables of interest +traindata = loadmat(filename)["traindata"] +X = traindata[:, 1:] +y = traindata[:, 0] +N, M = np.shape(X) # or X.shape +C = len(n) + +# Remove digits that are not to be inspected +class_mask = np.zeros(N).astype(bool) +for v in n: + cmsk = y == v + class_mask = class_mask | cmsk +X = X[class_mask, :] +y = y[class_mask] +N = np.shape(X)[0] # or X.shape[0] + +mu = X.mean(axis=0) +s = X.std(ddof=1, axis=0) +S = np.cov(X, rowvar=0, ddof=1) + +# Generate 10 samples from 1-D normal distribution +Xgen = np.random.randn(ngen, 256) +for i in range(ngen): + Xgen[i] = np.multiply(Xgen[i], s) + mu + +# Plot images +plt.figure() +for k in range(ngen): + plt.subplot(2, int(np.ceil(ngen / 2.0)), k + 1) + I = np.reshape(Xgen[k, :], (16, 16)) + plt.imshow(I, cmap=plt.cm.gray_r) + plt.xticks([]) + plt.yticks([]) + if k == 1: + plt.title("Digits: 1-D Normal") + + +# Generate 10 samples from multivariate normal distribution +Xmvgen = np.random.multivariate_normal(mu, S, ngen) +# Note if you are investigating a single class, then you may get: +# """RuntimeWarning: covariance is not positive-semidefinite.""" +# Which in general is troublesome, but here is due to numerical imprecission + + +# Plot images +plt.figure() +for k in range(ngen): + plt.subplot(2, int(np.ceil(ngen / 2.0)), k + 1) + I = np.reshape(Xmvgen[k, :], (16, 16)) + plt.imshow(I, cmap=plt.cm.gray_r) + plt.xticks([]) + plt.yticks([]) + if k == 1: + plt.title("Digits: Multivariate Normal") + +plt.show() + diff --git a/exercises/02450Toolbox_R/Scripts/ex2_1_2.R b/exercises/02450Toolbox_R/Scripts/ex2_1_2.R new file mode 100644 index 0000000000000000000000000000000000000000..2d7d725bf4a2cf7d7d7944c89107d40fef06c008 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex2_1_2.R @@ -0,0 +1,17 @@ +#################### +# Exercise 2.1.1 +#################### + +rm(list = ls()) # Clear work space + +x <- c(-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47) + +mean(x) +sd(x) +median(x) +diff(range(x)) + +# Range returns the minimum and maximum of the vector x. +# To get the range, we must take the maximum minus the minimum. +# We do this using the function diff, which finds differences +# between consecutive elements in a vector. diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_1.R b/exercises/02450Toolbox_R/Scripts/ex4_1_1.R new file mode 100644 index 0000000000000000000000000000000000000000..d4716b7e6498910b1045ba7fe5a4b34a079e516f --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_1.R @@ -0,0 +1,26 @@ +#################### +# Exercise 4.1.1 +#################### +rm(list = ls()) # Clear work space + +# Number of samples +N <- 100 + +# Mean +mu <- 17 + +# Standard deviation +s <- 2 + +# Number of bins in histogram +NBins <- 20 + +# Generate samples from the Normal distribution +X <- rnorm(N, mean = mu, sd = s) + +# Plot a histogram +{ + par(mfrow = c(1, 2)) + plot(X, main = "Data") + hist(X, breaks = NBins, main = "Histogram of Data") +} diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_2.R b/exercises/02450Toolbox_R/Scripts/ex4_1_2.R new file mode 100644 index 0000000000000000000000000000000000000000..ddb7d43b6c0874667f899a7bb6c581dcbe30cac6 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_2.R @@ -0,0 +1,31 @@ +#################### +# Exercise 4.1.2 +#################### +rm(list = ls()) # Clear work space + +# Number of samples +N <- 100 + +# Mean +mu <- 17 + +# Standard deviation +s <- 2 + +# Number of bins in histogram +NBins <- 20 + +# Generate samples from the Normal distribution +X <- rnorm(N, mean = mu, sd = s) + +# Plot a histogram +{ + par(mfrow = c(1, 2)) + plot(X, main = "Generated data") + hist(X, breaks = NBins, main = "Histogram of generated data") +} + +# Compute empirical mean and standard deviation +(mu_ <- mean(X)) +(s_ <- sd(X)) + diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_3.R b/exercises/02450Toolbox_R/Scripts/ex4_1_3.R new file mode 100644 index 0000000000000000000000000000000000000000..76fb13ebda1e787b01af6133e3ceb64d009f7ce5 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_3.R @@ -0,0 +1,33 @@ +#################### +# Exercise 4.1.3 +#################### +rm(list = ls()) # Clear work space + +# Number of samples +N <- 1000 + +# Mean +mu <- 17 + +# Standard deviation +s <- 2 + +# Number of bins in histogram +NBins <- 50 + +# Generate samples from the Normal distribution +X <- rnorm(N, mean = mu, sd = s) + +# Plot a histogram +{ + par(mfrow=c(1,1)) + res <- hist(X, breaks = NBins, freq = FALSE) + x <- res$mids + x <- seq(from = min(x), to = max(x), length.out = 1000) + lines(x, dnorm(x, mean = mu, sd = s)) +} + +# Compute empirical mean and standard deviation +(mu_ <- mean(X)) +(s_ <- sd(X)) + diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_4.R b/exercises/02450Toolbox_R/Scripts/ex4_1_4.R new file mode 100644 index 0000000000000000000000000000000000000000..86607cf49ad4750713195466cd5274942c4af4e2 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_4.R @@ -0,0 +1,24 @@ +#################### +# Exercise 4.1.4 +#################### +rm(list = ls()) # Clear work space + +# Library for multivariate normal distribution +library(MASS) # install.packages("MASS") +?mvrnorm + +# Number of samples +N <- 1000 + +# Mean +mu <- c(13, 17) + +# Covariance matrix +S <- matrix(c(4, 3, 3, 9), nrow = 2, byrow = TRUE) + +# Generate samples from the Normal distribution +X <- mvrnorm(N, mu = mu, Sigma = S) + +# Inspect the dimensions of the matrix containing +# the generated multivariate normal vectors. +dim(X) diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_5.R b/exercises/02450Toolbox_R/Scripts/ex4_1_5.R new file mode 100644 index 0000000000000000000000000000000000000000..8a34abf7928a9130e1f6a660c7db14677956b703 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_5.R @@ -0,0 +1,45 @@ +#################### +# Exercise 4.1.5 +#################### +rm(list = ls()) # Clear work space + +# load the package "gplots", which contains the function hist2d for making 2-dimensional histograms. If the package is not already installed on your computer, an error will result from the function call library(gplots). In that case, install the package using install.packages("gplots") and then run library(gplots) again. Same for the package MASS. +#library(gplots) + +library(MASS) +# Number of samples +N <- 1000 + +# Mean +mu <- c(13, 17) + +# Standard deviation of x1 +s1 <- 2 + +# Standard deviation of x2 +s2 <- 3 + +# Correlation between x1 and x2 +corr <- 0 + +# Covariance matrix +S <- matrix(c(s1^2, corr * s1 * s2, corr * s1 * s2, s2^2), nrow = 2, byrow = TRUE) + +# Number of bins in histogram +NBins <- 20 + +# Generate samples from the Normal distribution +X <- mvrnorm(N, mu = mu, Sigma = S) + +# Plot scatter plot of data +{ + xrange <- mu[1] + S[1, 1] * c(-3, 3) + yrange <- mu[2] + S[2, 2] * c(-3, 3) + par(mfrow = c(1, 2)) + plot(xrange, yrange, type = "n", ylab = "x2", xlab = "x1", + main = "Scatter plot of data") + points(X[, 1], X[, 2]) + k <- kde2d(X[,1], X[,2]) + image(k, col = gray(32:0 / 32), + main = "2-D Normal distribution", xlab = "x1", ylab = "x2") +} diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_6.R b/exercises/02450Toolbox_R/Scripts/ex4_1_6.R new file mode 100644 index 0000000000000000000000000000000000000000..ab9e60f75eb7fa4c121a0035c2a3f677fb114e7a --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_6.R @@ -0,0 +1,59 @@ +#################### +# Exercise 4.1.6 +#################### +rm(list = ls()) # Clear work space + +library(MASS) + +# Digits to include in analysis (to include all, n = 1:10) +n <- c(1) # c(1,5,9) +n <- sort(n) + +# Load the library R.matlab to enable the function readMat, +# which allows R to read the matlab .mat format. +library(R.matlab) # install.packages("R.matlab") + +# The row of training data that we will look at +i <- 1 + +# Read in the data +data <- readMat(file.path("Data", "zipdata.mat")) + +# Check that the structure data contains two matrices, testdata and traindata +names(data) + +ncols <- ncol(data$traindata) + +# Extract digits +X <- data$traindata[, 2:ncols] +y <- data$traindata[, 1] +classNames <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10") +classLabels <- classNames[y + 1] + +# Remove digits that are not to be inspected +j <- match(y, n) +X <- X[!is.na(j), ] +classLabels <- classLabels[!is.na(j)] +classNames <- classNames[n + 1] +y <- y[!is.na(j)] +for (k in 0:(length(n) - 1)) +{ + classlab <- n[k + 1] + y[y == classlab] <- k +} + +# Compute mean, standard deviations, and covariance matrix +mu <- colMeans(X) +s <- apply(X, 2, sd) +S <- cov(X) + +# Plot result +{ + par(mfrow = c(1, 2)) + I <- mu + dim(I) <- c(16, 16) + image(I[, 16:1], main = "Digits: Mean", col = gray(32:0 / 32)) + I <- s + dim(I) <- c(16, 16) + image(I[, 16:1], main = "Digits: SD", col = gray(32:0 / 32)) +} diff --git a/exercises/02450Toolbox_R/Scripts/ex4_1_7.R b/exercises/02450Toolbox_R/Scripts/ex4_1_7.R new file mode 100644 index 0000000000000000000000000000000000000000..2f22bd6b92d1dbc90fa3fdc9ea2fcae3cacc7cd7 --- /dev/null +++ b/exercises/02450Toolbox_R/Scripts/ex4_1_7.R @@ -0,0 +1,74 @@ +#################### +# Exercise 4.1.7 +#################### +rm(list = ls()) # Clear work space + +library(MASS) + +# Digits to include in analysis (to include all, n = 1:10) +n <- c(1) # c(1,5,9) +n <- sort(n) + +# Load the library R.matlab to enable the function readMat, +# which allows R to read the matlab .mat format. +library(R.matlab) # install.packages("R.matlab") + +# The row of training data that we will look at +i <- 1 + +# Read in the data +data <- readMat(file.path("Data", "zipdata.mat")) + +# Check that the structure data contains two matrices, testdata and traindata +names(data) + +ncols <- ncol(data$traindata) + +# Extract digits +X <- data$traindata[, 2:ncols] +y <- data$traindata[, 1] +classNames <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10") +classLabels <- classNames[y + 1] + +# Remove digits that are not to be inspected +j <- match(y, n) +X <- X[!is.na(j), ] +classLabels <- classLabels[!is.na(j)] +classNames <- classNames[n + 1] +y <- y[!is.na(j)] +for (k in 0:(length(n) - 1)) +{ + classlab <- n[k + 1] + y[y == classlab] <- k +} + +# Compute mean, standard deviations, and covariance matrix +mu <- colMeans(X) +s <- apply(X, 2, sd) +S <- cov(X) + +# Generate 10 images with same mean and standard deviation +Xgen <- mvrnorm(n = 10, mu = mu, Sigma = diag(s)) + +# Plot images generated using the Normal distribution +{ +par(mfrow = c(2, 3)) + for (k in 1:6) { + I <- Xgen[k, ] + dim(I) <- c(16, 16) + image(I[, 16:1], main = "Digits: 1-D Normal", col = gray(32:0 / 32)) + } +} + + +# Generate 10 images with same mean and covariance matrix +Xgen <- mvrnorm(n = 10, mu = mu, Sigma = S) + +{ + par(mfrow = c(2, 3)) + for (k in 1:6) { + I <- Xgen[k, ] + dim(I) <- c(16, 16) + image(I[, 16:1], main = "Digits: Multivariate Normal", col = gray(32:0 / 32)) + } +}