Skip to content
Snippets Groups Projects
Commit ab98fbf8 authored by bjje's avatar bjje
Browse files

Updates for week4

parent f4d96319
No related branches found
No related tags found
No related merge requests found
Showing
with 743 additions and 0 deletions
% exercise 4.1.1
% Number of samples
N = 200;
% Mean
mu = 17;
% Standard deviation
s = 2;
% Number of bins in histogram
NBins = 20;
%% Generate samples from the Normal distribution
X = normrnd(mu, s, N, 1);
%% Plot a histogram
mfig('Normal distribution');
subplot(1,2,1);
plot(X, 'x');
subplot(1,2,2);
hist(X, NBins);
% exercise 4.1.2
% Number of samples
N = 100;
% Mean
mu = 17;
% Standard deviation
s = 2;
% Number of bins in histogram
NBins = 10;
%% Generate samples from the Normal distribution
X = normrnd(mu, s, N, 1);
%% Plot a histogram
mfig('Normal distribution');
subplot(1,2,1);
plot(X, 'x');
subplot(1,2,2);
hist(X, NBins);
%% Compute empirical mean and standard deviation
mu_ = mean(X);
s_ = std(X);
display(mu_);
display(s_);
\ No newline at end of file
% exercise 4.1.3
% Number of samples
N = 1000;
% Mean
mu = 17;
% Standard deviation
s = 2;
% Number of bins in histogram
NBins = 50;
%% Generate samples from the Normal distribution
X = normrnd(mu, s, N, 1);
% Plot a histogram
mfig('Normal distribution'); clf; hold all;
[n, x] = hist(X, NBins);
bar(x, n/N./gradient(x));
x = linspace(min(x), max(x), 1000);
plot(x, normpdf(x, mu, s), 'r', 'LineWidth', 5);
xlim([min(x), max(x)]);
%% Compute empirical mean and standard deviation
mu_ = mean(X);
s_ = std(X);
display(mu_);
display(s_);
\ No newline at end of file
% exercise 4.1.4
% Number of samples
N = 1000;
% Mean
mu = [13 17];
% Covariance matrix
S = [4 3;3 9];
%% Generate samples from the Normal distribution
X = mvnrnd(mu, S, N);
% exercise 4.1.5
% Number of samples
N = 1000;
% Mean
mu = [13 17];
% Standard deviation of x1
s1 = 2;
% Standard deviation of x2
s2 = 3;
% Correlation between x1 and x2
corr = 0;
% Covariance matrix
S = [s1^2 corr*s1*s2;corr*s1*s2 s2^2];
% Number of bins in histogram
NBins = 20;
%% Generate samples from the Normal distribution
X = mvnrnd(mu, S, N);
%% Plot scatter plot of data
mfig('2-D Normal distribution'); clf;
subplot(1,2,1);
plot(X(:,1), X(:,2), 'x');
axis equal;
xlabel('x_1'); ylabel('x_2');
title('Scatter plot of data');
subplot(1,2,2);
[n, x] = hist2d(X, NBins);
imagesc(x(1,:), x(2,:), n);
axis equal;
axis xy;
colorbar('South');
colormap(1-gray);
xlabel('x_1'); ylabel('x_2');
title('2D histogram');
%% exercise 4.1.6
% Digits to include in analysis (to include all, n = 1:10);
n = [1];
%% Load data
cdir = fileparts(mfilename('fullpath'));
load(fullfile(cdir,'../Data/zipdata.mat'));
% Extract digits
X = traindata(:,2:end);
y = traindata(:,1);
classNames = {'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'10'};
classLabels = classNames(y+1);
% Remove digits that are not to be inspected
j = ismember(y, n);
X = X(j,:);
classLabels = classLabels(j);
classNames = classNames(n+1);
y = cellfun(@(str) find(strcmp(str, classNames)), classLabels)-1;
%% Compute mean, standard deviations, and covariance matrix
mu = mean(X);
s = std(X);
S = cov(X);
%% Plot result
mfig('Digits: Mean and std'); clf;
subplot(1,2,1);
I = reshape(mu, [16,16])';
imagesc(I);
axis image off
title('Mean');
subplot(1,2,2);
I = reshape(s, [16,16])';
imagesc(I);
axis image off
title('Standard deviation');
colormap(1-gray);
%% exercise 4.1.7
% Digits to include in analysis (to include all, n = 1:10);
n = [1];
%% Load data
cdir = fileparts(mfilename('fullpath'));
load(fullfile(cdir,'../Data/zipdata.mat'));
% Extract digits
X = traindata(:,2:end);
y = traindata(:,1);
classNames = {'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'10'};
classLabels = classNames(y+1);
% Remove digits that are not to be inspected
j = ismember(y, n);
X = X(j,:);
classLabels = classLabels(j);
classNames = classNames(n+1);
y = cellfun(@(str) find(strcmp(str, classNames)), classLabels)-1;
%% Compute mean, standard deviations, and covariance matrix
mu = mean(X);
s = std(X);
S = cov(X);
%% Generate 10 images with same mean and standard deviation
Xgen = normrnd(repmat(mu,10,1), repmat(s,10,1));
%% Plot images generated using the Normal distribution
mfig('Digits: 1-D Normal');
for k = 1:10
subplot(2,5,k);
I = reshape(Xgen(k,:), [16,16])';
imagesc(I);
axis image off
end
colormap(1-gray);
%% Generate 10 images with same mean and covariance matrix
Xgen = mvnrnd(mu, S, 10);
%% Plot images generated using the Normal distribution
mfig('Digits: Multivariate Normal');
for k = 1:10
subplot(2,5,k);
I = reshape(Xgen(k,:), [16,16])';
imagesc(I);
axis image off
end
colormap(1-gray);
# exercise 4.1.2
import numpy as np
import matplotlib.pyplot as plt
# Number of samples
N = 200
# Mean
mu = 17
# Standard deviation
s = 2
# Number of bins in histogram
nbins = 20
# Generate samples from the Normal distribution
X = np.random.normal(mu, s, N).T
# or equally:
X = np.random.randn(N).T * s + mu
# Compute empirical mean and standard deviation
mu_ = X.mean()
s_ = X.std(ddof=1)
print("Theoretical mean: ", mu)
print("Theoretical std.dev.: ", s)
print("Empirical mean: ", mu_)
print("Empirical std.dev.: ", s_)
# Plot the samples and histogram
plt.figure()
plt.title("Normal distribution")
plt.subplot(1, 2, 1)
plt.plot(X, "x")
plt.subplot(1, 2, 2)
plt.hist(X, bins=nbins)
plt.show()
# exercise 4.1.3
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
# Number of samples
N = 500
# Mean
mu = 17
# Standard deviation
s = 2
# Number of bins in histogram
nbins = 20
# Generate samples from the Normal distribution
X = np.random.normal(mu, s, N).T
# or equally:
X = np.random.randn(N).T * s + mu
# Plot the histogram
f = plt.figure()
plt.title("Normal distribution")
plt.hist(X, bins=nbins, density=True)
# Over the histogram, plot the theoretical probability distribution function:
x = np.linspace(X.min(), X.max(), 1000)
pdf = stats.norm.pdf(x, loc=17, scale=2)
plt.plot(x, pdf, ".", color="red")
# Compute empirical mean and standard deviation
mu_ = X.mean()
s_ = X.std(ddof=1)
print("Theoretical mean: ", mu)
print("Theoretical std.dev.: ", s)
print("Empirical mean: ", mu_)
print("Empirical std.dev.: ", s_)
plt.show()
# exercise 4.2.4
import numpy as np
# Number of samples
N = 1000
# Mean
mu = np.array([13, 17])
# Covariance matrix
S = np.array([[4, 3], [3, 9]])
# Generate samples from the Normal distribution
X = np.random.multivariate_normal(mu, S, N)
print("Ran Exercise 4.2.4")
# exercise 4.1.5
import numpy as np
import matplotlib.pyplot as plt
# Number of samples
N = 1000
# Standard deviation of x1
s1 = 2
# Standard deviation of x2
s2 = 3
# Correlation between x1 and x2
corr = 0.5
# Covariance matrix
S = np.matrix([[s1 * s1, corr * s1 * s2], [corr * s1 * s2, s2 * s2]])
# Mean
mu = np.array([13, 17])
# Number of bins in histogram
nbins = 20
# Generate samples from multivariate normal distribution
X = np.random.multivariate_normal(mu, S, N)
# Plot scatter plot of data
plt.figure(figsize=(12, 8))
plt.suptitle("2-D Normal distribution")
plt.subplot(1, 2, 1)
plt.plot(X[:, 0], X[:, 1], "x")
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Scatter plot of data")
plt.subplot(1, 2, 2)
x = np.histogram2d(X[:, 0], X[:, 1], nbins)
plt.imshow(x[0], cmap=plt.cm.gray_r, interpolation="None", origin="lower")
plt.colorbar()
plt.xlabel("x1")
plt.ylabel("x2")
plt.xticks([])
plt.yticks([])
plt.title("2D histogram")
plt.show()
# exercise 4.1.6
import importlib_resources
import numpy as np
import scipy.linalg as linalg
import matplotlib.pyplot as plt
from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
# Digits to include in analysis (to include all: n = range(10))
n = [0]
# Load Matlab data file to python dict structure
# and extract variables of interest
traindata = loadmat(filename)["traindata"]
X = traindata[:, 1:]
y = traindata[:, 0]
N, M = X.shape
C = len(n)
# Remove digits that are not to be inspected
class_mask = np.zeros(N).astype(bool)
for v in n:
cmsk = y == v
class_mask = class_mask | cmsk
X = X[class_mask, :]
y = y[class_mask]
N = np.shape(X)[0]
mu = X.mean(axis=0)
s = X.std(ddof=1, axis=0)
S = np.cov(X, rowvar=0, ddof=1)
plt.figure()
plt.subplot(1, 2, 1)
I = np.reshape(mu, (16, 16))
plt.imshow(I, cmap=plt.cm.gray_r)
plt.title("Mean")
plt.xticks([])
plt.yticks([])
plt.subplot(1, 2, 2)
I = np.reshape(s, (16, 16))
plt.imshow(I, cmap=plt.cm.gray_r)
plt.title("Standard deviation")
plt.xticks([])
plt.yticks([])
plt.show()
# exercise 4.1.7
import importlib_resources
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
filename = importlib_resources.files("dtuimldmtools").joinpath("data/zipdata.mat")
# Digits to include in analysis (to include all, n = range(10) )
n = [1]
# Number of digits to generate from normal distributions
ngen = 10
# Load Matlab data file to python dict structure
# and extract variables of interest
traindata = loadmat(filename)["traindata"]
X = traindata[:, 1:]
y = traindata[:, 0]
N, M = np.shape(X) # or X.shape
C = len(n)
# Remove digits that are not to be inspected
class_mask = np.zeros(N).astype(bool)
for v in n:
cmsk = y == v
class_mask = class_mask | cmsk
X = X[class_mask, :]
y = y[class_mask]
N = np.shape(X)[0] # or X.shape[0]
mu = X.mean(axis=0)
s = X.std(ddof=1, axis=0)
S = np.cov(X, rowvar=0, ddof=1)
# Generate 10 samples from 1-D normal distribution
Xgen = np.random.randn(ngen, 256)
for i in range(ngen):
Xgen[i] = np.multiply(Xgen[i], s) + mu
# Plot images
plt.figure()
for k in range(ngen):
plt.subplot(2, int(np.ceil(ngen / 2.0)), k + 1)
I = np.reshape(Xgen[k, :], (16, 16))
plt.imshow(I, cmap=plt.cm.gray_r)
plt.xticks([])
plt.yticks([])
if k == 1:
plt.title("Digits: 1-D Normal")
# Generate 10 samples from multivariate normal distribution
Xmvgen = np.random.multivariate_normal(mu, S, ngen)
# Note if you are investigating a single class, then you may get:
# """RuntimeWarning: covariance is not positive-semidefinite."""
# Which in general is troublesome, but here is due to numerical imprecission
# Plot images
plt.figure()
for k in range(ngen):
plt.subplot(2, int(np.ceil(ngen / 2.0)), k + 1)
I = np.reshape(Xmvgen[k, :], (16, 16))
plt.imshow(I, cmap=plt.cm.gray_r)
plt.xticks([])
plt.yticks([])
if k == 1:
plt.title("Digits: Multivariate Normal")
plt.show()
####################
# Exercise 2.1.1
####################
rm(list = ls()) # Clear work space
x <- c(-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47)
mean(x)
sd(x)
median(x)
diff(range(x))
# Range returns the minimum and maximum of the vector x.
# To get the range, we must take the maximum minus the minimum.
# We do this using the function diff, which finds differences
# between consecutive elements in a vector.
####################
# Exercise 4.1.1
####################
rm(list = ls()) # Clear work space
# Number of samples
N <- 100
# Mean
mu <- 17
# Standard deviation
s <- 2
# Number of bins in histogram
NBins <- 20
# Generate samples from the Normal distribution
X <- rnorm(N, mean = mu, sd = s)
# Plot a histogram
{
par(mfrow = c(1, 2))
plot(X, main = "Data")
hist(X, breaks = NBins, main = "Histogram of Data")
}
####################
# Exercise 4.1.2
####################
rm(list = ls()) # Clear work space
# Number of samples
N <- 100
# Mean
mu <- 17
# Standard deviation
s <- 2
# Number of bins in histogram
NBins <- 20
# Generate samples from the Normal distribution
X <- rnorm(N, mean = mu, sd = s)
# Plot a histogram
{
par(mfrow = c(1, 2))
plot(X, main = "Generated data")
hist(X, breaks = NBins, main = "Histogram of generated data")
}
# Compute empirical mean and standard deviation
(mu_ <- mean(X))
(s_ <- sd(X))
####################
# Exercise 4.1.3
####################
rm(list = ls()) # Clear work space
# Number of samples
N <- 1000
# Mean
mu <- 17
# Standard deviation
s <- 2
# Number of bins in histogram
NBins <- 50
# Generate samples from the Normal distribution
X <- rnorm(N, mean = mu, sd = s)
# Plot a histogram
{
par(mfrow=c(1,1))
res <- hist(X, breaks = NBins, freq = FALSE)
x <- res$mids
x <- seq(from = min(x), to = max(x), length.out = 1000)
lines(x, dnorm(x, mean = mu, sd = s))
}
# Compute empirical mean and standard deviation
(mu_ <- mean(X))
(s_ <- sd(X))
####################
# Exercise 4.1.4
####################
rm(list = ls()) # Clear work space
# Library for multivariate normal distribution
library(MASS) # install.packages("MASS")
?mvrnorm
# Number of samples
N <- 1000
# Mean
mu <- c(13, 17)
# Covariance matrix
S <- matrix(c(4, 3, 3, 9), nrow = 2, byrow = TRUE)
# Generate samples from the Normal distribution
X <- mvrnorm(N, mu = mu, Sigma = S)
# Inspect the dimensions of the matrix containing
# the generated multivariate normal vectors.
dim(X)
####################
# Exercise 4.1.5
####################
rm(list = ls()) # Clear work space
# load the package "gplots", which contains the function hist2d for making 2-dimensional histograms. If the package is not already installed on your computer, an error will result from the function call library(gplots). In that case, install the package using install.packages("gplots") and then run library(gplots) again. Same for the package MASS.
#library(gplots)
library(MASS)
# Number of samples
N <- 1000
# Mean
mu <- c(13, 17)
# Standard deviation of x1
s1 <- 2
# Standard deviation of x2
s2 <- 3
# Correlation between x1 and x2
corr <- 0
# Covariance matrix
S <- matrix(c(s1^2, corr * s1 * s2, corr * s1 * s2, s2^2), nrow = 2, byrow = TRUE)
# Number of bins in histogram
NBins <- 20
# Generate samples from the Normal distribution
X <- mvrnorm(N, mu = mu, Sigma = S)
# Plot scatter plot of data
{
xrange <- mu[1] + S[1, 1] * c(-3, 3)
yrange <- mu[2] + S[2, 2] * c(-3, 3)
par(mfrow = c(1, 2))
plot(xrange, yrange, type = "n", ylab = "x2", xlab = "x1",
main = "Scatter plot of data")
points(X[, 1], X[, 2])
k <- kde2d(X[,1], X[,2])
image(k, col = gray(32:0 / 32),
main = "2-D Normal distribution", xlab = "x1", ylab = "x2")
}
####################
# Exercise 4.1.6
####################
rm(list = ls()) # Clear work space
library(MASS)
# Digits to include in analysis (to include all, n = 1:10)
n <- c(1) # c(1,5,9)
n <- sort(n)
# Load the library R.matlab to enable the function readMat,
# which allows R to read the matlab .mat format.
library(R.matlab) # install.packages("R.matlab")
# The row of training data that we will look at
i <- 1
# Read in the data
data <- readMat(file.path("Data", "zipdata.mat"))
# Check that the structure data contains two matrices, testdata and traindata
names(data)
ncols <- ncol(data$traindata)
# Extract digits
X <- data$traindata[, 2:ncols]
y <- data$traindata[, 1]
classNames <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10")
classLabels <- classNames[y + 1]
# Remove digits that are not to be inspected
j <- match(y, n)
X <- X[!is.na(j), ]
classLabels <- classLabels[!is.na(j)]
classNames <- classNames[n + 1]
y <- y[!is.na(j)]
for (k in 0:(length(n) - 1))
{
classlab <- n[k + 1]
y[y == classlab] <- k
}
# Compute mean, standard deviations, and covariance matrix
mu <- colMeans(X)
s <- apply(X, 2, sd)
S <- cov(X)
# Plot result
{
par(mfrow = c(1, 2))
I <- mu
dim(I) <- c(16, 16)
image(I[, 16:1], main = "Digits: Mean", col = gray(32:0 / 32))
I <- s
dim(I) <- c(16, 16)
image(I[, 16:1], main = "Digits: SD", col = gray(32:0 / 32))
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment