Skip to content
Snippets Groups Projects
Commit 18c851b3 authored by Stas Syrota's avatar Stas Syrota
Browse files

Adjusted scripts

parent 00de7bf7
Branches
Tags
No related merge requests found
Showing
with 249 additions and 227 deletions
File deleted
File deleted
The Google matrix P is a model of the internet
P_ij is nonzero if there is a link from webpage i to j
The Google matrix is used to rank all Web pages
The ranking is done by solving a matrix eigenvalue problem
England dropped out of the top 10 in the FIFA ranking
\ No newline at end of file
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
# 02450 Toolbox - Python
## Installation
The exercise scriprs foudn depend on a
The exercise scripts inside `/Scripts` depend on a course specific package [dtuimldmtools](https://pypi.org/project/dtuimldmtools/) which needs to be installed.
TODO: To be completed
TODO: Virtual envs
We recommend using a Python Virtual environment using [Anaconda](https://www.anaconda.com/download/) or [Miniconda](https://docs.conda.io/projects/miniconda/en/latest/miniconda-install.html) and installing the package inside it. To set up such an environment follow the guide provided by [DTU Python support](https://pythonsupport.dtu.dk/python/install-conda.html).
pip install 02450toolbox
import toolbox_02450
Once setup, the package can be installed by running the following command:
```
pip install dtuimldmtools
```
## Dataset
......
......@@ -21,4 +21,4 @@ c = np.arange(100, 95, -1)
d = np.arange(1.2, 1.9, 0.1)
e = np.pi*np.arange(0,2.5,.5)
e = np.pi * np.arange(0, 2.5, 0.5)
## exercise 0.4.4
import numpy as np
# Extracting the elements from vectors is easy. Consider the
# following definition of x and the echoed results
x = np.concatenate([np.zeros(2), np.arange(0, 3.6, 0.6), np.ones(3)])
......@@ -17,7 +18,7 @@ x[1::2] # return every other element of x starting from the 2nd
# Inserting numbers into vectors is also easy. Using the same
# definition of x and observe the results when typing
y = x;
y = x
y[1::2] = np.pi
# Notice that we're inserting the same scalar value "pi" into all elements
# that we index y with
......
......@@ -51,14 +51,14 @@ a4 = np.eye(3) # diagonal array
a5 = np.random.rand(2, 3) # random array
a6 = a1.copy() # copy
a7 = a1 # alias
m1 = np.matrix('1 2 3; 4 5 6; 7 8 9') # define matrix by string
m1 = np.matrix("1 2 3; 4 5 6; 7 8 9") # define matrix by string
m2 = np.asmatrix(a1.copy()) # copy array into matrix
m3 = np.mat(np.array([1, 2, 3])) # map array onto matrix
a8 = np.asarray(m1) # map matrix onto array
# It is easy to extract and/or modify selected items from arrays/matrices.
# Here is how you can index matrix elements:
m = np.matrix('1 2 3; 4 5 6; 7 8 9')
m = np.matrix("1 2 3; 4 5 6; 7 8 9")
m[0, 0] # first element
m[-1, -1] # last element
m[0, :] # first row
......@@ -67,7 +67,7 @@ m[1:3,-1] # view on selected rows&columns
# Similarly, you can selectively assign values to matrix elements or columns:
m[-1, -1] = 10000
m[0:2,-1] = np.matrix('100; 1000')
m[0:2, -1] = np.matrix("100; 1000")
m[:, 0] = 0
# Logical indexing can be used to change or take only elements that
......
## exercise 0.5.1
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(0, 1, 0.1)
f = np.exp(x)
plt.figure(1)
plt.plot(x, f)
plt.xlabel('x')
plt.ylabel('f(x)=exp(x)')
plt.title('The exponential function')
plt.xlabel("x")
plt.ylabel("f(x)=exp(x)")
plt.title("The exponential function")
plt.show()
## exercise 0.5.2
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
# We simulate measurements every 100 ms for a period of 10 seconds
t = np.arange(0, 10, 0.1)
......@@ -14,40 +13,40 @@ sensor2 = 3*np.cos(t)+0.5*np.random.normal(size=len(t))
# Change the font size to make axis and title readable
font_size = 15
plt.rcParams.update({'font.size': font_size})
plt.rcParams.update({"font.size": font_size})
# Define the name of the curves
legend_strings = ['Sensor 1', 'Sensor 2']
legend_strings = ["Sensor 1", "Sensor 2"]
# Start plotting the simulated measurements
plt.figure(1)
# Plot the sensor 1 output as a function of time, and
# make the curve red and fully drawn
plt.plot(t, sensor1, 'r-')
plt.plot(t, sensor1, "r-")
# Plot the sensor 2 output as a function of time, and
# make the curve blue and dashed
plt.plot(t, sensor2, 'b--')
plt.plot(t, sensor2, "b--")
# Ensure that the limits on the axis fit the data
plt.axis('tight')
plt.axis("tight")
# Add a grid in the background
plt.grid()
# Add a legend describing each curve, place it at the "best" location
# so as to minimize the amount of curve it covers
plt.legend(legend_strings,loc='best')
plt.legend(legend_strings, loc="best")
# Add labels to the axes
plt.xlabel('Time [s]')
plt.ylabel('Voltage [mV]')
plt.xlabel("Time [s]")
plt.ylabel("Voltage [mV]")
# Add a title to the plot
plt.title('Sensor outputs')
plt.title("Sensor outputs")
# Export the figure
plt.savefig('ex1_5_2.png')
plt.savefig("ex1_5_2.png")
# Show the figure in the console
plt.show()
# exercise 10.1.1
import importlib_resources
from matplotlib.pyplot import figure, show
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.cluster import k_means
from dtuimldmtools import clusterplot
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
N, M = X.shape
C = len(classNames)
......@@ -24,4 +28,4 @@ figure(figsize=(14,9))
clusterplot(X, cls, centroids, y)
show()
print('Ran Exercise 10.1.1')
\ No newline at end of file
print("Ran Exercise 10.1.1")
# exercise 10.1.3
from matplotlib.pyplot import figure, title, plot, ylim, legend, show
import importlib_resources
import numpy as np
from matplotlib.pyplot import figure, legend, plot, show, title, ylim
from scipy.io import loadmat
from toolbox_02450 import clusterval
from sklearn.cluster import k_means
from dtuimldmtools import clusterval
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
N, M = X.shape
C = len(classNames)
......@@ -34,11 +38,11 @@ for k in range(K-1):
# Plot results:
figure(1)
title('Cluster validity')
title("Cluster validity")
plot(np.arange(K - 1) + 2, Rand)
plot(np.arange(K - 1) + 2, Jaccard)
plot(np.arange(K - 1) + 2, NMI)
legend(['Rand', 'Jaccard', 'NMI'], loc=4)
legend(["Rand", "Jaccard", "NMI"], loc=4)
show()
print('Ran Exercise 10.1.3')
\ No newline at end of file
print("Ran Exercise 10.1.3")
# exercise 10_1_5
from matplotlib import pyplot as plt
import importlib_resources
import numpy as np
from matplotlib import pyplot as plt
from scipy.io import loadmat
from sklearn.cluster import k_means
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wildfaces.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/wildfaces.mat')
mat_data = loadmat(filename)
# mat_data = loadmat('../Data/digits.mat') #<-- uncomment this for using the digits dataset
X = mat_data['X']
X = mat_data["X"]
N, M = X.shape
# Image resolution and number of colors
x = 40 # <-- change this for using the digits dataset
......@@ -45,9 +48,11 @@ for k in range(K):
# Squeeze out singleton dimension
# and flip the image (cancel out previos transpose)
img = np.squeeze(img).T
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
if k==np.floor((n2-1)/2): plt.title('Centroids')
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
if k == np.floor((n2 - 1) / 2):
plt.title("Centroids")
# Plot few randomly selected faces and their nearest centroids
L = 5 # number of images to plot
......@@ -58,16 +63,19 @@ for l in range(L):
img = np.resize(X[j[l], :], (c, x, y)).T
if c == 1:
img = np.squeeze(img).T
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
if l==np.floor((L-1)/2): plt.title('Randomly selected faces and their centroids')
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
if l == np.floor((L - 1) / 2):
plt.title("Randomly selected faces and their centroids")
plt.subplot(2, L, L + l + 1)
img = np.resize(centroids[cls[j[l]], :], (c, x, y)).T
if c == 1:
img = np.squeeze(img).T
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
plt.show()
print('Ran Exercise 10.1.5')
\ No newline at end of file
print("Ran Exercise 10.1.5")
# exercise 10.2.1
import importlib_resources
from matplotlib.pyplot import figure, show
from scipy.cluster.hierarchy import dendrogram, fcluster, linkage
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from dtuimldmtools import clusterplot
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth1.mat')
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
N, M = X.shape
C = len(classNames)
# Perform hierarchical/agglomerative clustering on data matrix
Method = 'single'
Metric = 'euclidean'
Method = "single"
Metric = "euclidean"
Z = linkage(X, method=Method, metric=Metric)
# Compute and display clusters by thresholding the dendrogram
Maxclust = 4
cls = fcluster(Z, criterion='maxclust', t=Maxclust)
cls = fcluster(Z, criterion="maxclust", t=Maxclust)
figure(1)
clusterplot(X, cls.reshape(cls.shape[0], 1), y=y)
# Display dendrogram
max_display_levels = 6
figure(2, figsize=(10, 4))
dendrogram(Z, truncate_mode='level', p=max_display_levels, color_threshold=Z[-Maxclust+1,2])
dendrogram(
Z, truncate_mode="level", p=max_display_levels, color_threshold=Z[-Maxclust + 1, 2]
)
show()
print('Ran Exercise 10.2.1')
\ No newline at end of file
print("Ran Exercise 10.2.1")
# exercise 11.1.1
from matplotlib.pyplot import figure, show
import importlib_resources
import numpy as np
from matplotlib.pyplot import figure, show
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.mixture import GaussianMixture
from dtuimldmtools import clusterplot
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth2.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat('../Data/synth2.mat')
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
# X_old = X
# X = np.hstack([X,X])
N, M = X.shape
C = len(classNames)
# Number of clusters
K = 4
cov_type = 'full' # e.g. 'full' or 'diag'
cov_type = "full" # e.g. 'full' or 'diag'
# define the initialization procedure (initial value of means)
initialization_method = 'random'# 'random' or 'kmeans'
initialization_method = "random" # 'random' or 'kmeans'
# random signifies random initiation, kmeans means we run a K-means and use the
# result as the starting point. K-means might converge faster/better than
# random, but might also cause the algorithm to be stuck in a poor local minimum
......@@ -28,15 +33,21 @@ initialization_method = 'random'# 'random' or 'kmeans'
reps = 1
# number of fits with different initalizations, best result will be kept
# Fit Gaussian mixture model
gmm = GaussianMixture(n_components=K, covariance_type=cov_type, n_init=reps,
tol=1e-6, reg_covar=1e-6, init_params=initialization_method).fit(X)
gmm = GaussianMixture(
n_components=K,
covariance_type=cov_type,
n_init=reps,
tol=1e-6,
reg_covar=1e-6,
init_params=initialization_method,
).fit(X)
cls = gmm.predict(X)
# extract cluster labels
cds = gmm.means_
# extract cluster centroids (means of gaussians)
covs = gmm.covariances_
# extract cluster shapes (covariances of gaussians)
if cov_type.lower() == 'diag':
if cov_type.lower() == "diag":
new_covs = np.zeros([K, M, M])
count = 0
......@@ -58,4 +69,4 @@ show()
# clusterplot(X[:,idx], clusterid=cls, centroids=cds[:,idx], y=y, covars=covs[:,idx,:][:,:,idx])
# show()
print('Ran Exercise 11.1.1')
\ No newline at end of file
print("Ran Exercise 11.1.1")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment