Skip to content
Snippets Groups Projects
Commit 13c26834 authored by bjje's avatar bjje
Browse files

Consistent use of plt in ex2

parent 245c9d47
No related branches found
No related tags found
No related merge requests found
Showing
with 104 additions and 142 deletions
...@@ -5,7 +5,7 @@ x = np.array([-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47]) ...@@ -5,7 +5,7 @@ x = np.array([-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47])
# Compute values # Compute values
mean_x = x.mean() mean_x = x.mean()
std_x = x.std(ddof=1) std_x = x.std(ddof=1) # ddof: Delta Degrees of freedom
median_x = np.median(x) median_x = np.median(x)
range_x = x.max() - x.min() range_x = x.max() - x.min()
...@@ -16,4 +16,4 @@ print("Standard Deviation:", std_x) ...@@ -16,4 +16,4 @@ print("Standard Deviation:", std_x)
print("Median:", median_x) print("Median:", median_x)
print("Range:", range_x) print("Range:", range_x)
print("Ran Exercise 2.1.1") print("Ran Exercise 2.1.1")
\ No newline at end of file
...@@ -12,7 +12,7 @@ filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat" ...@@ -12,7 +12,7 @@ filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat"
i = 1 i = 1
# Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation' # Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation'
similarity_measure = "SMC" similarity_measure = "Jaccard"
# Load the digits # Load the digits
# Load Matlab data file to python dict structure # Load Matlab data file to python dict structure
...@@ -32,7 +32,6 @@ sim = sim.tolist()[0] ...@@ -32,7 +32,6 @@ sim = sim.tolist()[0]
# Tuples of sorted similarities and their indices # Tuples of sorted similarities and their indices
sim_to_index = sorted(zip(sim, noti)) sim_to_index = sorted(zip(sim, noti))
# Visualize query image and 5 most/least similar images # Visualize query image and 5 most/least similar images
plt.figure(figsize=(12, 8)) plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1) plt.subplot(3, 1, 1)
...@@ -82,4 +81,4 @@ for ms in range(5): ...@@ -82,4 +81,4 @@ for ms in range(5):
plt.show() plt.show()
print("Ran Exercise 2.2.1") print("Ran Exercise 2.2.1")
\ No newline at end of file
# exercise 2.2.2 # exercise 2.2.2
import numpy as np import numpy as np
from dtuimldmtools import similarity from dtuimldmtools import similarity
...@@ -39,4 +38,4 @@ print( ...@@ -39,4 +38,4 @@ print(
% (similarity(x, y, "cor") - similarity(b + x, y, "cor"))[0, 0] % (similarity(x, y, "cor") - similarity(b + x, y, "cor"))[0, 0]
) )
print("Ran Exercise 2.2.2") print("Ran Exercise 2.2.2")
\ No newline at end of file
# exercise 2.3.1 # exercise 2.3.1
import importlib_resources import importlib_resources
import numpy as np import numpy as np
import xlrd import xlrd
...@@ -30,4 +29,4 @@ N = len(y) ...@@ -30,4 +29,4 @@ N = len(y)
M = len(attributeNames) M = len(attributeNames)
C = len(classNames) C = len(classNames)
print("Ran Exercise 2.3.1") print("Ran Exercise 2.3.1 - loading the Iris data")
\ No newline at end of file
...@@ -2,17 +2,17 @@ ...@@ -2,17 +2,17 @@
import numpy as np import numpy as np
# (requires data from exercise 2.3.1 so will run that script first) # (requires data from exercise 2.3.1 so will run that script first)
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import figure, hist, show, subplot, xlabel, ylim import matplotlib.pyplot as plt
figure(figsize=(8, 7)) plt.figure(figsize=(8, 7))
u = np.floor(np.sqrt(M)) u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u) v = np.ceil(float(M) / u)
for i in range(M): for i in range(M):
subplot(int(u), int(v), i + 1) plt.subplot(int(u), int(v), i + 1)
hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4)) plt.hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4))
xlabel(attributeNames[i]) plt.xlabel(attributeNames[i])
ylim(0, N / 2) plt.ylim(0, N / 2)
show() plt.show()
print("Ran Exercise 2.3.2") print("Ran Exercise 2.3.2")
# Exercise 2.3.3 # Exercise 2.3.3
# (requires data from exercise 2.3.1) # (requires data from exercise 2.3.1)
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import boxplot, show, title, xticks, ylabel import matplotlib.pyplot as plt
boxplot(X) plt.figure()
xticks(range(1, 5), attributeNames) plt.boxplot(X)
ylabel("cm") plt.xticks(range(1, 5), attributeNames)
title("Fisher's Iris data set - boxplot") plt.ylabel("cm")
show() plt.title("Fisher's Iris data set - boxplot")
plt.show()
print("Ran Exercise 2.3.3") print("Ran Exercise 2.3.3")
# Exercise 2.3.4 # Exercise 2.3.4
# requires data from exercise 4.1.1 # requires data from exercise 4.1.1
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import boxplot, figure, show, subplot, title, xticks, ylim import matplotlib.pyplot as plt
figure(figsize=(14, 7)) plt.figure(figsize=(14, 7))
for c in range(C): for c in range(C):
subplot(1, C, c + 1) plt.subplot(1, C, c + 1)
class_mask = y == c # binary mask to extract elements of class c class_mask = y == c # binary mask to extract elements of class c
# or: class_mask = nonzero(y==c)[0].tolist()[0] # indices of class c # or: class_mask = nonzero(y==c)[0].tolist()[0] # indices of class c
boxplot(X[class_mask, :]) plt.boxplot(X[class_mask, :])
# title('Class: {0}'.format(classNames[c])) # title('Class: {0}'.format(classNames[c]))
title("Class: " + classNames[c]) plt.title("Class: " + classNames[c])
xticks( plt.xticks(
range(1, len(attributeNames) + 1), [a[:7] for a in attributeNames], rotation=45 range(1, len(attributeNames) + 1), [a[:7] for a in attributeNames], rotation=45
) )
y_up = X.max() + (X.max() - X.min()) * 0.1 y_up = X.max() + (X.max() - X.min()) * 0.1
y_down = X.min() - (X.max() - X.min()) * 0.1 y_down = X.min() - (X.max() - X.min()) * 0.1
ylim(y_down, y_up) plt.ylim(y_down, y_up)
show() plt.show()
print("Ran Exercise 2.3.4") print("Ran Exercise 2.3.4")
# Exercise 2.3.5 # Exercise 2.3.5
# (requires data from exercise 2.3.1) # (requires data from exercise 2.3.1)
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import ( import matplotlib.pyplot as plt
figure,
legend,
plot,
show,
subplot,
xlabel,
xticks,
ylabel,
yticks,
)
figure(figsize=(12, 10)) plt.figure(figsize=(12, 10))
for m1 in range(M): for m1 in range(M):
for m2 in range(M): for m2 in range(M):
subplot(M, M, m1 * M + m2 + 1) plt.subplot(M, M, m1 * M + m2 + 1)
for c in range(C): for c in range(C):
class_mask = y == c class_mask = y == c
plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".") plt.plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".")
if m1 == M - 1: if m1 == M - 1:
xlabel(attributeNames[m2]) plt.xlabel(attributeNames[m2])
else: else:
xticks([]) plt.xticks([])
if m2 == 0: if m2 == 0:
ylabel(attributeNames[m1]) plt.ylabel(attributeNames[m1])
else: else:
yticks([]) plt.yticks([])
legend(classNames) plt.legend(classNames)
show() plt.show()
print("Ran Exercise 2.3.5") print("Ran Exercise 2.3.5")
# Exercise 2.3.6 # Exercise 2.3.6
# (requires data from exercise 2.3.1)
# requires data from exercise 2.3.1
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import figure, show import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
# Indices of the variables to plot # Indices of the variables to plot
ind = [0, 1, 2] ind = [0, 1, 2]
colors = ["blue", "green", "red"] colors = ["blue", "green", "red"]
f = figure() f = plt.figure()
ax = f.add_subplot(111, projection="3d") # Here the mpl_toolkits is used ax = f.add_subplot(111, projection="3d") # Here the mpl_toolkits is used
for c in range(C): for c in range(C):
class_mask = y == c class_mask = y == c
...@@ -22,6 +21,6 @@ ax.set_xlabel(attributeNames[ind[0]]) ...@@ -22,6 +21,6 @@ ax.set_xlabel(attributeNames[ind[0]])
ax.set_ylabel(attributeNames[ind[1]]) ax.set_ylabel(attributeNames[ind[1]])
ax.set_zlabel(attributeNames[ind[2]]) ax.set_zlabel(attributeNames[ind[2]])
show() plt.show()
print("Ran Exercise 2.3.6") print("Ran Exercise 2.3.6")
# Exercise 2.3.7 # Exercise 2.3.7
# (requires data from exercise 2.3.1)
# requires data from exercise 2.3.7
from ex2_3_1 import * from ex2_3_1 import *
from matplotlib.pyplot import ( import matplotlib.pyplot as plt
cm,
colorbar,
figure,
imshow,
show,
title,
xlabel,
xticks,
ylabel,
)
from scipy.stats import zscore from scipy.stats import zscore
X_standarized = zscore(X, ddof=1) X_standarized = zscore(X, ddof=1)
figure(figsize=(12, 6)) plt.figure(figsize=(12, 6))
imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=cm.gray) plt.imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=plt.cm.gray)
xticks(range(4), attributeNames) plt.xticks(range(4), attributeNames)
xlabel("Attributes") plt.xlabel("Attributes")
ylabel("Data objects") plt.ylabel("Data objects")
title("Fisher's Iris data matrix") plt.title("Fisher's Iris data matrix")
colorbar() plt.colorbar()
show() plt.show()
print("Ran Exercise 2.3.7") print("Ran Exercise 2.3.7")
\ No newline at end of file
# exercise 2.4.1 # exercise 2.4.1
"""
Note: This is a long script. You may want to use breakpoint
"""
import importlib_resources import importlib_resources
import numpy as np import numpy as np
from matplotlib.pyplot import ( import matplotlib.pyplot as plt
boxplot,
figure,
hist,
show,
subplot,
title,
xlabel,
xticks,
ylim,
yticks,
)
from scipy.io import loadmat from scipy.io import loadmat
from scipy.stats import zscore from scipy.stats import zscore
...@@ -29,56 +20,61 @@ N = mat_data["N"][0, 0] ...@@ -29,56 +20,61 @@ N = mat_data["N"][0, 0]
attributeNames = [name[0][0] for name in mat_data["attributeNames"]] attributeNames = [name[0][0] for name in mat_data["attributeNames"]]
classNames = [cls[0][0] for cls in mat_data["classNames"]] classNames = [cls[0][0] for cls in mat_data["classNames"]]
print("Data loaded")
# We start with a box plot of each attribute # We start with a box plot of each attribute
figure() plt.figure()
title("Wine: Boxplot") plt.title("Wine: Boxplot")
boxplot(X) plt.boxplot(X)
xticks(range(1, M + 1), attributeNames, rotation=45) plt.xticks(range(1, M + 1), attributeNames, rotation=45)
# From this it is clear that there are some outliers in the Alcohol # From this it is clear that there are some outliers in the Alcohol
# attribute (10x10^14 is clearly not a proper value for alcohol content) # attribute (10x10^14 is clearly not a proper value for alcohol content)
# However, it is impossible to see the distribution of the data, because # However, it is impossible to see the distribution of the data, because
# the axis is dominated by these extreme outliers. To avoid this, we plot a # the axis is dominated by these extreme outliers. To avoid this, we plot a
# box plot of standardized data (using the zscore function). # box plot of standardized data (using the zscore function).
figure(figsize=(12, 6)) plt.figure(figsize=(12, 6))
title("Wine: Boxplot (standarized)") plt.title("Wine: Boxplot (standarized)")
boxplot(zscore(X, ddof=1), attributeNames) plt.boxplot(zscore(X, ddof=1), attributeNames)
xticks(range(1, M + 1), attributeNames, rotation=45) plt.xticks(range(1, M + 1), attributeNames, rotation=45)
# This plot reveals that there are clearly some outliers in the Volatile # This plot reveals that there are clearly some outliers in the Volatile
# acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8, # acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8,
# and 11. # and 11.
plt.show()
# Next, we plot histograms of all attributes. # Next, we plot histograms of all attributes.
figure(figsize=(14, 9)) plt.figure(figsize=(14, 9))
u = np.floor(np.sqrt(M)) u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u) v = np.ceil(float(M) / u)
for i in range(M): for i in range(M):
subplot(int(u), int(v), i + 1) plt.subplot(int(u), int(v), i + 1)
hist(X[:, i]) plt.hist(X[:, i])
xlabel(attributeNames[i]) plt.xlabel(attributeNames[i])
ylim(0, N) # Make the y-axes equal for improved readability plt.ylim(0, N) # Make the y-axes equal for improved readability
if i % v != 0: if i % v != 0:
yticks([]) plt.yticks([])
if i == 0: if i == 0:
title("Wine: Histogram") plt.title("Wine: Histogram")
plt.show()
# This confirms our belief about outliers in attributes 2, 8, and 11. # This confirms our belief about outliers in attributes 2, 8, and 11.
# To take a closer look at this, we next plot histograms of the # To take a closer look at this, we next plot histograms of the
# attributes we suspect contains outliers # attributes we suspect contains outliers
figure(figsize=(14, 9)) plt.figure(figsize=(14, 9))
m = [1, 7, 10] m = [1, 7, 10]
for i in range(len(m)): for i in range(len(m)):
subplot(1, len(m), i + 1) plt.subplot(1, len(m), i + 1)
hist(X[:, m[i]], 50) plt.hist(X[:, m[i]], 50)
xlabel(attributeNames[m[i]]) plt.xlabel(attributeNames[m[i]])
ylim(0, N) # Make the y-axes equal for improved readability plt.ylim(0, N) # Make the y-axes equal for improved readability
if i > 0: if i > 0:
yticks([]) plt.yticks([])
if i == 0: if i == 0:
title("Wine: Histogram (selected attributes)") plt.title("Wine: Histogram (selected attributes)")
plt.show()
# The histograms show that there are a few very extreme values in these # The histograms show that there are a few very extreme values in these
# three attributes. To identify these values as outliers, we must use our # three attributes. To identify these values as outliers, we must use our
...@@ -95,25 +91,24 @@ X = X[valid_mask, :] ...@@ -95,25 +91,24 @@ X = X[valid_mask, :]
y = y[valid_mask] y = y[valid_mask]
N = len(y) N = len(y)
# Now, we can repeat the process to see if there are any more outliers # Now, we can repeat the process to see if there are any more outliers
# present in the data. We take a look at a histogram of all attributes: # present in the data. We take a look at a histogram of all attributes:
figure(figsize=(14, 9)) plt.figure(figsize=(14, 9))
u = np.floor(np.sqrt(M)) u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u) v = np.ceil(float(M) / u)
for i in range(M): for i in range(M):
subplot(int(u), int(v), i + 1) plt.subplot(int(u), int(v), i + 1)
hist(X[:, i]) plt.hist(X[:, i])
xlabel(attributeNames[i]) plt.xlabel(attributeNames[i])
ylim(0, N) # Make the y-axes equal for improved readability plt.ylim(0, N) # Make the y-axes equal for improved readability
if i % v != 0: if i % v != 0:
yticks([]) plt.yticks([])
if i == 0: if i == 0:
title("Wine: Histogram (after outlier detection)") plt.title("Wine: Histogram (after outlier detection)")
# This reveals no further outliers, and we conclude that all outliers have # This reveals no further outliers, and we conclude that all outliers have
# been detected and removed. # been detected and removed.
show() plt.show()
print("Ran Exercise 2.4.1") print("Ran Exercise 2.4.1")
...@@ -2,19 +2,10 @@ ...@@ -2,19 +2,10 @@
import importlib_resources import importlib_resources
import numpy as np import numpy as np
from matplotlib.pyplot import ( import matplotlib.pyplot as plt
figure,
legend,
plot,
show,
subplot,
xlabel,
xticks,
ylabel,
yticks,
)
from scipy.io import loadmat from scipy.io import loadmat
from scipy.stats import zscore from scipy.stats import zscore
from dtuimldmtools import similarity
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine.mat") filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine.mat")
...@@ -49,24 +40,24 @@ Xnorm = zscore(X, ddof=1) ...@@ -49,24 +40,24 @@ Xnorm = zscore(X, ddof=1)
Attributes = [1, 4, 5, 6] Attributes = [1, 4, 5, 6]
NumAtr = len(Attributes) NumAtr = len(Attributes)
figure(figsize=(12, 12)) plt.figure(figsize=(12, 12))
for m1 in range(NumAtr): for m1 in range(NumAtr):
for m2 in range(NumAtr): for m2 in range(NumAtr):
subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1) plt.subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1)
for c in range(C): for c in range(C):
class_mask = y == c class_mask = y == c
plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".") plt.plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".")
if m1 == NumAtr - 1: if m1 == NumAtr - 1:
xlabel(attributeNames[Attributes[m2]]) plt.xlabel(attributeNames[Attributes[m2]])
else: else:
xticks([]) plt.xticks([])
if m2 == 0: if m2 == 0:
ylabel(attributeNames[Attributes[m1]]) plt.ylabel(attributeNames[Attributes[m1]])
else: else:
yticks([]) plt.yticks([])
# ylim(0,X.max()*1.1) # ylim(0,X.max()*1.1)
# xlim(0,X.max()*1.1) # xlim(0,X.max()*1.1)
legend(classNames) plt.legend(classNames)
show() plt.show()
print("Ran Exercise 2.4.2") print("Ran Exercise 2.4.2")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment