Skip to content
Snippets Groups Projects
Commit 13c26834 authored by bjje's avatar bjje
Browse files

Consistent use of plt in ex2

parent 245c9d47
Branches
No related tags found
No related merge requests found
Showing
with 104 additions and 142 deletions
......@@ -5,7 +5,7 @@ x = np.array([-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47])
# Compute values
mean_x = x.mean()
std_x = x.std(ddof=1)
std_x = x.std(ddof=1) # ddof: Delta Degrees of freedom
median_x = np.median(x)
range_x = x.max() - x.min()
......
......@@ -12,7 +12,7 @@ filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat"
i = 1
# Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation'
similarity_measure = "SMC"
similarity_measure = "Jaccard"
# Load the digits
# Load Matlab data file to python dict structure
......@@ -32,7 +32,6 @@ sim = sim.tolist()[0]
# Tuples of sorted similarities and their indices
sim_to_index = sorted(zip(sim, noti))
# Visualize query image and 5 most/least similar images
plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1)
......
# exercise 2.2.2
import numpy as np
from dtuimldmtools import similarity
......
# exercise 2.3.1
import importlib_resources
import numpy as np
import xlrd
......@@ -30,4 +29,4 @@ N = len(y)
M = len(attributeNames)
C = len(classNames)
print("Ran Exercise 2.3.1")
print("Ran Exercise 2.3.1 - loading the Iris data")
\ No newline at end of file
......@@ -2,17 +2,17 @@
import numpy as np
# (requires data from exercise 2.3.1 so will run that script first)
from ex2_3_1 import *
from matplotlib.pyplot import figure, hist, show, subplot, xlabel, ylim
import matplotlib.pyplot as plt
figure(figsize=(8, 7))
plt.figure(figsize=(8, 7))
u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u)
for i in range(M):
subplot(int(u), int(v), i + 1)
hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4))
xlabel(attributeNames[i])
ylim(0, N / 2)
plt.subplot(int(u), int(v), i + 1)
plt.hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4))
plt.xlabel(attributeNames[i])
plt.ylim(0, N / 2)
show()
plt.show()
print("Ran Exercise 2.3.2")
# Exercise 2.3.3
# (requires data from exercise 2.3.1)
from ex2_3_1 import *
from matplotlib.pyplot import boxplot, show, title, xticks, ylabel
import matplotlib.pyplot as plt
boxplot(X)
xticks(range(1, 5), attributeNames)
ylabel("cm")
title("Fisher's Iris data set - boxplot")
show()
plt.figure()
plt.boxplot(X)
plt.xticks(range(1, 5), attributeNames)
plt.ylabel("cm")
plt.title("Fisher's Iris data set - boxplot")
plt.show()
print("Ran Exercise 2.3.3")
# Exercise 2.3.4
# requires data from exercise 4.1.1
from ex2_3_1 import *
from matplotlib.pyplot import boxplot, figure, show, subplot, title, xticks, ylim
import matplotlib.pyplot as plt
figure(figsize=(14, 7))
plt.figure(figsize=(14, 7))
for c in range(C):
subplot(1, C, c + 1)
plt.subplot(1, C, c + 1)
class_mask = y == c # binary mask to extract elements of class c
# or: class_mask = nonzero(y==c)[0].tolist()[0] # indices of class c
boxplot(X[class_mask, :])
plt.boxplot(X[class_mask, :])
# title('Class: {0}'.format(classNames[c]))
title("Class: " + classNames[c])
xticks(
plt.title("Class: " + classNames[c])
plt.xticks(
range(1, len(attributeNames) + 1), [a[:7] for a in attributeNames], rotation=45
)
y_up = X.max() + (X.max() - X.min()) * 0.1
y_down = X.min() - (X.max() - X.min()) * 0.1
ylim(y_down, y_up)
plt.ylim(y_down, y_up)
show()
plt.show()
print("Ran Exercise 2.3.4")
# Exercise 2.3.5
# (requires data from exercise 2.3.1)
from ex2_3_1 import *
from matplotlib.pyplot import (
figure,
legend,
plot,
show,
subplot,
xlabel,
xticks,
ylabel,
yticks,
)
import matplotlib.pyplot as plt
figure(figsize=(12, 10))
plt.figure(figsize=(12, 10))
for m1 in range(M):
for m2 in range(M):
subplot(M, M, m1 * M + m2 + 1)
plt.subplot(M, M, m1 * M + m2 + 1)
for c in range(C):
class_mask = y == c
plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".")
plt.plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".")
if m1 == M - 1:
xlabel(attributeNames[m2])
plt.xlabel(attributeNames[m2])
else:
xticks([])
plt.xticks([])
if m2 == 0:
ylabel(attributeNames[m1])
plt.ylabel(attributeNames[m1])
else:
yticks([])
plt.yticks([])
legend(classNames)
plt.legend(classNames)
show()
plt.show()
print("Ran Exercise 2.3.5")
# Exercise 2.3.6
# requires data from exercise 2.3.1
# (requires data from exercise 2.3.1)
from ex2_3_1 import *
from matplotlib.pyplot import figure, show
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Indices of the variables to plot
ind = [0, 1, 2]
colors = ["blue", "green", "red"]
f = figure()
f = plt.figure()
ax = f.add_subplot(111, projection="3d") # Here the mpl_toolkits is used
for c in range(C):
class_mask = y == c
......@@ -22,6 +21,6 @@ ax.set_xlabel(attributeNames[ind[0]])
ax.set_ylabel(attributeNames[ind[1]])
ax.set_zlabel(attributeNames[ind[2]])
show()
plt.show()
print("Ran Exercise 2.3.6")
# Exercise 2.3.7
# requires data from exercise 2.3.7
# (requires data from exercise 2.3.1)
from ex2_3_1 import *
from matplotlib.pyplot import (
cm,
colorbar,
figure,
imshow,
show,
title,
xlabel,
xticks,
ylabel,
)
import matplotlib.pyplot as plt
from scipy.stats import zscore
X_standarized = zscore(X, ddof=1)
figure(figsize=(12, 6))
imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=cm.gray)
xticks(range(4), attributeNames)
xlabel("Attributes")
ylabel("Data objects")
title("Fisher's Iris data matrix")
colorbar()
plt.figure(figsize=(12, 6))
plt.imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=plt.cm.gray)
plt.xticks(range(4), attributeNames)
plt.xlabel("Attributes")
plt.ylabel("Data objects")
plt.title("Fisher's Iris data matrix")
plt.colorbar()
show()
plt.show()
print("Ran Exercise 2.3.7")
\ No newline at end of file
# exercise 2.4.1
"""
Note: This is a long script. You may want to use breakpoint
"""
import importlib_resources
import numpy as np
from matplotlib.pyplot import (
boxplot,
figure,
hist,
show,
subplot,
title,
xlabel,
xticks,
ylim,
yticks,
)
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy.stats import zscore
......@@ -29,56 +20,61 @@ N = mat_data["N"][0, 0]
attributeNames = [name[0][0] for name in mat_data["attributeNames"]]
classNames = [cls[0][0] for cls in mat_data["classNames"]]
print("Data loaded")
# We start with a box plot of each attribute
figure()
title("Wine: Boxplot")
boxplot(X)
xticks(range(1, M + 1), attributeNames, rotation=45)
plt.figure()
plt.title("Wine: Boxplot")
plt.boxplot(X)
plt.xticks(range(1, M + 1), attributeNames, rotation=45)
# From this it is clear that there are some outliers in the Alcohol
# attribute (10x10^14 is clearly not a proper value for alcohol content)
# However, it is impossible to see the distribution of the data, because
# the axis is dominated by these extreme outliers. To avoid this, we plot a
# box plot of standardized data (using the zscore function).
figure(figsize=(12, 6))
title("Wine: Boxplot (standarized)")
boxplot(zscore(X, ddof=1), attributeNames)
xticks(range(1, M + 1), attributeNames, rotation=45)
plt.figure(figsize=(12, 6))
plt.title("Wine: Boxplot (standarized)")
plt.boxplot(zscore(X, ddof=1), attributeNames)
plt.xticks(range(1, M + 1), attributeNames, rotation=45)
# This plot reveals that there are clearly some outliers in the Volatile
# acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8,
# and 11.
plt.show()
# Next, we plot histograms of all attributes.
figure(figsize=(14, 9))
plt.figure(figsize=(14, 9))
u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u)
for i in range(M):
subplot(int(u), int(v), i + 1)
hist(X[:, i])
xlabel(attributeNames[i])
ylim(0, N) # Make the y-axes equal for improved readability
plt.subplot(int(u), int(v), i + 1)
plt.hist(X[:, i])
plt.xlabel(attributeNames[i])
plt.ylim(0, N) # Make the y-axes equal for improved readability
if i % v != 0:
yticks([])
plt.yticks([])
if i == 0:
title("Wine: Histogram")
plt.title("Wine: Histogram")
plt.show()
# This confirms our belief about outliers in attributes 2, 8, and 11.
# To take a closer look at this, we next plot histograms of the
# attributes we suspect contains outliers
figure(figsize=(14, 9))
plt.figure(figsize=(14, 9))
m = [1, 7, 10]
for i in range(len(m)):
subplot(1, len(m), i + 1)
hist(X[:, m[i]], 50)
xlabel(attributeNames[m[i]])
ylim(0, N) # Make the y-axes equal for improved readability
plt.subplot(1, len(m), i + 1)
plt.hist(X[:, m[i]], 50)
plt.xlabel(attributeNames[m[i]])
plt.ylim(0, N) # Make the y-axes equal for improved readability
if i > 0:
yticks([])
plt.yticks([])
if i == 0:
title("Wine: Histogram (selected attributes)")
plt.title("Wine: Histogram (selected attributes)")
plt.show()
# The histograms show that there are a few very extreme values in these
# three attributes. To identify these values as outliers, we must use our
......@@ -95,25 +91,24 @@ X = X[valid_mask, :]
y = y[valid_mask]
N = len(y)
# Now, we can repeat the process to see if there are any more outliers
# present in the data. We take a look at a histogram of all attributes:
figure(figsize=(14, 9))
plt.figure(figsize=(14, 9))
u = np.floor(np.sqrt(M))
v = np.ceil(float(M) / u)
for i in range(M):
subplot(int(u), int(v), i + 1)
hist(X[:, i])
xlabel(attributeNames[i])
ylim(0, N) # Make the y-axes equal for improved readability
plt.subplot(int(u), int(v), i + 1)
plt.hist(X[:, i])
plt.xlabel(attributeNames[i])
plt.ylim(0, N) # Make the y-axes equal for improved readability
if i % v != 0:
yticks([])
plt.yticks([])
if i == 0:
title("Wine: Histogram (after outlier detection)")
plt.title("Wine: Histogram (after outlier detection)")
# This reveals no further outliers, and we conclude that all outliers have
# been detected and removed.
show()
plt.show()
print("Ran Exercise 2.4.1")
......@@ -2,19 +2,10 @@
import importlib_resources
import numpy as np
from matplotlib.pyplot import (
figure,
legend,
plot,
show,
subplot,
xlabel,
xticks,
ylabel,
yticks,
)
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy.stats import zscore
from dtuimldmtools import similarity
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine.mat")
......@@ -49,24 +40,24 @@ Xnorm = zscore(X, ddof=1)
Attributes = [1, 4, 5, 6]
NumAtr = len(Attributes)
figure(figsize=(12, 12))
plt.figure(figsize=(12, 12))
for m1 in range(NumAtr):
for m2 in range(NumAtr):
subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1)
plt.subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1)
for c in range(C):
class_mask = y == c
plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".")
plt.plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".")
if m1 == NumAtr - 1:
xlabel(attributeNames[Attributes[m2]])
plt.xlabel(attributeNames[Attributes[m2]])
else:
xticks([])
plt.xticks([])
if m2 == 0:
ylabel(attributeNames[Attributes[m1]])
plt.ylabel(attributeNames[Attributes[m1]])
else:
yticks([])
plt.yticks([])
# ylim(0,X.max()*1.1)
# xlim(0,X.max()*1.1)
legend(classNames)
show()
plt.legend(classNames)
plt.show()
print("Ran Exercise 2.4.2")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment