diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_1_1.py b/exercises/02450Toolbox_Python/Scripts/ex2_1_1.py index 2a35baa91427f3c98540d03a7b7fe8026800ed36..dea5b3f9d071dbfe5fc62042a0708269e31523c1 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_1_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_1_1.py @@ -5,7 +5,7 @@ x = np.array([-0.68, -2.11, 2.39, 0.26, 1.46, 1.33, 1.03, -0.41, -0.33, 0.47]) # Compute values mean_x = x.mean() -std_x = x.std(ddof=1) +std_x = x.std(ddof=1) # ddof: Delta Degrees of freedom median_x = np.median(x) range_x = x.max() - x.min() @@ -16,4 +16,4 @@ print("Standard Deviation:", std_x) print("Median:", median_x) print("Range:", range_x) -print("Ran Exercise 2.1.1") +print("Ran Exercise 2.1.1") \ No newline at end of file diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_2_1.py b/exercises/02450Toolbox_Python/Scripts/ex2_2_1.py index f95d6278ae22613d56fa7c9ff8bc350f6ab369cd..1def1efde521b1ba27c3465cc9192c65c9a9d7d1 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_2_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_2_1.py @@ -12,7 +12,7 @@ filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat" i = 1 # Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation' -similarity_measure = "SMC" +similarity_measure = "Jaccard" # Load the digits # Load Matlab data file to python dict structure @@ -32,7 +32,6 @@ sim = sim.tolist()[0] # Tuples of sorted similarities and their indices sim_to_index = sorted(zip(sim, noti)) - # Visualize query image and 5 most/least similar images plt.figure(figsize=(12, 8)) plt.subplot(3, 1, 1) @@ -82,4 +81,4 @@ for ms in range(5): plt.show() -print("Ran Exercise 2.2.1") +print("Ran Exercise 2.2.1") \ No newline at end of file diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_2_2.py b/exercises/02450Toolbox_Python/Scripts/ex2_2_2.py index 3f249cab4bdeb1658576664c37e14eeaca4320bc..7d0e19687667dcfc340c2aec220966040466f141 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_2_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_2_2.py @@ -1,5 +1,4 @@ # exercise 2.2.2 - import numpy as np from dtuimldmtools import similarity @@ -39,4 +38,4 @@ print( % (similarity(x, y, "cor") - similarity(b + x, y, "cor"))[0, 0] ) -print("Ran Exercise 2.2.2") +print("Ran Exercise 2.2.2") \ No newline at end of file diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_1.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_1.py index 7a3afe602a2df9cd90c857e3e88f805360daeca4..ea2e8d92ddd892b2a2bd5e98f605c9f3b5a5d742 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_1.py @@ -1,5 +1,4 @@ # exercise 2.3.1 - import importlib_resources import numpy as np import xlrd @@ -30,4 +29,4 @@ N = len(y) M = len(attributeNames) C = len(classNames) -print("Ran Exercise 2.3.1") +print("Ran Exercise 2.3.1 - loading the Iris data") \ No newline at end of file diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_2.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_2.py index 0136f799fbb17c0ae785db023c8372b618ba674a..b2b786cf46a93cd911e7d6f237cdd0a30f180ffb 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_2.py @@ -2,17 +2,17 @@ import numpy as np # (requires data from exercise 2.3.1 so will run that script first) from ex2_3_1 import * -from matplotlib.pyplot import figure, hist, show, subplot, xlabel, ylim +import matplotlib.pyplot as plt -figure(figsize=(8, 7)) +plt.figure(figsize=(8, 7)) u = np.floor(np.sqrt(M)) v = np.ceil(float(M) / u) for i in range(M): - subplot(int(u), int(v), i + 1) - hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4)) - xlabel(attributeNames[i]) - ylim(0, N / 2) + plt.subplot(int(u), int(v), i + 1) + plt.hist(X[:, i], color=(0.2, 0.8 - i * 0.2, 0.4)) + plt.xlabel(attributeNames[i]) + plt.ylim(0, N / 2) -show() +plt.show() print("Ran Exercise 2.3.2") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_3.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_3.py index 437b1bf4ce6d7348db5a8485cb440c2dbc245046..ae2f56bdf0bff50eec77a313045784962c2c8fe3 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_3.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_3.py @@ -1,12 +1,13 @@ # Exercise 2.3.3 # (requires data from exercise 2.3.1) from ex2_3_1 import * -from matplotlib.pyplot import boxplot, show, title, xticks, ylabel +import matplotlib.pyplot as plt -boxplot(X) -xticks(range(1, 5), attributeNames) -ylabel("cm") -title("Fisher's Iris data set - boxplot") -show() +plt.figure() +plt.boxplot(X) +plt.xticks(range(1, 5), attributeNames) +plt.ylabel("cm") +plt.title("Fisher's Iris data set - boxplot") +plt.show() print("Ran Exercise 2.3.3") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_4.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_4.py index 5a33d4d310d32f1a8774985f1e72b70cafd469a0..ce1e39cc4562be51e35fe2a9ed44cfbab8bdd511 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_4.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_4.py @@ -1,24 +1,24 @@ # Exercise 2.3.4 # requires data from exercise 4.1.1 from ex2_3_1 import * -from matplotlib.pyplot import boxplot, figure, show, subplot, title, xticks, ylim +import matplotlib.pyplot as plt -figure(figsize=(14, 7)) +plt.figure(figsize=(14, 7)) for c in range(C): - subplot(1, C, c + 1) + plt.subplot(1, C, c + 1) class_mask = y == c # binary mask to extract elements of class c # or: class_mask = nonzero(y==c)[0].tolist()[0] # indices of class c - boxplot(X[class_mask, :]) + plt.boxplot(X[class_mask, :]) # title('Class: {0}'.format(classNames[c])) - title("Class: " + classNames[c]) - xticks( + plt.title("Class: " + classNames[c]) + plt.xticks( range(1, len(attributeNames) + 1), [a[:7] for a in attributeNames], rotation=45 ) y_up = X.max() + (X.max() - X.min()) * 0.1 y_down = X.min() - (X.max() - X.min()) * 0.1 - ylim(y_down, y_up) + plt.ylim(y_down, y_up) -show() +plt.show() print("Ran Exercise 2.3.4") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_5.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_5.py index e2b9836da15cc13d02a1fce4b3998c319f309905..93b51ddbeacabf244e8fd127f882128d59041365 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_5.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_5.py @@ -1,36 +1,26 @@ # Exercise 2.3.5 # (requires data from exercise 2.3.1) from ex2_3_1 import * -from matplotlib.pyplot import ( - figure, - legend, - plot, - show, - subplot, - xlabel, - xticks, - ylabel, - yticks, -) +import matplotlib.pyplot as plt -figure(figsize=(12, 10)) +plt.figure(figsize=(12, 10)) for m1 in range(M): for m2 in range(M): - subplot(M, M, m1 * M + m2 + 1) + plt.subplot(M, M, m1 * M + m2 + 1) for c in range(C): class_mask = y == c - plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".") + plt.plot(np.array(X[class_mask, m2]), np.array(X[class_mask, m1]), ".") if m1 == M - 1: - xlabel(attributeNames[m2]) + plt.xlabel(attributeNames[m2]) else: - xticks([]) + plt.xticks([]) if m2 == 0: - ylabel(attributeNames[m1]) + plt.ylabel(attributeNames[m1]) else: - yticks([]) + plt.yticks([]) -legend(classNames) +plt.legend(classNames) -show() +plt.show() print("Ran Exercise 2.3.5") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_6.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_6.py index 5684aa7d2b531df4da4ee32afc331764fc08993d..b8cec6e9303f999423bcec64332035a84066b3ee 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_6.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_6.py @@ -1,15 +1,14 @@ # Exercise 2.3.6 - -# requires data from exercise 2.3.1 +# (requires data from exercise 2.3.1) from ex2_3_1 import * -from matplotlib.pyplot import figure, show +import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # Indices of the variables to plot ind = [0, 1, 2] colors = ["blue", "green", "red"] -f = figure() +f = plt.figure() ax = f.add_subplot(111, projection="3d") # Here the mpl_toolkits is used for c in range(C): class_mask = y == c @@ -22,6 +21,6 @@ ax.set_xlabel(attributeNames[ind[0]]) ax.set_ylabel(attributeNames[ind[1]]) ax.set_zlabel(attributeNames[ind[2]]) -show() +plt.show() print("Ran Exercise 2.3.6") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_3_7.py b/exercises/02450Toolbox_Python/Scripts/ex2_3_7.py index 587367dc2b5e6c9ea0b49ded847561fb485fea49..891252345c9919dc0d3cbbbd5f83210f1ff9aa39 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_3_7.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_3_7.py @@ -1,30 +1,19 @@ # Exercise 2.3.7 - -# requires data from exercise 2.3.7 +# (requires data from exercise 2.3.1) from ex2_3_1 import * -from matplotlib.pyplot import ( - cm, - colorbar, - figure, - imshow, - show, - title, - xlabel, - xticks, - ylabel, -) +import matplotlib.pyplot as plt from scipy.stats import zscore X_standarized = zscore(X, ddof=1) -figure(figsize=(12, 6)) -imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=cm.gray) -xticks(range(4), attributeNames) -xlabel("Attributes") -ylabel("Data objects") -title("Fisher's Iris data matrix") -colorbar() +plt.figure(figsize=(12, 6)) +plt.imshow(X_standarized, interpolation="none", aspect=(4.0 / N), cmap=plt.cm.gray) +plt.xticks(range(4), attributeNames) +plt.xlabel("Attributes") +plt.ylabel("Data objects") +plt.title("Fisher's Iris data matrix") +plt.colorbar() -show() +plt.show() -print("Ran Exercise 2.3.7") +print("Ran Exercise 2.3.7") \ No newline at end of file diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_4_1.py b/exercises/02450Toolbox_Python/Scripts/ex2_4_1.py index 3bbc0bd0e807b369a257bcc37364ade22759066b..3b88f6b2ea4b724f696b52a31f37b729ed923b2d 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_4_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_4_1.py @@ -1,19 +1,10 @@ # exercise 2.4.1 - +""" +Note: This is a long script. You may want to use breakpoint +""" import importlib_resources import numpy as np -from matplotlib.pyplot import ( - boxplot, - figure, - hist, - show, - subplot, - title, - xlabel, - xticks, - ylim, - yticks, -) +import matplotlib.pyplot as plt from scipy.io import loadmat from scipy.stats import zscore @@ -29,56 +20,61 @@ N = mat_data["N"][0, 0] attributeNames = [name[0][0] for name in mat_data["attributeNames"]] classNames = [cls[0][0] for cls in mat_data["classNames"]] +print("Data loaded") + # We start with a box plot of each attribute -figure() -title("Wine: Boxplot") -boxplot(X) -xticks(range(1, M + 1), attributeNames, rotation=45) +plt.figure() +plt.title("Wine: Boxplot") +plt.boxplot(X) +plt.xticks(range(1, M + 1), attributeNames, rotation=45) # From this it is clear that there are some outliers in the Alcohol # attribute (10x10^14 is clearly not a proper value for alcohol content) # However, it is impossible to see the distribution of the data, because # the axis is dominated by these extreme outliers. To avoid this, we plot a # box plot of standardized data (using the zscore function). -figure(figsize=(12, 6)) -title("Wine: Boxplot (standarized)") -boxplot(zscore(X, ddof=1), attributeNames) -xticks(range(1, M + 1), attributeNames, rotation=45) +plt.figure(figsize=(12, 6)) +plt.title("Wine: Boxplot (standarized)") +plt.boxplot(zscore(X, ddof=1), attributeNames) +plt.xticks(range(1, M + 1), attributeNames, rotation=45) # This plot reveals that there are clearly some outliers in the Volatile # acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8, # and 11. +plt.show() # Next, we plot histograms of all attributes. -figure(figsize=(14, 9)) +plt.figure(figsize=(14, 9)) u = np.floor(np.sqrt(M)) v = np.ceil(float(M) / u) for i in range(M): - subplot(int(u), int(v), i + 1) - hist(X[:, i]) - xlabel(attributeNames[i]) - ylim(0, N) # Make the y-axes equal for improved readability + plt.subplot(int(u), int(v), i + 1) + plt.hist(X[:, i]) + plt.xlabel(attributeNames[i]) + plt.ylim(0, N) # Make the y-axes equal for improved readability if i % v != 0: - yticks([]) + plt.yticks([]) if i == 0: - title("Wine: Histogram") + plt.title("Wine: Histogram") +plt.show() # This confirms our belief about outliers in attributes 2, 8, and 11. # To take a closer look at this, we next plot histograms of the # attributes we suspect contains outliers -figure(figsize=(14, 9)) +plt.figure(figsize=(14, 9)) m = [1, 7, 10] for i in range(len(m)): - subplot(1, len(m), i + 1) - hist(X[:, m[i]], 50) - xlabel(attributeNames[m[i]]) - ylim(0, N) # Make the y-axes equal for improved readability + plt.subplot(1, len(m), i + 1) + plt.hist(X[:, m[i]], 50) + plt.xlabel(attributeNames[m[i]]) + plt.ylim(0, N) # Make the y-axes equal for improved readability if i > 0: - yticks([]) + plt.yticks([]) if i == 0: - title("Wine: Histogram (selected attributes)") + plt.title("Wine: Histogram (selected attributes)") +plt.show() # The histograms show that there are a few very extreme values in these # three attributes. To identify these values as outliers, we must use our @@ -95,25 +91,24 @@ X = X[valid_mask, :] y = y[valid_mask] N = len(y) - # Now, we can repeat the process to see if there are any more outliers # present in the data. We take a look at a histogram of all attributes: -figure(figsize=(14, 9)) +plt.figure(figsize=(14, 9)) u = np.floor(np.sqrt(M)) v = np.ceil(float(M) / u) for i in range(M): - subplot(int(u), int(v), i + 1) - hist(X[:, i]) - xlabel(attributeNames[i]) - ylim(0, N) # Make the y-axes equal for improved readability + plt.subplot(int(u), int(v), i + 1) + plt.hist(X[:, i]) + plt.xlabel(attributeNames[i]) + plt.ylim(0, N) # Make the y-axes equal for improved readability if i % v != 0: - yticks([]) + plt.yticks([]) if i == 0: - title("Wine: Histogram (after outlier detection)") + plt.title("Wine: Histogram (after outlier detection)") # This reveals no further outliers, and we conclude that all outliers have # been detected and removed. -show() +plt.show() print("Ran Exercise 2.4.1") diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_4_2.py b/exercises/02450Toolbox_Python/Scripts/ex2_4_2.py index 3c362f0b8b82eecfe3951f2c6c99722d8e10223b..9a5e241262b0b0cdb768cedea88e66f5902cf0d3 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_4_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_4_2.py @@ -2,19 +2,10 @@ import importlib_resources import numpy as np -from matplotlib.pyplot import ( - figure, - legend, - plot, - show, - subplot, - xlabel, - xticks, - ylabel, - yticks, -) +import matplotlib.pyplot as plt from scipy.io import loadmat from scipy.stats import zscore +from dtuimldmtools import similarity filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine.mat") @@ -49,24 +40,24 @@ Xnorm = zscore(X, ddof=1) Attributes = [1, 4, 5, 6] NumAtr = len(Attributes) -figure(figsize=(12, 12)) +plt.figure(figsize=(12, 12)) for m1 in range(NumAtr): for m2 in range(NumAtr): - subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1) + plt.subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1) for c in range(C): class_mask = y == c - plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".") + plt.plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".") if m1 == NumAtr - 1: - xlabel(attributeNames[Attributes[m2]]) + plt.xlabel(attributeNames[Attributes[m2]]) else: - xticks([]) + plt.xticks([]) if m2 == 0: - ylabel(attributeNames[Attributes[m1]]) + plt.ylabel(attributeNames[Attributes[m1]]) else: - yticks([]) + plt.yticks([]) # ylim(0,X.max()*1.1) # xlim(0,X.max()*1.1) -legend(classNames) -show() +plt.legend(classNames) +plt.show() -print("Ran Exercise 2.4.2") +print("Ran Exercise 2.4.2") \ No newline at end of file