From 822fecb6ba9f9b8eea611d00fd5a5712a3269098 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Sand=20Jensen?= <bjje@dtu.dk>
Date: Wed, 24 Jan 2024 16:47:54 +0100
Subject: [PATCH] Minor updates for public repo

---
 .../02450Toolbox_Matlab/Scripts/default.txt   | 101 ---------
 .../Scripts/check_installation.py             |   2 +-
 .../02450Toolbox_Python/Scripts/ex10_1_3.py   |  38 ++--
 .../02450Toolbox_Python/Scripts/ex10_1_5.py   |  64 +++---
 .../02450Toolbox_Python/Scripts/ex3_3_1.py    |  71 +++----
 .../02450Toolbox_Python/Scripts/ex6_2_1.py    | 196 +++++++-----------
 .../02450Toolbox_Python/Scripts/ex6_3_1.py    |   4 +-
 .../02450Toolbox_Python/Scripts/ex7_3_1.py    |  29 ++-
 .../02450Toolbox_Python/Scripts/ex9_1_1.py    |  43 ++--
 .../02450Toolbox_Python/Scripts/ex9_1_2.py    | 110 +++++-----
 .../02450Toolbox_Python/Scripts/ex9_1_3.py    |  27 +--
 .../02450Toolbox_Python/Scripts/ex9_2_1.py    |  39 ++--
 .../02450Toolbox_Python/Scripts/ex9_2_2.py    |  42 ++--
 .../02450Toolbox_R_Development.Rproj          |  13 ++
 14 files changed, 304 insertions(+), 475 deletions(-)
 delete mode 100644 exercises/02450Toolbox_Matlab/Scripts/default.txt
 create mode 100644 exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj

diff --git a/exercises/02450Toolbox_Matlab/Scripts/default.txt b/exercises/02450Toolbox_Matlab/Scripts/default.txt
deleted file mode 100644
index 065eaa1..0000000
--- a/exercises/02450Toolbox_Matlab/Scripts/default.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-Rule   (Support, Confidence) 
-11 -> 13  (39.3242%, 74.9395%)
-13 -> 11  (39.3242%, 78.1033%)
-15 -> 22  (38.1187%, 75.2348%)
-22 -> 15  (38.1187%, 76.4556%)
-16 -> 21  (37.5952%, 76.2058%)
-21 -> 16  (37.5952%, 74.9763%)
-9 -> 15  (37.2621%, 72.366%)
-15 -> 9  (37.2621%, 73.5441%)
-12 -> 14  (36.5006%, 76.8024%)
-14 -> 12  (36.5006%, 73.5144%)
-7 -> 13  (35.2951%, 70.0346%)
-13 -> 7  (35.2951%, 70.1008%)
-10 -> 16  (35.1047%, 72.3676%)
-16 -> 10  (35.1047%, 71.1576%)
-7 -> 11  (34.7874%, 69.0274%)
-11 -> 7  (34.7874%, 66.2938%)
-8 -> 14  (34.5495%, 69.6514%)
-14 -> 8  (34.5495%, 69.5847%)
-1 -> 15  (34.1529%, 63.0454%)
-15 -> 1  (34.1529%, 67.4076%)
-1 -> 9  (33.915%, 62.6061%)
-9 -> 1  (33.915%, 65.8657%)
-9 -> 22  (33.677%, 65.4036%)
-22 -> 9  (33.677%, 67.5469%)
-3 -> 9  (33.566%, 66.0012%)
-9 -> 3  (33.566%, 65.1879%)
-4 -> 11  (32.9156%, 66.9787%)
-11 -> 4  (32.9156%, 62.7267%)
-5 -> 1  (32.5032%, 61.8286%)
-10 -> 21  (32.3287%, 66.6449%)
-21 -> 10  (32.3287%, 64.4733%)
-8 -> 12  (31.9162%, 64.3428%)
-12 -> 8  (31.9162%, 67.1562%)
-9 -> 19  (31.71%, 61.5835%)
-19 -> 9  (31.71%, 63.1197%)
-7 -> 15  (31.5038%, 62.5118%)
-15 -> 7  (31.5038%, 62.1791%)
-3 -> 12  (31.2976%, 61.5409%)
-12 -> 3  (31.2976%, 65.8545%)
-17 -> 19  (31.2341%, 60.7716%)
-19 -> 17  (31.2341%, 62.1724%)
-4 -> 10  (31.2183%, 63.5249%)
-10 -> 4  (31.2183%, 64.3558%)
-3 -> 19  (31.0596%, 61.073%)
-19 -> 3  (31.0596%, 61.8251%)
-8 -> 17  (31.0596%, 62.6159%)
-17 -> 8  (31.0596%, 60.4321%)
-7 -> 20  (30.8852%, 61.2842%)
-20 -> 7  (30.8852%, 62.0657%)
-3 -> 1  (30.8693%, 60.6987%)
-3 -> 15  (30.8376%, 60.6363%)
-15 -> 3  (30.8376%, 60.8641%)
-20 -> 11  (30.7582%, 61.8106%)
-8 -> 19  (30.7265%, 61.9444%)
-19 -> 8  (30.7265%, 61.162%)
-8 -> 1  (30.5838%, 61.6565%)
-19 -> 1  (30.5679%, 60.8462%)
-13 -> 22  (30.4727%, 60.523%)
-22 -> 13  (30.4727%, 61.1199%)
-14 -> 3  (30.4569%, 61.3419%)
-8 -> 16  (30.441%, 61.3687%)
-16 -> 8  (30.441%, 61.7042%)
-14 -> 1  (30.3775%, 61.1821%)
-14 -> 21  (30.2665%, 60.9585%)
-21 -> 14  (30.2665%, 60.3606%)
-10 -> 11  (30.2189%, 62.2956%)
-19 -> 15  (30.1713%, 60.0568%)
-14 -> 17  (30.1396%, 60.7029%)
-18 -> 7  (30.0603%, 61.8473%)
-4 -> 5  (30.0127%, 61.0717%)
-20 -> 13  (30.0127%, 60.3124%)
-10 -> 20  (29.981%, 61.8051%)
-20 -> 10  (29.981%, 60.2486%)
-18 -> 1  (29.9651%, 61.6514%)
-4 -> 20  (29.9651%, 60.9748%)
-20 -> 4  (29.9651%, 60.2168%)
-4 -> 13  (29.9492%, 60.9425%)
-14 -> 19  (29.9016%, 60.2236%)
-12 -> 1  (29.8541%, 62.8171%)
-18 -> 20  (29.6003%, 60.9008%)
-2 -> 16  (29.3147%, 63.9668%)
-18 -> 5  (29.2513%, 60.1828%)
-12 -> 9  (29.2354%, 61.5154%)
-12 -> 19  (28.5216%, 60.0134%)
-2 -> 10  (28.2519%, 61.6476%)
-2 -> 11  (28.1567%, 61.4399%)
-1,3 -> 9  (21.9543%, 71.1202%)
-1,9 -> 3  (21.9543%, 64.7334%)
-3,9 -> 1  (21.9543%, 65.4064%)
-1,3 -> 12  (20.3363%, 65.8787%)
-1,12 -> 3  (20.3363%, 68.119%)
-3,12 -> 1  (20.3363%, 64.9772%)
-1,5 -> 15  (20.2887%, 62.4207%)
-5,15 -> 1  (20.2887%, 76.2217%)
-1,7 -> 15  (20.2411%, 85.8104%)
-7,15 -> 1  (20.2411%, 64.2497%)
-1,3 -> 15  (20.0666%, 65.0051%)
-3,15 -> 1  (20.0666%, 65.072%)
-1,8 -> 12  (20.0666%, 65.612%)
-1,12 -> 8  (20.0666%, 67.2157%)
diff --git a/exercises/02450Toolbox_Python/Scripts/check_installation.py b/exercises/02450Toolbox_Python/Scripts/check_installation.py
index bb1b412..cc381d5 100644
--- a/exercises/02450Toolbox_Python/Scripts/check_installation.py
+++ b/exercises/02450Toolbox_Python/Scripts/check_installation.py
@@ -1,5 +1,5 @@
 """ 
-This is a helper function which can help you and the TAs debug your Python setup.
+This is a helper function which can help you debug the Python installation
 """
 import os
 import sklearn
diff --git a/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py b/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py
index c66a9b5..a427cac 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py
@@ -9,40 +9,38 @@ from dtuimldmtools import clusterval
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")
 
-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)
 
-
 # Maximum number of clusters:
 K = 10
 
 # Allocate variables:
-Rand = np.zeros((K - 1,))
-Jaccard = np.zeros((K - 1,))
-NMI = np.zeros((K - 1,))
+Rand = np.zeros((K-1,))
+Jaccard = np.zeros((K-1,))
+NMI = np.zeros((K-1,))
 
-for k in range(K - 1):
+for k in range(K-1):
     # run K-means clustering:
-    # cls = Pycluster.kcluster(X,k+1)[0]
-    centroids, cls, inertia = k_means(X, k + 2)
+    #cls = Pycluster.kcluster(X,k+1)[0]
+    centroids, cls, inertia = k_means(X,k+2)
     # compute cluster validities:
-    Rand[k], Jaccard[k], NMI[k] = clusterval(y, cls)
-
+    Rand[k], Jaccard[k], NMI[k] = clusterval(y,cls)    
+        
 # Plot results:
 
 figure(1)
-title("Cluster validity")
-plot(np.arange(K - 1) + 2, Rand)
-plot(np.arange(K - 1) + 2, Jaccard)
-plot(np.arange(K - 1) + 2, NMI)
-legend(["Rand", "Jaccard", "NMI"], loc=4)
+title('Cluster validity')
+plot(np.arange(K-1)+2, Rand)
+plot(np.arange(K-1)+2, Jaccard)
+plot(np.arange(K-1)+2, NMI)
+legend(['Rand', 'Jaccard', 'NMI'], loc=4)
 show()
 
-print("Ran Exercise 10.1.3")
+print('Ran Exercise 10.1.3')
diff --git a/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py b/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py
index 3b60ca2..08e6653 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py
@@ -7,17 +7,18 @@ from sklearn.cluster import k_means
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wildfaces.mat")
 
-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-# mat_data = loadmat('../Data/digits.mat') #<-- uncomment this for using the digits dataset
 
-X = mat_data["X"]
+#filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat") #<-- uncomment this for using the digits dataset 
+#mat_data = loadmat('../Data/digits.mat') #<-- uncomment this for using the digits dataset 
+
+X = mat_data['X']
 N, M = X.shape
 # Image resolution and number of colors
-x = 40  # <-- change this for using the digits dataset
-y = 40  # <-- change this for using the digits dataset
-c = 3  # <-- change this for using the digits dataset
+x = 40 #<-- change this for using the digits dataset
+y = 40 #<-- change this for using the digits dataset
+c = 3 #<-- change this for using the digits dataset
 
 
 # Number of clusters:
@@ -34,48 +35,43 @@ centroids, cls, inertia = k_means(X, K, verbose=True, max_iter=100, n_init=S)
 
 # Plot centroids
 plt.figure(1)
-n1 = int(np.ceil(np.sqrt(K / 2)))
-n2 = int(np.ceil(float(K) / n1))
+n1 = int(np.ceil(np.sqrt(K/2)))
+n2 = int(np.ceil(float(K)/n1))
 
-# For black and white, cmap=plt.cm.binary, else default
-cmap = plt.cm.binary if c == 1 else None
+#For black and white, cmap=plt.cm.binary, else default
+cmap = plt.cm.binary if c==1 else None 
 
 for k in range(K):
-    plt.subplot(n1, n2, k + 1)
+    plt.subplot(n1,n2,k+1)
     # Reshape centroids to fit resolution and colors
-    img = np.reshape(centroids[k, :], (c, x, y)).T
-    if c == 1:  # if color is single-color/gray scale
+    img = np.reshape(centroids[k,:],(c,x,y)).T
+    if c == 1: # if color is single-color/gray scale
         # Squeeze out singleton dimension
         # and flip the image (cancel out previos transpose)
         img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
-    if k == np.floor((n2 - 1) / 2):
-        plt.title("Centroids")
-
-# Plot few randomly selected faces and their nearest centroids
-L = 5  # number of images to plot
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])
+    if k==np.floor((n2-1)/2): plt.title('Centroids')
+
+# Plot few randomly selected faces and their nearest centroids    
+L = 5       # number of images to plot
 j = np.random.randint(0, N, L)
 plt.figure(2)
 for l in range(L):
-    plt.subplot(2, L, l + 1)
-    img = np.resize(X[j[l], :], (c, x, y)).T
+    plt.subplot(2,L,l+1)
+    img = np.resize(X[j[l],:],(c,x,y)).T
     if c == 1:
         img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
-    if l == np.floor((L - 1) / 2):
-        plt.title("Randomly selected faces and their centroids")
-    plt.subplot(2, L, L + l + 1)
-    img = np.resize(centroids[cls[j[l]], :], (c, x, y)).T
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])
+    if l==np.floor((L-1)/2): plt.title('Randomly selected faces and their centroids')
+    plt.subplot(2,L,L+l+1)
+    img = np.resize(centroids[cls[j[l]],:],(c,x,y)).T
     if c == 1:
         img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])
 
 plt.show()
 
-print("Ran Exercise 10.1.5")
+print('Ran Exercise 10.1.5')
diff --git a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
index f0fca96..f5cfc46 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
@@ -18,67 +18,60 @@ similarity_measure = "SMC"
 # Load Matlab data file to python dict structure
 X = loadmat(filename)["X"]
 # You can also try the CBCL faces dataset (remember to change 'transpose')
-# X = loadmat('../Data/wildfaces_grayscale.mat')['X']
+#X = loadmat('../Data/wildfaces_grayscale.mat')['X']
 N, M = X.shape
-transpose = False  # should the plotted images be transposed?
+transpose = False # should the plotted images be transposed? 
 
 
-# Search the face database for similar faces
+# Search the face database for similar faces 
 # Index of all other images than i
-noti = list(range(0, i)) + list(range(i + 1, N))
+noti = list(range(0,i)) + list(range(i+1,N)) 
 # Compute similarity between image i and all others
-sim = similarity(X[i, :], X[noti, :], similarity_measure)
+sim = similarity(X[i,:], X[noti,:], similarity_measure)
 sim = sim.tolist()[0]
 # Tuples of sorted similarities and their indices
-sim_to_index = sorted(zip(sim, noti))
+sim_to_index = sorted(zip(sim,noti))
 
 
 # Visualize query image and 5 most/least similar images
-plt.figure(figsize=(12, 8))
-plt.subplot(3, 1, 1)
+plt.figure(figsize=(12,8))
+plt.subplot(3,1,1)
 
 img_hw = int(np.sqrt(len(X[0])))
-img = np.reshape(X[i], (img_hw, img_hw))
-if transpose:
-    img = img.T
+img = np.reshape(X[i], (img_hw,img_hw))
+if transpose: img = img.T
 plt.imshow(img, cmap=plt.cm.gray)
-plt.xticks([])
-plt.yticks([])
-plt.title("Query image")
-plt.ylabel("image #{0}".format(i))
+plt.xticks([]); plt.yticks([])
+plt.title('Query image')
+plt.ylabel('image #{0}'.format(i))
 
 
 for ms in range(5):
+
     # 5 most similar images found
-    plt.subplot(3, 5, 6 + ms)
-    im_id = sim_to_index[-ms - 1][1]
-    im_sim = sim_to_index[-ms - 1][0]
-    img = np.reshape(X[im_id], (img_hw, img_hw))
-    if transpose:
-        img = img.T
+    plt.subplot(3,5,6+ms)
+    im_id = sim_to_index[-ms-1][1]
+    im_sim = sim_to_index[-ms-1][0]
+    img = np.reshape(X[im_id],(img_hw,img_hw))
+    if transpose: img = img.T
     plt.imshow(img, cmap=plt.cm.gray)
-    plt.xlabel("sim={0:.3f}".format(im_sim))
-    plt.ylabel("image #{0}".format(im_id))
-    plt.xticks([])
-    plt.yticks([])
-    if ms == 2:
-        plt.title("Most similar images")
+    plt.xlabel('sim={0:.3f}'.format(im_sim))
+    plt.ylabel('image #{0}'.format(im_id))
+    plt.xticks([]); plt.yticks([])
+    if ms==2: plt.title('Most similar images')
 
     # 5 least similar images found
-    plt.subplot(3, 5, 11 + ms)
+    plt.subplot(3,5,11+ms)
     im_id = sim_to_index[ms][1]
     im_sim = sim_to_index[ms][0]
-    img = np.reshape(X[im_id], (img_hw, img_hw))
-    if transpose:
-        img = img.T
+    img = np.reshape(X[im_id],(img_hw,img_hw))
+    if transpose: img = img.T
     plt.imshow(img, cmap=plt.cm.gray)
-    plt.xlabel("sim={0:.3f}".format(im_sim))
-    plt.ylabel("image #{0}".format(im_id))
-    plt.xticks([])
-    plt.yticks([])
-    if ms == 2:
-        plt.title("Least similar images")
-
+    plt.xlabel('sim={0:.3f}'.format(im_sim))
+    plt.ylabel('image #{0}'.format(im_id))
+    plt.xticks([]); plt.yticks([])
+    if ms==2: plt.title('Least similar images')
+    
 plt.show()
 
-print("Ran Exercise 3.3.1")
+print('Ran Exercise 3.3.1')
diff --git a/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py b/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py
index 2be9500..64f7350 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py
@@ -11,161 +11,121 @@ from dtuimldmtools import bmplot, feature_selector_lr
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/body.mat")
 # Load data from matlab file
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
 N, M = X.shape
 
 
 ## Crossvalidation
 # Create crossvalidation partition for evaluation
 K = 5
-CV = model_selection.KFold(n_splits=K, shuffle=True)
+CV = model_selection.KFold(n_splits=K,shuffle=True)
 
 # Initialize variables
-Features = np.zeros((M, K))
-Error_train = np.empty((K, 1))
-Error_test = np.empty((K, 1))
-Error_train_fs = np.empty((K, 1))
-Error_test_fs = np.empty((K, 1))
-Error_train_nofeatures = np.empty((K, 1))
-Error_test_nofeatures = np.empty((K, 1))
-
-k = 0
+Features = np.zeros((M,K))
+Error_train = np.empty((K,1))
+Error_test = np.empty((K,1))
+Error_train_fs = np.empty((K,1))
+Error_test_fs = np.empty((K,1))
+Error_train_nofeatures = np.empty((K,1))
+Error_test_nofeatures = np.empty((K,1))
+
+k=0
 for train_index, test_index in CV.split(X):
+    
     # extract training and test set for current CV fold
-    X_train = X[train_index, :]
+    X_train = X[train_index,:]
     y_train = y[train_index]
-    X_test = X[test_index, :]
+    X_test = X[test_index,:]
     y_test = y[test_index]
     internal_cross_validation = 10
-
+    
     # Compute squared error without using the input data at all
-    Error_train_nofeatures[k] = (
-        np.square(y_train - y_train.mean()).sum() / y_train.shape[0]
-    )
-    Error_test_nofeatures[k] = np.square(y_test - y_test.mean()).sum() / y_test.shape[0]
+    Error_train_nofeatures[k] = np.square(y_train-y_train.mean()).sum()/y_train.shape[0]
+    Error_test_nofeatures[k] = np.square(y_test-y_test.mean()).sum()/y_test.shape[0]
 
     # Compute squared error with all features selected (no feature selection)
     m = lm.LinearRegression(fit_intercept=True).fit(X_train, y_train)
-    Error_train[k] = np.square(y_train - m.predict(X_train)).sum() / y_train.shape[0]
-    Error_test[k] = np.square(y_test - m.predict(X_test)).sum() / y_test.shape[0]
+    Error_train[k] = np.square(y_train-m.predict(X_train)).sum()/y_train.shape[0]
+    Error_test[k] = np.square(y_test-m.predict(X_test)).sum()/y_test.shape[0]
 
     # Compute squared error with feature subset selection
-    textout = ""
-    selected_features, features_record, loss_record = feature_selector_lr(
-        X_train, y_train, internal_cross_validation, display=textout
-    )
-
-    Features[selected_features, k] = 1
+    textout = ''
+    selected_features, features_record, loss_record = feature_selector_lr(X_train, y_train, internal_cross_validation,display=textout)
+    
+    Features[selected_features,k] = 1
     # .. alternatively you could use module sklearn.feature_selection
     if len(selected_features) == 0:
-        print(
-            "No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
-        )
+        print('No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
     else:
-        m = lm.LinearRegression(fit_intercept=True).fit(
-            X_train[:, selected_features], y_train
-        )
-        Error_train_fs[k] = (
-            np.square(y_train - m.predict(X_train[:, selected_features])).sum()
-            / y_train.shape[0]
-        )
-        Error_test_fs[k] = (
-            np.square(y_test - m.predict(X_test[:, selected_features])).sum()
-            / y_test.shape[0]
-        )
-
+        m = lm.LinearRegression(fit_intercept=True).fit(X_train[:,selected_features], y_train)
+        Error_train_fs[k] = np.square(y_train-m.predict(X_train[:,selected_features])).sum()/y_train.shape[0]
+        Error_test_fs[k] = np.square(y_test-m.predict(X_test[:,selected_features])).sum()/y_test.shape[0]
+    
         figure(k)
-        subplot(1, 2, 1)
-        plot(range(1, len(loss_record)), loss_record[1:])
-        xlabel("Iteration")
-        ylabel("Squared error (crossvalidation)")
-
-        subplot(1, 3, 3)
-        bmplot(
-            attributeNames, range(1, features_record.shape[1]), -features_record[:, 1:]
-        )
-        clim(-1.5, 0)
-        xlabel("Iteration")
+        subplot(1,2,1)
+        plot(range(1,len(loss_record)), loss_record[1:])
+        xlabel('Iteration')
+        ylabel('Squared error (crossvalidation)')    
+        
+        subplot(1,3,3)
+        bmplot(attributeNames, range(1,features_record.shape[1]), -features_record[:,1:])
+        clim(-1.5,0)
+        xlabel('Iteration')
 
-    print("Cross validation fold {0}/{1}".format(k + 1, K))
-    print("Train indices: {0}".format(train_index))
-    print("Test indices: {0}".format(test_index))
-    print("Features no: {0}\n".format(selected_features.size))
+    print('Cross validation fold {0}/{1}'.format(k+1,K))
+    print('Train indices: {0}'.format(train_index))
+    print('Test indices: {0}'.format(test_index))
+    print('Features no: {0}\n'.format(selected_features.size))
 
-    k += 1
+    k+=1
 
 
 # Display results
-print("\n")
-print("Linear regression without feature selection:\n")
-print("- Training error: {0}".format(Error_train.mean()))
-print("- Test error:     {0}".format(Error_test.mean()))
-print(
-    "- R^2 train:     {0}".format(
-        (Error_train_nofeatures.sum() - Error_train.sum())
-        / Error_train_nofeatures.sum()
-    )
-)
-print(
-    "- R^2 test:     {0}".format(
-        (Error_test_nofeatures.sum() - Error_test.sum()) / Error_test_nofeatures.sum()
-    )
-)
-print("Linear regression with feature selection:\n")
-print("- Training error: {0}".format(Error_train_fs.mean()))
-print("- Test error:     {0}".format(Error_test_fs.mean()))
-print(
-    "- R^2 train:     {0}".format(
-        (Error_train_nofeatures.sum() - Error_train_fs.sum())
-        / Error_train_nofeatures.sum()
-    )
-)
-print(
-    "- R^2 test:     {0}".format(
-        (Error_test_nofeatures.sum() - Error_test_fs.sum())
-        / Error_test_nofeatures.sum()
-    )
-)
+print('\n')
+print('Linear regression without feature selection:\n')
+print('- Training error: {0}'.format(Error_train.mean()))
+print('- Test error:     {0}'.format(Error_test.mean()))
+print('- R^2 train:     {0}'.format((Error_train_nofeatures.sum()-Error_train.sum())/Error_train_nofeatures.sum()))
+print('- R^2 test:     {0}'.format((Error_test_nofeatures.sum()-Error_test.sum())/Error_test_nofeatures.sum()))
+print('Linear regression with feature selection:\n')
+print('- Training error: {0}'.format(Error_train_fs.mean()))
+print('- Test error:     {0}'.format(Error_test_fs.mean()))
+print('- R^2 train:     {0}'.format((Error_train_nofeatures.sum()-Error_train_fs.sum())/Error_train_nofeatures.sum()))
+print('- R^2 test:     {0}'.format((Error_test_nofeatures.sum()-Error_test_fs.sum())/Error_test_nofeatures.sum()))
 
 figure(k)
-subplot(1, 3, 2)
-bmplot(attributeNames, range(1, Features.shape[1] + 1), -Features)
-clim(-1.5, 0)
-xlabel("Crossvalidation fold")
-ylabel("Attribute")
+subplot(1,3,2)
+bmplot(attributeNames, range(1,Features.shape[1]+1), -Features)
+clim(-1.5,0)
+xlabel('Crossvalidation fold')
+ylabel('Attribute')
 
 
 # Inspect selected feature coefficients effect on the entire dataset and
 # plot the fitted model residual error as function of each attribute to
 # inspect for systematic structure in the residual
 
-f = 2  # cross-validation fold to inspect
-ff = Features[:, f - 1].nonzero()[0]
+f=2 # cross-validation fold to inspect
+ff=Features[:,f-1].nonzero()[0]
 if len(ff) == 0:
-    print(
-        "\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
-    )
+    print('\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
 else:
-    m = lm.LinearRegression(fit_intercept=True).fit(X[:, ff], y)
-
-    y_est = m.predict(X[:, ff])
-    residual = y - y_est
-
-    figure(k + 1, figsize=(12, 6))
-    title(
-        "Residual error vs. Attributes for features selected in cross-validation fold {0}".format(
-            f
-        )
-    )
-    for i in range(0, len(ff)):
-        subplot(2, int(np.ceil(len(ff) / 2)), i + 1)
-        plot(X[:, ff[i]], residual, ".")
-        xlabel(attributeNames[ff[i]])
-        ylabel("residual error")
-
-
+    m = lm.LinearRegression(fit_intercept=True).fit(X[:,ff], y)
+    
+    y_est= m.predict(X[:,ff])
+    residual=y-y_est
+    
+    figure(k+1, figsize=(12,6))
+    title('Residual error vs. Attributes for features selected in cross-validation fold {0}'.format(f))
+    for i in range(0,len(ff)):
+       subplot(2, int( np.ceil(len(ff)/2)), i+1)
+       plot(X[:,ff[i]],residual,'.')
+       xlabel(attributeNames[ff[i]])
+       ylabel('residual error')
+    
+    
 show()
 
-print("Ran Exercise 6.2.1")
+print('Ran Exercise 6.2.1')
diff --git a/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py b/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py
index ced5d60..f04ee01 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py
@@ -17,9 +17,7 @@ from scipy.io import loadmat
 from sklearn.metrics import confusion_matrix
 from sklearn.neighbors import KNeighborsClassifier
 
-filename = importlib_resources.files("dtuimldmtools").joinpath(
-    "synth1.mat"
-)  # <-- change the number to change dataset
+filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth3.mat")  # <-- change the number to change dataset
 
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
diff --git a/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py b/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py
index 60a9781..4f54813 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py
@@ -10,12 +10,11 @@ from dtuimldmtools import *
 from dtuimldmtools.statistics.statistics import correlated_ttest
 
 loss = 2
-X, y = X[:, :10], X[:, 10:]
+X,y = X[:,:10], X[:,10:]
 # This script crates predictions from three KNN classifiers using cross-validation
 
-K = 10
+K = 10 # We presently set J=K
 m = 1
-J = 0
 r = []
 kf = model_selection.KFold(n_splits=K)
 
@@ -24,7 +23,7 @@ for dm in range(m):
     yhat = []
 
     for train_index, test_index in kf.split(X):
-        X_train, y_train = X[train_index, :], y[train_index]
+        X_train, y_train = X[train_index,:], y[train_index]
         X_test, y_test = X[test_index, :], y[test_index]
 
         mA = sklearn.linear_model.LinearRegression().fit(X_train, y_train)
@@ -33,30 +32,26 @@ for dm in range(m):
         yhatA = mA.predict(X_test)
         yhatB = mB.predict(X_test)[:, np.newaxis]  # justsklearnthings
         y_true.append(y_test)
-        yhat.append(np.concatenate([yhatA, yhatB], axis=1))
+        yhat.append( np.concatenate([yhatA, yhatB], axis=1) )
 
-        r.append(
-            np.mean(np.abs(yhatA - y_test) ** loss - np.abs(yhatB - y_test) ** loss)
-        )
+        r.append( np.mean( np.abs( yhatA-y_test ) ** loss - np.abs( yhatB-y_test) ** loss ) )
 
 # Initialize parameters and run test appropriate for setup II
 alpha = 0.05
-rho = 1 / K
+rho = 1/K
 p_setupII, CI_setupII = correlated_ttest(r, rho, alpha=alpha)
 
 if m == 1:
-    y_true = np.concatenate(y_true)[:, 0]
+    y_true = np.concatenate(y_true)[:,0]
     yhat = np.concatenate(yhat)
 
     # note our usual setup I ttest only makes sense if m=1.
-    zA = np.abs(y_true - yhat[:, 0]) ** loss
-    zB = np.abs(y_true - yhat[:, 1]) ** loss
+    zA = np.abs(y_true - yhat[:,0] ) ** loss
+    zB = np.abs(y_true - yhat[:,1] ) ** loss
     z = zA - zB
 
-    CI_setupI = st.t.interval(
-        1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z)
-    )  # Confidence interval
+    CI_setupI = st.t.interval(1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z))  # Confidence interval
     p_setupI = st.t.cdf(-np.abs(np.mean(z)) / st.sem(z), df=len(z) - 1)  # p-value
 
-    print([p_setupII, p_setupI])
-    print(CI_setupII, CI_setupI)
+    print( [p_setupII, p_setupI] )
+    print(CI_setupII, CI_setupI )
\ No newline at end of file
diff --git a/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py b/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py
index cd0cdc5..fcad651 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py
@@ -1,21 +1,19 @@
 # exercise 9.1.1
-
-
 import importlib_resources
-import numpy as np
 from matplotlib.pyplot import figure, show
+import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
-
 from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
+from sklearn.linear_model import LogisticRegression
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")
+
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)
 
@@ -25,17 +23,18 @@ C = len(classNames)
 L = 100
 
 # Weights for selecting samples in each bootstrap
-weights = np.ones((N, 1), dtype=float) / N
+weights = np.ones((N,1),dtype=float)/N
 
 # Storage of trained log.reg. classifiers fitted in each bootstrap
-logits = [0] * L
+logits = [0]*L
 votes = np.zeros((N,))
 
 # For each round of bagging
 for l in range(L):
+
     # Extract training set by random sampling with replacement from X and y
     X_train, y_train = bootstrap(X, y, N, weights)
-
+    
     # Fit logistic regression model to training data and save result
     logit_classifier = LogisticRegression()
     logit_classifier.fit(X_train, y_train)
@@ -43,22 +42,20 @@ for l in range(L):
     y_est = logit_classifier.predict(X).T
     votes = votes + y_est
 
-    ErrorRate = (y != y_est).sum(dtype=float) / N
-    print("Error rate: {:2.2f}%".format(ErrorRate * 100))
-
+    ErrorRate = (y!=y_est).sum(dtype=float)/N
+    print('Error rate: {:2.2f}%'.format(ErrorRate*100))    
+    
 # Estimated value of class labels (using 0.5 as threshold) by majority voting
-y_est_ensemble = votes > (L / 2)
+y_est_ensemble = votes>(L/2)
 
 # Compute error rate
-ErrorRate = (y != y_est_ensemble).sum(dtype=float) / N
-print("Error rate: {:3.2f}%".format(ErrorRate * 100))
+ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
+print('Error rate: {:3.2f}%'.format(ErrorRate*100))
 
 ce = BinClassifierEnsemble(logits)
-figure(1)
-dbprobplot(ce, X, y, "auto", resolution=200)
-figure(2)
-dbplot(ce, X, y, "auto", resolution=200)
+figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
+figure(2); dbplot(ce, X, y, 'auto', resolution=200)
 
 show()
 
-print("Ran Exercise 9.1.1")
+print('Ran Exercise 9.1.1')
\ No newline at end of file
diff --git a/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py b/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py
index a3ebddb..252a3c9 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py
@@ -1,23 +1,19 @@
 # exercise 9.1.2
-
-
 import importlib_resources
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
-
 from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
+from sklearn.linear_model import LogisticRegression
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")
 
-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)
 
@@ -27,80 +23,78 @@ C = len(classNames)
 L = 100
 
 # Weights for selecting samples in each bootstrap
-weights = np.ones((N,), dtype=float) / N
+weights = np.ones((N,),dtype=float)/N
 
 # Storage of trained log.reg. classifiers fitted in each bootstrap
-logits = [0] * L
-alpha = np.ones((L,))
-votes = np.zeros((N, 1))
+logits = [0]*L
+alpha = np.ones( (L,) )
+votes = np.zeros((N,1))
 epsi = 0
-y_all = np.zeros((N, L))
+y_all = np.zeros((N,L))
 y = y > 0.5
 # For each round of boosting
 for l in range(L):
+    
     # Extract training set by random sampling with replacement from X and y
-    while True:
-        # not a thing of beauty, however log.reg. fails if presented with less than two classes.
-        X_train, y_train = bootstrap(X, y, N, weights)
-        if not (all(y_train == 0) or all(y_train == 1)):
-            break
-
+    while True : 
+        # not a thing of beauty, however log.reg. fails if presented with less than two classes. 
+        X_train, y_train = bootstrap(X, y, N, weights) 
+        if not (all(y_train==0) or all(y_train == 1)) : break      
+    
     # Fit logistic regression model to training data and save result
-    # turn off regularization with C.
+    # turn off regularization with C. 
     logit_classifier = LogisticRegression(C=1000)
 
-    logit_classifier.fit(X_train, y_train)
+    logit_classifier.fit(X_train, y_train )
     logits[l] = logit_classifier
     y_est = logit_classifier.predict(X).T > 0.5
-
-    y_all[:, l] = 1.0 * y_est
-    v = (y_est != y).T
-    ErrorRate = np.multiply(weights, v).sum()
+    
+    y_all[:,l] = 1.0 * y_est
+    v  = (y_est != y).T
+    ErrorRate = np.multiply(weights,v).sum()
     epsi = ErrorRate
-
-    alphai = 0.5 * np.log((1 - epsi) / epsi)
-
-    weights[y_est == y] = weights[y_est == y] * np.exp(-alphai)
-    weights[y_est != y] = weights[y_est != y] * np.exp(alphai)
-
+    
+    alphai = 0.5 * np.log( (1-epsi)/epsi)
+    
+    weights[y_est == y] = weights[y_est == y] * np.exp( -alphai )
+    weights[y_est != y] = weights[y_est != y] * np.exp(  alphai )
+    
     weights = weights / sum(weights)
-
+            
     votes = votes + y_est
     alpha[l] = alphai
-    print("Error rate: {:2.2f}%".format(ErrorRate * 100))
-
-
+    print('Error rate: {:2.2f}%'.format(ErrorRate*100))
+    
+    
 # Estimated value of class labels (using 0.5 as threshold) by majority voting
-alpha = alpha / sum(alpha)
+alpha = alpha/sum(alpha)
 y_est_ensemble = y_all @ alpha > 0.5
 
-# y_est_ensemble = votes > (L/2)
-# y_est_ensemble = mat(y_all) * mat(alpha) - (1-mat(y_all)) * mat(alpha) > 0
-ErrorRateEnsemble = sum(y_est_ensemble != y) / N
+#y_est_ensemble = votes > (L/2)
+#y_est_ensemble = mat(y_all) * mat(alpha) - (1-mat(y_all)) * mat(alpha) > 0
+ErrorRateEnsemble = sum(y_est_ensemble != y)/N
 
 # Compute error rate
-# ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
-print("Error rate for ensemble classifier: {:.1f}%".format(ErrorRateEnsemble * 100))
-
-ce = BinClassifierEnsemble(logits, alpha)
-# ce = BinClassifierEnsemble(logits) # What happens if alpha is not included?
-plt.figure(1)
-dbprobplot(ce, X, y, "auto", resolution=200)
-plt.figure(2)
-dbplot(ce, X, y, "auto", resolution=200)
-# plt.figure(3); plt.plot(alpha);
-
-# %%
-plt.figure(4, figsize=(8, 8))
+#ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
+print('Error rate for ensemble classifier: {:.1f}%'.format(ErrorRateEnsemble*100))
+ 
+ce = BinClassifierEnsemble(logits,alpha)
+#ce = BinClassifierEnsemble(logits) # What happens if alpha is not included?
+plt.figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
+plt.figure(2); dbplot(ce, X, y, 'auto', resolution=200)
+#plt.figure(3); plt.plot(alpha);
+
+#%%
+plt.figure(4,figsize=(8,8))
 for i in range(2):
-    plt.plot(X[(y_est_ensemble == i), 0], X[(y_est_ensemble == i), 1], "br"[i] + "o")
+    plt.plot(X[ (y_est_ensemble==i),0],X[ (y_est_ensemble==i),1],'br'[i] + 'o')
 
 ## Incomment the below lines to investigate miss-classifications
-# for i in range(2):
+#for i in range(2):
 #    plt.plot(X[ (y==i),0],X[ (y==i),1],'br'[i] + '.')
 
-plt.xlabel("Feature 1")
-plt.ylabel("Feature 2")
+plt.xlabel('Feature 1')
+plt.ylabel('Feature 2')    
 plt.show()
 
-print("Ran Exercise 9.1.2")
+print('Ran Exercise 9.1.2')
\ No newline at end of file
diff --git a/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py b/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py
index bba69f4..296caf5 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py
@@ -1,21 +1,18 @@
 # exercise 9.1.3
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
 from scipy.io import loadmat
-from sklearn.ensemble import RandomForestClassifier
-
 from dtuimldmtools import dbplot, dbprobplot
+from sklearn.ensemble import RandomForestClassifier
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth7.mat")
 
-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)
 
@@ -30,15 +27,13 @@ y_est = rf_classifier.predict(X).T
 y_est_prob = rf_classifier.predict_proba(X).T
 
 # Compute classification error
-ErrorRate = (y != y_est).sum(dtype=float) / N
-print("Error rate: {:.2f}%".format(ErrorRate * 100))
+ErrorRate = (y!=y_est).sum(dtype=float)/N
+print('Error rate: {:.2f}%'.format(ErrorRate*100))    
 
-# Plot decision boundaries
-figure(1)
-dbprobplot(rf_classifier, X, y, "auto", resolution=400)
-figure(2)
-dbplot(rf_classifier, X, y, "auto", resolution=400)
+# Plot decision boundaries    
+figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
+figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400)
 
 show()
 
-print("Ran Exercise 9.1.3")
+print('Ran Exercise 9.1.3')
\ No newline at end of file
diff --git a/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py b/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py
index 8c6fe67..a582f7f 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py
@@ -1,24 +1,19 @@
 # exercise 9.2.1
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
-
-# import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import StratifiedKFold
-
-from dtuimldmtools import confmatplot, rocplot
+from sklearn.linear_model import LogisticRegression
+from dtuimldmtools import rocplot, confmatplot
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")
 
-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)
 
@@ -26,27 +21,27 @@ C = len(classNames)
 K = 2
 CV = StratifiedKFold(K, shuffle=True)
 
-k = 0
-for train_index, test_index in CV.split(X, y):
+k=0
+for train_index, test_index in CV.split(X,y):
     print(train_index)
     # extract training and test set for current CV fold
-    X_train, y_train = X[train_index, :], y[train_index]
-    X_test, y_test = X[test_index, :], y[test_index]
+    X_train, y_train = X[train_index,:], y[train_index]
+    X_test, y_test = X[test_index,:], y[test_index]
 
     logit_classifier = LogisticRegression()
     logit_classifier.fit(X_train, y_train)
 
     y_test_est = logit_classifier.predict(X_test).T
-    p = logit_classifier.predict_proba(X_test)[:, 1].T
+    p = logit_classifier.predict_proba(X_test)[:,1].T
 
     figure(k)
     rocplot(p, y_test)
 
-    figure(k + 1)
-    confmatplot(y_test, y_test_est)
-
-    k += 2
+    figure(k+1)
+    confmatplot(y_test,y_test_est)
 
-show()
+    k+=2
+    
+show()    
 
-print("Ran Exercise 9.2.1")
+print('Ran Exercise 9.2.1')
\ No newline at end of file
diff --git a/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py b/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py
index a6e2a33..138ce3d 100644
--- a/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py
@@ -1,26 +1,22 @@
 # exercise 9.2.2
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
-
-# import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import StratifiedKFold
-
-from dtuimldmtools import confmatplot, rocplot
+from sklearn.linear_model import LogisticRegression
+from dtuimldmtools import rocplot, confmatplot
 
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")
 
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
+classNames = [name[0][0] for name in mat_data['classNames']]
 
-attribute_included = 10  # alcohol contents
-X = X[:, attribute_included].reshape(-1, 1)
+attribute_included = 10   # alcohol contents
+X = X[:,attribute_included].reshape(-1,1)
 attributeNames = attributeNames[attribute_included]
 N, M = X.shape
 C = len(classNames)
@@ -29,25 +25,25 @@ C = len(classNames)
 K = 2
 CV = StratifiedKFold(K, shuffle=True)
 
-k = 0
-for train_index, test_index in CV.split(X, y):
+k=0
+for train_index, test_index in CV.split(X,y):
     print(train_index)
     # extract training and test set for current CV fold
-    X_train, y_train = X[train_index, :], y[train_index]
-    X_test, y_test = X[test_index, :], y[test_index]
+    X_train, y_train = X[train_index,:], y[train_index]
+    X_test, y_test = X[test_index,:], y[test_index]
 
     logit_classifier = LogisticRegression()
     logit_classifier.fit(X_train, y_train)
 
     y_test_est = logit_classifier.predict(X_test).T
-    p = logit_classifier.predict_proba(X_test)[:, 1].T
+    p = logit_classifier.predict_proba(X_test)[:,1].T
 
     figure(k)
-    rocplot(p, y_test)
-
-    figure(k + 1)
-    confmatplot(y_test, y_test_est)
+    rocplot(p,y_test)
 
-    k += 2
+    figure(k+1)
+    confmatplot(y_test,y_test_est)
 
-show()
+    k+=2
+    
+show()    
\ No newline at end of file
diff --git a/exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj b/exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj
new file mode 100644
index 0000000..8e3c2eb
--- /dev/null
+++ b/exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
-- 
GitLab