Minor updates for public repo

822fecb6 · bjje · 69260c41 · 69260c41 · 822fecb6 · 822fecb6
Commit 822fecb6 authored 1 year ago by bjje
--- a/exercises/02450Toolbox_Matlab/Scripts/default.txt
+++ b/exercises/02450Toolbox_Matlab/Scripts/default.txt
-Rule   (Support, Confidence) 
-11 -> 13  (39.3242%, 74.9395%)
-13 -> 11  (39.3242%, 78.1033%)
-15 -> 22  (38.1187%, 75.2348%)
-22 -> 15  (38.1187%, 76.4556%)
-16 -> 21  (37.5952%, 76.2058%)
-21 -> 16  (37.5952%, 74.9763%)
-9 -> 15  (37.2621%, 72.366%)
-15 -> 9  (37.2621%, 73.5441%)
-12 -> 14  (36.5006%, 76.8024%)
-14 -> 12  (36.5006%, 73.5144%)
-7 -> 13  (35.2951%, 70.0346%)
-13 -> 7  (35.2951%, 70.1008%)
-10 -> 16  (35.1047%, 72.3676%)
-16 -> 10  (35.1047%, 71.1576%)
-7 -> 11  (34.7874%, 69.0274%)
-11 -> 7  (34.7874%, 66.2938%)
-8 -> 14  (34.5495%, 69.6514%)
-14 -> 8  (34.5495%, 69.5847%)
-1 -> 15  (34.1529%, 63.0454%)
-15 -> 1  (34.1529%, 67.4076%)
-1 -> 9  (33.915%, 62.6061%)
-9 -> 1  (33.915%, 65.8657%)
-9 -> 22  (33.677%, 65.4036%)
-22 -> 9  (33.677%, 67.5469%)
-3 -> 9  (33.566%, 66.0012%)
-9 -> 3  (33.566%, 65.1879%)
-4 -> 11  (32.9156%, 66.9787%)
-11 -> 4  (32.9156%, 62.7267%)
-5 -> 1  (32.5032%, 61.8286%)
-10 -> 21  (32.3287%, 66.6449%)
-21 -> 10  (32.3287%, 64.4733%)
-8 -> 12  (31.9162%, 64.3428%)
-12 -> 8  (31.9162%, 67.1562%)
-9 -> 19  (31.71%, 61.5835%)
-19 -> 9  (31.71%, 63.1197%)
-7 -> 15  (31.5038%, 62.5118%)
-15 -> 7  (31.5038%, 62.1791%)
-3 -> 12  (31.2976%, 61.5409%)
-12 -> 3  (31.2976%, 65.8545%)
-17 -> 19  (31.2341%, 60.7716%)
-19 -> 17  (31.2341%, 62.1724%)
-4 -> 10  (31.2183%, 63.5249%)
-10 -> 4  (31.2183%, 64.3558%)
-3 -> 19  (31.0596%, 61.073%)
-19 -> 3  (31.0596%, 61.8251%)
-8 -> 17  (31.0596%, 62.6159%)
-17 -> 8  (31.0596%, 60.4321%)
-7 -> 20  (30.8852%, 61.2842%)
-20 -> 7  (30.8852%, 62.0657%)
-3 -> 1  (30.8693%, 60.6987%)
-3 -> 15  (30.8376%, 60.6363%)
-15 -> 3  (30.8376%, 60.8641%)
-20 -> 11  (30.7582%, 61.8106%)
-8 -> 19  (30.7265%, 61.9444%)
-19 -> 8  (30.7265%, 61.162%)
-8 -> 1  (30.5838%, 61.6565%)
-19 -> 1  (30.5679%, 60.8462%)
-13 -> 22  (30.4727%, 60.523%)
-22 -> 13  (30.4727%, 61.1199%)
-14 -> 3  (30.4569%, 61.3419%)
-8 -> 16  (30.441%, 61.3687%)
-16 -> 8  (30.441%, 61.7042%)
-14 -> 1  (30.3775%, 61.1821%)
-14 -> 21  (30.2665%, 60.9585%)
-21 -> 14  (30.2665%, 60.3606%)
-10 -> 11  (30.2189%, 62.2956%)
-19 -> 15  (30.1713%, 60.0568%)
-14 -> 17  (30.1396%, 60.7029%)
-18 -> 7  (30.0603%, 61.8473%)
-4 -> 5  (30.0127%, 61.0717%)
-20 -> 13  (30.0127%, 60.3124%)
-10 -> 20  (29.981%, 61.8051%)
-20 -> 10  (29.981%, 60.2486%)
-18 -> 1  (29.9651%, 61.6514%)
-4 -> 20  (29.9651%, 60.9748%)
-20 -> 4  (29.9651%, 60.2168%)
-4 -> 13  (29.9492%, 60.9425%)
-14 -> 19  (29.9016%, 60.2236%)
-12 -> 1  (29.8541%, 62.8171%)
-18 -> 20  (29.6003%, 60.9008%)
-2 -> 16  (29.3147%, 63.9668%)
-18 -> 5  (29.2513%, 60.1828%)
-12 -> 9  (29.2354%, 61.5154%)
-12 -> 19  (28.5216%, 60.0134%)
-2 -> 10  (28.2519%, 61.6476%)
-2 -> 11  (28.1567%, 61.4399%)
-1,3 -> 9  (21.9543%, 71.1202%)
-1,9 -> 3  (21.9543%, 64.7334%)
-3,9 -> 1  (21.9543%, 65.4064%)
-1,3 -> 12  (20.3363%, 65.8787%)
-1,12 -> 3  (20.3363%, 68.119%)
-3,12 -> 1  (20.3363%, 64.9772%)
-1,5 -> 15  (20.2887%, 62.4207%)
-5,15 -> 1  (20.2887%, 76.2217%)
-1,7 -> 15  (20.2411%, 85.8104%)
-7,15 -> 1  (20.2411%, 64.2497%)
-1,3 -> 15  (20.0666%, 65.0051%)
-3,15 -> 1  (20.0666%, 65.072%)
-1,8 -> 12  (20.0666%, 65.612%)
-1,12 -> 8  (20.0666%, 67.2157%)
--- a/exercises/02450Toolbox_Python/Scripts/check_installation.py
+++ b/exercises/02450Toolbox_Python/Scripts/check_installation.py
 """ 
-This is a helper function which can help you and the TAs debug your Python setup.
+This is a helper function which can help you debug the Python installation
 """
 import os
 import sklearn

--- a/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex10_1_3.py
@@ -9,17 +9,15 @@ from dtuimldmtools import clusterval

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")

-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)

-
 # Maximum number of clusters:
 K = 10

@@ -38,11 +36,11 @@ for k in range(K - 1):
 # Plot results:

 figure(1)
-title("Cluster validity")
+title('Cluster validity')
 plot(np.arange(K-1)+2, Rand)
 plot(np.arange(K-1)+2, Jaccard)
 plot(np.arange(K-1)+2, NMI)
-legend(["Rand", "Jaccard", "NMI"], loc=4)
+legend(['Rand', 'Jaccard', 'NMI'], loc=4)
 show()

-print("Ran Exercise 10.1.3")
+print('Ran Exercise 10.1.3')
--- a/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex10_1_5.py
@@ -7,12 +7,13 @@ from sklearn.cluster import k_means

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wildfaces.mat")

-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
+
+#filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat") #<-- uncomment this for using the digits dataset 
 #mat_data = loadmat('../Data/digits.mat') #<-- uncomment this for using the digits dataset 

-X = mat_data["X"]
+X = mat_data['X']
 N, M = X.shape
 # Image resolution and number of colors
 x = 40 #<-- change this for using the digits dataset
@@ -48,11 +49,9 @@ for k in range(K):
        # Squeeze out singleton dimension
        # and flip the image (cancel out previos transpose)
        img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
-    if k == np.floor((n2 - 1) / 2):
-        plt.title("Centroids")
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])
+    if k==np.floor((n2-1)/2): plt.title('Centroids')

 # Plot few randomly selected faces and their nearest centroids    
 L = 5       # number of images to plot
@@ -63,19 +62,16 @@ for l in range(L):
    img = np.resize(X[j[l],:],(c,x,y)).T
    if c == 1:
        img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
-    if l == np.floor((L - 1) / 2):
-        plt.title("Randomly selected faces and their centroids")
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])
+    if l==np.floor((L-1)/2): plt.title('Randomly selected faces and their centroids')
    plt.subplot(2,L,L+l+1)
    img = np.resize(centroids[cls[j[l]],:],(c,x,y)).T
    if c == 1:
        img = np.squeeze(img).T
-    plt.imshow(img, interpolation="None", cmap=cmap)
-    plt.xticks([])
-    plt.yticks([])
+    plt.imshow(img,interpolation='None', cmap=cmap)
+    plt.xticks([]); plt.yticks([])

 plt.show()

-print("Ran Exercise 10.1.5")
+print('Ran Exercise 10.1.5')
--- a/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex3_3_1.py
@@ -39,46 +39,39 @@ plt.subplot(3, 1, 1)

 img_hw = int(np.sqrt(len(X[0])))
 img = np.reshape(X[i], (img_hw,img_hw))
-if transpose:
-    img = img.T
+if transpose: img = img.T
 plt.imshow(img, cmap=plt.cm.gray)
-plt.xticks([])
-plt.yticks([])
-plt.title("Query image")
-plt.ylabel("image #{0}".format(i))
+plt.xticks([]); plt.yticks([])
+plt.title('Query image')
+plt.ylabel('image #{0}'.format(i))


 for ms in range(5):
+
    # 5 most similar images found
    plt.subplot(3,5,6+ms)
    im_id = sim_to_index[-ms-1][1]
    im_sim = sim_to_index[-ms-1][0]
    img = np.reshape(X[im_id],(img_hw,img_hw))
-    if transpose:
-        img = img.T
+    if transpose: img = img.T
    plt.imshow(img, cmap=plt.cm.gray)
-    plt.xlabel("sim={0:.3f}".format(im_sim))
-    plt.ylabel("image #{0}".format(im_id))
-    plt.xticks([])
-    plt.yticks([])
-    if ms == 2:
-        plt.title("Most similar images")
+    plt.xlabel('sim={0:.3f}'.format(im_sim))
+    plt.ylabel('image #{0}'.format(im_id))
+    plt.xticks([]); plt.yticks([])
+    if ms==2: plt.title('Most similar images')

    # 5 least similar images found
    plt.subplot(3,5,11+ms)
    im_id = sim_to_index[ms][1]
    im_sim = sim_to_index[ms][0]
    img = np.reshape(X[im_id],(img_hw,img_hw))
-    if transpose:
-        img = img.T
+    if transpose: img = img.T
    plt.imshow(img, cmap=plt.cm.gray)
-    plt.xlabel("sim={0:.3f}".format(im_sim))
-    plt.ylabel("image #{0}".format(im_id))
-    plt.xticks([])
-    plt.yticks([])
-    if ms == 2:
-        plt.title("Least similar images")
+    plt.xlabel('sim={0:.3f}'.format(im_sim))
+    plt.ylabel('image #{0}'.format(im_id))
+    plt.xticks([]); plt.yticks([])
+    if ms==2: plt.title('Least similar images')
    
 plt.show()

-print("Ran Exercise 3.3.1")
+print('Ran Exercise 3.3.1')
--- a/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex6_2_1.py
@@ -11,9 +11,9 @@ from dtuimldmtools import bmplot, feature_selector_lr
 filename = importlib_resources.files("dtuimldmtools").joinpath("data/body.mat")
 # Load data from matlab file
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
 N, M = X.shape


@@ -33,6 +33,7 @@ Error_test_nofeatures = np.empty((K, 1))

 k=0
 for train_index, test_index in CV.split(X):
+    
    # extract training and test set for current CV fold
    X_train = X[train_index,:]
    y_train = y[train_index]
@@ -41,9 +42,7 @@ for train_index, test_index in CV.split(X):
    internal_cross_validation = 10
    
    # Compute squared error without using the input data at all
-    Error_train_nofeatures[k] = (
-        np.square(y_train - y_train.mean()).sum() / y_train.shape[0]
-    )
+    Error_train_nofeatures[k] = np.square(y_train-y_train.mean()).sum()/y_train.shape[0]
    Error_test_nofeatures[k] = np.square(y_test-y_test.mean()).sum()/y_test.shape[0]

    # Compute squared error with all features selected (no feature selection)
@@ -52,89 +51,56 @@ for train_index, test_index in CV.split(X):
    Error_test[k] = np.square(y_test-m.predict(X_test)).sum()/y_test.shape[0]

    # Compute squared error with feature subset selection
-    textout = ""
-    selected_features, features_record, loss_record = feature_selector_lr(
-        X_train, y_train, internal_cross_validation, display=textout
-    )
+    textout = ''
+    selected_features, features_record, loss_record = feature_selector_lr(X_train, y_train, internal_cross_validation,display=textout)
    
    Features[selected_features,k] = 1
    # .. alternatively you could use module sklearn.feature_selection
    if len(selected_features) == 0:
-        print(
-            "No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
-        )
+        print('No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
    else:
-        m = lm.LinearRegression(fit_intercept=True).fit(
-            X_train[:, selected_features], y_train
-        )
-        Error_train_fs[k] = (
-            np.square(y_train - m.predict(X_train[:, selected_features])).sum()
-            / y_train.shape[0]
-        )
-        Error_test_fs[k] = (
-            np.square(y_test - m.predict(X_test[:, selected_features])).sum()
-            / y_test.shape[0]
-        )
+        m = lm.LinearRegression(fit_intercept=True).fit(X_train[:,selected_features], y_train)
+        Error_train_fs[k] = np.square(y_train-m.predict(X_train[:,selected_features])).sum()/y_train.shape[0]
+        Error_test_fs[k] = np.square(y_test-m.predict(X_test[:,selected_features])).sum()/y_test.shape[0]
    
        figure(k)
        subplot(1,2,1)
        plot(range(1,len(loss_record)), loss_record[1:])
-        xlabel("Iteration")
-        ylabel("Squared error (crossvalidation)")
+        xlabel('Iteration')
+        ylabel('Squared error (crossvalidation)')    
        
        subplot(1,3,3)
-        bmplot(
-            attributeNames, range(1, features_record.shape[1]), -features_record[:, 1:]
-        )
+        bmplot(attributeNames, range(1,features_record.shape[1]), -features_record[:,1:])
        clim(-1.5,0)
-        xlabel("Iteration")
+        xlabel('Iteration')

-    print("Cross validation fold {0}/{1}".format(k + 1, K))
-    print("Train indices: {0}".format(train_index))
-    print("Test indices: {0}".format(test_index))
-    print("Features no: {0}\n".format(selected_features.size))
+    print('Cross validation fold {0}/{1}'.format(k+1,K))
+    print('Train indices: {0}'.format(train_index))
+    print('Test indices: {0}'.format(test_index))
+    print('Features no: {0}\n'.format(selected_features.size))

    k+=1


 # Display results
-print("\n")
-print("Linear regression without feature selection:\n")
-print("- Training error: {0}".format(Error_train.mean()))
-print("- Test error:     {0}".format(Error_test.mean()))
-print(
-    "- R^2 train:     {0}".format(
-        (Error_train_nofeatures.sum() - Error_train.sum())
-        / Error_train_nofeatures.sum()
-    )
-)
-print(
-    "- R^2 test:     {0}".format(
-        (Error_test_nofeatures.sum() - Error_test.sum()) / Error_test_nofeatures.sum()
-    )
-)
-print("Linear regression with feature selection:\n")
-print("- Training error: {0}".format(Error_train_fs.mean()))
-print("- Test error:     {0}".format(Error_test_fs.mean()))
-print(
-    "- R^2 train:     {0}".format(
-        (Error_train_nofeatures.sum() - Error_train_fs.sum())
-        / Error_train_nofeatures.sum()
-    )
-)
-print(
-    "- R^2 test:     {0}".format(
-        (Error_test_nofeatures.sum() - Error_test_fs.sum())
-        / Error_test_nofeatures.sum()
-    )
-)
+print('\n')
+print('Linear regression without feature selection:\n')
+print('- Training error: {0}'.format(Error_train.mean()))
+print('- Test error:     {0}'.format(Error_test.mean()))
+print('- R^2 train:     {0}'.format((Error_train_nofeatures.sum()-Error_train.sum())/Error_train_nofeatures.sum()))
+print('- R^2 test:     {0}'.format((Error_test_nofeatures.sum()-Error_test.sum())/Error_test_nofeatures.sum()))
+print('Linear regression with feature selection:\n')
+print('- Training error: {0}'.format(Error_train_fs.mean()))
+print('- Test error:     {0}'.format(Error_test_fs.mean()))
+print('- R^2 train:     {0}'.format((Error_train_nofeatures.sum()-Error_train_fs.sum())/Error_train_nofeatures.sum()))
+print('- R^2 test:     {0}'.format((Error_test_nofeatures.sum()-Error_test_fs.sum())/Error_test_nofeatures.sum()))

 figure(k)
 subplot(1,3,2)
 bmplot(attributeNames, range(1,Features.shape[1]+1), -Features)
 clim(-1.5,0)
-xlabel("Crossvalidation fold")
-ylabel("Attribute")
+xlabel('Crossvalidation fold')
+ylabel('Attribute')


 # Inspect selected feature coefficients effect on the entire dataset and
@@ -144,9 +110,7 @@ ylabel("Attribute")
 f=2 # cross-validation fold to inspect
 ff=Features[:,f-1].nonzero()[0]
 if len(ff) == 0:
-    print(
-        "\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
-    )
+    print('\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
 else:
    m = lm.LinearRegression(fit_intercept=True).fit(X[:,ff], y)
    
@@ -154,18 +118,14 @@ else:
    residual=y-y_est
    
    figure(k+1, figsize=(12,6))
-    title(
-        "Residual error vs. Attributes for features selected in cross-validation fold {0}".format(
-            f
-        )
-    )
+    title('Residual error vs. Attributes for features selected in cross-validation fold {0}'.format(f))
    for i in range(0,len(ff)):
       subplot(2, int( np.ceil(len(ff)/2)), i+1)
-        plot(X[:, ff[i]], residual, ".")
+       plot(X[:,ff[i]],residual,'.')
       xlabel(attributeNames[ff[i]])
-        ylabel("residual error")
+       ylabel('residual error')
    
    
 show()

-print("Ran Exercise 6.2.1")
+print('Ran Exercise 6.2.1')
--- a/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex6_3_1.py
@@ -17,9 +17,7 @@ from scipy.io import loadmat
 from sklearn.metrics import confusion_matrix
 from sklearn.neighbors import KNeighborsClassifier

-filename = importlib_resources.files("dtuimldmtools").joinpath(
-    "synth1.mat"
-)  # <-- change the number to change dataset
+filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth3.mat")  # <-- change the number to change dataset

 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)

--- a/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex7_3_1.py
@@ -13,9 +13,8 @@ loss = 2
 X,y = X[:,:10], X[:,10:]
 # This script crates predictions from three KNN classifiers using cross-validation

-K = 10
+K = 10 # We presently set J=K
 m = 1
-J = 0
 r = []
 kf = model_selection.KFold(n_splits=K)

@@ -35,9 +34,7 @@ for dm in range(m):
        y_true.append(y_test)
        yhat.append( np.concatenate([yhatA, yhatB], axis=1) )

-        r.append(
-            np.mean(np.abs(yhatA - y_test) ** loss - np.abs(yhatB - y_test) ** loss)
-        )
+        r.append( np.mean( np.abs( yhatA-y_test ) ** loss - np.abs( yhatB-y_test) ** loss ) )

 # Initialize parameters and run test appropriate for setup II
 alpha = 0.05
@@ -53,9 +50,7 @@ if m == 1:
    zB = np.abs(y_true - yhat[:,1] ) ** loss
    z = zA - zB

-    CI_setupI = st.t.interval(
-        1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z)
-    )  # Confidence interval
+    CI_setupI = st.t.interval(1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z))  # Confidence interval
    p_setupI = st.t.cdf(-np.abs(np.mean(z)) / st.sem(z), df=len(z) - 1)  # p-value

    print( [p_setupII, p_setupI] )

--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_1.py
 # exercise 9.1.1
-
-
 import importlib_resources
-import numpy as np
 from matplotlib.pyplot import figure, show
+import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
-
 from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
+from sklearn.linear_model import LogisticRegression

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")
+
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)

@@ -33,6 +31,7 @@ votes = np.zeros((N,))

 # For each round of bagging
 for l in range(L):
+
    # Extract training set by random sampling with replacement from X and y
    X_train, y_train = bootstrap(X, y, N, weights)
    
@@ -44,21 +43,19 @@ for l in range(L):
    votes = votes + y_est

    ErrorRate = (y!=y_est).sum(dtype=float)/N
-    print("Error rate: {:2.2f}%".format(ErrorRate * 100))
+    print('Error rate: {:2.2f}%'.format(ErrorRate*100))    
    
 # Estimated value of class labels (using 0.5 as threshold) by majority voting
 y_est_ensemble = votes>(L/2)

 # Compute error rate
 ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
-print("Error rate: {:3.2f}%".format(ErrorRate * 100))
+print('Error rate: {:3.2f}%'.format(ErrorRate*100))

 ce = BinClassifierEnsemble(logits)
-figure(1)
-dbprobplot(ce, X, y, "auto", resolution=200)
-figure(2)
-dbplot(ce, X, y, "auto", resolution=200)
+figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
+figure(2); dbplot(ce, X, y, 'auto', resolution=200)

 show()

-print("Ran Exercise 9.1.1")
+print('Ran Exercise 9.1.1')
\ No newline at end of file
--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_2.py
 # exercise 9.1.2
-
-
 import importlib_resources
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
-
 from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
+from sklearn.linear_model import LogisticRegression

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")

-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)

@@ -38,12 +34,12 @@ y_all = np.zeros((N, L))
 y = y > 0.5
 # For each round of boosting
 for l in range(L):
+    
    # Extract training set by random sampling with replacement from X and y
    while True : 
        # not a thing of beauty, however log.reg. fails if presented with less than two classes. 
        X_train, y_train = bootstrap(X, y, N, weights) 
-        if not (all(y_train == 0) or all(y_train == 1)):
-            break
+        if not (all(y_train==0) or all(y_train == 1)) : break      
    
    # Fit logistic regression model to training data and save result
    # turn off regularization with C. 
@@ -67,7 +63,7 @@ for l in range(L):
            
    votes = votes + y_est
    alpha[l] = alphai
-    print("Error rate: {:2.2f}%".format(ErrorRate * 100))
+    print('Error rate: {:2.2f}%'.format(ErrorRate*100))
    
    
 # Estimated value of class labels (using 0.5 as threshold) by majority voting
@@ -80,27 +76,25 @@ ErrorRateEnsemble = sum(y_est_ensemble != y) / N

 # Compute error rate
 #ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
-print("Error rate for ensemble classifier: {:.1f}%".format(ErrorRateEnsemble * 100))
+print('Error rate for ensemble classifier: {:.1f}%'.format(ErrorRateEnsemble*100))
 
 ce = BinClassifierEnsemble(logits,alpha)
 #ce = BinClassifierEnsemble(logits) # What happens if alpha is not included?
-plt.figure(1)
-dbprobplot(ce, X, y, "auto", resolution=200)
-plt.figure(2)
-dbplot(ce, X, y, "auto", resolution=200)
+plt.figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
+plt.figure(2); dbplot(ce, X, y, 'auto', resolution=200)
 #plt.figure(3); plt.plot(alpha);

 #%%
 plt.figure(4,figsize=(8,8))
 for i in range(2):
-    plt.plot(X[(y_est_ensemble == i), 0], X[(y_est_ensemble == i), 1], "br"[i] + "o")
+    plt.plot(X[ (y_est_ensemble==i),0],X[ (y_est_ensemble==i),1],'br'[i] + 'o')

 ## Incomment the below lines to investigate miss-classifications
 #for i in range(2):
 #    plt.plot(X[ (y==i),0],X[ (y==i),1],'br'[i] + '.')

-plt.xlabel("Feature 1")
-plt.ylabel("Feature 2")
+plt.xlabel('Feature 1')
+plt.ylabel('Feature 2')    
 plt.show()

-print("Ran Exercise 9.1.2")
+print('Ran Exercise 9.1.2')
\ No newline at end of file
--- a/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_1_3.py
 # exercise 9.1.3
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
 from scipy.io import loadmat
-from sklearn.ensemble import RandomForestClassifier
-
 from dtuimldmtools import dbplot, dbprobplot
+from sklearn.ensemble import RandomForestClassifier

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth7.mat")

-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)

@@ -31,14 +28,12 @@ y_est_prob = rf_classifier.predict_proba(X).T

 # Compute classification error
 ErrorRate = (y!=y_est).sum(dtype=float)/N
-print("Error rate: {:.2f}%".format(ErrorRate * 100))
+print('Error rate: {:.2f}%'.format(ErrorRate*100))    

 # Plot decision boundaries    
-figure(1)
-dbprobplot(rf_classifier, X, y, "auto", resolution=400)
-figure(2)
-dbplot(rf_classifier, X, y, "auto", resolution=400)
+figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
+figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400)

 show()

-print("Ran Exercise 9.1.3")
+print('Ran Exercise 9.1.3')
\ No newline at end of file
--- a/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_2_1.py
 # exercise 9.2.1
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
-
-# import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import StratifiedKFold
-
-from dtuimldmtools import confmatplot, rocplot
+from sklearn.linear_model import LogisticRegression
+from dtuimldmtools import rocplot, confmatplot

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")

-
 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
+classNames = [name[0][0] for name in mat_data['classNames']]
 N, M = X.shape
 C = len(classNames)

@@ -49,4 +44,4 @@ for train_index, test_index in CV.split(X, y):
    
 show()    

-print("Ran Exercise 9.2.1")
+print('Ran Exercise 9.2.1')
\ No newline at end of file
--- a/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py
+++ b/exercises/02450Toolbox_Python/Scripts/ex9_2_2.py
 # exercise 9.2.2
-
 import importlib_resources
 from matplotlib.pyplot import figure, show
-
-# import numpy as np
 from scipy.io import loadmat
-from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import StratifiedKFold
-
-from dtuimldmtools import confmatplot, rocplot
+from sklearn.linear_model import LogisticRegression
+from dtuimldmtools import rocplot, confmatplot

 filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")

 # Load Matlab data file and extract variables of interest
 mat_data = loadmat(filename)
-X = mat_data["X"]
-y = mat_data["y"].squeeze()
-attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
-classNames = [name[0][0] for name in mat_data["classNames"]]
+X = mat_data['X']
+y = mat_data['y'].squeeze()
+attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
+classNames = [name[0][0] for name in mat_data['classNames']]

 attribute_included = 10   # alcohol contents
 X = X[:,attribute_included].reshape(-1,1)

--- a/exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj
+++ b/exercises/02450Toolbox_R/02450Toolbox_R_Development.Rproj
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX