Skip to content
Snippets Groups Projects
Commit 822fecb6 authored by bjje's avatar bjje
Browse files

Minor updates for public repo

parent 69260c41
No related branches found
No related tags found
No related merge requests found
Showing
with 304 additions and 475 deletions
Rule (Support, Confidence)
11 -> 13 (39.3242%, 74.9395%)
13 -> 11 (39.3242%, 78.1033%)
15 -> 22 (38.1187%, 75.2348%)
22 -> 15 (38.1187%, 76.4556%)
16 -> 21 (37.5952%, 76.2058%)
21 -> 16 (37.5952%, 74.9763%)
9 -> 15 (37.2621%, 72.366%)
15 -> 9 (37.2621%, 73.5441%)
12 -> 14 (36.5006%, 76.8024%)
14 -> 12 (36.5006%, 73.5144%)
7 -> 13 (35.2951%, 70.0346%)
13 -> 7 (35.2951%, 70.1008%)
10 -> 16 (35.1047%, 72.3676%)
16 -> 10 (35.1047%, 71.1576%)
7 -> 11 (34.7874%, 69.0274%)
11 -> 7 (34.7874%, 66.2938%)
8 -> 14 (34.5495%, 69.6514%)
14 -> 8 (34.5495%, 69.5847%)
1 -> 15 (34.1529%, 63.0454%)
15 -> 1 (34.1529%, 67.4076%)
1 -> 9 (33.915%, 62.6061%)
9 -> 1 (33.915%, 65.8657%)
9 -> 22 (33.677%, 65.4036%)
22 -> 9 (33.677%, 67.5469%)
3 -> 9 (33.566%, 66.0012%)
9 -> 3 (33.566%, 65.1879%)
4 -> 11 (32.9156%, 66.9787%)
11 -> 4 (32.9156%, 62.7267%)
5 -> 1 (32.5032%, 61.8286%)
10 -> 21 (32.3287%, 66.6449%)
21 -> 10 (32.3287%, 64.4733%)
8 -> 12 (31.9162%, 64.3428%)
12 -> 8 (31.9162%, 67.1562%)
9 -> 19 (31.71%, 61.5835%)
19 -> 9 (31.71%, 63.1197%)
7 -> 15 (31.5038%, 62.5118%)
15 -> 7 (31.5038%, 62.1791%)
3 -> 12 (31.2976%, 61.5409%)
12 -> 3 (31.2976%, 65.8545%)
17 -> 19 (31.2341%, 60.7716%)
19 -> 17 (31.2341%, 62.1724%)
4 -> 10 (31.2183%, 63.5249%)
10 -> 4 (31.2183%, 64.3558%)
3 -> 19 (31.0596%, 61.073%)
19 -> 3 (31.0596%, 61.8251%)
8 -> 17 (31.0596%, 62.6159%)
17 -> 8 (31.0596%, 60.4321%)
7 -> 20 (30.8852%, 61.2842%)
20 -> 7 (30.8852%, 62.0657%)
3 -> 1 (30.8693%, 60.6987%)
3 -> 15 (30.8376%, 60.6363%)
15 -> 3 (30.8376%, 60.8641%)
20 -> 11 (30.7582%, 61.8106%)
8 -> 19 (30.7265%, 61.9444%)
19 -> 8 (30.7265%, 61.162%)
8 -> 1 (30.5838%, 61.6565%)
19 -> 1 (30.5679%, 60.8462%)
13 -> 22 (30.4727%, 60.523%)
22 -> 13 (30.4727%, 61.1199%)
14 -> 3 (30.4569%, 61.3419%)
8 -> 16 (30.441%, 61.3687%)
16 -> 8 (30.441%, 61.7042%)
14 -> 1 (30.3775%, 61.1821%)
14 -> 21 (30.2665%, 60.9585%)
21 -> 14 (30.2665%, 60.3606%)
10 -> 11 (30.2189%, 62.2956%)
19 -> 15 (30.1713%, 60.0568%)
14 -> 17 (30.1396%, 60.7029%)
18 -> 7 (30.0603%, 61.8473%)
4 -> 5 (30.0127%, 61.0717%)
20 -> 13 (30.0127%, 60.3124%)
10 -> 20 (29.981%, 61.8051%)
20 -> 10 (29.981%, 60.2486%)
18 -> 1 (29.9651%, 61.6514%)
4 -> 20 (29.9651%, 60.9748%)
20 -> 4 (29.9651%, 60.2168%)
4 -> 13 (29.9492%, 60.9425%)
14 -> 19 (29.9016%, 60.2236%)
12 -> 1 (29.8541%, 62.8171%)
18 -> 20 (29.6003%, 60.9008%)
2 -> 16 (29.3147%, 63.9668%)
18 -> 5 (29.2513%, 60.1828%)
12 -> 9 (29.2354%, 61.5154%)
12 -> 19 (28.5216%, 60.0134%)
2 -> 10 (28.2519%, 61.6476%)
2 -> 11 (28.1567%, 61.4399%)
1,3 -> 9 (21.9543%, 71.1202%)
1,9 -> 3 (21.9543%, 64.7334%)
3,9 -> 1 (21.9543%, 65.4064%)
1,3 -> 12 (20.3363%, 65.8787%)
1,12 -> 3 (20.3363%, 68.119%)
3,12 -> 1 (20.3363%, 64.9772%)
1,5 -> 15 (20.2887%, 62.4207%)
5,15 -> 1 (20.2887%, 76.2217%)
1,7 -> 15 (20.2411%, 85.8104%)
7,15 -> 1 (20.2411%, 64.2497%)
1,3 -> 15 (20.0666%, 65.0051%)
3,15 -> 1 (20.0666%, 65.072%)
1,8 -> 12 (20.0666%, 65.612%)
1,12 -> 8 (20.0666%, 67.2157%)
"""
This is a helper function which can help you and the TAs debug your Python setup.
This is a helper function which can help you debug the Python installation
"""
import os
import sklearn
......
......@@ -9,17 +9,15 @@ from dtuimldmtools import clusterval
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth1.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
# Maximum number of clusters:
K = 10
......@@ -38,11 +36,11 @@ for k in range(K - 1):
# Plot results:
figure(1)
title("Cluster validity")
title('Cluster validity')
plot(np.arange(K-1)+2, Rand)
plot(np.arange(K-1)+2, Jaccard)
plot(np.arange(K-1)+2, NMI)
legend(["Rand", "Jaccard", "NMI"], loc=4)
legend(['Rand', 'Jaccard', 'NMI'], loc=4)
show()
print("Ran Exercise 10.1.3")
print('Ran Exercise 10.1.3')
......@@ -7,12 +7,13 @@ from sklearn.cluster import k_means
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wildfaces.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
#filename = importlib_resources.files("dtuimldmtools").joinpath("data/digits.mat") #<-- uncomment this for using the digits dataset
#mat_data = loadmat('../Data/digits.mat') #<-- uncomment this for using the digits dataset
X = mat_data["X"]
X = mat_data['X']
N, M = X.shape
# Image resolution and number of colors
x = 40 #<-- change this for using the digits dataset
......@@ -48,11 +49,9 @@ for k in range(K):
# Squeeze out singleton dimension
# and flip the image (cancel out previos transpose)
img = np.squeeze(img).T
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
if k == np.floor((n2 - 1) / 2):
plt.title("Centroids")
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
if k==np.floor((n2-1)/2): plt.title('Centroids')
# Plot few randomly selected faces and their nearest centroids
L = 5 # number of images to plot
......@@ -63,19 +62,16 @@ for l in range(L):
img = np.resize(X[j[l],:],(c,x,y)).T
if c == 1:
img = np.squeeze(img).T
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
if l == np.floor((L - 1) / 2):
plt.title("Randomly selected faces and their centroids")
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
if l==np.floor((L-1)/2): plt.title('Randomly selected faces and their centroids')
plt.subplot(2,L,L+l+1)
img = np.resize(centroids[cls[j[l]],:],(c,x,y)).T
if c == 1:
img = np.squeeze(img).T
plt.imshow(img, interpolation="None", cmap=cmap)
plt.xticks([])
plt.yticks([])
plt.imshow(img,interpolation='None', cmap=cmap)
plt.xticks([]); plt.yticks([])
plt.show()
print("Ran Exercise 10.1.5")
print('Ran Exercise 10.1.5')
......@@ -39,46 +39,39 @@ plt.subplot(3, 1, 1)
img_hw = int(np.sqrt(len(X[0])))
img = np.reshape(X[i], (img_hw,img_hw))
if transpose:
img = img.T
if transpose: img = img.T
plt.imshow(img, cmap=plt.cm.gray)
plt.xticks([])
plt.yticks([])
plt.title("Query image")
plt.ylabel("image #{0}".format(i))
plt.xticks([]); plt.yticks([])
plt.title('Query image')
plt.ylabel('image #{0}'.format(i))
for ms in range(5):
# 5 most similar images found
plt.subplot(3,5,6+ms)
im_id = sim_to_index[-ms-1][1]
im_sim = sim_to_index[-ms-1][0]
img = np.reshape(X[im_id],(img_hw,img_hw))
if transpose:
img = img.T
if transpose: img = img.T
plt.imshow(img, cmap=plt.cm.gray)
plt.xlabel("sim={0:.3f}".format(im_sim))
plt.ylabel("image #{0}".format(im_id))
plt.xticks([])
plt.yticks([])
if ms == 2:
plt.title("Most similar images")
plt.xlabel('sim={0:.3f}'.format(im_sim))
plt.ylabel('image #{0}'.format(im_id))
plt.xticks([]); plt.yticks([])
if ms==2: plt.title('Most similar images')
# 5 least similar images found
plt.subplot(3,5,11+ms)
im_id = sim_to_index[ms][1]
im_sim = sim_to_index[ms][0]
img = np.reshape(X[im_id],(img_hw,img_hw))
if transpose:
img = img.T
if transpose: img = img.T
plt.imshow(img, cmap=plt.cm.gray)
plt.xlabel("sim={0:.3f}".format(im_sim))
plt.ylabel("image #{0}".format(im_id))
plt.xticks([])
plt.yticks([])
if ms == 2:
plt.title("Least similar images")
plt.xlabel('sim={0:.3f}'.format(im_sim))
plt.ylabel('image #{0}'.format(im_id))
plt.xticks([]); plt.yticks([])
if ms==2: plt.title('Least similar images')
plt.show()
print("Ran Exercise 3.3.1")
print('Ran Exercise 3.3.1')
......@@ -11,9 +11,9 @@ from dtuimldmtools import bmplot, feature_selector_lr
filename = importlib_resources.files("dtuimldmtools").joinpath("data/body.mat")
# Load data from matlab file
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
N, M = X.shape
......@@ -33,6 +33,7 @@ Error_test_nofeatures = np.empty((K, 1))
k=0
for train_index, test_index in CV.split(X):
# extract training and test set for current CV fold
X_train = X[train_index,:]
y_train = y[train_index]
......@@ -41,9 +42,7 @@ for train_index, test_index in CV.split(X):
internal_cross_validation = 10
# Compute squared error without using the input data at all
Error_train_nofeatures[k] = (
np.square(y_train - y_train.mean()).sum() / y_train.shape[0]
)
Error_train_nofeatures[k] = np.square(y_train-y_train.mean()).sum()/y_train.shape[0]
Error_test_nofeatures[k] = np.square(y_test-y_test.mean()).sum()/y_test.shape[0]
# Compute squared error with all features selected (no feature selection)
......@@ -52,89 +51,56 @@ for train_index, test_index in CV.split(X):
Error_test[k] = np.square(y_test-m.predict(X_test)).sum()/y_test.shape[0]
# Compute squared error with feature subset selection
textout = ""
selected_features, features_record, loss_record = feature_selector_lr(
X_train, y_train, internal_cross_validation, display=textout
)
textout = ''
selected_features, features_record, loss_record = feature_selector_lr(X_train, y_train, internal_cross_validation,display=textout)
Features[selected_features,k] = 1
# .. alternatively you could use module sklearn.feature_selection
if len(selected_features) == 0:
print(
"No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
)
print('No features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
else:
m = lm.LinearRegression(fit_intercept=True).fit(
X_train[:, selected_features], y_train
)
Error_train_fs[k] = (
np.square(y_train - m.predict(X_train[:, selected_features])).sum()
/ y_train.shape[0]
)
Error_test_fs[k] = (
np.square(y_test - m.predict(X_test[:, selected_features])).sum()
/ y_test.shape[0]
)
m = lm.LinearRegression(fit_intercept=True).fit(X_train[:,selected_features], y_train)
Error_train_fs[k] = np.square(y_train-m.predict(X_train[:,selected_features])).sum()/y_train.shape[0]
Error_test_fs[k] = np.square(y_test-m.predict(X_test[:,selected_features])).sum()/y_test.shape[0]
figure(k)
subplot(1,2,1)
plot(range(1,len(loss_record)), loss_record[1:])
xlabel("Iteration")
ylabel("Squared error (crossvalidation)")
xlabel('Iteration')
ylabel('Squared error (crossvalidation)')
subplot(1,3,3)
bmplot(
attributeNames, range(1, features_record.shape[1]), -features_record[:, 1:]
)
bmplot(attributeNames, range(1,features_record.shape[1]), -features_record[:,1:])
clim(-1.5,0)
xlabel("Iteration")
xlabel('Iteration')
print("Cross validation fold {0}/{1}".format(k + 1, K))
print("Train indices: {0}".format(train_index))
print("Test indices: {0}".format(test_index))
print("Features no: {0}\n".format(selected_features.size))
print('Cross validation fold {0}/{1}'.format(k+1,K))
print('Train indices: {0}'.format(train_index))
print('Test indices: {0}'.format(test_index))
print('Features no: {0}\n'.format(selected_features.size))
k+=1
# Display results
print("\n")
print("Linear regression without feature selection:\n")
print("- Training error: {0}".format(Error_train.mean()))
print("- Test error: {0}".format(Error_test.mean()))
print(
"- R^2 train: {0}".format(
(Error_train_nofeatures.sum() - Error_train.sum())
/ Error_train_nofeatures.sum()
)
)
print(
"- R^2 test: {0}".format(
(Error_test_nofeatures.sum() - Error_test.sum()) / Error_test_nofeatures.sum()
)
)
print("Linear regression with feature selection:\n")
print("- Training error: {0}".format(Error_train_fs.mean()))
print("- Test error: {0}".format(Error_test_fs.mean()))
print(
"- R^2 train: {0}".format(
(Error_train_nofeatures.sum() - Error_train_fs.sum())
/ Error_train_nofeatures.sum()
)
)
print(
"- R^2 test: {0}".format(
(Error_test_nofeatures.sum() - Error_test_fs.sum())
/ Error_test_nofeatures.sum()
)
)
print('\n')
print('Linear regression without feature selection:\n')
print('- Training error: {0}'.format(Error_train.mean()))
print('- Test error: {0}'.format(Error_test.mean()))
print('- R^2 train: {0}'.format((Error_train_nofeatures.sum()-Error_train.sum())/Error_train_nofeatures.sum()))
print('- R^2 test: {0}'.format((Error_test_nofeatures.sum()-Error_test.sum())/Error_test_nofeatures.sum()))
print('Linear regression with feature selection:\n')
print('- Training error: {0}'.format(Error_train_fs.mean()))
print('- Test error: {0}'.format(Error_test_fs.mean()))
print('- R^2 train: {0}'.format((Error_train_nofeatures.sum()-Error_train_fs.sum())/Error_train_nofeatures.sum()))
print('- R^2 test: {0}'.format((Error_test_nofeatures.sum()-Error_test_fs.sum())/Error_test_nofeatures.sum()))
figure(k)
subplot(1,3,2)
bmplot(attributeNames, range(1,Features.shape[1]+1), -Features)
clim(-1.5,0)
xlabel("Crossvalidation fold")
ylabel("Attribute")
xlabel('Crossvalidation fold')
ylabel('Attribute')
# Inspect selected feature coefficients effect on the entire dataset and
......@@ -144,9 +110,7 @@ ylabel("Attribute")
f=2 # cross-validation fold to inspect
ff=Features[:,f-1].nonzero()[0]
if len(ff) == 0:
print(
"\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y)."
)
print('\nNo features were selected, i.e. the data (X) in the fold cannot describe the outcomes (y).' )
else:
m = lm.LinearRegression(fit_intercept=True).fit(X[:,ff], y)
......@@ -154,18 +118,14 @@ else:
residual=y-y_est
figure(k+1, figsize=(12,6))
title(
"Residual error vs. Attributes for features selected in cross-validation fold {0}".format(
f
)
)
title('Residual error vs. Attributes for features selected in cross-validation fold {0}'.format(f))
for i in range(0,len(ff)):
subplot(2, int( np.ceil(len(ff)/2)), i+1)
plot(X[:, ff[i]], residual, ".")
plot(X[:,ff[i]],residual,'.')
xlabel(attributeNames[ff[i]])
ylabel("residual error")
ylabel('residual error')
show()
print("Ran Exercise 6.2.1")
print('Ran Exercise 6.2.1')
......@@ -17,9 +17,7 @@ from scipy.io import loadmat
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
filename = importlib_resources.files("dtuimldmtools").joinpath(
"synth1.mat"
) # <-- change the number to change dataset
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth3.mat") # <-- change the number to change dataset
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
......
......@@ -13,9 +13,8 @@ loss = 2
X,y = X[:,:10], X[:,10:]
# This script crates predictions from three KNN classifiers using cross-validation
K = 10
K = 10 # We presently set J=K
m = 1
J = 0
r = []
kf = model_selection.KFold(n_splits=K)
......@@ -35,9 +34,7 @@ for dm in range(m):
y_true.append(y_test)
yhat.append( np.concatenate([yhatA, yhatB], axis=1) )
r.append(
np.mean(np.abs(yhatA - y_test) ** loss - np.abs(yhatB - y_test) ** loss)
)
r.append( np.mean( np.abs( yhatA-y_test ) ** loss - np.abs( yhatB-y_test) ** loss ) )
# Initialize parameters and run test appropriate for setup II
alpha = 0.05
......@@ -53,9 +50,7 @@ if m == 1:
zB = np.abs(y_true - yhat[:,1] ) ** loss
z = zA - zB
CI_setupI = st.t.interval(
1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z)
) # Confidence interval
CI_setupI = st.t.interval(1 - alpha, len(z) - 1, loc=np.mean(z), scale=st.sem(z)) # Confidence interval
p_setupI = st.t.cdf(-np.abs(np.mean(z)) / st.sem(z), df=len(z) - 1) # p-value
print( [p_setupII, p_setupI] )
......
# exercise 9.1.1
import importlib_resources
import numpy as np
from matplotlib.pyplot import figure, show
import numpy as np
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegression
from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
from sklearn.linear_model import LogisticRegression
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
......@@ -33,6 +31,7 @@ votes = np.zeros((N,))
# For each round of bagging
for l in range(L):
# Extract training set by random sampling with replacement from X and y
X_train, y_train = bootstrap(X, y, N, weights)
......@@ -44,21 +43,19 @@ for l in range(L):
votes = votes + y_est
ErrorRate = (y!=y_est).sum(dtype=float)/N
print("Error rate: {:2.2f}%".format(ErrorRate * 100))
print('Error rate: {:2.2f}%'.format(ErrorRate*100))
# Estimated value of class labels (using 0.5 as threshold) by majority voting
y_est_ensemble = votes>(L/2)
# Compute error rate
ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
print("Error rate: {:3.2f}%".format(ErrorRate * 100))
print('Error rate: {:3.2f}%'.format(ErrorRate*100))
ce = BinClassifierEnsemble(logits)
figure(1)
dbprobplot(ce, X, y, "auto", resolution=200)
figure(2)
dbplot(ce, X, y, "auto", resolution=200)
figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
figure(2); dbplot(ce, X, y, 'auto', resolution=200)
show()
print("Ran Exercise 9.1.1")
print('Ran Exercise 9.1.1')
\ No newline at end of file
# exercise 9.1.2
import importlib_resources
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegression
from dtuimldmtools import BinClassifierEnsemble, bootstrap, dbplot, dbprobplot
from sklearn.linear_model import LogisticRegression
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth5.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
......@@ -38,12 +34,12 @@ y_all = np.zeros((N, L))
y = y > 0.5
# For each round of boosting
for l in range(L):
# Extract training set by random sampling with replacement from X and y
while True :
# not a thing of beauty, however log.reg. fails if presented with less than two classes.
X_train, y_train = bootstrap(X, y, N, weights)
if not (all(y_train == 0) or all(y_train == 1)):
break
if not (all(y_train==0) or all(y_train == 1)) : break
# Fit logistic regression model to training data and save result
# turn off regularization with C.
......@@ -67,7 +63,7 @@ for l in range(L):
votes = votes + y_est
alpha[l] = alphai
print("Error rate: {:2.2f}%".format(ErrorRate * 100))
print('Error rate: {:2.2f}%'.format(ErrorRate*100))
# Estimated value of class labels (using 0.5 as threshold) by majority voting
......@@ -80,27 +76,25 @@ ErrorRateEnsemble = sum(y_est_ensemble != y) / N
# Compute error rate
#ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N
print("Error rate for ensemble classifier: {:.1f}%".format(ErrorRateEnsemble * 100))
print('Error rate for ensemble classifier: {:.1f}%'.format(ErrorRateEnsemble*100))
ce = BinClassifierEnsemble(logits,alpha)
#ce = BinClassifierEnsemble(logits) # What happens if alpha is not included?
plt.figure(1)
dbprobplot(ce, X, y, "auto", resolution=200)
plt.figure(2)
dbplot(ce, X, y, "auto", resolution=200)
plt.figure(1); dbprobplot(ce, X, y, 'auto', resolution=200)
plt.figure(2); dbplot(ce, X, y, 'auto', resolution=200)
#plt.figure(3); plt.plot(alpha);
#%%
plt.figure(4,figsize=(8,8))
for i in range(2):
plt.plot(X[(y_est_ensemble == i), 0], X[(y_est_ensemble == i), 1], "br"[i] + "o")
plt.plot(X[ (y_est_ensemble==i),0],X[ (y_est_ensemble==i),1],'br'[i] + 'o')
## Incomment the below lines to investigate miss-classifications
#for i in range(2):
# plt.plot(X[ (y==i),0],X[ (y==i),1],'br'[i] + '.')
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
print("Ran Exercise 9.1.2")
print('Ran Exercise 9.1.2')
\ No newline at end of file
# exercise 9.1.3
import importlib_resources
from matplotlib.pyplot import figure, show
from scipy.io import loadmat
from sklearn.ensemble import RandomForestClassifier
from dtuimldmtools import dbplot, dbprobplot
from sklearn.ensemble import RandomForestClassifier
filename = importlib_resources.files("dtuimldmtools").joinpath("data/synth7.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"].squeeze()]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
......@@ -31,14 +28,12 @@ y_est_prob = rf_classifier.predict_proba(X).T
# Compute classification error
ErrorRate = (y!=y_est).sum(dtype=float)/N
print("Error rate: {:.2f}%".format(ErrorRate * 100))
print('Error rate: {:.2f}%'.format(ErrorRate*100))
# Plot decision boundaries
figure(1)
dbprobplot(rf_classifier, X, y, "auto", resolution=400)
figure(2)
dbplot(rf_classifier, X, y, "auto", resolution=400)
figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400)
figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400)
show()
print("Ran Exercise 9.1.3")
print('Ran Exercise 9.1.3')
\ No newline at end of file
# exercise 9.2.1
import importlib_resources
from matplotlib.pyplot import figure, show
# import numpy as np
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from dtuimldmtools import confmatplot, rocplot
from sklearn.linear_model import LogisticRegression
from dtuimldmtools import rocplot, confmatplot
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
classNames = [name[0][0] for name in mat_data['classNames']]
N, M = X.shape
C = len(classNames)
......@@ -49,4 +44,4 @@ for train_index, test_index in CV.split(X, y):
show()
print("Ran Exercise 9.2.1")
print('Ran Exercise 9.2.1')
\ No newline at end of file
# exercise 9.2.2
import importlib_resources
from matplotlib.pyplot import figure, show
# import numpy as np
from scipy.io import loadmat
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from dtuimldmtools import confmatplot, rocplot
from sklearn.linear_model import LogisticRegression
from dtuimldmtools import rocplot, confmatplot
filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine2.mat")
# Load Matlab data file and extract variables of interest
mat_data = loadmat(filename)
X = mat_data["X"]
y = mat_data["y"].squeeze()
attributeNames = [name[0] for name in mat_data["attributeNames"][0]]
classNames = [name[0][0] for name in mat_data["classNames"]]
X = mat_data['X']
y = mat_data['y'].squeeze()
attributeNames = [name[0] for name in mat_data['attributeNames'][0]]
classNames = [name[0][0] for name in mat_data['classNames']]
attribute_included = 10 # alcohol contents
X = X[:,attribute_included].reshape(-1,1)
......
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment