Skip to content
Snippets Groups Projects
Commit 5fdbfb16 authored by bjje's avatar bjje
Browse files

minor restructuring of ex4 and new ex2_4_3

parent a0cca827
No related branches found
No related tags found
No related merge requests found
...@@ -2,9 +2,13 @@ ...@@ -2,9 +2,13 @@
#%% #%%
## Intro ## Intro
""" """
Note: This is a long script. We suggest you run it usign the #%% feature This is a small experiment where the exercise has a slightly different format than usual.
The purpose is to explore the best format of Python exercise in the course.
It is a long script. We suggest you run it usign the #%% feature
in VScode which allows you to easily run parts at the time in interactive mode in VScode which allows you to easily run parts at the time in interactive mode
(similar to a Jupyter notebook) (similar to a Jupyter notebook yet still havign the full VScode/debugger available)
""" """
import importlib_resources import importlib_resources
import numpy as np import numpy as np
...@@ -51,7 +55,6 @@ subsample_mask = np.random.choice(N, N_wines_to_consider, replace=False) ...@@ -51,7 +55,6 @@ subsample_mask = np.random.choice(N, N_wines_to_consider, replace=False)
X = X[subsample_mask, :] X = X[subsample_mask, :]
y = y[subsample_mask] y = y[subsample_mask]
wine_id = wine_id[subsample_mask] # this is simply so we can id the orginal winev if need be wine_id = wine_id[subsample_mask] # this is simply so we can id the orginal winev if need be
N = len(y)
sorted_indices = np.argsort(y) # sort rows in X acording to whether they are red of white sorted_indices = np.argsort(y) # sort rows in X acording to whether they are red of white
X = X[sorted_indices] X = X[sorted_indices]
...@@ -59,6 +62,7 @@ y = y[sorted_indices] ...@@ -59,6 +62,7 @@ y = y[sorted_indices]
wine_id = wine_id[sorted_indices] wine_id = wine_id[sorted_indices]
N = len(y) N = len(y)
# create a list of string for the plots xticks/labels
idx = np.arange(0,N) idx = np.arange(0,N)
wine_id_type = [f"{a3} (id={a1} type={a2})" for a1,a2,a3 in zip(wine_id, y , idx)] wine_id_type = [f"{a3} (id={a1} type={a2})" for a1,a2,a3 in zip(wine_id, y , idx)]
wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y , idx)] wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y , idx)]
...@@ -66,7 +70,7 @@ wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y ...@@ -66,7 +70,7 @@ wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y
#%% #%%
## TASK D: Optionally, standardize the attributes ## TASK D: Optionally, standardize the attributes
# Try, once you have complted the script, to change this and see the effect on # Try, once you have completed the script, to change this and see the effect on
# the associated distance in TASK H and I # the associated distance in TASK H and I
if True: if True:
X = zscore(X, ddof=1) X = zscore(X, ddof=1)
...@@ -87,13 +91,15 @@ plt.xlabel('Attributes/features') ...@@ -87,13 +91,15 @@ plt.xlabel('Attributes/features')
plt.ylabel('Observations') plt.ylabel('Observations')
plt.show() plt.show()
print("Data loaded (both standardized and not standardized versions)") print("Data loaded")
#%% #%%
## TASK F: Extract two wines and compute distances between a white and red whine (warm up exercise) ## TASK F: Extract two wines and compute distances between a white and red wine (warm up exercise)
# #
# Experiment with the various scaling factors and attrbutes being scale to see how the # Experiment with the various scaling factors and attributes being scale
# scaling affects the Lp distances (default L2) # to see how the scaling affects the Lp distances (default L2)
#
# Note: you should think about ´x_red´ and ´x_white´ as vectors!
# #
x_red = np.copy(X[0,:]) # note we make a copy to avoid messing with X in case we change x_white and x_red x_red = np.copy(X[0,:]) # note we make a copy to avoid messing with X in case we change x_white and x_red
x_white = np.copy(X[-1,:]) x_white = np.copy(X[-1,:])
...@@ -137,16 +143,19 @@ red_L2 = np.linalg.norm(X - x_red, 2, axis=1) # L_2 ...@@ -137,16 +143,19 @@ red_L2 = np.linalg.norm(X - x_red, 2, axis=1) # L_2
red_Linf = np.linalg.norm(X - x_red, np.inf, axis=1) # L_inf red_Linf = np.linalg.norm(X - x_red, np.inf, axis=1) # L_inf
# This is not important # This is not important
def list_in_order(alist, order): def list_in_order(alist, order): # credit JHW
"""Given a list 'alist' and a list of indices 'order' """Given a list 'alist' and a list of indices 'order'
returns the list in the order given by the indices""" returns the list in the order given by the indices. Credit: JHW"""
return [alist[i] for i in order] return [alist[i] for i in order]
def rank_plot(distances): # this is not important def rank_plot(distances): # credit JHW
"""
A helper function. Credit: JHW
"""
order = np.argsort(distances) # find the ordering of the distances order = np.argsort(distances) # find the ordering of the distances
ax.bar(np.arange(len(distances)), distances[order]) # bar plot them ax.bar(np.arange(len(distances)), distances[order]) # bar plot them
ax.set_xlabel("Wines / type", fontsize=12) ax.set_xlabel("Wines / type", fontsize=12)
ax.set_ylabel("Distance to the first red whine", fontsize=12) ax.set_ylabel("Distance to the first red wine", fontsize=12)
ax.set_xticks(np.arange(N)) ax.set_xticks(np.arange(N))
#ax.set_frame_on(False) # remove frame #ax.set_frame_on(False) # remove frame
# make sure the correct order is used for the labels! # make sure the correct order is used for the labels!
...@@ -170,8 +179,9 @@ plt.tight_layout() ...@@ -170,8 +179,9 @@ plt.tight_layout()
#%% #%%
## TASK H: Plot distances among all wines ## TASK H: Plot distances between all wines.
# Compute pairwise distances between rows and save in the following variables: # Compute all the possible pairwise distances between rows and save
# in the following variables:
# #
# ´pairwise_distances_L1´: An NxN matrix with distances between row i and row j using L1 # ´pairwise_distances_L1´: An NxN matrix with distances between row i and row j using L1
# ´pairwise_distances_L2´: An NxN matrix with distances between row i and row j using L2 # ´pairwise_distances_L2´: An NxN matrix with distances between row i and row j using L2
...@@ -233,9 +243,9 @@ plt.show() ...@@ -233,9 +243,9 @@ plt.show()
# relative difference between the inter and intra wine distances (p.s. it does...). # relative difference between the inter and intra wine distances (p.s. it does...).
# #
avg_interdist_white = np.nan # replace np.nan with your avg_interdist_white = np.nan # replace np.nan with your estimate
avg_interdist_red = np.nan # replace np.nan with your avg_interdist_red = np.nan # replace np.nan with your estimate
avg_intradist_red2white = np.nan # replace np.nan with your avg_intradist_red2white = np.nan # replace np.nan with your estimate
# TASK: INSERT YOUR CODE HERE # TASK: INSERT YOUR CODE HERE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment