Skip to content
Snippets Groups Projects
Commit 5fdbfb16 authored by bjje's avatar bjje
Browse files

minor restructuring of ex4 and new ex2_4_3

parent a0cca827
No related branches found
No related tags found
No related merge requests found
......@@ -2,9 +2,13 @@
#%%
## Intro
"""
Note: This is a long script. We suggest you run it usign the #%% feature
This is a small experiment where the exercise has a slightly different format than usual.
The purpose is to explore the best format of Python exercise in the course.
It is a long script. We suggest you run it usign the #%% feature
in VScode which allows you to easily run parts at the time in interactive mode
(similar to a Jupyter notebook)
(similar to a Jupyter notebook yet still havign the full VScode/debugger available)
"""
import importlib_resources
import numpy as np
......@@ -51,7 +55,6 @@ subsample_mask = np.random.choice(N, N_wines_to_consider, replace=False)
X = X[subsample_mask, :]
y = y[subsample_mask]
wine_id = wine_id[subsample_mask] # this is simply so we can id the orginal winev if need be
N = len(y)
sorted_indices = np.argsort(y) # sort rows in X acording to whether they are red of white
X = X[sorted_indices]
......@@ -59,14 +62,15 @@ y = y[sorted_indices]
wine_id = wine_id[sorted_indices]
N = len(y)
# create a list of string for the plots xticks/labels
idx = np.arange(0,N)
wine_id_type = [f"{a3} (id={a1} type={a2})" for a1,a2,a3 in zip(wine_id, y , idx)]
wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y , idx)]
#%%
## TASK D: Optionally, standardize the attributes
# Try, once you have complted the script, to change this and see the effect on
## TASK D: Optionally, standardize the attributes
# Try, once you have completed the script, to change this and see the effect on
# the associated distance in TASK H and I
if True:
X = zscore(X, ddof=1)
......@@ -87,13 +91,15 @@ plt.xlabel('Attributes/features')
plt.ylabel('Observations')
plt.show()
print("Data loaded (both standardized and not standardized versions)")
print("Data loaded")
#%%
## TASK F: Extract two wines and compute distances between a white and red whine (warm up exercise)
## TASK F: Extract two wines and compute distances between a white and red wine (warm up exercise)
#
# Experiment with the various scaling factors and attributes being scale
# to see how the scaling affects the Lp distances (default L2)
#
# Experiment with the various scaling factors and attrbutes being scale to see how the
# scaling affects the Lp distances (default L2)
# Note: you should think about ´x_red´ and ´x_white´ as vectors!
#
x_red = np.copy(X[0,:]) # note we make a copy to avoid messing with X in case we change x_white and x_red
x_white = np.copy(X[-1,:])
......@@ -137,21 +143,24 @@ red_L2 = np.linalg.norm(X - x_red, 2, axis=1) # L_2
red_Linf = np.linalg.norm(X - x_red, np.inf, axis=1) # L_inf
# This is not important
def list_in_order(alist, order):
def list_in_order(alist, order): # credit JHW
"""Given a list 'alist' and a list of indices 'order'
returns the list in the order given by the indices"""
returns the list in the order given by the indices. Credit: JHW"""
return [alist[i] for i in order]
def rank_plot(distances): # this is not important
def rank_plot(distances): # credit JHW
"""
A helper function. Credit: JHW
"""
order = np.argsort(distances) # find the ordering of the distances
ax.bar(np.arange(len(distances)), distances[order]) # bar plot them
ax.set_xlabel("Wines / type", fontsize=12)
ax.set_ylabel("Distance to the first red whine", fontsize=12)
ax.set_ylabel("Distance to the first red wine", fontsize=12)
ax.set_xticks(np.arange(N))
#ax.set_frame_on(False) # remove frame
# make sure the correct order is used for the labels!
ax.set_xticklabels(
list_in_order(wine_id_type, order), rotation="vertical", fontsize=7
list_in_order(wine_id_type, order), rotation="vertical", fontsize=7
)
# Make the plots (not important how this happens)
......@@ -170,8 +179,9 @@ plt.tight_layout()
#%%
## TASK H: Plot distances among all wines
# Compute pairwise distances between rows and save in the following variables:
## TASK H: Plot distances between all wines.
# Compute all the possible pairwise distances between rows and save
# in the following variables:
#
# ´pairwise_distances_L1´: An NxN matrix with distances between row i and row j using L1
# ´pairwise_distances_L2´: An NxN matrix with distances between row i and row j using L2
......@@ -233,9 +243,9 @@ plt.show()
# relative difference between the inter and intra wine distances (p.s. it does...).
#
avg_interdist_white = np.nan # replace np.nan with your
avg_interdist_red = np.nan # replace np.nan with your
avg_intradist_red2white = np.nan # replace np.nan with your
avg_interdist_white = np.nan # replace np.nan with your estimate
avg_interdist_red = np.nan # replace np.nan with your estimate
avg_intradist_red2white = np.nan # replace np.nan with your estimate
# TASK: INSERT YOUR CODE HERE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment