diff --git a/exercises/02450Toolbox_Python/Scripts/ex2_4_3.py b/exercises/02450Toolbox_Python/Scripts/ex2_4_3.py index f1a15f3caf70807d29520c277b3f98633cdb872a..07f073eb6aad61924d6907dbb4a40e70bd69525d 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex2_4_3.py +++ b/exercises/02450Toolbox_Python/Scripts/ex2_4_3.py @@ -2,9 +2,13 @@ #%% ## Intro """ -Note: This is a long script. We suggest you run it usign the #%% feature +This is a small experiment where the exercise has a slightly different format than usual. +The purpose is to explore the best format of Python exercise in the course. + +It is a long script. We suggest you run it usign the #%% feature in VScode which allows you to easily run parts at the time in interactive mode -(similar to a Jupyter notebook) +(similar to a Jupyter notebook yet still havign the full VScode/debugger available) + """ import importlib_resources import numpy as np @@ -51,7 +55,6 @@ subsample_mask = np.random.choice(N, N_wines_to_consider, replace=False) X = X[subsample_mask, :] y = y[subsample_mask] wine_id = wine_id[subsample_mask] # this is simply so we can id the orginal winev if need be -N = len(y) sorted_indices = np.argsort(y) # sort rows in X acording to whether they are red of white X = X[sorted_indices] @@ -59,14 +62,15 @@ y = y[sorted_indices] wine_id = wine_id[sorted_indices] N = len(y) +# create a list of string for the plots xticks/labels idx = np.arange(0,N) wine_id_type = [f"{a3} (id={a1} type={a2})" for a1,a2,a3 in zip(wine_id, y , idx)] wine_id_type_vert = [f"(id={a1} type={a2}) {a3}" for a1,a2,a3 in zip(wine_id, y , idx)] #%% -## TASK D: Optionally, standardize the attributes -# Try, once you have complted the script, to change this and see the effect on +## TASK D: Optionally, standardize the attributes +# Try, once you have completed the script, to change this and see the effect on # the associated distance in TASK H and I if True: X = zscore(X, ddof=1) @@ -87,13 +91,15 @@ plt.xlabel('Attributes/features') plt.ylabel('Observations') plt.show() -print("Data loaded (both standardized and not standardized versions)") +print("Data loaded") #%% -## TASK F: Extract two wines and compute distances between a white and red whine (warm up exercise) +## TASK F: Extract two wines and compute distances between a white and red wine (warm up exercise) +# +# Experiment with the various scaling factors and attributes being scale +# to see how the scaling affects the Lp distances (default L2) # -# Experiment with the various scaling factors and attrbutes being scale to see how the -# scaling affects the Lp distances (default L2) +# Note: you should think about ´x_red´ and ´x_white´ as vectors! # x_red = np.copy(X[0,:]) # note we make a copy to avoid messing with X in case we change x_white and x_red x_white = np.copy(X[-1,:]) @@ -137,21 +143,24 @@ red_L2 = np.linalg.norm(X - x_red, 2, axis=1) # L_2 red_Linf = np.linalg.norm(X - x_red, np.inf, axis=1) # L_inf # This is not important -def list_in_order(alist, order): +def list_in_order(alist, order): # credit JHW """Given a list 'alist' and a list of indices 'order' - returns the list in the order given by the indices""" + returns the list in the order given by the indices. Credit: JHW""" return [alist[i] for i in order] -def rank_plot(distances): # this is not important +def rank_plot(distances): # credit JHW + """ + A helper function. Credit: JHW + """ order = np.argsort(distances) # find the ordering of the distances ax.bar(np.arange(len(distances)), distances[order]) # bar plot them ax.set_xlabel("Wines / type", fontsize=12) - ax.set_ylabel("Distance to the first red whine", fontsize=12) + ax.set_ylabel("Distance to the first red wine", fontsize=12) ax.set_xticks(np.arange(N)) #ax.set_frame_on(False) # remove frame # make sure the correct order is used for the labels! ax.set_xticklabels( - list_in_order(wine_id_type, order), rotation="vertical", fontsize=7 + list_in_order(wine_id_type, order), rotation="vertical", fontsize=7 ) # Make the plots (not important how this happens) @@ -170,8 +179,9 @@ plt.tight_layout() #%% -## TASK H: Plot distances among all wines -# Compute pairwise distances between rows and save in the following variables: +## TASK H: Plot distances between all wines. +# Compute all the possible pairwise distances between rows and save +# in the following variables: # # ´pairwise_distances_L1´: An NxN matrix with distances between row i and row j using L1 # ´pairwise_distances_L2´: An NxN matrix with distances between row i and row j using L2 @@ -233,9 +243,9 @@ plt.show() # relative difference between the inter and intra wine distances (p.s. it does...). # -avg_interdist_white = np.nan # replace np.nan with your -avg_interdist_red = np.nan # replace np.nan with your -avg_intradist_red2white = np.nan # replace np.nan with your +avg_interdist_white = np.nan # replace np.nan with your estimate +avg_interdist_red = np.nan # replace np.nan with your estimate +avg_intradist_red2white = np.nan # replace np.nan with your estimate # TASK: INSERT YOUR CODE HERE