Skip to content
Snippets Groups Projects
ex2_4_2.py 2.05 KiB
Newer Older
  • Learn to ignore specific revisions
  • bjje's avatar
    bjje committed
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    import importlib_resources
    
    bjje's avatar
    bjje committed
    import numpy as np
    
    bjje's avatar
    bjje committed
    import matplotlib.pyplot as plt
    
    bjje's avatar
    bjje committed
    from scipy.io import loadmat
    from scipy.stats import zscore
    
    bjje's avatar
    bjje committed
    from dtuimldmtools import similarity
    
    bjje's avatar
    bjje committed
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    filename = importlib_resources.files("dtuimldmtools").joinpath("data/wine.mat")
    
    
    bjje's avatar
    bjje committed
    # Load Matlab data file and extract variables of interest
    
    Stas Syrota's avatar
    Stas Syrota committed
    mat_data = loadmat(filename)
    X = mat_data["X"]
    y = np.squeeze(mat_data["y"])
    C = mat_data["C"][0, 0]
    M = mat_data["M"][0, 0]
    N = mat_data["N"][0, 0]
    
    attributeNames = [name[0][0] for name in mat_data["attributeNames"]]
    classNames = [cls[0] for cls in mat_data["classNames"][0]]
    
    
    bjje's avatar
    bjje committed
    # The histograms show that there are a few very extreme values in these
    # three attributes. To identify these values as outliers, we must use our
    # knowledge about the data set and the attributes. Say we expect volatide
    # acidity to be around 0-2 g/dm^3, density to be close to 1 g/cm^3, and
    # alcohol percentage to be somewhere between 5-20 % vol. Then we can safely
    # identify the following outliers, which are a factor of 10 greater than
    # the largest we expect.
    
    Stas Syrota's avatar
    Stas Syrota committed
    outlier_mask = (X[:, 1] > 20) | (X[:, 7] > 10) | (X[:, 10] > 200)
    
    bjje's avatar
    bjje committed
    valid_mask = np.logical_not(outlier_mask)
    
    # Finally we will remove these from the data set
    
    Stas Syrota's avatar
    Stas Syrota committed
    X = X[valid_mask, :]
    
    bjje's avatar
    bjje committed
    y = y[valid_mask]
    N = len(y)
    Xnorm = zscore(X, ddof=1)
    
    ## Next we plot a number of atttributes
    
    Stas Syrota's avatar
    Stas Syrota committed
    Attributes = [1, 4, 5, 6]
    
    bjje's avatar
    bjje committed
    NumAtr = len(Attributes)
    
    
    bjje's avatar
    bjje committed
    plt.figure(figsize=(12, 12))
    
    bjje's avatar
    bjje committed
    for m1 in range(NumAtr):
        for m2 in range(NumAtr):
    
    bjje's avatar
    bjje committed
            plt.subplot(NumAtr, NumAtr, m1 * NumAtr + m2 + 1)
    
    bjje's avatar
    bjje committed
            for c in range(C):
    
    Stas Syrota's avatar
    Stas Syrota committed
                class_mask = y == c
    
    bjje's avatar
    bjje committed
                plt.plot(X[class_mask, Attributes[m2]], X[class_mask, Attributes[m1]], ".")
    
    Stas Syrota's avatar
    Stas Syrota committed
                if m1 == NumAtr - 1:
    
    bjje's avatar
    bjje committed
                    plt.xlabel(attributeNames[Attributes[m2]])
    
    bjje's avatar
    bjje committed
                else:
    
    bjje's avatar
    bjje committed
                    plt.xticks([])
    
    Stas Syrota's avatar
    Stas Syrota committed
                if m2 == 0:
    
    bjje's avatar
    bjje committed
                    plt.ylabel(attributeNames[Attributes[m1]])
    
    bjje's avatar
    bjje committed
                else:
    
    bjje's avatar
    bjje committed
                    plt.yticks([])
    
    Stas Syrota's avatar
    Stas Syrota committed
                # ylim(0,X.max()*1.1)
                # xlim(0,X.max()*1.1)
    
    bjje's avatar
    bjje committed
    plt.legend(classNames)
    plt.show()
    
    bjje's avatar
    bjje committed
    
    
    bjje's avatar
    bjje committed
    print("Ran Exercise 2.4.2")