Skip to content
Snippets Groups Projects
ex2_1_1.py 930 B
Newer Older
  • Learn to ignore specific revisions
  • bjje's avatar
    bjje committed
    # exercise 2.1.1
    
    Stas Syrota's avatar
    Stas Syrota committed
    import importlib_resources
    
    bjje's avatar
    bjje committed
    import numpy as np
    import xlrd
    
    # Load xls sheet with data
    
    Stas Syrota's avatar
    Stas Syrota committed
    filename = importlib_resources.files("dtuimldmtools").joinpath("data/nanonose.xls")
    doc = xlrd.open_workbook(filename).sheet_by_index(0)
    
    bjje's avatar
    bjje committed
    
    # Extract attribute names (1st row, column 4 to 12)
    attributeNames = doc.row_values(0, 3, 11)
    
    # Extract class names to python list,
    # then encode with integers (dict)
    classLabels = doc.col_values(0, 2, 92)
    classNames = sorted(set(classLabels))
    classDict = dict(zip(classNames, range(5)))
    
    # Extract vector y, convert to NumPy array
    y = np.asarray([classDict[value] for value in classLabels])
    
    # Preallocate memory, then extract excel data to matrix X
    X = np.empty((90, 8))
    for i, col_id in enumerate(range(3, 11)):
        X[:, i] = np.asarray(doc.col_values(col_id, 2, 92))
    
    # Compute values of N, M and C.
    N = len(y)
    M = len(attributeNames)
    C = len(classNames)
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    print("Ran Exercise 2.1.1")