Skip to content
Snippets Groups Projects
ex1_5_2.py 1.87 KiB
Newer Older
  • Learn to ignore specific revisions
  • bjje's avatar
    bjje committed
    # exercise 1.5.2
    
    Stas Syrota's avatar
    Stas Syrota committed
    import importlib_resources
    
    bjje's avatar
    bjje committed
    import numpy as np
    
    # You can read data from excel spreadsheets after installing and importing xlrd
    # module. In most cases, you will need only few functions to accomplish it:
    # open_workbook(), col_values(), row_values()
    import xlrd
    
    Stas Syrota's avatar
    Stas Syrota committed
    
    # If you need more advanced reference, or if you are interested how to write
    
    bjje's avatar
    bjje committed
    # data to excel files, see the following tutorial:
    # http://www.simplistix.co.uk/presentations/python-excel.pdf}
    
    
    # Get path to the datafile
    
    Stas Syrota's avatar
    Stas Syrota committed
    filename = importlib_resources.files("dtuimldmtools").joinpath("data/iris.xls")
    
    
    # Print the location of the iris.xls file on your computer. 
    # You should inspect it manually to understand the format and content
    print("\nLocation of the iris.xls file: {}".format(filename))
    
    # Load xls sheet with data
    # There's only a single sheet in the .xls, so we take out that sheet
    
    Stas Syrota's avatar
    Stas Syrota committed
    doc = xlrd.open_workbook(filename).sheet_by_index(0)
    
    bjje's avatar
    bjje committed
    
    # Extract attribute names
    attributeNames = doc.row_values(rowx=0, start_colx=0, end_colx=4)
    # Try calling help(doc.row_values). You'll see that the above means
    
    Stas Syrota's avatar
    Stas Syrota committed
    # that we extract columns 0 through 4 from the first row of the document,
    
    bjje's avatar
    bjje committed
    # which contains the header of the xls files (where the attributen names are)
    
    
    Stas Syrota's avatar
    Stas Syrota committed
    # Extract class names to python list, then encode with integers (dict) just as
    # we did previously. The class labels are in the 5th column, in the rows 2 to
    
    bjje's avatar
    bjje committed
    # and up to 151:
    
    Stas Syrota's avatar
    Stas Syrota committed
    classLabels = doc.col_values(4, 1, 151)  # check out help(doc.col_values)
    
    bjje's avatar
    bjje committed
    classNames = sorted(set(classLabels))
    
    Stas Syrota's avatar
    Stas Syrota committed
    classDict = dict(zip(classNames, range(len(classNames))))
    
    bjje's avatar
    bjje committed
    
    # Extract vector y, convert to NumPy array
    y = np.array([classDict[value] for value in classLabels])
    
    # Preallocate memory, then extract data to matrix X
    
    Stas Syrota's avatar
    Stas Syrota committed
    X = np.empty((150, 4))
    
    bjje's avatar
    bjje committed
    for i in range(4):
    
    Stas Syrota's avatar
    Stas Syrota committed
        X[:, i] = np.array(doc.col_values(i, 1, 151)).T
    
    bjje's avatar
    bjje committed
    
    # Compute values of N, M and C.
    N = len(y)
    M = len(attributeNames)
    C = len(classNames)