Newer
Older
import numpy as np
# You can read data from excel spreadsheets after installing and importing xlrd
# module. In most cases, you will need only few functions to accomplish it:
# open_workbook(), col_values(), row_values()
import xlrd
# If you need more advanced reference, or if you are interested how to write
# data to excel files, see the following tutorial:
# http://www.simplistix.co.uk/presentations/python-excel.pdf}
filename = importlib_resources.files("dtuimldmtools").joinpath("data/iris.xls")
# Print the location of the iris.xls file on your computer.
# You should inspect it manually to understand the format and content
print("\nLocation of the iris.xls file: {}".format(filename))
# Load xls sheet with data
# There's only a single sheet in the .xls, so we take out that sheet
# Extract attribute names
attributeNames = doc.row_values(rowx=0, start_colx=0, end_colx=4)
# Try calling help(doc.row_values). You'll see that the above means
# that we extract columns 0 through 4 from the first row of the document,
# which contains the header of the xls files (where the attributen names are)
# Extract class names to python list, then encode with integers (dict) just as
# we did previously. The class labels are in the 5th column, in the rows 2 to
classLabels = doc.col_values(4, 1, 151) # check out help(doc.col_values)
# Extract vector y, convert to NumPy array
y = np.array([classDict[value] for value in classLabels])
# Preallocate memory, then extract data to matrix X