Newer
Older
filename = importlib_resources.files("dtuimldmtools").joinpath("data/nanonose.xls")
doc = xlrd.open_workbook(filename).sheet_by_index(0)
# Extract attribute names (1st row, column 4 to 12)
attributeNames = doc.row_values(0, 3, 11)
# Extract class names to python list,
# then encode with integers (dict)
classLabels = doc.col_values(0, 2, 92)
classNames = sorted(set(classLabels))
classDict = dict(zip(classNames, range(5)))
# Extract vector y, convert to NumPy array
y = np.asarray([classDict[value] for value in classLabels])
# Preallocate memory, then extract excel data to matrix X
X = np.empty((90, 8))
for i, col_id in enumerate(range(3, 11)):
X[:, i] = np.asarray(doc.col_values(col_id, 2, 92))
# Compute values of N, M and C.
N = len(y)
M = len(attributeNames)
C = len(classNames)