Select Git revision
bjje authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ex1_6_5.py 1.14 KiB
# exercise 1.6.5
import numpy as np
import scipy.linalg as linalg
from ex1_6_4 import *
from dtuimldmtools import similarity
# Query vector
q = np.array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0])
# notice, that you could get the query vector using the vectorizer, too:
# q = vectorizer.transform(['matrix rank solv'])
# q = np.asarray(q.toarray())
# or use any other string:
# q = vectorizer.transform(['Can I Google how to fix my problem?'])
# q = np.asarray(q.toarray())
# Method 1 ('for' loop - slow)
N = np.shape(X)[0]
# get the number of data objects
sim = np.zeros((N, 1)) # allocate a vector for the similarity
for i in range(N):
x = X[i, :] # Get the i'th data object (here: document)
sim[i] = q / linalg.norm(q) @ x.T / linalg.norm(x) # Compute cosine similarity
# Method 2 (one line of code with no iterations - faster)
sim = (q @ X.T).T / (
np.sqrt(np.power(X, 2).sum(axis=1)) * np.sqrt(np.power(q, 2).sum())
)
# Method 3 (use the "similarity" function)
sim = similarity(X, q, "cos")
# Display the result
print("Query vector:\n {0}\n".format(q))
print("Similarity results:\n {0}".format(sim))
print("Ran Exercise 1.6.5")