Skip to content
Snippets Groups Projects
Select Git revision
  • ab5be3106cdc4b5dc30ee68dfa953c397c0be855
  • main default protected
  • s2025dev
  • plot-fixes
  • Fall2024
5 results

ex1_6_5.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    ex1_6_5.py 1.14 KiB
    # exercise 1.6.5
    import numpy as np
    import scipy.linalg as linalg
    from ex1_6_4 import *
    
    from dtuimldmtools import similarity
    
    # Query vector
    q = np.array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0])
    # notice, that you could get the query vector using the vectorizer, too:
    # q = vectorizer.transform(['matrix rank solv'])
    # q = np.asarray(q.toarray())
    # or use any other string:
    # q = vectorizer.transform(['Can I Google how to fix my problem?'])
    # q = np.asarray(q.toarray())
    
    # Method 1 ('for' loop - slow)
    N = np.shape(X)[0]
    # get the number of data objects
    sim = np.zeros((N, 1))  # allocate a vector for the similarity
    for i in range(N):
        x = X[i, :]  # Get the i'th data object (here: document)
        sim[i] = q / linalg.norm(q) @ x.T / linalg.norm(x)  # Compute cosine similarity
    
    # Method 2 (one line of code with no iterations - faster)
    sim = (q @ X.T).T / (
        np.sqrt(np.power(X, 2).sum(axis=1)) * np.sqrt(np.power(q, 2).sum())
    )
    
    # Method 3 (use the "similarity" function)
    sim = similarity(X, q, "cos")
    
    
    # Display the result
    print("Query vector:\n {0}\n".format(q))
    print("Similarity results:\n {0}".format(sim))
    
    print("Ran Exercise 1.6.5")