From f19f83634913d17264db8ec870efd7168b3e4a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sand=20Jensen?= <bjje@dtu.dk> Date: Sat, 1 Feb 2025 17:11:52 +0100 Subject: [PATCH] minor bug fixes updates --- .../02450Toolbox_Matlab/Scripts/ex1_6_3.m | 1 + .../TMG_6.0R7/log_files/filters_results.log | 164 ++++++++++++++++++ .../Tools/TMG_6.0R7/log_files/tmg.log | 156 +++++++++++++++++ .../02450Toolbox_Python/Scripts/ex1_6_2.py | 2 +- .../02450Toolbox_Python/Scripts/ex1_6_3.py | 2 +- .../02450Toolbox_Python/Scripts/ex1_6_5.py | 1 - .../02450Toolbox_Python/Scripts/ex5_1_1.py | 4 +- 7 files changed, 325 insertions(+), 5 deletions(-) diff --git a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m index a9c1368..3036b65 100644 --- a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m +++ b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m @@ -1,5 +1,6 @@ %% exercise 1.6.3 cdir = fileparts(mfilename('fullpath')); +print cdir TMGOpts.stoplist = fullfile(cdir,'../Data/stopWords.txt'); [A, D] = tmg(fullfile(cdir,'../Data/textDocs.txt'), TMGOpts); X = full(A)'; diff --git a/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/filters_results.log b/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/filters_results.log index 824dfdf..c972026 100644 --- a/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/filters_results.log +++ b/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/filters_results.log @@ -2402,3 +2402,167 @@ Filtering of input files ended at 03-May-2023 14:31:17... Estimated time of parsing documents: 0.039420 seconds ================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:05:54... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:05:54... + +Estimated time of parsing documents: 0.132857 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:06:00... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:06:00... + +Estimated time of parsing documents: 0.067723 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:06:26... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:06:26... + +Estimated time of parsing documents: 0.027121 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:06:30... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:06:30... + +Estimated time of parsing documents: 0.027886 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:07:09... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:07:09... + +Estimated time of parsing documents: 0.043797 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:07:18... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:07:18... + +Estimated time of parsing documents: 0.027370 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on ..\Data\textDocs.txt file in 01-Feb-2025 06:08:05... +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:10:01... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:10:01... + +Estimated time of parsing documents: 0.023193 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:10:20... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:10:20... + +Estimated time of parsing documents: 0.034251 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 06:10:46... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 06:10:46... + +Estimated time of parsing documents: 0.022991 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 17:04:10... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 17:04:10... + +Estimated time of parsing documents: 0.057242 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 17:04:14... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 17:04:14... + +Estimated time of parsing documents: 0.023574 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on ..\Data\textDocs.txt file in 01-Feb-2025 17:04:33... +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 17:07:56... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 17:07:56... + +Estimated time of parsing documents: 0.030590 seconds +================================================================================================================================================================== +================================================================================================================================================================== +Running TMG... +Converting Results on C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt file in 01-Feb-2025 17:08:01... +================================================================================================================================================================== + +-File textDocs.txt did not need conversion... + +================================================================================================================================================================== +Filtering of input files ended at 01-Feb-2025 17:08:01... + +Estimated time of parsing documents: 0.030105 seconds +================================================================================================================================================================== diff --git a/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/tmg.log b/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/tmg.log index 4b54367..1245f68 100644 --- a/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/tmg.log +++ b/exercises/02450Toolbox_Matlab/Tools/TMG_6.0R7/log_files/tmg.log @@ -5696,3 +5696,159 @@ Removed 0 elements using the local thresholds... Removed 0 empty terms... Removed 0 empty documents... ================================================================================= +================================================================================================================================================================== +Running tmg (tmg_p.m) on 01-Feb-2025 06:05:55... +Using the Perl implementation... +Input File/Directory: C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt +Delimiter: emptyline +Line Delimiter: Yes +Stoplist Filename: -nostoplist +Update Step: 10000 +Minimum Length: 3 +Maximum Length: 30 +Minimum Local Frequency: 1 +Maximum Local Frequency: Inf +Minimum Global Frequency: 1 +Maximum Global Frequency: Inf +Local Term Weighting Function: term frequency +Global Term Weighting Function: none +Using Normalization: no +Using Stemming: no +Removing Numbers: no +Removing Alphanumerics: no + +================================================================================= +Results: +================================================================================= +Number of documents = 5 +Number of terms = 25 +Average number of terms per document (before the normalization) = 11 +Average number of indexing terms per document = 6.8 +Sparsity = 25.6% + +Estimated time for parsing and converting the files: 0.132857 seconds +Estimated time for constructing tdm and the other workspace parts: 0.769623 seconds + +Removed 12 terms using the term-length thresholds... +Removed 0 terms using the global thresholds... +Removed 0 elements using the local thresholds... +Removed 0 empty terms... +Removed 0 empty documents... +================================================================================= +================================================================================================================================================================== +Running tmg (tmg_p.m) on 01-Feb-2025 06:06:26... +Using the Perl implementation... +Input File/Directory: C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt +Delimiter: emptyline +Line Delimiter: Yes +Stoplist Filename: -nostoplist +Update Step: 10000 +Minimum Length: 3 +Maximum Length: 30 +Minimum Local Frequency: 1 +Maximum Local Frequency: Inf +Minimum Global Frequency: 1 +Maximum Global Frequency: Inf +Local Term Weighting Function: term frequency +Global Term Weighting Function: none +Using Normalization: no +Using Stemming: no +Removing Numbers: no +Removing Alphanumerics: no + +================================================================================= +Results: +================================================================================= +Number of documents = 5 +Number of terms = 25 +Average number of terms per document (before the normalization) = 11 +Average number of indexing terms per document = 6.8 +Sparsity = 25.6% + +Estimated time for parsing and converting the files: 0.027121 seconds +Estimated time for constructing tdm and the other workspace parts: 0.403465 seconds + +Removed 12 terms using the term-length thresholds... +Removed 0 terms using the global thresholds... +Removed 0 elements using the local thresholds... +Removed 0 empty terms... +Removed 0 empty documents... +================================================================================= +================================================================================================================================================================== +Running tmg (tmg_p.m) on 01-Feb-2025 06:07:10... +Using the Perl implementation... +Input File/Directory: C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt +Delimiter: emptyline +Line Delimiter: Yes +Stoplist Filename: -nostoplist +Update Step: 10000 +Minimum Length: 3 +Maximum Length: 30 +Minimum Local Frequency: 1 +Maximum Local Frequency: Inf +Minimum Global Frequency: 1 +Maximum Global Frequency: Inf +Local Term Weighting Function: term frequency +Global Term Weighting Function: none +Using Normalization: no +Using Stemming: no +Removing Numbers: no +Removing Alphanumerics: no + +================================================================================= +Results: +================================================================================= +Number of documents = 5 +Number of terms = 25 +Average number of terms per document (before the normalization) = 11 +Average number of indexing terms per document = 6.8 +Sparsity = 25.6% + +Estimated time for parsing and converting the files: 0.043797 seconds +Estimated time for constructing tdm and the other workspace parts: 0.641746 seconds + +Removed 12 terms using the term-length thresholds... +Removed 0 terms using the global thresholds... +Removed 0 elements using the local thresholds... +Removed 0 empty terms... +Removed 0 empty documents... +================================================================================= +================================================================================================================================================================== +Running tmg (tmg_p.m) on 01-Feb-2025 17:04:11... +Using the Perl implementation... +Input File/Directory: C:\Users\bjje\02450spring2025\02450students\exercises\02450Toolbox_Matlab\Scripts\..\Data\textDocs.txt +Delimiter: emptyline +Line Delimiter: Yes +Stoplist Filename: -nostoplist +Update Step: 10000 +Minimum Length: 3 +Maximum Length: 30 +Minimum Local Frequency: 1 +Maximum Local Frequency: Inf +Minimum Global Frequency: 1 +Maximum Global Frequency: Inf +Local Term Weighting Function: term frequency +Global Term Weighting Function: none +Using Normalization: no +Using Stemming: no +Removing Numbers: no +Removing Alphanumerics: no + +================================================================================= +Results: +================================================================================= +Number of documents = 5 +Number of terms = 25 +Average number of terms per document (before the normalization) = 11 +Average number of indexing terms per document = 6.8 +Sparsity = 25.6% + +Estimated time for parsing and converting the files: 0.057242 seconds +Estimated time for constructing tdm and the other workspace parts: 0.585559 seconds + +Removed 12 terms using the term-length thresholds... +Removed 0 terms using the global thresholds... +Removed 0 elements using the local thresholds... +Removed 0 empty terms... +Removed 0 empty documents... +================================================================================= diff --git a/exercises/02450Toolbox_Python/Scripts/ex1_6_2.py b/exercises/02450Toolbox_Python/Scripts/ex1_6_2.py index 53697ff..c2444ce 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex1_6_2.py +++ b/exercises/02450Toolbox_Python/Scripts/ex1_6_2.py @@ -51,4 +51,4 @@ print() print("Document-term matrix:") print(X.toarray()) print() -print("Ran Exercise 3.1.2") + diff --git a/exercises/02450Toolbox_Python/Scripts/ex1_6_3.py b/exercises/02450Toolbox_Python/Scripts/ex1_6_3.py index c61d0a0..0281fd4 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex1_6_3.py +++ b/exercises/02450Toolbox_Python/Scripts/ex1_6_3.py @@ -37,4 +37,4 @@ print() print("Document-term matrix:") print(X.toarray()) print() -print("Ran Exercise 1.6.3") + diff --git a/exercises/02450Toolbox_Python/Scripts/ex1_6_5.py b/exercises/02450Toolbox_Python/Scripts/ex1_6_5.py index 857b49a..c19d3d3 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex1_6_5.py +++ b/exercises/02450Toolbox_Python/Scripts/ex1_6_5.py @@ -35,4 +35,3 @@ sim = similarity(X, q, "cos") print("Query vector:\n {0}\n".format(q)) print("Similarity results:\n {0}".format(sim)) -print("Ran Exercise 1.6.5") diff --git a/exercises/02450Toolbox_Python/Scripts/ex5_1_1.py b/exercises/02450Toolbox_Python/Scripts/ex5_1_1.py index 3b3c5e4..6f26096 100644 --- a/exercises/02450Toolbox_Python/Scripts/ex5_1_1.py +++ b/exercises/02450Toolbox_Python/Scripts/ex5_1_1.py @@ -34,7 +34,7 @@ attributeNames = [ # Attribute values X = np.asarray( - np.mat( + np.asmatrix( """ 1 1 1 0 0 1 0; 0 2 0 0 0 0 1; @@ -55,7 +55,7 @@ X = np.asarray( ) # Class indices -y = np.asarray(np.mat("3 4 2 3 0 4 3 1 3 2 4 1 3 2 0").T).squeeze() +y = np.asarray(np.asmatrix("3 4 2 3 0 4 3 1 3 2 4 1 3 2 0").T).squeeze() # Class names classNames = ["Amphibian", "Bird", "Fish", "Mammal", "Reptile"] -- GitLab