From 42158bbe4756c6bd2619608de1ab1a5fdb0c1095 Mon Sep 17 00:00:00 2001 From: Morten Hannemose <morten@hannemose.dk> Date: Wed, 4 Oct 2023 13:20:07 +0200 Subject: [PATCH] Update week6 --- cp/ex06/word_histogram.py | 22 +++++++----------- cp/project3/unitgrade_data/Week06MultiTap.pkl | Bin 470 -> 470 bytes .../Week06SentimentAnalysis.pkl | Bin 490 -> 490 bytes cp/tests/tests_week06.py | 2 +- .../unitgrade_data/Week06ExtractKeywords.pkl | Bin 132 -> 132 bytes .../Week06GetPeopleByLanguage.pkl | Bin 140 -> 140 bytes .../unitgrade_data/Week06LetterHistogram.pkl | Bin 132 -> 132 bytes cp/tests/unitgrade_data/Week06MultiTap.pkl | Bin 119 -> 119 bytes .../Week06SentimentAnalysis.pkl | Bin 137 -> 137 bytes cp/tests/unitgrade_data/Week06SpellCheck.pkl | Bin 124 -> 124 bytes cp/tests/unitgrade_data/Week06TextToNato.pkl | Bin 122 -> 122 bytes .../Week06TruncateAndNormalize.pkl | Bin 130 -> 130 bytes .../unitgrade_data/Week06WordHistogram.pkl | Bin 128 -> 128 bytes 13 files changed, 10 insertions(+), 14 deletions(-) diff --git a/cp/ex06/word_histogram.py b/cp/ex06/word_histogram.py index cab8ba1..d111db3 100644 --- a/cp/ex06/word_histogram.py +++ b/cp/ex06/word_histogram.py @@ -1,6 +1,6 @@ """Exercise 6.3-6.4.""" -def word_histogram(lines : list) -> list: +def word_histogram(lines : list) -> dict: """Return the word count histogram from the input lines. :param lines: The lines that are analyzed for word count. @@ -20,19 +20,15 @@ def extract_keyword(lines : str, ignore_list : list) -> dict: if __name__ == "__main__": # here you can try out your functions - print(word_histogram('I think therefore I am.')) + lines = ['This is the first sentence of text for you', 'This is the second sentence of text', 'This is for you'] + print("word_histogram") + print(word_histogram(lines)) # Ignore list of common words - ignore_list = [ - 'a', 'an', 'the', 'above', 'across', 'against', 'along', 'among', 'around', - 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'by', - 'down', 'from', 'in', 'into', 'near', 'of', 'off', 'on', 'to', 'toward', - 'under', 'upon', 'with', 'within','function', 'for', 'and', 'nor', 'but', 'or', 'yet', 'so'] - - # Example usage: - lines = [ "Write the function word_histogram, which takes as argument a list containing lines of a text.", "The function should ... ... ... ... ... make a histogram of words that occur in the text."] - - keywords_result = extract_keyword(lines, ignore_list) + ignore_list = ['the', 'be', 'to', 'of', 'and', 'a', 'in', 'is', 'have', 'I'] # Print the 5 most occurring keywords - print(keywords_result) + print("extract_keywords") + print(extract_keyword(lines, ignore_list)) + + diff --git a/cp/project3/unitgrade_data/Week06MultiTap.pkl b/cp/project3/unitgrade_data/Week06MultiTap.pkl index 6ca33b7aa3d8e940d32f033632ff5138c10440a5..3e142880387cf4ed6b9863925d9f98ca7e368fa4 100644 GIT binary patch delta 19 acmcb{e2sa6C6ltnL~COv%btw|X^a3sn+7KU delta 19 bcmcb{e2sa6C6m&GiPpwU#uGLcq%i^jL`MdE diff --git a/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl b/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl index cef5f2274e1687b74e02715c465b65ae610c6c37..6937e5ec0fc25ff3bde01a4927bd87425b508e0c 100644 GIT binary patch delta 19 acmaFG{EB&k6O*jML>C(-C$^1E8H@ly1O?*& delta 19 acmaFG{EB&k6O*FFL>C(-TaS%R8H@l!Xa+n0 diff --git a/cp/tests/tests_week06.py b/cp/tests/tests_week06.py index 8169cf4..0a9023b 100644 --- a/cp/tests/tests_week06.py +++ b/cp/tests/tests_week06.py @@ -22,7 +22,7 @@ class Week06WordHistogram(UTestCase): def test_WordHistogram(self): from cp.ex06.word_histogram import word_histogram self.assertEqual(word_histogram(["Write the function word_histogram."," which takes as argument a list containing lines of a text."]),{'write': 1, 'the': 1, 'function': 1, 'wordhistogram': 1, 'which': 1, 'takes': 1, 'as': 1, 'argument': 1, 'a': 2, 'list': 1, 'containing': 1, 'lines': 1, 'of': 1, 'text': 1}) - self.assertEqual(word_histogram(["The function should make a histogram of words that occur in the text.","Punctuation, spaces, numbers, and capitalization should be ignored.",]),{'the': 2, 'function': 1, 'should': 2, 'make': 1, 'a': 1, 'histogram': 1, 'of': 1, 'words': 1, 'that': 1, 'occur': 1, 'in': 1, 'text': 1, 'punctuation': 1, 'spaces': 1, 'numbers': 1, 'and': 1, 'capitalization': 1, 'be': 1, 'ignored': 1}) + self.assertEqual(word_histogram(["The function should make a histogram of words that occur in the text.","Punctuation, spaces, and capitalization should be ignored.",]),{'the': 2, 'function': 1, 'should': 2, 'make': 1, 'a': 1, 'histogram': 1, 'of': 1, 'words': 1, 'that': 1, 'occur': 1, 'in': 1, 'text': 1, 'punctuation': 1, 'spaces': 1, 'and': 1, 'capitalization': 1, 'be': 1, 'ignored': 1}) self.assertEqual(word_histogram(["The function should return a dictionary, e.g. {'write': 2, 'the': 12, 'function': 7, …}",]),{'the': 2, 'function': 2, 'should': 1, 'return': 1, 'a': 1, 'dictionary': 1, 'eg': 1, 'write': 1}) class Week06ExtractKeywords(UTestCase): diff --git a/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl b/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl index 68a7243852f77012525a4006f8214294de57c4cd..52287154838c3f31077d68fbc237d1009d7bc9d0 100644 GIT binary patch delta 21 acmZo+Y+;<>z$ib_(TY>|0|N+@>Hz>dngq}Q delta 21 acmZo+Y+;<>z$i1((Tdaj3Ihm~>Hz>c>IAz0 diff --git a/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl b/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl index 910f5dcc642db901c1998a4fe9b42ed10949fd69..321f2bd003e37bd566fcca5c6580b9a118ad51a1 100644 GIT binary patch delta 21 acmeBS>|vbX!X$rSqMIGZ0tOH$)dK)R6$LN= delta 21 acmeBS>|vbX!laNf(anxy1p^3_>Hz>f>jdEd diff --git a/cp/tests/unitgrade_data/Week06LetterHistogram.pkl b/cp/tests/unitgrade_data/Week06LetterHistogram.pkl index 2a7019b2f58629e083e892a7b9b6d162316be1a8..d23629ba7f89a0716eeaa4cc627800697b80804d 100644 GIT binary patch delta 22 ZcmZo+Y+;<>z@#KF(aDlig8>Xm^#C^p1MdI; delta 22 acmZo+Y+;<>z$D!;(aDn2&VT^~O7#Fg0|cG` diff --git a/cp/tests/unitgrade_data/Week06MultiTap.pkl b/cp/tests/unitgrade_data/Week06MultiTap.pkl index 9e47cc3bcb9612d0a03f2ab900cffbeeba344858..6a947c4e055c7d96773184533831ce22c026c727 100644 GIT binary patch delta 20 ZcmXRfpJ2%(-!ReIn3HP(0|=Dr0RTKL1g-!8 delta 20 ZcmXRfpJ2(Pdt#!sF{fi30|=Dr0RTYG1o;2} diff --git a/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl b/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl index 0ef765ada2bb5a6d7847cd8322cb3b992c568ee2..3c2acc03d5816820736164674ff992281c08badb 100644 GIT binary patch delta 23 ccmeBV>|~ta#H3g-(Z!m}?so$N2$bpp08Y*Y@Bjb+ delta 23 ccmeBV>|~ta#3Zv}qKh?`J);5x2$bpp07iZU(f|Me diff --git a/cp/tests/unitgrade_data/Week06SpellCheck.pkl b/cp/tests/unitgrade_data/Week06SpellCheck.pkl index 726072d5c7f2f78db8eb35792fd57052262dda43..e917f58e045787c187c857fdb5e70a36f2e6d47b 100644 GIT binary patch delta 20 Zcmb=anPAN%e_*1m8K=q#1`sIK0{}wx1wjA+ delta 20 Zcmb=anPAN%y<wuQ8K>q01`sIK0{}wC1xf$_ diff --git a/cp/tests/unitgrade_data/Week06TextToNato.pkl b/cp/tests/unitgrade_data/Week06TextToNato.pkl index a63fe7a60b77202564e2b6ccc495d748805a1246..50f43117289dd523468989eb703550e5523ae5e1 100644 GIT binary patch delta 20 Zcmb=bnqbXjb!Vcj38zT{0|=Dr0RTe&1sVVV delta 20 Zcmb=bnqbYO{$rx838xVU0|=Dr0RTXk1jzsZ diff --git a/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl b/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl index bf549de170e56e1cc06aa565a927d5f40c2f5061..5cc2aea1c0230c031dfc3c7d10c7fee82ac0e918 100644 GIT binary patch delta 22 acmZo-Y+{_?%A_bT(cOqsxq$%$O7#Fbx&&wd delta 22 bcmZo-Y+{_?$|SX5qPr2NiU0!$l<ENhKDq>g diff --git a/cp/tests/unitgrade_data/Week06WordHistogram.pkl b/cp/tests/unitgrade_data/Week06WordHistogram.pkl index 581377a19dea24072306e1ed742d5211aec25947..169e714935daf5e4317155feb49e31590fd0f12a 100644 GIT binary patch delta 22 bcmZo*Y+#&V$E0v#qJuf7-U|j0DAfZ1M|K5; delta 22 acmZo*Y+#&V$E2h%(ZQV4B!&S5O7#FbIs{n& -- GitLab