diff --git a/cp/ex06/word_histogram.py b/cp/ex06/word_histogram.py index cab8ba1a1c189b2abcdbe0746e8c441389490d51..d111db3bbdd8521d409a84928176b321a13eb7dc 100644 --- a/cp/ex06/word_histogram.py +++ b/cp/ex06/word_histogram.py @@ -1,6 +1,6 @@ """Exercise 6.3-6.4.""" -def word_histogram(lines : list) -> list: +def word_histogram(lines : list) -> dict: """Return the word count histogram from the input lines. :param lines: The lines that are analyzed for word count. @@ -20,19 +20,15 @@ def extract_keyword(lines : str, ignore_list : list) -> dict: if __name__ == "__main__": # here you can try out your functions - print(word_histogram('I think therefore I am.')) + lines = ['This is the first sentence of text for you', 'This is the second sentence of text', 'This is for you'] + print("word_histogram") + print(word_histogram(lines)) # Ignore list of common words - ignore_list = [ - 'a', 'an', 'the', 'above', 'across', 'against', 'along', 'among', 'around', - 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'by', - 'down', 'from', 'in', 'into', 'near', 'of', 'off', 'on', 'to', 'toward', - 'under', 'upon', 'with', 'within','function', 'for', 'and', 'nor', 'but', 'or', 'yet', 'so'] - - # Example usage: - lines = [ "Write the function word_histogram, which takes as argument a list containing lines of a text.", "The function should ... ... ... ... ... make a histogram of words that occur in the text."] - - keywords_result = extract_keyword(lines, ignore_list) + ignore_list = ['the', 'be', 'to', 'of', 'and', 'a', 'in', 'is', 'have', 'I'] # Print the 5 most occurring keywords - print(keywords_result) + print("extract_keywords") + print(extract_keyword(lines, ignore_list)) + + diff --git a/cp/project3/unitgrade_data/Week06MultiTap.pkl b/cp/project3/unitgrade_data/Week06MultiTap.pkl index 6ca33b7aa3d8e940d32f033632ff5138c10440a5..3e142880387cf4ed6b9863925d9f98ca7e368fa4 100644 Binary files a/cp/project3/unitgrade_data/Week06MultiTap.pkl and b/cp/project3/unitgrade_data/Week06MultiTap.pkl differ diff --git a/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl b/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl index cef5f2274e1687b74e02715c465b65ae610c6c37..6937e5ec0fc25ff3bde01a4927bd87425b508e0c 100644 Binary files a/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl and b/cp/project3/unitgrade_data/Week06SentimentAnalysis.pkl differ diff --git a/cp/tests/tests_week06.py b/cp/tests/tests_week06.py index 8169cf4221e85331dc683ffb300aea4a89f4097b..0a9023b3362ca6b1cd4e541eca58afe32d4420c4 100644 --- a/cp/tests/tests_week06.py +++ b/cp/tests/tests_week06.py @@ -22,7 +22,7 @@ class Week06WordHistogram(UTestCase): def test_WordHistogram(self): from cp.ex06.word_histogram import word_histogram self.assertEqual(word_histogram(["Write the function word_histogram."," which takes as argument a list containing lines of a text."]),{'write': 1, 'the': 1, 'function': 1, 'wordhistogram': 1, 'which': 1, 'takes': 1, 'as': 1, 'argument': 1, 'a': 2, 'list': 1, 'containing': 1, 'lines': 1, 'of': 1, 'text': 1}) - self.assertEqual(word_histogram(["The function should make a histogram of words that occur in the text.","Punctuation, spaces, numbers, and capitalization should be ignored.",]),{'the': 2, 'function': 1, 'should': 2, 'make': 1, 'a': 1, 'histogram': 1, 'of': 1, 'words': 1, 'that': 1, 'occur': 1, 'in': 1, 'text': 1, 'punctuation': 1, 'spaces': 1, 'numbers': 1, 'and': 1, 'capitalization': 1, 'be': 1, 'ignored': 1}) + self.assertEqual(word_histogram(["The function should make a histogram of words that occur in the text.","Punctuation, spaces, and capitalization should be ignored.",]),{'the': 2, 'function': 1, 'should': 2, 'make': 1, 'a': 1, 'histogram': 1, 'of': 1, 'words': 1, 'that': 1, 'occur': 1, 'in': 1, 'text': 1, 'punctuation': 1, 'spaces': 1, 'and': 1, 'capitalization': 1, 'be': 1, 'ignored': 1}) self.assertEqual(word_histogram(["The function should return a dictionary, e.g. {'write': 2, 'the': 12, 'function': 7, …}",]),{'the': 2, 'function': 2, 'should': 1, 'return': 1, 'a': 1, 'dictionary': 1, 'eg': 1, 'write': 1}) class Week06ExtractKeywords(UTestCase): diff --git a/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl b/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl index 68a7243852f77012525a4006f8214294de57c4cd..52287154838c3f31077d68fbc237d1009d7bc9d0 100644 Binary files a/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl and b/cp/tests/unitgrade_data/Week06ExtractKeywords.pkl differ diff --git a/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl b/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl index 910f5dcc642db901c1998a4fe9b42ed10949fd69..321f2bd003e37bd566fcca5c6580b9a118ad51a1 100644 Binary files a/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl and b/cp/tests/unitgrade_data/Week06GetPeopleByLanguage.pkl differ diff --git a/cp/tests/unitgrade_data/Week06LetterHistogram.pkl b/cp/tests/unitgrade_data/Week06LetterHistogram.pkl index 2a7019b2f58629e083e892a7b9b6d162316be1a8..d23629ba7f89a0716eeaa4cc627800697b80804d 100644 Binary files a/cp/tests/unitgrade_data/Week06LetterHistogram.pkl and b/cp/tests/unitgrade_data/Week06LetterHistogram.pkl differ diff --git a/cp/tests/unitgrade_data/Week06MultiTap.pkl b/cp/tests/unitgrade_data/Week06MultiTap.pkl index 9e47cc3bcb9612d0a03f2ab900cffbeeba344858..6a947c4e055c7d96773184533831ce22c026c727 100644 Binary files a/cp/tests/unitgrade_data/Week06MultiTap.pkl and b/cp/tests/unitgrade_data/Week06MultiTap.pkl differ diff --git a/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl b/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl index 0ef765ada2bb5a6d7847cd8322cb3b992c568ee2..3c2acc03d5816820736164674ff992281c08badb 100644 Binary files a/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl and b/cp/tests/unitgrade_data/Week06SentimentAnalysis.pkl differ diff --git a/cp/tests/unitgrade_data/Week06SpellCheck.pkl b/cp/tests/unitgrade_data/Week06SpellCheck.pkl index 726072d5c7f2f78db8eb35792fd57052262dda43..e917f58e045787c187c857fdb5e70a36f2e6d47b 100644 Binary files a/cp/tests/unitgrade_data/Week06SpellCheck.pkl and b/cp/tests/unitgrade_data/Week06SpellCheck.pkl differ diff --git a/cp/tests/unitgrade_data/Week06TextToNato.pkl b/cp/tests/unitgrade_data/Week06TextToNato.pkl index a63fe7a60b77202564e2b6ccc495d748805a1246..50f43117289dd523468989eb703550e5523ae5e1 100644 Binary files a/cp/tests/unitgrade_data/Week06TextToNato.pkl and b/cp/tests/unitgrade_data/Week06TextToNato.pkl differ diff --git a/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl b/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl index bf549de170e56e1cc06aa565a927d5f40c2f5061..5cc2aea1c0230c031dfc3c7d10c7fee82ac0e918 100644 Binary files a/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl and b/cp/tests/unitgrade_data/Week06TruncateAndNormalize.pkl differ diff --git a/cp/tests/unitgrade_data/Week06WordHistogram.pkl b/cp/tests/unitgrade_data/Week06WordHistogram.pkl index 581377a19dea24072306e1ed742d5211aec25947..169e714935daf5e4317155feb49e31590fd0f12a 100644 Binary files a/cp/tests/unitgrade_data/Week06WordHistogram.pkl and b/cp/tests/unitgrade_data/Week06WordHistogram.pkl differ