diff --git a/docs/mkdocs.py b/docs/mkdocs.py index ca35e72a2766b651a00ec03fbdea45d47fc2acaa..aa19556349f38c63a08a2bf7ccdc36c0c4b2aee1 100644 --- a/docs/mkdocs.py +++ b/docs/mkdocs.py @@ -8,7 +8,7 @@ if __name__ == "__main__": bibtex = make_bibliography("../setup.py", "./") out = subprocess.check_output("python --version").decode("utf-8") - fn = unitgrade_private.__path__[0] + "/../../examples/02631/instructor/programs/report1intro.py" + fn = unitgrade_private.__path__[0] + "/../../examples/02631/instructor/week5/report1intro.py" out = subprocess.check_output(f"cd {os.path.dirname(fn)} && python {os.path.basename(fn)} --noprogress", shell=True, encoding='utf8', errors='strict') out = out.replace("", "") diff --git a/docs/unitgrade.bib b/docs/unitgrade.bib index f75d1c2a5d552a5f8492fc4d41cd4f6324404115..b1cb0cd977511c51bec1b36f3fc8bfdc76bc39a5 100644 --- a/docs/unitgrade.bib +++ b/docs/unitgrade.bib @@ -1,7 +1,7 @@ @online{unitgrade, - title={Unitgrade (0.0.3): \texttt{pip install unitgrade}}, + title={Unitgrade (0.1.18): \texttt{pip install unitgrade}}, url={https://lab.compute.dtu.dk/tuhe/unitgrade}, - urldate = {2021-09-07}, + urldate = {2021-09-20}, month={9}, publisher={Technical University of Denmark (DTU)}, author={Tue Herlau}, diff --git a/src/unitgrade.egg-info/PKG-INFO b/src/unitgrade.egg-info/PKG-INFO index b480de9c020a826621a6d79d4f4b9937399d4259..a61ec2e7f4aa0afe208f9e2caeb9e957c364752e 100644 --- a/src/unitgrade.egg-info/PKG-INFO +++ b/src/unitgrade.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: unitgrade -Version: 0.1.17 +Version: 0.1.20 Summary: A student homework/exam evaluation framework build on pythons unittest framework. Home-page: https://lab.compute.dtu.dk/tuhe/unitgrade Author: Tue Herlau diff --git a/src/unitgrade/__init__.py b/src/unitgrade/__init__.py index 193ddd4829042bb7cea567a2b12121a0acc7c2fe..05e52e231ed43514d1ea0b6a9893fa1cf401a337 100644 --- a/src/unitgrade/__init__.py +++ b/src/unitgrade/__init__.py @@ -1,5 +1,5 @@ from unitgrade.version import __version__ -from unitgrade.utils import myround, msum, mfloor, Capturing, ActiveProgress, cache, hide +from unitgrade.utils import myround, msum, mfloor, Capturing, ActiveProgress, cache, hide, Capturing2 # from unitgrade import hide from unitgrade.framework import Report, UTestCase, NotebookTestCase from unitgrade.evaluate import evaluate_report_student diff --git a/src/unitgrade/evaluate.py b/src/unitgrade/evaluate.py index 6032b3837ac3751fff576aa94d985911d528a0fe..1be3c94905d656a21af555acee16ce47c96461eb 100644 --- a/src/unitgrade/evaluate.py +++ b/src/unitgrade/evaluate.py @@ -136,7 +136,11 @@ def evaluate_report(report, question=None, qitem=None, passall=False, verbose=Fa continue suite = loader.loadTestsFromTestCase(q) qtitle = q.question_title() if hasattr(q, 'question_title') else q.__qualname__ - q_title_print = "Question %i: %s"%(n+1, qtitle) + if not report.abbreviate_questions: + q_title_print = "Question %i: %s"%(n+1, qtitle) + else: + q_title_print = "q%i) %s" % (n + 1, qtitle) + print(q_title_print, end="") q.possible = 0 q.obtained = 0 @@ -148,10 +152,20 @@ def evaluate_report(report, question=None, qitem=None, passall=False, verbose=Fa UTextResult.unmute = unmute # Hacky as well. UTextResult.setUpClass_time = q._cache.get(((q.__name__, 'setUpClass'), 'time'), 3) if hasattr(q, '_cache') and q._cache is not None else 3 + res = UTextTestRunner(verbosity=2, resultclass=UTextResult).run(suite) details = {} for s, msg in res.successes + res.failures + res.errors: - key = (q.__name__, s._testMethodName) # cannot use the cache_id method bc. it is not compatible with plain unittest. + # from unittest.suite import _ErrorHolder + # from unittest import _Err + # if isinstance(s, _ErrorHolder) + if hasattr(s, '_testMethodName'): + key = (q.__name__, s._testMethodName) + else: + # In case s is an _ErrorHolder (unittest.suite) + key = (q.__name__, s.id()) + # key = (q.__name__, s._testMethodName) # cannot use the cache_id method bc. it is not compatible with plain unittest. + detail = {} if (s,msg) in res.successes: detail['status'] = "pass" @@ -174,7 +188,7 @@ def evaluate_report(report, question=None, qitem=None, passall=False, verbose=Fa possible = res.testsRun obtained = len(res.successes) - assert len(res.successes) + len(res.errors) + len(res.failures) == res.testsRun + # assert len(res.successes) + len(res.errors) + len(res.failures) == res.testsRun obtained = int(w * obtained * 1.0 / possible ) if possible > 0 else 0 score[n] = {'w': w, 'possible': w, 'obtained': obtained, 'items': details, 'title': qtitle, 'name': q.__name__, diff --git a/src/unitgrade/framework.py b/src/unitgrade/framework.py index c5ac7150b8cb89d4201dc745f44c5fd55cb95d78..a2d982037df204f865981cd5141c3c2c726b053a 100644 --- a/src/unitgrade/framework.py +++ b/src/unitgrade/framework.py @@ -11,6 +11,8 @@ import textwrap from unitgrade.runners import UTextResult from unitgrade.utils import gprint, Capturing2, Capturing colorama.init(autoreset=True) # auto resets your settings after every output +import numpy +numpy.seterr(all='raise') def setup_dir_by_class(C, base_dir): name = C.__class__.__name__ @@ -19,6 +21,8 @@ def setup_dir_by_class(C, base_dir): class Report: title = "report title" + abbreviate_questions = False # Should the test items start with 'Question ...' or just be q1). + version = None questions = [] pack_imports = [] @@ -41,7 +45,15 @@ class Report: def _import_base_relative(self): if hasattr(self.pack_imports[0], '__path__'): - root_dir = self.pack_imports[0].__path__._path[0] + # im = self.pack_imports[0] + # print("im!!!!") + # print(im.__path__) + # if isinstance(im, list): + # print("im is", im) + # __path__ was originally not a list; but now it seems like it behaves like a list. Add check if problem persists and treat it as either list or string. + # root_dir = self.pack_imports[0].__path__[0]._path[0] + root_dir = self.pack_imports[0].__path__[0] + else: root_dir = self.pack_imports[0].__file__ @@ -75,6 +87,8 @@ class Report: for q, _ in self.questions: q._with_coverage = True q._report = self + for q, _ in self.questions: + q._setup_answers_mode = True from unitgrade import evaluate_report_student evaluate_report_student(self, unmute=True) @@ -129,6 +143,10 @@ class UTestCase(unittest.TestCase): _covcache = None # Coverage cache. Written to if _with_coverage is true. _report = None # The report used. This is very, very hacky and should always be None. Don't rely on it! + # If true, the tests will not fail when cache is used. This is necesary since otherwise the cache will not be updated + # during setup, and the deploy script must be run many times. + _setup_answers_mode = False + def capture(self): if hasattr(self, '_stdout') and self._stdout is not None: @@ -174,6 +192,7 @@ class UTestCase(unittest.TestCase): try: # print("Stoppping coverage...") self.cov.stop() + # print("Coverage was stopped") # self.cov.html_report() # print("Success!") except Exception as e: @@ -216,12 +235,18 @@ class UTestCase(unittest.TestCase): assert len(lines) == len(lines2) # print("In file ", file, "context by lineno", data.contexts_by_lineno(file)) for ll in data.contexts_by_lineno(file): + # For empty files (e.g. __init__) there is a potential bug where coverage will return the file but lines2 will be = []. + # print("loop B: ll is", ll) l = ll-1 # print(l) - l1 = (lines[l] + " "*1000)[:80] - l2 = (lines2[l] + " "*1000)[:80] + # l1 = (lines[l] + " "*1000)[:80] + # l2 = (lines2[l] + " "*1000)[:80] # print("l is", l, l1, " " + l2, "file", file) - if lines2[l].strip() == garb: + # print("Checking if statement: ") + # print(l, lines2) + # print(">> ", lines2[l]) + # print(">", lines2[l].strip(), garb) + if l < len(lines2) and lines2[l].strip() == garb: # print("Got a hit at l", l) rel = os.path.relpath(child, root) cc = self._covcache @@ -237,8 +262,9 @@ class UTestCase(unittest.TestCase): cc[rel][fun] = (l, "\n".join(comments)) # print("found", rel, fun) self._cache_put((self.cache_id(), 'coverage'), self._covcache) - # import sys - # sys.exit() + # print("ending loop B") + # print("At end of outer loop A") + # print("-------------------------------------------- Tear down called") def shortDescriptionStandard(self): sd = super().shortDescription() @@ -305,6 +331,13 @@ class UTestCase(unittest.TestCase): self._ensure_cache_exists() return key in self.__class__._cache + def get_expected_test_value(self): + key = (self.cache_id(), 'assert') + id = self._assert_cache_index + cache = self._cache_get(key) + _expected = cache.get(id, f"Key {id} not found in cache; framework files missing. Please run deploy()") + return _expected + def wrap_assert(self, assert_fun, first, *args, **kwargs): key = (self.cache_id(), 'assert') if not self._cache_contains(key): @@ -313,15 +346,27 @@ class UTestCase(unittest.TestCase): key] = {} # A new dict. We manually insert it because we have to use that the dict is mutable. cache = self._cache_get(key) id = self._assert_cache_index - if not id in cache: - print("Warning, framework missing cache index", key, "id =", id) _expected = cache.get(id, f"Key {id} not found in cache; framework files missing. Please run deploy()") + if not id in cache: + print("Warning, framework missing cache index", key, "id =", id, " - The test will be skipped for now.") + if self._setup_answers_mode: + _expected = first # Bypass by setting equal to first. This is in case multiple self.assertEqualC's are run in a row and have to be set. # The order of these calls is important. If the method assert fails, we should still store the correct result in cache. cache[id] = first self._cache_put(key, cache) self._assert_cache_index += 1 - assert_fun(first, _expected, *args, **kwargs) + if not self._setup_answers_mode: + assert_fun(first, _expected, *args, **kwargs) + else: + try: + assert_fun(first, _expected, *args, **kwargs) + except Exception as e: + print("Mumble grumble. Cache function failed during class setup. Most likely due to old cache. Re-run deploy to check it pass.", id) + print("> first", first) + print("> expected", _expected) + print(e) + def assertEqualC(self, first, msg=None): self.wrap_assert(self.assertEqual, first, msg) @@ -334,7 +379,13 @@ class UTestCase(unittest.TestCase): if len(msg) > 0: msg += "\n" self.assertEqual(a1.shape, a2.shape, msg=msg + "Dimensions of input data does not agree.") + assert(np.all(np.isinf(a1) == np.isinf(a2))) # Check infinite part. + a1[np.isinf(a1)] = 0 + a2[np.isinf(a2)] = 0 + diff = np.abs(a1 - a2) + + # print(a1, a2, diff) return diff @@ -344,18 +395,26 @@ class UTestCase(unittest.TestCase): else: diff = self._shape_equal(first, second) if max(diff.flat) >= tol: - self.assertEqual(first, second, msg=msg + f"Not equal within tolerance {tol}") + from unittest.util import safe_repr + # msg = f'{safe_repr(first)} != {safe_repr(second)} : Not equal within tolerance {tol}' + # print(msg) + self.assertEqual(first, second, msg=f'Not equal within tolerance {tol}') - def assertL2(self, first, second=None, tol=1e-5, msg=None): + def assertL2(self, first, second=None, tol=1e-5, msg=None, relative=False): if second is None: - return self.wrap_assert(self.assertL2, first, tol=tol, msg=msg) + return self.wrap_assert(self.assertL2, first, tol=tol, msg=msg, relative=relative) else: # We first test using numpys build-in testing method to see if one coordinate deviates a great deal. # This gives us better output, and we know that the coordinate wise difference is lower than the norm difference. - np.testing.assert_allclose(first, second, atol=tol) + if not relative: + np.testing.assert_allclose(first, second, atol=tol) diff = self._shape_equal(first, second) diff = ( ( np.asarray( diff.flatten() )**2).sum() )**.5 - if max(diff.flat) >= tol: + + scale = (2/(np.linalg.norm(np.asarray(first).flat) + np.linalg.norm(np.asarray(second).flat)) ) if relative else 1 + if max(diff.flat)*scale >= tol: + msg = "" if msg is None else msg + print(f"|first - second|_2 = {diff} > {tol} ") self.assertEqual(first, second, msg=msg + f"Not equal within tolerance {tol}") def _cache_file(self): diff --git a/src/unitgrade/runners.py b/src/unitgrade/runners.py index cfb047754eacde604a95c73514e3490fc5b11818..040dc1fe3e08aec727a9a64debb692f22b785048 100644 --- a/src/unitgrade/runners.py +++ b/src/unitgrade/runners.py @@ -27,12 +27,22 @@ class UTextResult(unittest.TextTestResult): def addError(self, test, err): super(unittest.TextTestResult, self).addError(test, err) err = self.errors[-1][1] - stdout = sys.stdout.log.readlines() # Only works because we set sys.stdout to a unitgrade.Logger + if hasattr(sys.stdout, 'log'): + stdout = sys.stdout.log.readlines() # Only works because we set sys.stdout to a unitgrade.Logger + else: + stdout = "" self.errors[-1] = (self.errors[-1][0], {'return': None, 'stderr': err, 'stdout': stdout }) + if not hasattr(self, 'item_title_print'): + # In case setUpClass() fails with an error the short description may not be set. This will fix that problem. + self.item_title_print = test.shortDescription() + if self.item_title_print is None: # In case the short description is not set either... + self.item_title_print = test.id() + + self.cc_terminate(success=False) def addFailure(self, test, err): diff --git a/src/unitgrade/utils.py b/src/unitgrade/utils.py index f5d19704f172036c2c0b42b8b49f59a512dadffd..00ea948b56c9a3cac92239d0f7f5f3401cc4a2a5 100644 --- a/src/unitgrade/utils.py +++ b/src/unitgrade/utils.py @@ -70,7 +70,7 @@ class Capturing2(Capturing): def __exit__(self, *args): lines = self._stringio.getvalue().splitlines() txt = "\n".join(lines) - numbers = extract_numbers(txt) + numbers = extract_numbers(rm_progress_bar(txt)) self.extend(lines) del self._stringio # free up some memory sys.stdout = self._stdout @@ -121,7 +121,9 @@ class ActiveProgress(): def terminate(self): if not self._running: - raise Exception("Stopping a stopped progress bar. ") + print("Stopping a progress bar which is not running (class unitgrade.utils.ActiveProgress") + pass + # raise Exception("Stopping a stopped progress bar. ") self._running = False if self.show_progress_bar: self.thread.join() @@ -205,11 +207,16 @@ def cache(foo, typed=False): def wrapper(self, *args, **kwargs): key = (self.cache_id(), ("@cache", foo.__name__, _make_key(args, kwargs, typed))) # print(self._cache.keys()) + # for k in self._cache: + # print(k) if not self._cache_contains(key): value = foo(self, *args, **kwargs) self._cache_put(key, value) else: value = self._cache_get(key) + # This appears to be required since there are two caches. Otherwise, when deploy method is run twice, + # the cache will not be set correctly. + self._cache_put(key, value) return value return wrapper diff --git a/src/unitgrade/version.py b/src/unitgrade/version.py index e2c0985d7febd897787a8aa07abd689e841af565..6cd5b392035c9ed94fc8a0948803e8623a5067ce 100644 --- a/src/unitgrade/version.py +++ b/src/unitgrade/version.py @@ -1 +1 @@ -__version__ = "0.1.17" \ No newline at end of file +__version__ = "0.1.20" \ No newline at end of file