updates

0b0740fe · tuhe · 8ccb4655 · 0b0740fe · 0b0740fe · 0b0740fe
Commit 0b0740fe authored Apr 9, 2024 by tuhe
--- a/src/unitgrade_private/pipelines/dtulearn.py
+++ b/src/unitgrade_private/pipelines/dtulearn.py
@@ -141,6 +141,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                accept_problems=False, # No!
                                copydetect_check=False,
                                slim_rs=False, # Slim the rs data structure that is returned.
+                                skip_stage_3=False,
                                ):
    """
    This is the main verification scripts. It is the main entry point for project verifications as downloaded from DTU Learn.
@@ -503,6 +504,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
    _stage2()
    print("> Stage 2 completed")

+    def load_produced_token_rs(id):
+        p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
+        p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
+
+        produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None
+        produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None
+        student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True)
+        products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
+
+        assert len(student_token_file) <= 1
+        if type == 'token': assert len(student_token_file) == 1
+        assert sum([produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}"
+
+        if len(products) == 2:
+            rc = combine_token_results(produced_python_rs, produced_token_rs)
+            # flag when student has a test item that pass which the token file does not.
+        elif len(products) > 2:
+            raise Exception(f"Handins not recognized {products}")
+        elif len(products) == 1:
+            rc = produced_python_rs if produced_python_rs is not None else produced_token_rs
+        else:
+            rc = None
+
+        trs = {}
+        if produced_token_rs:
+            trs['token'] = produced_token_rs
+
+        if produced_python_rs:
+            trs['python'] = produced_python_rs
+
+        return {'combined_rs': rc, 'individual_rs': trs, 'products': products }
+
    def _stage3(Dockerfile, fix_user=True, xvfb=True, unmute=False, verbose=False):
        if Dockerfile is None:
            images = download_docker_images()
@@ -532,34 +565,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad

            # combine the token and student python versions. Check if we are happy with the current result, i.e., do we get as many points as the student expected or not?
            RERUN_TOKEN = True #Re-evaluate this staged execution and re-create the token.
+
            if os.path.isdir(s4dir):
                RERUN_TOKEN = False
                # Try to get the old token file
                # id, type = os.path.basename(fid).split("-")
                # now combine the student and instructor versions of this file for an evaluations.

-                products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
-                p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
-                p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
-
-                produced_python_rs, _ = load_token(p1[0]) if len(p1) > 0 else None
-                produced_token_rs = load_token(p2[0]) if len(p2) > 0 else None
-
-                assert len(student_token_file) <= 1
-                if type == 'token': assert len(student_token_file) == 1
-
-
-                if len(products) == 2:
-                    rc = combine_token_results(produced_python_rs, produced_token_rs)
-                    # flag when student has a test item that pass which the token file does not.
-                elif len(products) > 2:
-                    raise Exception(f"Handins not recognized {products}")
-                elif len(products) == 1:
-                    rc = produced_token_rs if produced_python_rs is not None else produced_token_rs
-                    # rc = load_token(products[0])[0]
-
-
-                if len(products) == 0: # No .token file has actually been generated. So obviously we have to re-generate it.
+                # products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
+                # p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
+                # p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
+
+                # produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None
+                # produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None
+
+                # assert len(student_token_file) <= 1
+                # if type == 'token': assert len(student_token_file) == 1
+                # assert sum( [produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}"
+
+                # if len(products) == 2:
+                #     rc = combine_token_results(produced_python_rs, produced_token_rs)
+                #     # flag when student has a test item that pass which the token file does not.
+                # elif len(products) > 2:
+                #     raise Exception(f"Handins not recognized {products}")
+                # elif len(products) == 1:
+                #     rc = produced_python_rs if produced_python_rs is not None else produced_token_rs
+                #     # rc = load_token(products[0])[0]
+                token_outcome = load_produced_token_rs(id)
+                produced_token_rs = token_outcome['individual_rs'].get('token', None)
+                produced_python_rs = token_outcome['individual_rs'].get('python', None)
+                rc = token_outcome['combined_rs']
+
+                if len(token_outcome['individual_rs']) == 0: # No .token file has actually been generated. So obviously we have to re-generate it.
                    RERUN_TOKEN = True
                elif len(student_token_file) > 0 and id not in configuration.get('stage2', {}).get('skip_students', []):
                    # We check if the student id is marked as skipped. This is reserved for cases where student uploads a token file, but it is fundamentally broken (as determined by manual inspection).
@@ -576,26 +613,33 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                    if "sources" not in rc:
                        print("no sources")

-                    ptoken = produced_token_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0]
+                    ptoken = produced_python_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0]

                    rename_map = conf.get('rename_items', {})  # Why give them a single test when I can sit on my ass and give them incompatible tests, WCGW?
                    for q in stoken['details']:
                        stoken['details'][q]['items'] = {rename_map.get(k, k): v for k, v in stoken['details'][q]['items'].items()}
-
+                    s_better_than_i = []
+                    if produced_token_rs is not None:
                        if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): #
                            s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files.
                            acceptable_broken = False
-                    elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []):
-                        print("Incompatible token names accepted...")
-                        s_better_than_i = []
-                        acceptable_broken = True
+                            # elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []):
+                            #     print("Incompatible token names accepted...")
+                            #     s_better_than_i = []
+                            #     acceptable_broken = True
                        else:
-                        print(".".join(stoken['sources'][0]['report_module_specification']).lower())
+                            specification = ".".join(stoken['sources'][0]['report_module_specification']).lower()
+                            # print(".".join(stoken['sources'][0]['report_module_specification']).lower())
+
                            if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None:
                                print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower())
-                        messages['stage3'].append(f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file))
+                            messages['stage3'].append((id,
+                                                       id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []),
+                                                       f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file) + " and " + specification))
+
                            RERUN_TOKEN = True # Not hat it really helps.
                            acceptable_broken = True
+                            s_better_than_i = []

                        if len(s_better_than_i) > 0:
                            for q in s_better_than_i:
@@ -603,14 +647,13 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                    if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
                                        print("Yes we were better but it had to do with idiotic sentiment analysis...")
                                        continue
-                                messages['stage3'].append(f"{id}> ERROR: Student strictly better than instructor. q{q}. item: {item}")
+
+                                    messages['stage3'].append((id, id in conf.get('verified_problematic_items', {}).get(item, []), f"{id}> Student strictly better than instructor. q{q}. item: {item}") )
                                    RERUN_TOKEN = True

                    rch = token_gather_hidden(rc)

-                    # instructor_rs_token, _ = load_token([t for t in products if '-token' in t].pop())
-                    instructor_rs_token = produced_token_rs
-
+                    if produced_token_rs is not None:

                        for q in stoken['details']:
                            if acceptable_broken:
@@ -622,9 +665,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                if item == ("Week06SpellCheck", "test_SpellCheck"):
                                    item = ("Week06SpellCheck", "test_spell_check")

-                            if item not in rch['details'][q]['items']:
-
-                                print( rch['details'][q]['items'].keys() )
+                                # if item not in rch['details'][q]['items']:
+                                #     print( rch['details'][q]['items'].keys() )

                                # print(rch['details'][q]['items'].keys())

@@ -632,9 +674,9 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                # Since we combine the token we can only trust (our) token product and not the combined one.


-                            iitems = [instructor_rs_token['details'][q]['items'][item]]
+                                iitems2 = [produced_token_rs['details'][q]['items'][item]]

-                            if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False):
+                                if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems2]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False):
                                    # print('disagreement found.')
                                    iitems = rch['details'][q]['items'][item]
                                    fails = [i['nice_title'] for i in iitems if i['status'] != 'pass']
@@ -644,12 +686,11 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                    from unitgrade_private.token_loader import get_coverage_files
                                    cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination))

-                                # with open(f"{os.path.dirname(grade_script_destination)}/unitgrade_data/{stoken['details'][q]['name']}.pkl", 'rb') as f:
-                                #     pk = pickle.load(f)
-                                # fls = list( pk[(item, 'coverage')].keys() )[0]
+
+
                                    fls = cfiles[q][(item, 'coverage')][0]
                                    if fid.endswith("token"):
-                                    failures = [i for i in iitems if i['status'] != 'pass']
+                                        failures = [i for i in iitems2 if i['status'] != 'pass']
                                        print("*"*100)
                                        print(item)
                                        print(id)
@@ -707,6 +748,9 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
            fcom = f"{cdcom}  && {dcom}"
            print(f"{k}> Running docker command in", fid)
            print(fcom)
+            with open(base_directory + "/dockercom.txt", 'w') as fdock:
+                fdock.write( f"{k}> Running docker command in {fid}\n{fcom}" )
+
            # if os.path.basename(fid) == 'Group33-token':
            #     a = 234
            from unitgrade.utils import Capturing2, Capturing, Logger
@@ -761,6 +805,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                print("tokens", tokens)
                raise e

+    if not skip_stage_3:
        _stage3(Dockerfile, unmute=unmute_docker)
    print("> Stage 3 completed")

@@ -799,12 +844,18 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
        if len(found_students) != len(glob.glob(stage1_dir + "/*")):
            a = list(found_students.keys())
            b = [os.path.basename(d) for d in glob.glob(stage1_dir + "/*")]
-            print("Found students idffer from all downloads. Very bad.",  [s for s in b if s not in a])
+            raise Exception("Found students differ from all downloads. Very bad.",  [s for s in b if s not in a])
+
+

        assert len(found_students) == len(glob.glob(stage1_dir + "/*")) # Ensure all students have been found.
        for id in found_students:
+            product = load_produced_token_rs(id)
+            assert ("python" in found_students[id]) == ('python' in product['individual_rs'])
+            assert ("token" in found_students[id]) == ('token' in product['individual_rs'])
+
            if 'python' in found_students[id] and 'token' in found_students[id]:
-                t_best, p_best = determine_token_difference(load_token(found_students[id]['token'])[0], load_token(found_students[id]['python'])[0])
+                t_best, p_best = determine_token_difference( product['individual_rs']['token'], product['individual_rs']['python'])
                if len(p_best) > 0:
                    for q in p_best.values():
                        for item in q['items']:
@@ -824,7 +875,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                        messages['report'].append(s)

                        if configuration.get("stage_report", {}).get("accept_only_py_no_token", False):
-                            stoken = token_gather_hidden(load_token(found_students[id]['python'])[0])
+                            stoken = token_gather_hidden(product['individual_rs']['python'])
                            print("=" * 50)
                            print(s)
                            dd = defaultdict(list)
@@ -837,14 +888,14 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad

            else:
                raise Exception(id + "> No code handin for this student")
-            tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]]
-            if len(tkns) == 2:
-                t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0])
-            else:
-                t, _ = load_token(tkns[0])
+            # tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]]
+
+            # if len(tkns) == 2:
+            #     t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0])
+            # else:
+            #     t, _ = load_token(tkns[0])
+            t = product['combined_rs']

-            # strange id is s234546
-            # rs['s223845']['details']
            if configuration['stage3'].get("fudge_accept_student_evaluation", False):
                # In this case, we limit the number of items that are available to these since we rely on the student token files.
                # this mean the token file can have differnet evaluation items which woudl be shit.
@@ -921,7 +972,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
        from unitgrade_private.plagiarism.mossit import moss_it2023
        print("> running moss setup")
        moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script,
-                    student_files_dir=student_handout_folder, submit_to_server=not copydetect_check)
+                    student_files_dir=student_handout_folder, submit_to_server=not copydetect_check, as_single_file=True)

    if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector.
        print("> running copydetect check")

--- a/src/unitgrade_private/plagiarism/mossit.py
+++ b/src/unitgrade_private/plagiarism/mossit.py
@@ -57,7 +57,7 @@ def get_id(moss_pl):
    return pl.split("=")[1][:-1]

 def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None,
-                student_files_dir=None, submit_to_server=True):
+                student_files_dir=None, submit_to_server=True, as_single_file=False):

    # submissions_base_dir = stage4_dir
    submissions_pattern = "*-token"
@@ -144,6 +144,9 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel
        print("You need to specify a moss id. You can do that by putting the moss.pl script at:", os.path.expanduser('~') + "/Documents/moss.pl")
        return

+
+
+
    if submit_to_server:
        m = mosspy.Moss(moss_id, "python")
        for f in glob.glob(working_dir + "/handouts/**/*.py", recursive=True):
@@ -188,7 +191,10 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None):

    m.addFilesByWildcard(tmp_submission_dir + "/*/*.py")
    print("> Calling moss")
-    url = m.send(lambda file_path, display_name: print('*', end='', flush=True))
+    print(f"> Using: {tmp_base=}")
+    print(f"> Using: {tmp_submission_dir=}")
+
+    url = m.send(lambda file_path, display_name: print(f'{file_path}: {display_name}', end='\n', flush=True))
    print()
    print("Report Url: " + url)
    report_dir = os.path.dirname(whitelist_dir) + "/report"
@@ -198,4 +204,4 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None):
    r = report_dir + "/report.html"
    m.saveWebPage(url, r)
    print("Saved report to:", r)
-    mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print('*', end='', flush=True))
+    mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print(f'* {u}', end='', flush=True))
--- a/src/unitgrade_private/token_loader.py
+++ b/src/unitgrade_private/token_loader.py
@@ -145,6 +145,9 @@ def combine_token_results(token_a_rs, token_b_rs, combine_at_question_level=True
    rsd = {}
    n_tot = 0
    n_obt = 0
+    import copy
+    token_a_rs = copy.deepcopy(token_a_rs)
+    token_b_rs = copy.deepcopy(token_b_rs)

    for q in set(token_a_rs['details'].keys()) | set(token_b_rs['details'].keys()):
        itemsa = list(token_a_rs['details'][q]['items'])