From 8ccb465549d4f4891f0ed5dc71b92b39411470da Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 7 Dec 2023 21:00:31 +0100
Subject: [PATCH] Updates to pipeline

---
 src/unitgrade_devel.egg-info/PKG-INFO         |   2 +-
 src/unitgrade_private/hidden_gather_upload.py |   8 +
 src/unitgrade_private/pipelines/dtulearn.py   | 408 +++++++++++++-----
 src/unitgrade_private/plagiarism/mossit.py    |  62 +--
 src/unitgrade_private/token_loader.py         |  21 +-
 src/unitgrade_private/version.py              |   2 +-
 6 files changed, 362 insertions(+), 141 deletions(-)

diff --git a/src/unitgrade_devel.egg-info/PKG-INFO b/src/unitgrade_devel.egg-info/PKG-INFO
index cc0a865..f367b93 100644
--- a/src/unitgrade_devel.egg-info/PKG-INFO
+++ b/src/unitgrade_devel.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unitgrade-devel
-Version: 0.1.62
+Version: 0.1.64
 Summary: A set of tools to develop unitgrade tests and reports and later evaluate them
 Home-page: https://lab.compute.dtu.dk/tuhe/unitgrade_private
 Author: Tue Herlau
diff --git a/src/unitgrade_private/hidden_gather_upload.py b/src/unitgrade_private/hidden_gather_upload.py
index 1ab502f..a57d918 100644
--- a/src/unitgrade_private/hidden_gather_upload.py
+++ b/src/unitgrade_private/hidden_gather_upload.py
@@ -218,11 +218,19 @@ Error: I could not find information about previously generated tokens. The likel
     token = os.path.normpath(os.path.join(output_dir, token))
 
     b_hash = save_token(results, "\n".join(s_include), token)
+    mfdir = "unitgrade_data"
     try:
+        mfdir = os.path.dirname(report._manifest_file())
+
         with open(report._manifest_file(), 'a') as _file:
             _file.write("\n"+token + " " + b_hash)
     except Exception as e:
+        print("A problem occured while writing a file to the directory: ", mfdir)
+        print("The likely reason is that you removed the directory by accident, in which case you can re-create the directory to avoid this warning")
+        print("The exact error that occured was:")
         print(e)
+        print("The script will now complete as usual")
+
 
     # ug_dir = os.path.dirname(report._artifact_file())
     # ug_name = os.path.basename(report._artifact_file())
diff --git a/src/unitgrade_private/pipelines/dtulearn.py b/src/unitgrade_private/pipelines/dtulearn.py
index 4521931..e1b7c60 100644
--- a/src/unitgrade_private/pipelines/dtulearn.py
+++ b/src/unitgrade_private/pipelines/dtulearn.py
@@ -200,7 +200,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
     info = class_information()
 
     def _stage0():
-
+        NUMBER = 0
         # stage0_excluded_files = ["*.pdf"]
         stage0_excluded_files = configuration['stage0']['excluded_files']
         found = []
@@ -216,12 +216,17 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
             unpack_zip_file_recursively(z[:-4] + ".zip", z[:-4] + "/raw", remove_zipfiles=True)
 
-            for f in glob.glob(z[:-4] + "/raw/*"):
+            if os.path.isdir(z[:-4] + "/raw/archive") and len( glob.glob(z[:-4] + "/raw/*") ) == 1:
+                rawdir = z[:-4] + "/raw/archive"
+            else:
+                rawdir = z[:-4] + "/raw"
+
+            for f in glob.glob(rawdir + "/*"):
                 if os.path.basename(f) == "index.html":
                     continue
                 elif os.path.isdir(f):
                     id = fname2id(os.path.basename(f), info)
-
+                    # fname2id(os.path.basename(f), info)
                     # now get the directory.
 
                     if id not in relevant_directories:
@@ -240,12 +245,18 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     raise Exception(
                         "The .zip files can only contain directories with names such as: '67914-43587 - s214598, Andreas Rahbek-Palm - 09 February, 2023 441 PM', got " + student_handout_folder)
 
+
             for id, f in relevant_directories.items():
                 found.append(id)
+                NUMBER += 1
+                if (lm := configuration['stage0'].get("limit", None)) is not None:
+                    if NUMBER > lm:
+                        break
+
                 dest = stage1_dir +"/" + id
 
                 if not os.path.isdir(dest):
-                    shutil.copytree(f, dest )
+                    shutil.copytree(f, dest)
                 else:
                     # merge the files...
                     for new_file in glob.glob(f +"/**/*", recursive=True):
@@ -255,20 +266,76 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 # Now remove blacklisted files to simplify it.
                 for g in glob.glob(dest +"/**/*", recursive=True):
                     import fnmatch
-                    if g.endswith(".py"):
-                        print(g)
-                    if os.path.basename(g) in configuration['stage0']['rename']:
-                        dst_name = configuration['stage0']['rename'][os.path.basename(g)]
+                    # if g.endswith(".py"):
+                    #     print(g)
+
+                    if len([ex for ex in stage0_excluded_files if fnmatch.fnmatch(g, ex)]) > 0:
+                        # move to graveyeard of broken stuff.
+                        if not os.path.isdir(ff := os.path.dirname(stage0_dir) + "/removed_files/" + id):
+                            os.makedirs(ff)
+                        shutil.move(g, ff + f"/{id} -- " + os.path.basename(g))
+                        continue
+
+                    dst_name = None
+                    for pat in configuration['stage0']['rename']:
+                        if fnmatch.fnmatch(g, pat):
+                            dst_name = configuration['stage0']['rename'][pat]
+                            break
+
+                    if dst_name is not None and dst_name != os.path.basename(g): #os.path.basename(g) in configuration['stage0']['rename']:
+                        # dst_name = configuration['stage0']['rename'][os.path.basename(g)]
                         dst_name = os.path.dirname(g) + "/" + dst_name
                         if not os.path.isfile(dst_name):
                             shutil.move(g, dst_name)
 
-                    if len([ex for ex in stage0_excluded_files if fnmatch.fnmatch(g, ex)]) > 0:
-                        os.remove(g)
 
 
-    _stage0()
+                # Unpack zip files.
+                for new_file in glob.glob(dest + "/**/*.zip", recursive=True):
+                    from zipfile import ZipFile
 
+                    # loading the temp.zip and creating a zip object
+                    with ZipFile(new_file, 'r') as zObject:
+                        if os.path.isdir(ff_ := new_file + "-unpacked"):
+                            shutil.rmtree(ff_)
+
+                        # Extracting all the members of the zip
+                        # into a specific location.
+                        zObject.extractall(new_file + "-unpacked")
+                        os.remove(new_file)
+
+                tokens = glob.glob(dest + "/**/*.token", recursive=True)
+                if len(tokens) > 1: # If the user has too many token files, we may be able to safely delete one of them if they are the same. Otherwise we nag.
+                    dd = [open(t, 'rb').read() for t in tokens]
+                    if len(list(set(dd))) == 1:
+                        print("The two token files are the same. So just delete one of them.")
+                        for t in tokens[1:]:
+                            os.remove(t)
+                    else:
+
+                        scored = [(int(t.replace("-", "_").split("_")[-3]), t) for t in tokens]
+                        best = []
+                        bad = []
+                        for s, t in scored:
+                            if s == max([s for s, _ in scored]):
+                                best.append(t)
+                            else:
+                                bad.append(t)
+                        if len(best) == 1:
+                            for t in bad:
+                                shutil.move(t, os.path.dirname(stage0_dir) + "/removed_files/" + id + "/" + os.path.basename(t))
+                                messages['stage0'].append((id, True, "Student had more than one token file; using the one with the most points."))
+
+                        else:
+                            raise Exception(f"{id} has too many tokens: The tokens found are {tokens}")
+
+
+                if len(glob.glob(dest + "/*")) == 0:
+                    # If the destination ends up being empty, remove it. There are no handins.
+                    shutil.rmtree(dest)
+    print("> Starting stage 0")
+    _stage0()
+    print("> Stage 0 completed")
     def _stage1():
         # In this we move on to stage1.
         # In this stage, we move the files over to a staging area. The staging area consist of actual (complete) handins (tokens or .py files).
@@ -313,8 +380,10 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     if not os.path.isdir(os.path.dirname(dst)):
                         os.makedirs(os.path.dirname(dst))
                     shutil.copy(f, dst)
-                    # print(dst)
+
+
     _stage1()
+    print("> Stage 1 completed")
     # Now move through the files and extract. I guess we do that by recursively unpacking them?
 
     def get_grade_script_location(instructor_grade_script):
@@ -324,7 +393,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
     def _stage2(fix_user=True, xvfb=True):
         # configuration
-        """ Unpack token or prep python files. """
+        """ Unpack token or prep python files. for execution. """
         for fid in glob.glob(stage2_dir + "/*"):
             if "s234792" in fid:
                 print(fid)
@@ -337,7 +406,13 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
             grade_script_relative = get_grade_script_location(instructor_grade_script)
             if type == "token":
                 tokens = glob.glob(fid + "/**/*.token", recursive=True)
-                assert len(tokens) == 1, f"{id} has too many tokens: The tokens found are {tokens}"
+                if len(tokens) != 1:
+                    dd = [open(t, 'rb').read() for t in tokens]
+                    if len(list(set(dd))) == 1:
+                        print("The two token files are the same. So just delete one of them.")
+                    raise Exception(f"{id} has too many tokens: The tokens found are {tokens}")
+
+
                 try:
                     unpack_sources_from_token(tokens[0], s3dir)
                 except Exception as e:
@@ -349,15 +424,20 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     else:
                         raise e
 
+
                 # This will copy in resource files etc. that may not be contained in the .token file.
                 for g in glob.glob(student_handout_folder + "/**/*.*", recursive=True):
                     rg = os.path.relpath(g, student_handout_folder)
                     if not os.path.isfile(s3dir + "/"+rg) and not rg.endswith(".py"):
                         if not os.path.isdir(os.path.dirname(s3dir + "/"+rg)): os.makedirs(os.path.dirname(s3dir + "/"+rg))
                         if os.path.isfile(g):
+                            # print(s3dir + "/" + rg)
+                            # if "/home/tuhe" in rg:
+                            #     print("Wrong?")
                             shutil.copy(g, s3dir + "/"+rg)
                         else:
-                            shutil.copytree(g, s3dir + "/" + g)
+                            print(s3dir + "/" + os.path.relpath(g, student_handout_folder))
+                            shutil.copytree(g, s3dir + "/" + os.path.relpath(g, student_handout_folder))
             else:
                 shutil.copytree(student_handout_folder, s3dir)
                 for g in glob.glob(fid+"/**/*.*", recursive=True):
@@ -365,15 +445,30 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     fn = glob.glob(student_handout_folder + "/**/" + os.path.basename(g), recursive=True)
                     if len(fn) == 0:
                         print("I was unable to locate", g)
-                        print("Bad?")
+
+                        messages['stage2'].append( (id, False, "Files did not have a match " + g))
+
+                        files_without_a_match = os.path.dirname(os.path.dirname(s3dir)) + "/files_without_a_match"
+                        if not os.path.isdir(files_without_a_match):
+                            os.makedirs(files_without_a_match)
+                            shutil.copy(g, files_without_a_match + f"/{id} -- " + os.path.basename(g))
+
                         # os.path.relpath(fn[0], student_handout_folder)
-                        dst = os.path.relpath(g, fid) # Take it relative to the currnet directory.
+                        dst = s3dir + "/"+ os.path.relpath(g, fid) # Take it relative to the currnet directory.
                     else:
                         # dst = s3dir + "/"+os.path.dirname(grade_script_relative) + "/"+ os.path.basename(g)
                         dst = s3dir + "/" + os.path.relpath(fn[0], student_handout_folder)
 
                     if os.path.isfile(dst):
-                        shutil.copy(g, dst)
+                        if not os.path.isdir(dn_ := os.path.dirname(dst)):
+                            os.makedirs(dn_)
+                            # import time
+                            # time.sleep(0.1)
+                        try:
+                            if os.path.isfile(g):
+                                shutil.copy(g, dst)
+                        except Exception as e:
+                            raise e
                     else:
                         shutil.move(g, dst)
                         print("> Stage two: Created", dst)
@@ -395,6 +490,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     with open(f, 'r') as ff:
                         ff.read()
                 except UnicodeDecodeError as e:
+                    print(f)
                     print("""Student file not readable. add to stage2 kill list as in { configurations['projects']['project1']['stage3']['exclude_if_bad_encoding'] += ['*/~BROMIUM/*.py'] }""", f)
                     for p in configuration['stage2'].get('exclude_if_bad_encoding', []):
                         if fnmatch.fnmatch(f, p):
@@ -404,11 +500,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     if os.path.isfile(f):
                         raise e
 
-
-
-
-
     _stage2()
+    print("> Stage 2 completed")
 
     def _stage3(Dockerfile, fix_user=True, xvfb=True, unmute=False, verbose=False):
         if Dockerfile is None:
@@ -422,9 +515,17 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
         # did_nag_about = {}
         conf = configuration.get('stage3', {})
 
-        for fid in glob.glob(stage3_dir + "/*"):
+        for k, fid in enumerate(al_ := glob.glob(stage3_dir + "/*")):
             # if "s234792" in fid:
             #     print(fid)
+            if (k+1) % 100 == 0:
+                print(f"stage3> at student {k+1} of {len(al_)}")
+
+            if "-" not in os.path.basename(fid):
+                print("Bad file! ", fid)
+            id, type = os.path.basename(fid).split("-")
+            student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True)
+
             s4dir = f"{stage4_dir}/{os.path.basename(fid)}"
             grade_script_relative = get_grade_script_location(instructor_grade_script)
             grade_script_destination = os.path.dirname(fid + "/" + grade_script_relative) + "/" + os.path.basename(instructor_grade_script)
@@ -434,20 +535,28 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
             if os.path.isdir(s4dir):
                 RERUN_TOKEN = False
                 # Try to get the old token file
-                id, type = os.path.basename(fid).split("-")
+                # id, type = os.path.basename(fid).split("-")
                 # now combine the student and instructor versions of this file for an evaluations.
+
                 products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
-                student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True)
+                p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
+                p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
+
+                produced_python_rs, _ = load_token(p1[0]) if len(p1) > 0 else None
+                produced_token_rs = load_token(p2[0]) if len(p2) > 0 else None
+
                 assert len(student_token_file) <= 1
                 if type == 'token': assert len(student_token_file) == 1
 
+
                 if len(products) == 2:
-                    rc = combine_token_results(load_token(products[0])[0], load_token(products[1])[0])
+                    rc = combine_token_results(produced_python_rs, produced_token_rs)
                     # flag when student has a test item that pass which the token file does not.
                 elif len(products) > 2:
                     raise Exception(f"Handins not recognized {products}")
                 elif len(products) == 1:
-                    rc = load_token(products[0])[0]
+                    rc = produced_token_rs if produced_python_rs is not None else produced_token_rs
+                    # rc = load_token(products[0])[0]
 
 
                 if len(products) == 0: # No .token file has actually been generated. So obviously we have to re-generate it.
@@ -467,14 +576,14 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     if "sources" not in rc:
                         print("no sources")
 
-                    ptoken = load_token(products[0])[0]
+                    ptoken = produced_token_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0]
 
                     rename_map = conf.get('rename_items', {})  # Why give them a single test when I can sit on my ass and give them incompatible tests, WCGW?
                     for q in stoken['details']:
                         stoken['details'][q]['items'] = {rename_map.get(k, k): v for k, v in stoken['details'][q]['items'].items()}
 
                     if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): #
-                        s_better_than_i, _ = determine_token_difference(stoken, rc)
+                        s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files.
                         acceptable_broken = False
                     elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []):
                         print("Incompatible token names accepted...")
@@ -482,7 +591,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                         acceptable_broken = True
                     else:
                         print(".".join(stoken['sources'][0]['report_module_specification']).lower())
-                        print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower())
+                        if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None:
+                            print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower())
                         messages['stage3'].append(f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file))
                         RERUN_TOKEN = True # Not hat it really helps.
                         acceptable_broken = True
@@ -491,19 +601,16 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                         for q in s_better_than_i:
                             for item in s_better_than_i[q]['items']:
                                 if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
-                                    print("Yes we were better but it had to do with idiotic sentiment analysis thanks a fuck...")
+                                    print("Yes we were better but it had to do with idiotic sentiment analysis...")
                                     continue
                                 messages['stage3'].append(f"{id}> ERROR: Student strictly better than instructor. q{q}. item: {item}")
                                 RERUN_TOKEN = True
 
-                                # for q in stoken['details']:
-                                #     print(stoken['details'][q]['name'], ptoken['details'][q]['name'] )
-                                #
-                                #     print(stoken['details'][5] )
-                                #     print( ptoken['details'][5] )
+                    rch = token_gather_hidden(rc)
 
+                    # instructor_rs_token, _ = load_token([t for t in products if '-token' in t].pop())
+                    instructor_rs_token = produced_token_rs
 
-                    rch = token_gather_hidden(rc)
 
                     for q in stoken['details']:
                         if acceptable_broken:
@@ -521,13 +628,19 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
                             # print(rch['details'][q]['items'].keys())
 
-                            iitems = rch['details'][q]['items'][item]
+                            # token_products =
+                            # Since we combine the token we can only trust (our) token product and not the combined one.
+
+
+                            iitems = [instructor_rs_token['details'][q]['items'][item]]
 
                             if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False):
                                 # print('disagreement found.')
                                 iitems = rch['details'][q]['items'][item]
                                 fails = [i['nice_title'] for i in iitems if i['status'] != 'pass']
+
                                 messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}")
+
                                 from unitgrade_private.token_loader import get_coverage_files
                                 cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination))
 
@@ -577,7 +690,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 dockname = tag
 
             pycom = ".".join(grade_script_relative[:-3].split("/")) + " --noprogress"
-            pycom = "python3 -m " + pycom
+            pycom = "python3.11 -m " + pycom
             if fix_user:
                 user_cmd = ' --user "$(id -u):$(id -g)" '
             else:
@@ -592,10 +705,10 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
             dcom = f"docker run {user_cmd} -v {tmp_path}:/home {dockname} {pycom}"
             cdcom = f"cd {os.path.dirname(Dockerfile)}"
             fcom = f"{cdcom}  && {dcom}"
-            print("> Running docker command in", fid)
+            print(f"{k}> Running docker command in", fid)
             print(fcom)
-            if os.path.basename(fid) == 'Group33-token':
-                a = 234
+            # if os.path.basename(fid) == 'Group33-token':
+            #     a = 234
             from unitgrade.utils import Capturing2, Capturing, Logger
 
             # from spb.defaults import * # spb / defaults.py
@@ -628,7 +741,19 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
             for f in glob.glob(s4dir + "/*.token"):
                 os.remove(f)
             try:
-                shutil.move(tokens[0], s4dir + "/" + os.path.basename(tokens[0]))
+                real_dest = s4dir + "/" + os.path.basename(tokens[0])
+
+                if conf.get('fudge_accept_student_evaluation', False):
+                    try:
+                        dest = real_dest.split("handin_")[0] + "handin_" + student_token_file[0].split('handin_')[1]
+                    except Exception as e:
+                        dest = real_dest
+                    shutil.copy(student_token_file[0], dest)
+                    # raise e
+                else:
+                    shutil.move(tokens[0], real_dest)
+
+
             except Exception as e:
                 print("-"*50)
                 print("Got a problem wit hthis student")
@@ -637,11 +762,11 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 raise e
 
     _stage3(Dockerfile, unmute=unmute_docker)
+    print("> Stage 3 completed")
 
     def _stage_report():
         found_students = defaultdict(dict)
         rs = {}
-
         for fid in glob.glob(stage1_dir + "/*"):
             id = os.path.basename(fid)
             rs[id] = {'token_downloaded': None, 'token_produced': []}
@@ -657,7 +782,6 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
                 rs[id]['token_downloaded_hash'] = blake_hash
 
-
             for cid in glob.glob(f"{stage4_dir}/{id}-*"):
                 type = os.path.basename(cid).split("-")[1]
                 tokens = glob.glob(f"{cid}/*.token")
@@ -672,6 +796,11 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
                 rs[id]['token_produced'].append(tokens[0])
 
+        if len(found_students) != len(glob.glob(stage1_dir + "/*")):
+            a = list(found_students.keys())
+            b = [os.path.basename(d) for d in glob.glob(stage1_dir + "/*")]
+            print("Found students idffer from all downloads. Very bad.",  [s for s in b if s not in a])
+
         assert len(found_students) == len(glob.glob(stage1_dir + "/*")) # Ensure all students have been found.
         for id in found_students:
             if 'python' in found_students[id] and 'token' in found_students[id]:
@@ -679,28 +808,32 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 if len(p_best) > 0:
                     for q in p_best.values():
                         for item in q['items']:
-                            if not configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False):
-                                messages['report'].append(f"{id}> Evaluation of student code (i.e. .py handins) was better than the token file evaluation. " + str(item) ) # + " student stderr: \n" + str(q['items'][item]['a']['stderr']) + "\n instructor stderr: \n" + str(q['items'][item]['b']['stderr']))
+                            # if not configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False):
+                            messages['report'].append((id,
+                                                       configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False),
+                                                       f"{id}> Evaluation of student code (i.e. .py handins) was better than the token file evaluation. " + str(item))) # + " student stderr: \n" + str(q['items'][item]['a']['stderr']) + "\n instructor stderr: \n" + str(q['items'][item]['b']['stderr']))
+
 
             elif 'token' in found_students[id] and 'python' not in found_students[id]:
                 pass
             elif 'token' not in found_students[id] and 'python' in found_students[id]:
                 if id not in configuration.get('stage_report', {}).get("python_handin_checked", []):
                     if not configuration.get("stage_report", {}).get("accept_only_py_no_token", False):
-                        print("=" * 50)
-                        s = f"{id}> only handed in the .py files and not the .token files. " +str(found_students[id]['python'] + " to skip this mesage, alter the stage_report['python_handin_checked'] field. ")
+                        s = (id, configuration.get("stage_report", {}).get("accept_only_py_no_token", False),
+                            f"{id}> only handed in the .py files and not the .token files. " +str(found_students[id]['python'] + " to skip this mesage, alter the stage_report['python_handin_checked'] field. "))
                         messages['report'].append(s)
-                        stoken =token_gather_hidden(load_token(found_students[id]['python'])[0])
-                        print(s)
-                        dd = defaultdict(list)
-                        for q in stoken['details']:
-                            for item in stoken['details'][q]['items']:
-                                # print(item, stoken['details'][q]['items'][item][0]['status'])
-                                dd['test'].append(item)
-                                dd['status'].append(stoken['details'][q]['items'][item][0]['status'])
-                        print(tabulate.tabulate(dd, headers='keys'))
-
 
+                        if configuration.get("stage_report", {}).get("accept_only_py_no_token", False):
+                            stoken = token_gather_hidden(load_token(found_students[id]['python'])[0])
+                            print("=" * 50)
+                            print(s)
+                            dd = defaultdict(list)
+                            for q in stoken['details']:
+                                for item in stoken['details'][q]['items']:
+                                    # print(item, stoken['details'][q]['items'][item][0]['status'])
+                                    dd['test'].append(item)
+                                    dd['status'].append(stoken['details'][q]['items'][item][0]['status'])
+                            print(tabulate.tabulate(dd, headers='keys'))
 
             else:
                 raise Exception(id + "> No code handin for this student")
@@ -709,38 +842,89 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0])
             else:
                 t, _ = load_token(tkns[0])
+
+            # strange id is s234546
+            # rs['s223845']['details']
+            if configuration['stage3'].get("fudge_accept_student_evaluation", False):
+                # In this case, we limit the number of items that are available to these since we rely on the student token files.
+                # this mean the token file can have differnet evaluation items which woudl be shit.
+
+                # limit_items = configuration['stage3']['fudge_accept_student_evaluation_items']
+                # f"{stage3_dir}/{os.path.basename(os.path.dirname(rs[id]['token_produced'][0]))}/"
+                grade_script_relative = get_grade_script_location(instructor_grade_script)
+                # Get the intstructor token
+                itoken = glob.glob( os.path.dirname(f"{stage3_dir}/{os.path.basename(os.path.dirname(rs[id]['token_produced'][0]))}/{grade_script_relative}") + "/*.token" )
+                assert len(itoken) >= 1, "no produced token found for " + rs[id]['token_produced'][0]
+                irs, _ = load_token(itoken[0])
+                for q in list(t['details'].keys()):
+                    if q not in irs['details']:
+                        print(id, "> Deleting bad questions", q)
+                        del t['details'][q]
+                for q in t['details']:
+                    for item in list(t['details'][q]['items']):
+                        if item not in irs['details'][q]['items']:
+                            print(id, "> Deleting bad item", item)
+                            del t['details'][q]['items'][item]
+
             rs[id] = {**rs[id], **t}
             if slim_rs and 'sources' in rs[id]:
                 rs[id]['sources'] = "Sources have been removed from this token because slim_rs=True (see dtulearn.py)."
         return rs
 
     rs = _stage_report()
+    print("> Stage reporting completed")
+
+    # message_log = ""
+    # messages_bad = []
+    # messages_ok = []
 
     all_msgs = []
+    all_observations = []
+
     if len(messages) > 0:
-        print("=" * 50)
-        print("Oy veh, there are messages")
+        # print("=" * 50)
+        # print(f"Oy veh, there are {sum([len(s) for s in messages.values()])} messages")
         for stage in messages:
-            print("Messages from", stage)
+            # print("Messages from", stage)
             for s in messages[stage]:
-                print(m_ := ">> "+ s)
-                all_msgs.append(m_)
-        print("-" * 50)
+                if isinstance(s, str):
+                    s = ("saf", False, s)
+
+                id, acceptable, msg = s
+                msg = f"{id} | {stage}> {msg}"
+                if acceptable:
+                    all_observations.append(msg)
+                else:
+                    all_msgs.append(msg)
+
 
-        if not accept_problems:
-            assert False, "No messages allowed!"
+        if len(all_msgs) > 0:
+            print("=" * 50)
+            print(f"Oy veh, there are {len(all_msgs)} critical problems")
 
+            for s in all_msgs:
+                print(s)
+                # print(m_ := ">> "+ s)
+                # all_msgs.append(m_)
+            print("-" * 50)
 
-    with open(base_directory +"/log.txt", "w") as f:
+            if not accept_problems:
+                assert False, "No messages allowed!"
+
+    with open(base_directory +"/errors.txt", "w") as f:
         f.write("\n".join(all_msgs))
 
+    with open(base_directory + "/acceptable.txt", "w") as f:
+        f.write("\n".join(all_observations))
+
     if plagiarism_check or copydetect_check:
         from unitgrade_private.plagiarism.mossit import moss_it2023
+        print("> running moss setup")
         moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script,
                     student_files_dir=student_handout_folder, submit_to_server=not copydetect_check)
-        # Write the moss files.
 
     if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector.
+        print("> running copydetect check")
         from coursebox.core.info_paths import get_paths
         paths = get_paths()
         from copydetect import CopyDetector
@@ -773,48 +957,49 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
         test_dir_list = list(glob.glob(copydetect_submissions_dir + "/*"))
 
-        detector = CopyDetector(extensions=["py"], display_t=0.7, boilerplate_dirs=[copydetect_handout_dir], test_dirs=test_dir_list, same_name_only=True)
+        detector = CopyDetector(extensions=["py"], display_t=0.7, boilerplate_dirs=[copydetect_handout_dir], test_dirs=test_dir_list, same_name_only=True, autoopen=False)
         detector.out_file = working_dir + "/copydetect_report.html"
         detector.run()
         detector.generate_html_report()
-        # """
-        # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/handouts
-        # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/moss_submissions
-        #
-        # """
-        #
-        #
-        # detector = CopyDetector(same_name_only=True, extensions=["py"], display_t=0.7)
-        #
-        # relatives = []
-        # for id in rs:
-        #     v = [int(s.split("_")[-3]) for s in rs[id]['token_produced']]
-        #     token = rs[id]['token_produced'][v.index(max(v))]
-        #     tk, _ = load_token(token)
-        #     rl = tk['sources'][0]['report_relative_location']
-        #     bd = f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{os.path.dirname(rl)}"
-        #     chk_files = []
-        #     for q in tk['details']:
-        #         # print(q)
-        #         with open(f"{bd}/unitgrade_data/{tk['details'][q]['name']}.pkl", 'rb') as f:
-        #             pk = pickle.load(f)
-        #
-        #         for item in tk['details'][q]['items']:
-        #             key = (item, 'coverage')
-        #             if key in pk:
-        #                 for f in pk[key]:
-        #                     relatives.append(f)
-        #                     chk_files.append( f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{f}")
-        #
-        #     chk_files = list(set(chk_files))
-        #     for f in chk_files:
-        #         detector.add_file(f)
-        #     for f in set(relatives):
-        #         ff = paths['02450private']+"/Exam/exam2023spring/handout/" + f
-        #         detector.add_file(ff, type="boilerplate")
-        #
-        #     detector.run()
-        #     detector.generate_html_report()
+
+        cheaters = defaultdict(float)
+        for element in detector.get_copied_code_list():
+            if element[-1] < 800:
+
+                continue
+
+
+            pct1 = element[0]
+            pct2 = element[1]
+            id1 = element[2].split("/")[-2].split("-")[0]
+            id2 = element[3].split("/")[-2].split("-")[0]
+
+            if min(pct1, pct2) < 0.95:
+                continue
+
+
+            cheaters[id1] = max(cheaters[id1], pct1)
+            cheaters[id2] = max(cheaters[id2], pct2)
+
+
+        cheaters = {id: pct for id, pct in cheaters.items() if pct > 0.95}
+
+
+        with open( paths['semester']+ "/cheating_" + os.path.basename(base_directory) + ".txt", 'w') as f:
+            f.write( "\n".join([f"{id} {pct}" for id, pct in cheaters.items()]) )
+
+        with open( paths['semester']+ "/cheating_" + os.path.basename(base_directory) + "_email.txt", 'w') as f:
+            f.write("; ".join([f"{id}@student.dtu.dk" for id, pct in cheaters.items()]) )
+
+
+
+
+
+
+
+
+
+
 
     return rs
 
@@ -1045,8 +1230,11 @@ def moss_check(dzip, out, moss_id=None):
 
 def fname2id(fname, info=None):
     # fname = os.path.basename(f)
-    id_cand = fname.split("-")[2].strip().split(",")[0]
-    # print(id_cand, token)
+    if "-" not in fname and fname[0] == "s" and len(fname) == 7:
+        id_cand = fname
+    else:
+        id_cand = fname.split("-")[2].strip().split(",")[0]
+
     if id_cand.startswith("Group"):
         id = id_cand.replace(" ", "")
     else:
diff --git a/src/unitgrade_private/plagiarism/mossit.py b/src/unitgrade_private/plagiarism/mossit.py
index fb69919..3492b5b 100644
--- a/src/unitgrade_private/plagiarism/mossit.py
+++ b/src/unitgrade_private/plagiarism/mossit.py
@@ -59,8 +59,6 @@ def get_id(moss_pl):
 def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None,
                 student_files_dir=None, submit_to_server=True):
 
-    a = 234
-
     # submissions_base_dir = stage4_dir
     submissions_pattern = "*-token"
     print("-"*50)
@@ -79,28 +77,44 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel
     student_files_dir = paths['02450students']
 
     cov_files = None
-    for f in glob.glob(submissions_base_dir + "/" + submissions_pattern):
-        if os.path.isdir(f):
-            id = os.path.basename(f)
-            # This gives us all the tokens. From here, we want to extract the relevant files.
-            # To do that, we must first get the relevant files.
-            tokens = glob.glob(f + "/**/*.token", recursive=True)
-            if len(tokens) > 0:
-                token = tokens[0]
-                if cov_files is None:
-                    cov_files = get_coverage_files(token, os.path.dirname(instructor_grade_script))
-                # Now create all the submissions by extracting the covered files.
-                import tempfile
-                with tempfile.TemporaryDirectory() as tmpdirname:
-                    unpack_sources_from_token(token, destination=tmpdirname)
-                    sdir = working_dir + "/moss_submissions/" + id
-                    if not os.path.isdir(sdir):
-                        os.makedirs(sdir)
-                    for q in cov_files:
-                        for i in cov_files[q].values():
-                            for g in i:
-                                if os.path.isfile(student_file := f"{tmpdirname}/{g}"):
-                                    shutil.copy(student_file, f"{sdir}/{os.path.basename(g)}")
+    # Get the submissions to check.
+    from collections import defaultdict
+    # ids = defaultdict(list)
+    ids = {}
+    for f in glob.glob(submissions_base_dir + "/" + '*-*'):
+        token = glob.glob(f + "/*.token")[0]
+        points = int( os.path.basename(token).split("_")[-3] )
+        id =os.path.basename(f).split("-")[0]
+        if id not in ids:
+            ids[id] = (points, token)
+        else:
+            if points > ids[id][0]:
+                ids[id] = (points, token)
+
+
+    for id, (points, token) in ids.items(): # glob.glob(submissions_base_dir + "/" + submissions_pattern):
+        # if os.path.isdir(f):
+        # id = os.path.basename(f)
+        # This gives us all the tokens. From here, we want to extract the relevant files.
+        # To do that, we must first get the relevant files.
+        # tokens = glob.glob(f + "/**/*.token", recursive=True)
+
+        if True: # len(tokens) > 0:
+            # token = tokens[0]
+            if cov_files is None:
+                cov_files = get_coverage_files(token, os.path.dirname(instructor_grade_script))
+            # Now create all the submissions by extracting the covered files.
+            import tempfile
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                unpack_sources_from_token(token, destination=tmpdirname)
+                sdir = working_dir + "/moss_submissions/" + id
+                if not os.path.isdir(sdir):
+                    os.makedirs(sdir)
+                for q in cov_files:
+                    for i in cov_files[q].values():
+                        for g in i:
+                            if os.path.isfile(student_file := f"{tmpdirname}/{g}"):
+                                shutil.copy(student_file, f"{sdir}/{os.path.basename(g)}")
     if cov_files is None:
         return
     if student_files_dir is not None:
diff --git a/src/unitgrade_private/token_loader.py b/src/unitgrade_private/token_loader.py
index 7321ea2..4d464bf 100644
--- a/src/unitgrade_private/token_loader.py
+++ b/src/unitgrade_private/token_loader.py
@@ -74,7 +74,6 @@ def token_gather_hidden(token_rs, public_test_items_weight=1.):
         # Now fix the score.
         item_scores = 0
 
-
         for item in rb['details'][q]['items']:
             # w0 = 1 if len(rb['details'][q]['items'][item]) == 1 else public_test_items_weight
 
@@ -113,8 +112,11 @@ def determine_token_difference(student_token_rs, instructor_token_rs):
         kk = list(a.keys())
         kk +=  [k for k in b.keys() if b not in kk]
         for q in kk:
+
             for item in a[q]['items']:
                 # print(q)
+                if q not in b:
+                    print("Bad question!")
                 if a[q]['items'][item]['status'] == 'pass' and (item not in b[q]['items'] or b[q]['items'][item]['status'] != 'pass'):
                     if q not in a_better_than_b:
                         a_better_than_b[q] = {'items': {}}
@@ -129,14 +131,13 @@ def determine_token_difference(student_token_rs, instructor_token_rs):
     return a_better_than_b, b_better_than_a
 
 
-def combine_token_results(token_a_rs, token_b_rs):
+def combine_token_results(token_a_rs, token_b_rs, combine_at_question_level=True):
     """
     token_a_rs = load_token(...)
     token_b_rs = load_token(...)
 
     Combine by or'in the inputs. It will also recompute the token scores.
 
-
     :param token_a_rs:
     :param token_b_rs:
     :return:
@@ -166,12 +167,10 @@ def combine_token_results(token_a_rs, token_b_rs):
                     eql = False
             rsd[q]['items'][i] = item
 
-
         for k in token_a_rs['details'][q].keys():
             if k not in ['obtained', 'items']:
                 rsd[q][k] = token_a_rs['details'][q][k]
                 assert token_a_rs['details'][q][k]  == token_b_rs['details'][q][k], k
-            # rsd[q] = k
 
         w = token_a_rs['details'][q]['w']
         nc = int( np.floor( np.mean( [i['status'] == 'pass' for i in token_a_rs['details'][q]['items'].values()] ) * w ) )
@@ -179,6 +178,18 @@ def combine_token_results(token_a_rs, token_b_rs):
             abt = token_a_rs['details'][q]['obtained']
             assert nc == token_a_rs['details'][q]['obtained'] and nc == token_b_rs['details'][q]['obtained'], f"points differ. {nc} != {abt}"
         rsd[q]['obtained'] = nc
+
+        if combine_at_question_level:
+            assert token_a_rs['details'][q]['possible'] == token_b_rs['details'][q]['possible']
+
+            if token_a_rs['details'][q]['obtained'] >= token_b_rs['details'][q]['obtained']:
+                rsd[q] = token_a_rs['details'][q]
+            else:
+                rsd[q] = token_b_rs['details'][q]
+
+            w = rsd[q]['w']
+            nc = rsd[q]['obtained']
+
         n_tot += w
         n_obt += nc
 
diff --git a/src/unitgrade_private/version.py b/src/unitgrade_private/version.py
index 5c2098c..5ddcdfd 100644
--- a/src/unitgrade_private/version.py
+++ b/src/unitgrade_private/version.py
@@ -1 +1 @@
-__version__ = "0.1.62"
+__version__ = "0.1.64"
-- 
GitLab