diff --git a/src/unitgrade_private/pipelines/dtulearn.py b/src/unitgrade_private/pipelines/dtulearn.py
index e1b7c60cca91fb3b41334f8e3c2fbd1e835ae3ef..3e4b01788c287360a6f6d5ee5a71f990952397e3 100644
--- a/src/unitgrade_private/pipelines/dtulearn.py
+++ b/src/unitgrade_private/pipelines/dtulearn.py
@@ -141,6 +141,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                                 accept_problems=False, # No!
                                 copydetect_check=False,
                                 slim_rs=False, # Slim the rs data structure that is returned.
+                                skip_stage_3=False,
                                 ):
     """
     This is the main verification scripts. It is the main entry point for project verifications as downloaded from DTU Learn.
@@ -503,6 +504,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
     _stage2()
     print("> Stage 2 completed")
 
+    def load_produced_token_rs(id):
+        p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
+        p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
+
+        produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None
+        produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None
+        student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True)
+        products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
+
+        assert len(student_token_file) <= 1
+        if type == 'token': assert len(student_token_file) == 1
+        assert sum([produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}"
+
+        if len(products) == 2:
+            rc = combine_token_results(produced_python_rs, produced_token_rs)
+            # flag when student has a test item that pass which the token file does not.
+        elif len(products) > 2:
+            raise Exception(f"Handins not recognized {products}")
+        elif len(products) == 1:
+            rc = produced_python_rs if produced_python_rs is not None else produced_token_rs
+        else:
+            rc = None
+
+        trs = {}
+        if produced_token_rs:
+            trs['token'] = produced_token_rs
+
+        if produced_python_rs:
+            trs['python'] = produced_python_rs
+
+        return {'combined_rs': rc, 'individual_rs': trs, 'products': products }
+
     def _stage3(Dockerfile, fix_user=True, xvfb=True, unmute=False, verbose=False):
         if Dockerfile is None:
             images = download_docker_images()
@@ -532,34 +565,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
             # combine the token and student python versions. Check if we are happy with the current result, i.e., do we get as many points as the student expected or not?
             RERUN_TOKEN = True #Re-evaluate this staged execution and re-create the token.
+
             if os.path.isdir(s4dir):
                 RERUN_TOKEN = False
                 # Try to get the old token file
                 # id, type = os.path.basename(fid).split("-")
                 # now combine the student and instructor versions of this file for an evaluations.
 
-                products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
-                p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
-                p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
-
-                produced_python_rs, _ = load_token(p1[0]) if len(p1) > 0 else None
-                produced_token_rs = load_token(p2[0]) if len(p2) > 0 else None
-
-                assert len(student_token_file) <= 1
-                if type == 'token': assert len(student_token_file) == 1
-
-
-                if len(products) == 2:
-                    rc = combine_token_results(produced_python_rs, produced_token_rs)
-                    # flag when student has a test item that pass which the token file does not.
-                elif len(products) > 2:
-                    raise Exception(f"Handins not recognized {products}")
-                elif len(products) == 1:
-                    rc = produced_token_rs if produced_python_rs is not None else produced_token_rs
-                    # rc = load_token(products[0])[0]
-
-
-                if len(products) == 0: # No .token file has actually been generated. So obviously we have to re-generate it.
+                # products = glob.glob(f"{stage4_dir}/{id}-*/*.token")
+                # p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token")
+                # p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token")
+
+                # produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None
+                # produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None
+
+                # assert len(student_token_file) <= 1
+                # if type == 'token': assert len(student_token_file) == 1
+                # assert sum( [produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}"
+
+                # if len(products) == 2:
+                #     rc = combine_token_results(produced_python_rs, produced_token_rs)
+                #     # flag when student has a test item that pass which the token file does not.
+                # elif len(products) > 2:
+                #     raise Exception(f"Handins not recognized {products}")
+                # elif len(products) == 1:
+                #     rc = produced_python_rs if produced_python_rs is not None else produced_token_rs
+                #     # rc = load_token(products[0])[0]
+                token_outcome = load_produced_token_rs(id)
+                produced_token_rs = token_outcome['individual_rs'].get('token', None)
+                produced_python_rs = token_outcome['individual_rs'].get('python', None)
+                rc = token_outcome['combined_rs']
+
+                if len(token_outcome['individual_rs']) == 0: # No .token file has actually been generated. So obviously we have to re-generate it.
                     RERUN_TOKEN = True
                 elif len(student_token_file) > 0 and id not in configuration.get('stage2', {}).get('skip_students', []):
                     # We check if the student id is marked as skipped. This is reserved for cases where student uploads a token file, but it is fundamentally broken (as determined by manual inspection).
@@ -576,92 +613,96 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                     if "sources" not in rc:
                         print("no sources")
 
-                    ptoken = produced_token_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0]
+                    ptoken = produced_python_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0]
 
                     rename_map = conf.get('rename_items', {})  # Why give them a single test when I can sit on my ass and give them incompatible tests, WCGW?
                     for q in stoken['details']:
                         stoken['details'][q]['items'] = {rename_map.get(k, k): v for k, v in stoken['details'][q]['items'].items()}
+                    s_better_than_i = []
+                    if produced_token_rs is not None:
+                        if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): #
+                            s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files.
+                            acceptable_broken = False
+                            # elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []):
+                            #     print("Incompatible token names accepted...")
+                            #     s_better_than_i = []
+                            #     acceptable_broken = True
+                        else:
+                            specification = ".".join(stoken['sources'][0]['report_module_specification']).lower()
+                            # print(".".join(stoken['sources'][0]['report_module_specification']).lower())
 
-                    if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): #
-                        s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files.
-                        acceptable_broken = False
-                    elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []):
-                        print("Incompatible token names accepted...")
-                        s_better_than_i = []
-                        acceptable_broken = True
-                    else:
-                        print(".".join(stoken['sources'][0]['report_module_specification']).lower())
-                        if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None:
-                            print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower())
-                        messages['stage3'].append(f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file))
-                        RERUN_TOKEN = True # Not hat it really helps.
-                        acceptable_broken = True
-
-                    if len(s_better_than_i) > 0:
-                        for q in s_better_than_i:
-                            for item in s_better_than_i[q]['items']:
-                                if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
-                                    print("Yes we were better but it had to do with idiotic sentiment analysis...")
-                                    continue
-                                messages['stage3'].append(f"{id}> ERROR: Student strictly better than instructor. q{q}. item: {item}")
-                                RERUN_TOKEN = True
+                            if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None:
+                                print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower())
+                            messages['stage3'].append((id,
+                                                       id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []),
+                                                       f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file) + " and " + specification))
 
-                    rch = token_gather_hidden(rc)
+                            RERUN_TOKEN = True # Not hat it really helps.
+                            acceptable_broken = True
+                            s_better_than_i = []
 
-                    # instructor_rs_token, _ = load_token([t for t in products if '-token' in t].pop())
-                    instructor_rs_token = produced_token_rs
+                        if len(s_better_than_i) > 0:
+                            for q in s_better_than_i:
+                                for item in s_better_than_i[q]['items']:
+                                    if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
+                                        print("Yes we were better but it had to do with idiotic sentiment analysis...")
+                                        continue
 
+                                    messages['stage3'].append((id, id in conf.get('verified_problematic_items', {}).get(item, []), f"{id}> Student strictly better than instructor. q{q}. item: {item}") )
+                                    RERUN_TOKEN = True
 
-                    for q in stoken['details']:
-                        if acceptable_broken:
-                            continue
-                        for item in stoken['details'][q]['items']:
-                            if item ==  ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
+                    rch = token_gather_hidden(rc)
+
+                    if produced_token_rs is not None:
+
+                        for q in stoken['details']:
+                            if acceptable_broken:
                                 continue
-                            sitem = stoken['details'][q]['items'][item]
-                            if item == ("Week06SpellCheck", "test_SpellCheck"):
-                                item = ("Week06SpellCheck", "test_spell_check")
+                            for item in stoken['details'][q]['items']:
+                                if item ==  ('Week06SentimentAnalysis', 'test_sentiment_analysis'):
+                                    continue
+                                sitem = stoken['details'][q]['items'][item]
+                                if item == ("Week06SpellCheck", "test_SpellCheck"):
+                                    item = ("Week06SpellCheck", "test_spell_check")
 
-                            if item not in rch['details'][q]['items']:
+                                # if item not in rch['details'][q]['items']:
+                                #     print( rch['details'][q]['items'].keys() )
 
-                                print( rch['details'][q]['items'].keys() )
+                                # print(rch['details'][q]['items'].keys())
 
-                            # print(rch['details'][q]['items'].keys())
+                                # token_products =
+                                # Since we combine the token we can only trust (our) token product and not the combined one.
 
-                            # token_products =
-                            # Since we combine the token we can only trust (our) token product and not the combined one.
 
+                                iitems2 = [produced_token_rs['details'][q]['items'][item]]
 
-                            iitems = [instructor_rs_token['details'][q]['items'][item]]
+                                if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems2]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False):
+                                    # print('disagreement found.')
+                                    iitems = rch['details'][q]['items'][item]
+                                    fails = [i['nice_title'] for i in iitems if i['status'] != 'pass']
 
-                            if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False):
-                                # print('disagreement found.')
-                                iitems = rch['details'][q]['items'][item]
-                                fails = [i['nice_title'] for i in iitems if i['status'] != 'pass']
+                                    messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}")
 
-                                messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}")
+                                    from unitgrade_private.token_loader import get_coverage_files
+                                    cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination))
 
-                                from unitgrade_private.token_loader import get_coverage_files
-                                cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination))
 
-                                # with open(f"{os.path.dirname(grade_script_destination)}/unitgrade_data/{stoken['details'][q]['name']}.pkl", 'rb') as f:
-                                #     pk = pickle.load(f)
-                                # fls = list( pk[(item, 'coverage')].keys() )[0]
-                                fls = cfiles[q][(item, 'coverage')][0]
-                                if fid.endswith("token"):
-                                    failures = [i for i in iitems if i['status'] != 'pass']
-                                    print("*"*100)
-                                    print(item)
-                                    print(id)
-                                    print("-"*20 + "---We got the error " + "-"*20)
-                                    print(failures.pop()['stderr'])
-                                    print("-"*20 + "Please make sure the following is broken" + "-"*20 )
-                                    with open(f"{fid}/{fls}", 'r') as f:
-                                        print( f.read() )
-                                    print("="*100)
 
-                                RERUN_TOKEN = True
-                                nn += 1
+                                    fls = cfiles[q][(item, 'coverage')][0]
+                                    if fid.endswith("token"):
+                                        failures = [i for i in iitems2 if i['status'] != 'pass']
+                                        print("*"*100)
+                                        print(item)
+                                        print(id)
+                                        print("-"*20 + "---We got the error " + "-"*20)
+                                        print(failures.pop()['stderr'])
+                                        print("-"*20 + "Please make sure the following is broken" + "-"*20 )
+                                        with open(f"{fid}/{fls}", 'r') as f:
+                                            print( f.read() )
+                                        print("="*100)
+
+                                    RERUN_TOKEN = True
+                                    nn += 1
             else:
                 print("No token rerunning", s4dir)
 
@@ -707,6 +748,9 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
             fcom = f"{cdcom}  && {dcom}"
             print(f"{k}> Running docker command in", fid)
             print(fcom)
+            with open(base_directory + "/dockercom.txt", 'w') as fdock:
+                fdock.write( f"{k}> Running docker command in {fid}\n{fcom}" )
+
             # if os.path.basename(fid) == 'Group33-token':
             #     a = 234
             from unitgrade.utils import Capturing2, Capturing, Logger
@@ -761,7 +805,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                 print("tokens", tokens)
                 raise e
 
-    _stage3(Dockerfile, unmute=unmute_docker)
+    if not skip_stage_3:
+        _stage3(Dockerfile, unmute=unmute_docker)
     print("> Stage 3 completed")
 
     def _stage_report():
@@ -799,12 +844,18 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
         if len(found_students) != len(glob.glob(stage1_dir + "/*")):
             a = list(found_students.keys())
             b = [os.path.basename(d) for d in glob.glob(stage1_dir + "/*")]
-            print("Found students idffer from all downloads. Very bad.",  [s for s in b if s not in a])
+            raise Exception("Found students differ from all downloads. Very bad.",  [s for s in b if s not in a])
+
+
 
         assert len(found_students) == len(glob.glob(stage1_dir + "/*")) # Ensure all students have been found.
         for id in found_students:
+            product = load_produced_token_rs(id)
+            assert ("python" in found_students[id]) == ('python' in product['individual_rs'])
+            assert ("token" in found_students[id]) == ('token' in product['individual_rs'])
+
             if 'python' in found_students[id] and 'token' in found_students[id]:
-                t_best, p_best = determine_token_difference(load_token(found_students[id]['token'])[0], load_token(found_students[id]['python'])[0])
+                t_best, p_best = determine_token_difference( product['individual_rs']['token'], product['individual_rs']['python'])
                 if len(p_best) > 0:
                     for q in p_best.values():
                         for item in q['items']:
@@ -824,7 +875,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
                         messages['report'].append(s)
 
                         if configuration.get("stage_report", {}).get("accept_only_py_no_token", False):
-                            stoken = token_gather_hidden(load_token(found_students[id]['python'])[0])
+                            stoken = token_gather_hidden(product['individual_rs']['python'])
                             print("=" * 50)
                             print(s)
                             dd = defaultdict(list)
@@ -837,14 +888,14 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
 
             else:
                 raise Exception(id + "> No code handin for this student")
-            tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]]
-            if len(tkns) == 2:
-                t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0])
-            else:
-                t, _ = load_token(tkns[0])
+            # tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]]
+
+            # if len(tkns) == 2:
+            #     t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0])
+            # else:
+            #     t, _ = load_token(tkns[0])
+            t = product['combined_rs']
 
-            # strange id is s234546
-            # rs['s223845']['details']
             if configuration['stage3'].get("fudge_accept_student_evaluation", False):
                 # In this case, we limit the number of items that are available to these since we rely on the student token files.
                 # this mean the token file can have differnet evaluation items which woudl be shit.
@@ -921,7 +972,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad
         from unitgrade_private.plagiarism.mossit import moss_it2023
         print("> running moss setup")
         moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script,
-                    student_files_dir=student_handout_folder, submit_to_server=not copydetect_check)
+                    student_files_dir=student_handout_folder, submit_to_server=not copydetect_check, as_single_file=True)
 
     if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector.
         print("> running copydetect check")
diff --git a/src/unitgrade_private/plagiarism/mossit.py b/src/unitgrade_private/plagiarism/mossit.py
index 3492b5b7013aa37ab7030996905efbf2affd5c83..d998d453b4c659244753c6fdaa29b41303f270a4 100644
--- a/src/unitgrade_private/plagiarism/mossit.py
+++ b/src/unitgrade_private/plagiarism/mossit.py
@@ -57,7 +57,7 @@ def get_id(moss_pl):
     return pl.split("=")[1][:-1]
 
 def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None,
-                student_files_dir=None, submit_to_server=True):
+                student_files_dir=None, submit_to_server=True, as_single_file=False):
 
     # submissions_base_dir = stage4_dir
     submissions_pattern = "*-token"
@@ -144,6 +144,9 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel
         print("You need to specify a moss id. You can do that by putting the moss.pl script at:", os.path.expanduser('~') + "/Documents/moss.pl")
         return
 
+
+
+
     if submit_to_server:
         m = mosspy.Moss(moss_id, "python")
         for f in glob.glob(working_dir + "/handouts/**/*.py", recursive=True):
@@ -188,7 +191,10 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None):
 
     m.addFilesByWildcard(tmp_submission_dir + "/*/*.py")
     print("> Calling moss")
-    url = m.send(lambda file_path, display_name: print('*', end='', flush=True))
+    print(f"> Using: {tmp_base=}")
+    print(f"> Using: {tmp_submission_dir=}")
+
+    url = m.send(lambda file_path, display_name: print(f'{file_path}: {display_name}', end='\n', flush=True))
     print()
     print("Report Url: " + url)
     report_dir = os.path.dirname(whitelist_dir) + "/report"
@@ -198,4 +204,4 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None):
     r = report_dir + "/report.html"
     m.saveWebPage(url, r)
     print("Saved report to:", r)
-    mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print('*', end='', flush=True))
+    mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print(f'* {u}', end='', flush=True))
diff --git a/src/unitgrade_private/token_loader.py b/src/unitgrade_private/token_loader.py
index 4d464bf469f7766203233cef1298e9c59567373a..9fed600710fc2ff34cea3cf7b78c6181b3327c81 100644
--- a/src/unitgrade_private/token_loader.py
+++ b/src/unitgrade_private/token_loader.py
@@ -145,6 +145,9 @@ def combine_token_results(token_a_rs, token_b_rs, combine_at_question_level=True
     rsd = {}
     n_tot = 0
     n_obt = 0
+    import copy
+    token_a_rs = copy.deepcopy(token_a_rs)
+    token_b_rs = copy.deepcopy(token_b_rs)
 
     for q in set(token_a_rs['details'].keys()) | set(token_b_rs['details'].keys()):
         itemsa = list(token_a_rs['details'][q]['items'])