diff --git a/src/unitgrade_private/pipelines/dtulearn.py b/src/unitgrade_private/pipelines/dtulearn.py index e1b7c60cca91fb3b41334f8e3c2fbd1e835ae3ef..3e4b01788c287360a6f6d5ee5a71f990952397e3 100644 --- a/src/unitgrade_private/pipelines/dtulearn.py +++ b/src/unitgrade_private/pipelines/dtulearn.py @@ -141,6 +141,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad accept_problems=False, # No! copydetect_check=False, slim_rs=False, # Slim the rs data structure that is returned. + skip_stage_3=False, ): """ This is the main verification scripts. It is the main entry point for project verifications as downloaded from DTU Learn. @@ -503,6 +504,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad _stage2() print("> Stage 2 completed") + def load_produced_token_rs(id): + p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token") + p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token") + + produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None + produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None + student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True) + products = glob.glob(f"{stage4_dir}/{id}-*/*.token") + + assert len(student_token_file) <= 1 + if type == 'token': assert len(student_token_file) == 1 + assert sum([produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}" + + if len(products) == 2: + rc = combine_token_results(produced_python_rs, produced_token_rs) + # flag when student has a test item that pass which the token file does not. + elif len(products) > 2: + raise Exception(f"Handins not recognized {products}") + elif len(products) == 1: + rc = produced_python_rs if produced_python_rs is not None else produced_token_rs + else: + rc = None + + trs = {} + if produced_token_rs: + trs['token'] = produced_token_rs + + if produced_python_rs: + trs['python'] = produced_python_rs + + return {'combined_rs': rc, 'individual_rs': trs, 'products': products } + def _stage3(Dockerfile, fix_user=True, xvfb=True, unmute=False, verbose=False): if Dockerfile is None: images = download_docker_images() @@ -532,34 +565,38 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad # combine the token and student python versions. Check if we are happy with the current result, i.e., do we get as many points as the student expected or not? RERUN_TOKEN = True #Re-evaluate this staged execution and re-create the token. + if os.path.isdir(s4dir): RERUN_TOKEN = False # Try to get the old token file # id, type = os.path.basename(fid).split("-") # now combine the student and instructor versions of this file for an evaluations. - products = glob.glob(f"{stage4_dir}/{id}-*/*.token") - p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token") - p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token") - - produced_python_rs, _ = load_token(p1[0]) if len(p1) > 0 else None - produced_token_rs = load_token(p2[0]) if len(p2) > 0 else None - - assert len(student_token_file) <= 1 - if type == 'token': assert len(student_token_file) == 1 - - - if len(products) == 2: - rc = combine_token_results(produced_python_rs, produced_token_rs) - # flag when student has a test item that pass which the token file does not. - elif len(products) > 2: - raise Exception(f"Handins not recognized {products}") - elif len(products) == 1: - rc = produced_token_rs if produced_python_rs is not None else produced_token_rs - # rc = load_token(products[0])[0] - - - if len(products) == 0: # No .token file has actually been generated. So obviously we have to re-generate it. + # products = glob.glob(f"{stage4_dir}/{id}-*/*.token") + # p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token") + # p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token") + + # produced_python_rs = load_token(p1[0])[0] if len(p1) > 0 else None + # produced_token_rs = load_token(p2[0])[0] if len(p2) > 0 else None + + # assert len(student_token_file) <= 1 + # if type == 'token': assert len(student_token_file) == 1 + # assert sum( [produced_token_rs is not None, produced_python_rs is not None]) == len(products), f"very odd {id}" + + # if len(products) == 2: + # rc = combine_token_results(produced_python_rs, produced_token_rs) + # # flag when student has a test item that pass which the token file does not. + # elif len(products) > 2: + # raise Exception(f"Handins not recognized {products}") + # elif len(products) == 1: + # rc = produced_python_rs if produced_python_rs is not None else produced_token_rs + # # rc = load_token(products[0])[0] + token_outcome = load_produced_token_rs(id) + produced_token_rs = token_outcome['individual_rs'].get('token', None) + produced_python_rs = token_outcome['individual_rs'].get('python', None) + rc = token_outcome['combined_rs'] + + if len(token_outcome['individual_rs']) == 0: # No .token file has actually been generated. So obviously we have to re-generate it. RERUN_TOKEN = True elif len(student_token_file) > 0 and id not in configuration.get('stage2', {}).get('skip_students', []): # We check if the student id is marked as skipped. This is reserved for cases where student uploads a token file, but it is fundamentally broken (as determined by manual inspection). @@ -576,92 +613,96 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if "sources" not in rc: print("no sources") - ptoken = produced_token_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0] + ptoken = produced_python_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0] rename_map = conf.get('rename_items', {}) # Why give them a single test when I can sit on my ass and give them incompatible tests, WCGW? for q in stoken['details']: stoken['details'][q]['items'] = {rename_map.get(k, k): v for k, v in stoken['details'][q]['items'].items()} + s_better_than_i = [] + if produced_token_rs is not None: + if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): # + s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files. + acceptable_broken = False + # elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []): + # print("Incompatible token names accepted...") + # s_better_than_i = [] + # acceptable_broken = True + else: + specification = ".".join(stoken['sources'][0]['report_module_specification']).lower() + # print(".".join(stoken['sources'][0]['report_module_specification']).lower()) - if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): # - s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files. - acceptable_broken = False - elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []): - print("Incompatible token names accepted...") - s_better_than_i = [] - acceptable_broken = True - else: - print(".".join(stoken['sources'][0]['report_module_specification']).lower()) - if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None: - print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower()) - messages['stage3'].append(f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file)) - RERUN_TOKEN = True # Not hat it really helps. - acceptable_broken = True - - if len(s_better_than_i) > 0: - for q in s_better_than_i: - for item in s_better_than_i[q]['items']: - if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'): - print("Yes we were better but it had to do with idiotic sentiment analysis...") - continue - messages['stage3'].append(f"{id}> ERROR: Student strictly better than instructor. q{q}. item: {item}") - RERUN_TOKEN = True + if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None: + print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower()) + messages['stage3'].append((id, + id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []), + f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file) + " and " + specification)) - rch = token_gather_hidden(rc) + RERUN_TOKEN = True # Not hat it really helps. + acceptable_broken = True + s_better_than_i = [] - # instructor_rs_token, _ = load_token([t for t in products if '-token' in t].pop()) - instructor_rs_token = produced_token_rs + if len(s_better_than_i) > 0: + for q in s_better_than_i: + for item in s_better_than_i[q]['items']: + if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'): + print("Yes we were better but it had to do with idiotic sentiment analysis...") + continue + messages['stage3'].append((id, id in conf.get('verified_problematic_items', {}).get(item, []), f"{id}> Student strictly better than instructor. q{q}. item: {item}") ) + RERUN_TOKEN = True - for q in stoken['details']: - if acceptable_broken: - continue - for item in stoken['details'][q]['items']: - if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'): + rch = token_gather_hidden(rc) + + if produced_token_rs is not None: + + for q in stoken['details']: + if acceptable_broken: continue - sitem = stoken['details'][q]['items'][item] - if item == ("Week06SpellCheck", "test_SpellCheck"): - item = ("Week06SpellCheck", "test_spell_check") + for item in stoken['details'][q]['items']: + if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'): + continue + sitem = stoken['details'][q]['items'][item] + if item == ("Week06SpellCheck", "test_SpellCheck"): + item = ("Week06SpellCheck", "test_spell_check") - if item not in rch['details'][q]['items']: + # if item not in rch['details'][q]['items']: + # print( rch['details'][q]['items'].keys() ) - print( rch['details'][q]['items'].keys() ) + # print(rch['details'][q]['items'].keys()) - # print(rch['details'][q]['items'].keys()) + # token_products = + # Since we combine the token we can only trust (our) token product and not the combined one. - # token_products = - # Since we combine the token we can only trust (our) token product and not the combined one. + iitems2 = [produced_token_rs['details'][q]['items'][item]] - iitems = [instructor_rs_token['details'][q]['items'][item]] + if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems2]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False): + # print('disagreement found.') + iitems = rch['details'][q]['items'][item] + fails = [i['nice_title'] for i in iitems if i['status'] != 'pass'] - if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False): - # print('disagreement found.') - iitems = rch['details'][q]['items'][item] - fails = [i['nice_title'] for i in iitems if i['status'] != 'pass'] + messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}") - messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}") + from unitgrade_private.token_loader import get_coverage_files + cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination)) - from unitgrade_private.token_loader import get_coverage_files - cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination)) - # with open(f"{os.path.dirname(grade_script_destination)}/unitgrade_data/{stoken['details'][q]['name']}.pkl", 'rb') as f: - # pk = pickle.load(f) - # fls = list( pk[(item, 'coverage')].keys() )[0] - fls = cfiles[q][(item, 'coverage')][0] - if fid.endswith("token"): - failures = [i for i in iitems if i['status'] != 'pass'] - print("*"*100) - print(item) - print(id) - print("-"*20 + "---We got the error " + "-"*20) - print(failures.pop()['stderr']) - print("-"*20 + "Please make sure the following is broken" + "-"*20 ) - with open(f"{fid}/{fls}", 'r') as f: - print( f.read() ) - print("="*100) - RERUN_TOKEN = True - nn += 1 + fls = cfiles[q][(item, 'coverage')][0] + if fid.endswith("token"): + failures = [i for i in iitems2 if i['status'] != 'pass'] + print("*"*100) + print(item) + print(id) + print("-"*20 + "---We got the error " + "-"*20) + print(failures.pop()['stderr']) + print("-"*20 + "Please make sure the following is broken" + "-"*20 ) + with open(f"{fid}/{fls}", 'r') as f: + print( f.read() ) + print("="*100) + + RERUN_TOKEN = True + nn += 1 else: print("No token rerunning", s4dir) @@ -707,6 +748,9 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad fcom = f"{cdcom} && {dcom}" print(f"{k}> Running docker command in", fid) print(fcom) + with open(base_directory + "/dockercom.txt", 'w') as fdock: + fdock.write( f"{k}> Running docker command in {fid}\n{fcom}" ) + # if os.path.basename(fid) == 'Group33-token': # a = 234 from unitgrade.utils import Capturing2, Capturing, Logger @@ -761,7 +805,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad print("tokens", tokens) raise e - _stage3(Dockerfile, unmute=unmute_docker) + if not skip_stage_3: + _stage3(Dockerfile, unmute=unmute_docker) print("> Stage 3 completed") def _stage_report(): @@ -799,12 +844,18 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if len(found_students) != len(glob.glob(stage1_dir + "/*")): a = list(found_students.keys()) b = [os.path.basename(d) for d in glob.glob(stage1_dir + "/*")] - print("Found students idffer from all downloads. Very bad.", [s for s in b if s not in a]) + raise Exception("Found students differ from all downloads. Very bad.", [s for s in b if s not in a]) + + assert len(found_students) == len(glob.glob(stage1_dir + "/*")) # Ensure all students have been found. for id in found_students: + product = load_produced_token_rs(id) + assert ("python" in found_students[id]) == ('python' in product['individual_rs']) + assert ("token" in found_students[id]) == ('token' in product['individual_rs']) + if 'python' in found_students[id] and 'token' in found_students[id]: - t_best, p_best = determine_token_difference(load_token(found_students[id]['token'])[0], load_token(found_students[id]['python'])[0]) + t_best, p_best = determine_token_difference( product['individual_rs']['token'], product['individual_rs']['python']) if len(p_best) > 0: for q in p_best.values(): for item in q['items']: @@ -824,7 +875,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad messages['report'].append(s) if configuration.get("stage_report", {}).get("accept_only_py_no_token", False): - stoken = token_gather_hidden(load_token(found_students[id]['python'])[0]) + stoken = token_gather_hidden(product['individual_rs']['python']) print("=" * 50) print(s) dd = defaultdict(list) @@ -837,14 +888,14 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad else: raise Exception(id + "> No code handin for this student") - tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]] - if len(tkns) == 2: - t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0]) - else: - t, _ = load_token(tkns[0]) + # tkns = [found_students[id][key] for key in ['token','python'] if key in found_students[id]] + + # if len(tkns) == 2: + # t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0]) + # else: + # t, _ = load_token(tkns[0]) + t = product['combined_rs'] - # strange id is s234546 - # rs['s223845']['details'] if configuration['stage3'].get("fudge_accept_student_evaluation", False): # In this case, we limit the number of items that are available to these since we rely on the student token files. # this mean the token file can have differnet evaluation items which woudl be shit. @@ -921,7 +972,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad from unitgrade_private.plagiarism.mossit import moss_it2023 print("> running moss setup") moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script, - student_files_dir=student_handout_folder, submit_to_server=not copydetect_check) + student_files_dir=student_handout_folder, submit_to_server=not copydetect_check, as_single_file=True) if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector. print("> running copydetect check") diff --git a/src/unitgrade_private/plagiarism/mossit.py b/src/unitgrade_private/plagiarism/mossit.py index 3492b5b7013aa37ab7030996905efbf2affd5c83..d998d453b4c659244753c6fdaa29b41303f270a4 100644 --- a/src/unitgrade_private/plagiarism/mossit.py +++ b/src/unitgrade_private/plagiarism/mossit.py @@ -57,7 +57,7 @@ def get_id(moss_pl): return pl.split("=")[1][:-1] def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None, - student_files_dir=None, submit_to_server=True): + student_files_dir=None, submit_to_server=True, as_single_file=False): # submissions_base_dir = stage4_dir submissions_pattern = "*-token" @@ -144,6 +144,9 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel print("You need to specify a moss id. You can do that by putting the moss.pl script at:", os.path.expanduser('~') + "/Documents/moss.pl") return + + + if submit_to_server: m = mosspy.Moss(moss_id, "python") for f in glob.glob(working_dir + "/handouts/**/*.py", recursive=True): @@ -188,7 +191,10 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None): m.addFilesByWildcard(tmp_submission_dir + "/*/*.py") print("> Calling moss") - url = m.send(lambda file_path, display_name: print('*', end='', flush=True)) + print(f"> Using: {tmp_base=}") + print(f"> Using: {tmp_submission_dir=}") + + url = m.send(lambda file_path, display_name: print(f'{file_path}: {display_name}', end='\n', flush=True)) print() print("Report Url: " + url) report_dir = os.path.dirname(whitelist_dir) + "/report" @@ -198,4 +204,4 @@ def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None): r = report_dir + "/report.html" m.saveWebPage(url, r) print("Saved report to:", r) - mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print('*', end='', flush=True)) + mosspy.download_report(url, report_dir, connections=8, log_level=10, on_read=lambda u: print(f'* {u}', end='', flush=True)) diff --git a/src/unitgrade_private/token_loader.py b/src/unitgrade_private/token_loader.py index 4d464bf469f7766203233cef1298e9c59567373a..9fed600710fc2ff34cea3cf7b78c6181b3327c81 100644 --- a/src/unitgrade_private/token_loader.py +++ b/src/unitgrade_private/token_loader.py @@ -145,6 +145,9 @@ def combine_token_results(token_a_rs, token_b_rs, combine_at_question_level=True rsd = {} n_tot = 0 n_obt = 0 + import copy + token_a_rs = copy.deepcopy(token_a_rs) + token_b_rs = copy.deepcopy(token_b_rs) for q in set(token_a_rs['details'].keys()) | set(token_b_rs['details'].keys()): itemsa = list(token_a_rs['details'][q]['items'])