diff --git a/src/unitgrade_devel.egg-info/PKG-INFO b/src/unitgrade_devel.egg-info/PKG-INFO index cc0a865d0a1bd5058de229b19bd9b1da6b2defd3..f367b939e651bdea47025b5ebf3cef071b16e846 100644 --- a/src/unitgrade_devel.egg-info/PKG-INFO +++ b/src/unitgrade_devel.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: unitgrade-devel -Version: 0.1.62 +Version: 0.1.64 Summary: A set of tools to develop unitgrade tests and reports and later evaluate them Home-page: https://lab.compute.dtu.dk/tuhe/unitgrade_private Author: Tue Herlau diff --git a/src/unitgrade_private/hidden_gather_upload.py b/src/unitgrade_private/hidden_gather_upload.py index 1ab502f94669612bb620d13b2cf88c1d01154cfc..a57d918479dea48438fa4cf183c1307b4ea82e41 100644 --- a/src/unitgrade_private/hidden_gather_upload.py +++ b/src/unitgrade_private/hidden_gather_upload.py @@ -218,11 +218,19 @@ Error: I could not find information about previously generated tokens. The likel token = os.path.normpath(os.path.join(output_dir, token)) b_hash = save_token(results, "\n".join(s_include), token) + mfdir = "unitgrade_data" try: + mfdir = os.path.dirname(report._manifest_file()) + with open(report._manifest_file(), 'a') as _file: _file.write("\n"+token + " " + b_hash) except Exception as e: + print("A problem occured while writing a file to the directory: ", mfdir) + print("The likely reason is that you removed the directory by accident, in which case you can re-create the directory to avoid this warning") + print("The exact error that occured was:") print(e) + print("The script will now complete as usual") + # ug_dir = os.path.dirname(report._artifact_file()) # ug_name = os.path.basename(report._artifact_file()) diff --git a/src/unitgrade_private/pipelines/dtulearn.py b/src/unitgrade_private/pipelines/dtulearn.py index 45219311e4f1e3de219791e9c32fba3df73cbe6d..e1b7c60cca91fb3b41334f8e3c2fbd1e835ae3ef 100644 --- a/src/unitgrade_private/pipelines/dtulearn.py +++ b/src/unitgrade_private/pipelines/dtulearn.py @@ -200,7 +200,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad info = class_information() def _stage0(): - + NUMBER = 0 # stage0_excluded_files = ["*.pdf"] stage0_excluded_files = configuration['stage0']['excluded_files'] found = [] @@ -216,12 +216,17 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad unpack_zip_file_recursively(z[:-4] + ".zip", z[:-4] + "/raw", remove_zipfiles=True) - for f in glob.glob(z[:-4] + "/raw/*"): + if os.path.isdir(z[:-4] + "/raw/archive") and len( glob.glob(z[:-4] + "/raw/*") ) == 1: + rawdir = z[:-4] + "/raw/archive" + else: + rawdir = z[:-4] + "/raw" + + for f in glob.glob(rawdir + "/*"): if os.path.basename(f) == "index.html": continue elif os.path.isdir(f): id = fname2id(os.path.basename(f), info) - + # fname2id(os.path.basename(f), info) # now get the directory. if id not in relevant_directories: @@ -240,12 +245,18 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad raise Exception( "The .zip files can only contain directories with names such as: '67914-43587 - s214598, Andreas Rahbek-Palm - 09 February, 2023 441 PM', got " + student_handout_folder) + for id, f in relevant_directories.items(): found.append(id) + NUMBER += 1 + if (lm := configuration['stage0'].get("limit", None)) is not None: + if NUMBER > lm: + break + dest = stage1_dir +"/" + id if not os.path.isdir(dest): - shutil.copytree(f, dest ) + shutil.copytree(f, dest) else: # merge the files... for new_file in glob.glob(f +"/**/*", recursive=True): @@ -255,20 +266,76 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad # Now remove blacklisted files to simplify it. for g in glob.glob(dest +"/**/*", recursive=True): import fnmatch - if g.endswith(".py"): - print(g) - if os.path.basename(g) in configuration['stage0']['rename']: - dst_name = configuration['stage0']['rename'][os.path.basename(g)] + # if g.endswith(".py"): + # print(g) + + if len([ex for ex in stage0_excluded_files if fnmatch.fnmatch(g, ex)]) > 0: + # move to graveyeard of broken stuff. + if not os.path.isdir(ff := os.path.dirname(stage0_dir) + "/removed_files/" + id): + os.makedirs(ff) + shutil.move(g, ff + f"/{id} -- " + os.path.basename(g)) + continue + + dst_name = None + for pat in configuration['stage0']['rename']: + if fnmatch.fnmatch(g, pat): + dst_name = configuration['stage0']['rename'][pat] + break + + if dst_name is not None and dst_name != os.path.basename(g): #os.path.basename(g) in configuration['stage0']['rename']: + # dst_name = configuration['stage0']['rename'][os.path.basename(g)] dst_name = os.path.dirname(g) + "/" + dst_name if not os.path.isfile(dst_name): shutil.move(g, dst_name) - if len([ex for ex in stage0_excluded_files if fnmatch.fnmatch(g, ex)]) > 0: - os.remove(g) - _stage0() + # Unpack zip files. + for new_file in glob.glob(dest + "/**/*.zip", recursive=True): + from zipfile import ZipFile + # loading the temp.zip and creating a zip object + with ZipFile(new_file, 'r') as zObject: + if os.path.isdir(ff_ := new_file + "-unpacked"): + shutil.rmtree(ff_) + + # Extracting all the members of the zip + # into a specific location. + zObject.extractall(new_file + "-unpacked") + os.remove(new_file) + + tokens = glob.glob(dest + "/**/*.token", recursive=True) + if len(tokens) > 1: # If the user has too many token files, we may be able to safely delete one of them if they are the same. Otherwise we nag. + dd = [open(t, 'rb').read() for t in tokens] + if len(list(set(dd))) == 1: + print("The two token files are the same. So just delete one of them.") + for t in tokens[1:]: + os.remove(t) + else: + + scored = [(int(t.replace("-", "_").split("_")[-3]), t) for t in tokens] + best = [] + bad = [] + for s, t in scored: + if s == max([s for s, _ in scored]): + best.append(t) + else: + bad.append(t) + if len(best) == 1: + for t in bad: + shutil.move(t, os.path.dirname(stage0_dir) + "/removed_files/" + id + "/" + os.path.basename(t)) + messages['stage0'].append((id, True, "Student had more than one token file; using the one with the most points.")) + + else: + raise Exception(f"{id} has too many tokens: The tokens found are {tokens}") + + + if len(glob.glob(dest + "/*")) == 0: + # If the destination ends up being empty, remove it. There are no handins. + shutil.rmtree(dest) + print("> Starting stage 0") + _stage0() + print("> Stage 0 completed") def _stage1(): # In this we move on to stage1. # In this stage, we move the files over to a staging area. The staging area consist of actual (complete) handins (tokens or .py files). @@ -313,8 +380,10 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if not os.path.isdir(os.path.dirname(dst)): os.makedirs(os.path.dirname(dst)) shutil.copy(f, dst) - # print(dst) + + _stage1() + print("> Stage 1 completed") # Now move through the files and extract. I guess we do that by recursively unpacking them? def get_grade_script_location(instructor_grade_script): @@ -324,7 +393,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad def _stage2(fix_user=True, xvfb=True): # configuration - """ Unpack token or prep python files. """ + """ Unpack token or prep python files. for execution. """ for fid in glob.glob(stage2_dir + "/*"): if "s234792" in fid: print(fid) @@ -337,7 +406,13 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad grade_script_relative = get_grade_script_location(instructor_grade_script) if type == "token": tokens = glob.glob(fid + "/**/*.token", recursive=True) - assert len(tokens) == 1, f"{id} has too many tokens: The tokens found are {tokens}" + if len(tokens) != 1: + dd = [open(t, 'rb').read() for t in tokens] + if len(list(set(dd))) == 1: + print("The two token files are the same. So just delete one of them.") + raise Exception(f"{id} has too many tokens: The tokens found are {tokens}") + + try: unpack_sources_from_token(tokens[0], s3dir) except Exception as e: @@ -349,15 +424,20 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad else: raise e + # This will copy in resource files etc. that may not be contained in the .token file. for g in glob.glob(student_handout_folder + "/**/*.*", recursive=True): rg = os.path.relpath(g, student_handout_folder) if not os.path.isfile(s3dir + "/"+rg) and not rg.endswith(".py"): if not os.path.isdir(os.path.dirname(s3dir + "/"+rg)): os.makedirs(os.path.dirname(s3dir + "/"+rg)) if os.path.isfile(g): + # print(s3dir + "/" + rg) + # if "/home/tuhe" in rg: + # print("Wrong?") shutil.copy(g, s3dir + "/"+rg) else: - shutil.copytree(g, s3dir + "/" + g) + print(s3dir + "/" + os.path.relpath(g, student_handout_folder)) + shutil.copytree(g, s3dir + "/" + os.path.relpath(g, student_handout_folder)) else: shutil.copytree(student_handout_folder, s3dir) for g in glob.glob(fid+"/**/*.*", recursive=True): @@ -365,15 +445,30 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad fn = glob.glob(student_handout_folder + "/**/" + os.path.basename(g), recursive=True) if len(fn) == 0: print("I was unable to locate", g) - print("Bad?") + + messages['stage2'].append( (id, False, "Files did not have a match " + g)) + + files_without_a_match = os.path.dirname(os.path.dirname(s3dir)) + "/files_without_a_match" + if not os.path.isdir(files_without_a_match): + os.makedirs(files_without_a_match) + shutil.copy(g, files_without_a_match + f"/{id} -- " + os.path.basename(g)) + # os.path.relpath(fn[0], student_handout_folder) - dst = os.path.relpath(g, fid) # Take it relative to the currnet directory. + dst = s3dir + "/"+ os.path.relpath(g, fid) # Take it relative to the currnet directory. else: # dst = s3dir + "/"+os.path.dirname(grade_script_relative) + "/"+ os.path.basename(g) dst = s3dir + "/" + os.path.relpath(fn[0], student_handout_folder) if os.path.isfile(dst): - shutil.copy(g, dst) + if not os.path.isdir(dn_ := os.path.dirname(dst)): + os.makedirs(dn_) + # import time + # time.sleep(0.1) + try: + if os.path.isfile(g): + shutil.copy(g, dst) + except Exception as e: + raise e else: shutil.move(g, dst) print("> Stage two: Created", dst) @@ -395,6 +490,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad with open(f, 'r') as ff: ff.read() except UnicodeDecodeError as e: + print(f) print("""Student file not readable. add to stage2 kill list as in { configurations['projects']['project1']['stage3']['exclude_if_bad_encoding'] += ['*/~BROMIUM/*.py'] }""", f) for p in configuration['stage2'].get('exclude_if_bad_encoding', []): if fnmatch.fnmatch(f, p): @@ -404,11 +500,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if os.path.isfile(f): raise e - - - - _stage2() + print("> Stage 2 completed") def _stage3(Dockerfile, fix_user=True, xvfb=True, unmute=False, verbose=False): if Dockerfile is None: @@ -422,9 +515,17 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad # did_nag_about = {} conf = configuration.get('stage3', {}) - for fid in glob.glob(stage3_dir + "/*"): + for k, fid in enumerate(al_ := glob.glob(stage3_dir + "/*")): # if "s234792" in fid: # print(fid) + if (k+1) % 100 == 0: + print(f"stage3> at student {k+1} of {len(al_)}") + + if "-" not in os.path.basename(fid): + print("Bad file! ", fid) + id, type = os.path.basename(fid).split("-") + student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True) + s4dir = f"{stage4_dir}/{os.path.basename(fid)}" grade_script_relative = get_grade_script_location(instructor_grade_script) grade_script_destination = os.path.dirname(fid + "/" + grade_script_relative) + "/" + os.path.basename(instructor_grade_script) @@ -434,20 +535,28 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if os.path.isdir(s4dir): RERUN_TOKEN = False # Try to get the old token file - id, type = os.path.basename(fid).split("-") + # id, type = os.path.basename(fid).split("-") # now combine the student and instructor versions of this file for an evaluations. + products = glob.glob(f"{stage4_dir}/{id}-*/*.token") - student_token_file = glob.glob(f"{stage2_dir}/{id}-token/**/*.token", recursive=True) + p1 = glob.glob(f"{stage4_dir}/{id}-python/*.token") + p2 = glob.glob(f"{stage4_dir}/{id}-token/*.token") + + produced_python_rs, _ = load_token(p1[0]) if len(p1) > 0 else None + produced_token_rs = load_token(p2[0]) if len(p2) > 0 else None + assert len(student_token_file) <= 1 if type == 'token': assert len(student_token_file) == 1 + if len(products) == 2: - rc = combine_token_results(load_token(products[0])[0], load_token(products[1])[0]) + rc = combine_token_results(produced_python_rs, produced_token_rs) # flag when student has a test item that pass which the token file does not. elif len(products) > 2: raise Exception(f"Handins not recognized {products}") elif len(products) == 1: - rc = load_token(products[0])[0] + rc = produced_token_rs if produced_python_rs is not None else produced_token_rs + # rc = load_token(products[0])[0] if len(products) == 0: # No .token file has actually been generated. So obviously we have to re-generate it. @@ -467,14 +576,14 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if "sources" not in rc: print("no sources") - ptoken = load_token(products[0])[0] + ptoken = produced_token_rs if produced_python_rs is not None else produced_token_rs # load_token(products[0])[0] rename_map = conf.get('rename_items', {}) # Why give them a single test when I can sit on my ass and give them incompatible tests, WCGW? for q in stoken['details']: stoken['details'][q]['items'] = {rename_map.get(k, k): v for k, v in stoken['details'][q]['items'].items()} if ".".join(stoken['sources'][0]['report_module_specification']).lower().replace(" ", "") == ".".join(ptoken['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower(): # - s_better_than_i, _ = determine_token_difference(stoken, rc) + s_better_than_i, _ = determine_token_difference(stoken, produced_token_rs) # Since we are going on a per-question basis, we only look at the token files. acceptable_broken = False elif id in configuration.get('stage3', {}).get('accept_incompatible_token_names', []): print("Incompatible token names accepted...") @@ -482,7 +591,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad acceptable_broken = True else: print(".".join(stoken['sources'][0]['report_module_specification']).lower()) - print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower()) + if rc is not None and rc['sources'] is not None and rc['sources'][0] is not None: + print(".".join(rc['sources'][0]['report_module_specification']).replace("_tests_complete", "").lower()) messages['stage3'].append(f"{id}> Bad student token. Add id incompatible token names ['stage3']['accept_incompatible_token_names']. This likely occured because the student renamed the grade script. " + str(student_token_file)) RERUN_TOKEN = True # Not hat it really helps. acceptable_broken = True @@ -491,19 +601,16 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad for q in s_better_than_i: for item in s_better_than_i[q]['items']: if item == ('Week06SentimentAnalysis', 'test_sentiment_analysis'): - print("Yes we were better but it had to do with idiotic sentiment analysis thanks a fuck...") + print("Yes we were better but it had to do with idiotic sentiment analysis...") continue messages['stage3'].append(f"{id}> ERROR: Student strictly better than instructor. q{q}. item: {item}") RERUN_TOKEN = True - # for q in stoken['details']: - # print(stoken['details'][q]['name'], ptoken['details'][q]['name'] ) - # - # print(stoken['details'][5] ) - # print( ptoken['details'][5] ) + rch = token_gather_hidden(rc) + # instructor_rs_token, _ = load_token([t for t in products if '-token' in t].pop()) + instructor_rs_token = produced_token_rs - rch = token_gather_hidden(rc) for q in stoken['details']: if acceptable_broken: @@ -521,13 +628,19 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad # print(rch['details'][q]['items'].keys()) - iitems = rch['details'][q]['items'][item] + # token_products = + # Since we combine the token we can only trust (our) token product and not the combined one. + + + iitems = [instructor_rs_token['details'][q]['items'][item]] if sitem['status'] == 'pass' and not all([i['status'] == 'pass' for i in iitems]) and id not in conf.get('verified_problematic_items', {}).get(item, []) and not conf.get("accept_public_ok_hidden_failed", False): # print('disagreement found.') iitems = rch['details'][q]['items'][item] fails = [i['nice_title'] for i in iitems if i['status'] != 'pass'] + messages['stage3'].append(f"{id} {nn+1}> Hidden test disagreement. Public ok but hidden got failues in: {fails}, {item}") + from unitgrade_private.token_loader import get_coverage_files cfiles = get_coverage_files(student_token_file[0], instructor_grade_script_dir=os.path.dirname(grade_script_destination)) @@ -577,7 +690,7 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad dockname = tag pycom = ".".join(grade_script_relative[:-3].split("/")) + " --noprogress" - pycom = "python3 -m " + pycom + pycom = "python3.11 -m " + pycom if fix_user: user_cmd = ' --user "$(id -u):$(id -g)" ' else: @@ -592,10 +705,10 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad dcom = f"docker run {user_cmd} -v {tmp_path}:/home {dockname} {pycom}" cdcom = f"cd {os.path.dirname(Dockerfile)}" fcom = f"{cdcom} && {dcom}" - print("> Running docker command in", fid) + print(f"{k}> Running docker command in", fid) print(fcom) - if os.path.basename(fid) == 'Group33-token': - a = 234 + # if os.path.basename(fid) == 'Group33-token': + # a = 234 from unitgrade.utils import Capturing2, Capturing, Logger # from spb.defaults import * # spb / defaults.py @@ -628,7 +741,19 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad for f in glob.glob(s4dir + "/*.token"): os.remove(f) try: - shutil.move(tokens[0], s4dir + "/" + os.path.basename(tokens[0])) + real_dest = s4dir + "/" + os.path.basename(tokens[0]) + + if conf.get('fudge_accept_student_evaluation', False): + try: + dest = real_dest.split("handin_")[0] + "handin_" + student_token_file[0].split('handin_')[1] + except Exception as e: + dest = real_dest + shutil.copy(student_token_file[0], dest) + # raise e + else: + shutil.move(tokens[0], real_dest) + + except Exception as e: print("-"*50) print("Got a problem wit hthis student") @@ -637,11 +762,11 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad raise e _stage3(Dockerfile, unmute=unmute_docker) + print("> Stage 3 completed") def _stage_report(): found_students = defaultdict(dict) rs = {} - for fid in glob.glob(stage1_dir + "/*"): id = os.path.basename(fid) rs[id] = {'token_downloaded': None, 'token_produced': []} @@ -657,7 +782,6 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad rs[id]['token_downloaded_hash'] = blake_hash - for cid in glob.glob(f"{stage4_dir}/{id}-*"): type = os.path.basename(cid).split("-")[1] tokens = glob.glob(f"{cid}/*.token") @@ -672,6 +796,11 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad rs[id]['token_produced'].append(tokens[0]) + if len(found_students) != len(glob.glob(stage1_dir + "/*")): + a = list(found_students.keys()) + b = [os.path.basename(d) for d in glob.glob(stage1_dir + "/*")] + print("Found students idffer from all downloads. Very bad.", [s for s in b if s not in a]) + assert len(found_students) == len(glob.glob(stage1_dir + "/*")) # Ensure all students have been found. for id in found_students: if 'python' in found_students[id] and 'token' in found_students[id]: @@ -679,28 +808,32 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad if len(p_best) > 0: for q in p_best.values(): for item in q['items']: - if not configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False): - messages['report'].append(f"{id}> Evaluation of student code (i.e. .py handins) was better than the token file evaluation. " + str(item) ) # + " student stderr: \n" + str(q['items'][item]['a']['stderr']) + "\n instructor stderr: \n" + str(q['items'][item]['b']['stderr'])) + # if not configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False): + messages['report'].append((id, + configuration.get("stage_report", {}).get("accept_student_code_better_than_token", False), + f"{id}> Evaluation of student code (i.e. .py handins) was better than the token file evaluation. " + str(item))) # + " student stderr: \n" + str(q['items'][item]['a']['stderr']) + "\n instructor stderr: \n" + str(q['items'][item]['b']['stderr'])) + elif 'token' in found_students[id] and 'python' not in found_students[id]: pass elif 'token' not in found_students[id] and 'python' in found_students[id]: if id not in configuration.get('stage_report', {}).get("python_handin_checked", []): if not configuration.get("stage_report", {}).get("accept_only_py_no_token", False): - print("=" * 50) - s = f"{id}> only handed in the .py files and not the .token files. " +str(found_students[id]['python'] + " to skip this mesage, alter the stage_report['python_handin_checked'] field. ") + s = (id, configuration.get("stage_report", {}).get("accept_only_py_no_token", False), + f"{id}> only handed in the .py files and not the .token files. " +str(found_students[id]['python'] + " to skip this mesage, alter the stage_report['python_handin_checked'] field. ")) messages['report'].append(s) - stoken =token_gather_hidden(load_token(found_students[id]['python'])[0]) - print(s) - dd = defaultdict(list) - for q in stoken['details']: - for item in stoken['details'][q]['items']: - # print(item, stoken['details'][q]['items'][item][0]['status']) - dd['test'].append(item) - dd['status'].append(stoken['details'][q]['items'][item][0]['status']) - print(tabulate.tabulate(dd, headers='keys')) - + if configuration.get("stage_report", {}).get("accept_only_py_no_token", False): + stoken = token_gather_hidden(load_token(found_students[id]['python'])[0]) + print("=" * 50) + print(s) + dd = defaultdict(list) + for q in stoken['details']: + for item in stoken['details'][q]['items']: + # print(item, stoken['details'][q]['items'][item][0]['status']) + dd['test'].append(item) + dd['status'].append(stoken['details'][q]['items'][item][0]['status']) + print(tabulate.tabulate(dd, headers='keys')) else: raise Exception(id + "> No code handin for this student") @@ -709,38 +842,89 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad t = combine_token_results(load_token(tkns[0])[0], load_token(tkns[1])[0]) else: t, _ = load_token(tkns[0]) + + # strange id is s234546 + # rs['s223845']['details'] + if configuration['stage3'].get("fudge_accept_student_evaluation", False): + # In this case, we limit the number of items that are available to these since we rely on the student token files. + # this mean the token file can have differnet evaluation items which woudl be shit. + + # limit_items = configuration['stage3']['fudge_accept_student_evaluation_items'] + # f"{stage3_dir}/{os.path.basename(os.path.dirname(rs[id]['token_produced'][0]))}/" + grade_script_relative = get_grade_script_location(instructor_grade_script) + # Get the intstructor token + itoken = glob.glob( os.path.dirname(f"{stage3_dir}/{os.path.basename(os.path.dirname(rs[id]['token_produced'][0]))}/{grade_script_relative}") + "/*.token" ) + assert len(itoken) >= 1, "no produced token found for " + rs[id]['token_produced'][0] + irs, _ = load_token(itoken[0]) + for q in list(t['details'].keys()): + if q not in irs['details']: + print(id, "> Deleting bad questions", q) + del t['details'][q] + for q in t['details']: + for item in list(t['details'][q]['items']): + if item not in irs['details'][q]['items']: + print(id, "> Deleting bad item", item) + del t['details'][q]['items'][item] + rs[id] = {**rs[id], **t} if slim_rs and 'sources' in rs[id]: rs[id]['sources'] = "Sources have been removed from this token because slim_rs=True (see dtulearn.py)." return rs rs = _stage_report() + print("> Stage reporting completed") + + # message_log = "" + # messages_bad = [] + # messages_ok = [] all_msgs = [] + all_observations = [] + if len(messages) > 0: - print("=" * 50) - print("Oy veh, there are messages") + # print("=" * 50) + # print(f"Oy veh, there are {sum([len(s) for s in messages.values()])} messages") for stage in messages: - print("Messages from", stage) + # print("Messages from", stage) for s in messages[stage]: - print(m_ := ">> "+ s) - all_msgs.append(m_) - print("-" * 50) + if isinstance(s, str): + s = ("saf", False, s) + + id, acceptable, msg = s + msg = f"{id} | {stage}> {msg}" + if acceptable: + all_observations.append(msg) + else: + all_msgs.append(msg) + - if not accept_problems: - assert False, "No messages allowed!" + if len(all_msgs) > 0: + print("=" * 50) + print(f"Oy veh, there are {len(all_msgs)} critical problems") + for s in all_msgs: + print(s) + # print(m_ := ">> "+ s) + # all_msgs.append(m_) + print("-" * 50) - with open(base_directory +"/log.txt", "w") as f: + if not accept_problems: + assert False, "No messages allowed!" + + with open(base_directory +"/errors.txt", "w") as f: f.write("\n".join(all_msgs)) + with open(base_directory + "/acceptable.txt", "w") as f: + f.write("\n".join(all_observations)) + if plagiarism_check or copydetect_check: from unitgrade_private.plagiarism.mossit import moss_it2023 + print("> running moss setup") moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script, student_files_dir=student_handout_folder, submit_to_server=not copydetect_check) - # Write the moss files. if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector. + print("> running copydetect check") from coursebox.core.info_paths import get_paths paths = get_paths() from copydetect import CopyDetector @@ -773,48 +957,49 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad test_dir_list = list(glob.glob(copydetect_submissions_dir + "/*")) - detector = CopyDetector(extensions=["py"], display_t=0.7, boilerplate_dirs=[copydetect_handout_dir], test_dirs=test_dir_list, same_name_only=True) + detector = CopyDetector(extensions=["py"], display_t=0.7, boilerplate_dirs=[copydetect_handout_dir], test_dirs=test_dir_list, same_name_only=True, autoopen=False) detector.out_file = working_dir + "/copydetect_report.html" detector.run() detector.generate_html_report() - # """ - # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/handouts - # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/moss_submissions - # - # """ - # - # - # detector = CopyDetector(same_name_only=True, extensions=["py"], display_t=0.7) - # - # relatives = [] - # for id in rs: - # v = [int(s.split("_")[-3]) for s in rs[id]['token_produced']] - # token = rs[id]['token_produced'][v.index(max(v))] - # tk, _ = load_token(token) - # rl = tk['sources'][0]['report_relative_location'] - # bd = f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{os.path.dirname(rl)}" - # chk_files = [] - # for q in tk['details']: - # # print(q) - # with open(f"{bd}/unitgrade_data/{tk['details'][q]['name']}.pkl", 'rb') as f: - # pk = pickle.load(f) - # - # for item in tk['details'][q]['items']: - # key = (item, 'coverage') - # if key in pk: - # for f in pk[key]: - # relatives.append(f) - # chk_files.append( f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{f}") - # - # chk_files = list(set(chk_files)) - # for f in chk_files: - # detector.add_file(f) - # for f in set(relatives): - # ff = paths['02450private']+"/Exam/exam2023spring/handout/" + f - # detector.add_file(ff, type="boilerplate") - # - # detector.run() - # detector.generate_html_report() + + cheaters = defaultdict(float) + for element in detector.get_copied_code_list(): + if element[-1] < 800: + + continue + + + pct1 = element[0] + pct2 = element[1] + id1 = element[2].split("/")[-2].split("-")[0] + id2 = element[3].split("/")[-2].split("-")[0] + + if min(pct1, pct2) < 0.95: + continue + + + cheaters[id1] = max(cheaters[id1], pct1) + cheaters[id2] = max(cheaters[id2], pct2) + + + cheaters = {id: pct for id, pct in cheaters.items() if pct > 0.95} + + + with open( paths['semester']+ "/cheating_" + os.path.basename(base_directory) + ".txt", 'w') as f: + f.write( "\n".join([f"{id} {pct}" for id, pct in cheaters.items()]) ) + + with open( paths['semester']+ "/cheating_" + os.path.basename(base_directory) + "_email.txt", 'w') as f: + f.write("; ".join([f"{id}@student.dtu.dk" for id, pct in cheaters.items()]) ) + + + + + + + + + + return rs @@ -1045,8 +1230,11 @@ def moss_check(dzip, out, moss_id=None): def fname2id(fname, info=None): # fname = os.path.basename(f) - id_cand = fname.split("-")[2].strip().split(",")[0] - # print(id_cand, token) + if "-" not in fname and fname[0] == "s" and len(fname) == 7: + id_cand = fname + else: + id_cand = fname.split("-")[2].strip().split(",")[0] + if id_cand.startswith("Group"): id = id_cand.replace(" ", "") else: diff --git a/src/unitgrade_private/plagiarism/mossit.py b/src/unitgrade_private/plagiarism/mossit.py index fb6991916a003c0f378dc26123ab44cf6b7c1fcf..3492b5b7013aa37ab7030996905efbf2affd5c83 100644 --- a/src/unitgrade_private/plagiarism/mossit.py +++ b/src/unitgrade_private/plagiarism/mossit.py @@ -59,8 +59,6 @@ def get_id(moss_pl): def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None, student_files_dir=None, submit_to_server=True): - a = 234 - # submissions_base_dir = stage4_dir submissions_pattern = "*-token" print("-"*50) @@ -79,28 +77,44 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel student_files_dir = paths['02450students'] cov_files = None - for f in glob.glob(submissions_base_dir + "/" + submissions_pattern): - if os.path.isdir(f): - id = os.path.basename(f) - # This gives us all the tokens. From here, we want to extract the relevant files. - # To do that, we must first get the relevant files. - tokens = glob.glob(f + "/**/*.token", recursive=True) - if len(tokens) > 0: - token = tokens[0] - if cov_files is None: - cov_files = get_coverage_files(token, os.path.dirname(instructor_grade_script)) - # Now create all the submissions by extracting the covered files. - import tempfile - with tempfile.TemporaryDirectory() as tmpdirname: - unpack_sources_from_token(token, destination=tmpdirname) - sdir = working_dir + "/moss_submissions/" + id - if not os.path.isdir(sdir): - os.makedirs(sdir) - for q in cov_files: - for i in cov_files[q].values(): - for g in i: - if os.path.isfile(student_file := f"{tmpdirname}/{g}"): - shutil.copy(student_file, f"{sdir}/{os.path.basename(g)}") + # Get the submissions to check. + from collections import defaultdict + # ids = defaultdict(list) + ids = {} + for f in glob.glob(submissions_base_dir + "/" + '*-*'): + token = glob.glob(f + "/*.token")[0] + points = int( os.path.basename(token).split("_")[-3] ) + id =os.path.basename(f).split("-")[0] + if id not in ids: + ids[id] = (points, token) + else: + if points > ids[id][0]: + ids[id] = (points, token) + + + for id, (points, token) in ids.items(): # glob.glob(submissions_base_dir + "/" + submissions_pattern): + # if os.path.isdir(f): + # id = os.path.basename(f) + # This gives us all the tokens. From here, we want to extract the relevant files. + # To do that, we must first get the relevant files. + # tokens = glob.glob(f + "/**/*.token", recursive=True) + + if True: # len(tokens) > 0: + # token = tokens[0] + if cov_files is None: + cov_files = get_coverage_files(token, os.path.dirname(instructor_grade_script)) + # Now create all the submissions by extracting the covered files. + import tempfile + with tempfile.TemporaryDirectory() as tmpdirname: + unpack_sources_from_token(token, destination=tmpdirname) + sdir = working_dir + "/moss_submissions/" + id + if not os.path.isdir(sdir): + os.makedirs(sdir) + for q in cov_files: + for i in cov_files[q].values(): + for g in i: + if os.path.isfile(student_file := f"{tmpdirname}/{g}"): + shutil.copy(student_file, f"{sdir}/{os.path.basename(g)}") if cov_files is None: return if student_files_dir is not None: diff --git a/src/unitgrade_private/token_loader.py b/src/unitgrade_private/token_loader.py index 7321ea2ecd023b3ac909f07a8223212e68b53524..4d464bf469f7766203233cef1298e9c59567373a 100644 --- a/src/unitgrade_private/token_loader.py +++ b/src/unitgrade_private/token_loader.py @@ -74,7 +74,6 @@ def token_gather_hidden(token_rs, public_test_items_weight=1.): # Now fix the score. item_scores = 0 - for item in rb['details'][q]['items']: # w0 = 1 if len(rb['details'][q]['items'][item]) == 1 else public_test_items_weight @@ -113,8 +112,11 @@ def determine_token_difference(student_token_rs, instructor_token_rs): kk = list(a.keys()) kk += [k for k in b.keys() if b not in kk] for q in kk: + for item in a[q]['items']: # print(q) + if q not in b: + print("Bad question!") if a[q]['items'][item]['status'] == 'pass' and (item not in b[q]['items'] or b[q]['items'][item]['status'] != 'pass'): if q not in a_better_than_b: a_better_than_b[q] = {'items': {}} @@ -129,14 +131,13 @@ def determine_token_difference(student_token_rs, instructor_token_rs): return a_better_than_b, b_better_than_a -def combine_token_results(token_a_rs, token_b_rs): +def combine_token_results(token_a_rs, token_b_rs, combine_at_question_level=True): """ token_a_rs = load_token(...) token_b_rs = load_token(...) Combine by or'in the inputs. It will also recompute the token scores. - :param token_a_rs: :param token_b_rs: :return: @@ -166,12 +167,10 @@ def combine_token_results(token_a_rs, token_b_rs): eql = False rsd[q]['items'][i] = item - for k in token_a_rs['details'][q].keys(): if k not in ['obtained', 'items']: rsd[q][k] = token_a_rs['details'][q][k] assert token_a_rs['details'][q][k] == token_b_rs['details'][q][k], k - # rsd[q] = k w = token_a_rs['details'][q]['w'] nc = int( np.floor( np.mean( [i['status'] == 'pass' for i in token_a_rs['details'][q]['items'].values()] ) * w ) ) @@ -179,6 +178,18 @@ def combine_token_results(token_a_rs, token_b_rs): abt = token_a_rs['details'][q]['obtained'] assert nc == token_a_rs['details'][q]['obtained'] and nc == token_b_rs['details'][q]['obtained'], f"points differ. {nc} != {abt}" rsd[q]['obtained'] = nc + + if combine_at_question_level: + assert token_a_rs['details'][q]['possible'] == token_b_rs['details'][q]['possible'] + + if token_a_rs['details'][q]['obtained'] >= token_b_rs['details'][q]['obtained']: + rsd[q] = token_a_rs['details'][q] + else: + rsd[q] = token_b_rs['details'][q] + + w = rsd[q]['w'] + nc = rsd[q]['obtained'] + n_tot += w n_obt += nc diff --git a/src/unitgrade_private/version.py b/src/unitgrade_private/version.py index 5c2098c4689bbe95e6e068ea2f6a6ac579a3416e..5ddcdfdff1fb3d5d0f9a2b77ce3e519790965970 100644 --- a/src/unitgrade_private/version.py +++ b/src/unitgrade_private/version.py @@ -1 +1 @@ -__version__ = "0.1.62" +__version__ = "0.1.64"