diff --git a/src/unitgrade_private/pipelines/dtulearn.py b/src/unitgrade_private/pipelines/dtulearn.py index f49cc94987a763219d6d0c8d57f2b3f297552f96..45219311e4f1e3de219791e9c32fba3df73cbe6d 100644 --- a/src/unitgrade_private/pipelines/dtulearn.py +++ b/src/unitgrade_private/pipelines/dtulearn.py @@ -139,6 +139,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad unmute_docker = True, plagiarism_check=False, accept_problems=False, # No! + copydetect_check=False, + slim_rs=False, # Slim the rs data structure that is returned. ): """ This is the main verification scripts. It is the main entry point for project verifications as downloaded from DTU Learn. @@ -708,6 +710,8 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad else: t, _ = load_token(tkns[0]) rs[id] = {**rs[id], **t} + if slim_rs and 'sources' in rs[id]: + rs[id]['sources'] = "Sources have been removed from this token because slim_rs=True (see dtulearn.py)." return rs rs = _stage_report() @@ -730,65 +734,87 @@ def docker_stagewise_evaluation(base_directory, Dockerfile=None, instructor_grad with open(base_directory +"/log.txt", "w") as f: f.write("\n".join(all_msgs)) - # rs['messages'] = messages - - # with open() - - if plagiarism_check and True: + if plagiarism_check or copydetect_check: from unitgrade_private.plagiarism.mossit import moss_it2023 moss_it2023(submissions_base_dir=stage4_dir, submissions_pattern="*-token", instructor_grade_script=instructor_grade_script, - student_files_dir=student_handout_folder) + student_files_dir=student_handout_folder, submit_to_server=not copydetect_check) + # Write the moss files. - if plagiarism_check and False: # This check is based on detector and is deprecated. I don't like detector. + if plagiarism_check and copydetect_check: # This check is based on detector and is deprecated. I don't like detector. from coursebox.core.info_paths import get_paths paths = get_paths() from copydetect import CopyDetector - detector = CopyDetector(extensions=["py"], display_t=0.7) - # rs['Group10']['sources'][0]['report_relative_location'] - relatives = [] - for id in rs: - v = [int(s.split("_")[-3]) for s in rs[id]['token_produced']] - token = rs[id]['token_produced'][v.index(max(v))] - tk, _ = load_token(token) - rl = tk['sources'][0]['report_relative_location'] - bd = f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{os.path.dirname(rl)}" - chk_files = [] - for q in tk['details']: - # print(q) - with open(f"{bd}/unitgrade_data/{tk['details'][q]['name']}.pkl", 'rb') as f: - pk = pickle.load(f) - - for item in tk['details'][q]['items']: - key = (item, 'coverage') - if key in pk: - for f in pk[key]: - relatives.append(f) - chk_files.append( f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{f}") - # print(chk_files) - chk_files = list(set(chk_files)) - - for f in chk_files: - detector.add_file(f) - # Fuck this shit just add the solutions. - for f in set(relatives): - ff = paths['02450private']+"/Exam/exam2023spring/handout/" + f - detector.add_file(ff, type="boilerplate") - - detector.run() - detector.generate_html_report() - - - - - - - - - # 799171 89568609 - # _stage_plagiarism() - - pass - + working_dir = os.path.dirname(stage4_dir) + "/moss" + + # Use the plagiarism checker. + def read_all_chunk(bdir): + py_file = [] + for f in sorted(glob.glob(bdir + "/*.py")): + with open(f, "r") as f: + py_file.append(f.read()) + return "\n\n".join(py_file) + + copydetect_submissions_dir = working_dir + "/copydetect_submissions" + if os.path.isdir(copydetect_submissions_dir): + shutil.rmtree(copydetect_submissions_dir) + os.makedirs(copydetect_submissions_dir) + + for bdir in glob.glob(working_dir + "/moss_submissions/*"): + os.makedirs(odir := copydetect_submissions_dir + "/" + os.path.basename(bdir)) + with open(odir +"/student_code.py", "w") as f: + f.write(read_all_chunk(bdir)) + + copydetect_handout_dir = working_dir + "/copydetect_handout" + if os.path.isdir(copydetect_handout_dir): + shutil.rmtree(copydetect_handout_dir) + os.makedirs(copydetect_handout_dir) + with open(copydetect_handout_dir + "/student_code.py", "w") as f: + f.write(read_all_chunk(working_dir + "/handouts")) + + test_dir_list = list(glob.glob(copydetect_submissions_dir + "/*")) + + detector = CopyDetector(extensions=["py"], display_t=0.7, boilerplate_dirs=[copydetect_handout_dir], test_dirs=test_dir_list, same_name_only=True) + detector.out_file = working_dir + "/copydetect_report.html" + detector.run() + detector.generate_html_report() + # """ + # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/handouts + # file:///home/tuhe/Documents/02002instructors/project_evaluations_2023fall/project1/moss/moss_submissions + # + # """ + # + # + # detector = CopyDetector(same_name_only=True, extensions=["py"], display_t=0.7) + # + # relatives = [] + # for id in rs: + # v = [int(s.split("_")[-3]) for s in rs[id]['token_produced']] + # token = rs[id]['token_produced'][v.index(max(v))] + # tk, _ = load_token(token) + # rl = tk['sources'][0]['report_relative_location'] + # bd = f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{os.path.dirname(rl)}" + # chk_files = [] + # for q in tk['details']: + # # print(q) + # with open(f"{bd}/unitgrade_data/{tk['details'][q]['name']}.pkl", 'rb') as f: + # pk = pickle.load(f) + # + # for item in tk['details'][q]['items']: + # key = (item, 'coverage') + # if key in pk: + # for f in pk[key]: + # relatives.append(f) + # chk_files.append( f"{stage3_dir}/{os.path.basename(os.path.dirname(token))}/{f}") + # + # chk_files = list(set(chk_files)) + # for f in chk_files: + # detector.add_file(f) + # for f in set(relatives): + # ff = paths['02450private']+"/Exam/exam2023spring/handout/" + f + # detector.add_file(ff, type="boilerplate") + # + # detector.run() + # detector.generate_html_report() return rs diff --git a/src/unitgrade_private/plagiarism/mossit.py b/src/unitgrade_private/plagiarism/mossit.py index b6de2fe4dfddc8905d4eb1faffcab4d4e7e2ff57..fb6991916a003c0f378dc26123ab44cf6b7c1fcf 100644 --- a/src/unitgrade_private/plagiarism/mossit.py +++ b/src/unitgrade_private/plagiarism/mossit.py @@ -57,7 +57,7 @@ def get_id(moss_pl): return pl.split("=")[1][:-1] def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitelisted_tokens="", instructor_grade_script=None, moss_id=None, - student_files_dir=None): + student_files_dir=None, submit_to_server=True): a = 234 @@ -101,6 +101,8 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel for g in i: if os.path.isfile(student_file := f"{tmpdirname}/{g}"): shutil.copy(student_file, f"{sdir}/{os.path.basename(g)}") + if cov_files is None: + return if student_files_dir is not None: for q in cov_files: @@ -128,23 +130,34 @@ def moss_it2023(submissions_base_dir=None, submissions_pattern="*-token", whitel print("You need to specify a moss id. You can do that by putting the moss.pl script at:", os.path.expanduser('~') + "/Documents/moss.pl") return - m = mosspy.Moss(moss_id, "python") - for f in glob.glob(working_dir + "/handouts/**/*.py", recursive=True): - print(f) - m.addBaseFile(f) - - m.addFilesByWildcard(working_dir + "/moss_submissions/*/*.py") - print("> Calling moss with id", moss_id) - url = m.send(lambda file_path, display_name: print('*', end='', flush=True)) - print() - print("Report Url: " + url) - r = working_dir + "/report/report.html" - if not os.path.isdir(os.path.dirname(r)): - os.makedirs(os.path.dirname(r)) - # m.saveWebPage(url, r) - # print("Saved report to:", r) - mosspy.download_report(url, os.path.dirname(r), connections=8, log_level=10, - on_read=lambda u: print('*', end='', flush=True)) + if submit_to_server: + m = mosspy.Moss(moss_id, "python") + for f in glob.glob(working_dir + "/handouts/**/*.py", recursive=True): + print("Moss adding base file>", f) + m.addBaseFile(f) + + m.addFilesByWildcard(working_dir + "/moss_submissions/*/*.py") + print("> Calling moss with id", moss_id) + d = dict() + d['count'] = 0 + def gcount(): + d['count'] = d['count'] + 1 + return d['count'] + verbose = False + if verbose: + status_fun = lambda file_path, display_name: print("moss> " + str(gcount()) + ": " + file_path + " - " + display_name, flush=True) + else: + status_fun = lambda file_path, display_name: print("*", end='', flush=True) + url = m.send(status_fun) + print() + print("Report Url: " + url) + r = working_dir + "/report/report.html" + if not os.path.isdir(os.path.dirname(r)): + os.makedirs(os.path.dirname(r)) + # m.saveWebPage(url, r) + # print("Saved report to:", r) + mosspy.download_report(url, os.path.dirname(r), connections=8, log_level=10, + on_read=lambda u: print('*', end='', flush=True)) def moss_it(whitelist_dir="", submissions_dir="", moss_id=None, blacklist=None):