diff --git a/setup.py b/setup.py index 5d1e12e2d1c33e1c740836a63512970abad01369..e5eb159599011e2949af935c6ee9f8b7295dc79c 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh: # beamer-slider setuptools.setup( name="coursebox", - version="0.1.2", + version="0.1.4", author="Tue Herlau", author_email="tuhe@dtu.dk", description="A course management system currently used at DTU", @@ -30,5 +30,5 @@ setuptools.setup( package_dir={"": "src"}, packages=setuptools.find_packages(where="src"), python_requires=">=3.8", - install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','jinjafy','beamer-slider','tinydb'], + install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','beamer-slider','tinydb'], ) diff --git a/src/coursebox.egg-info/PKG-INFO b/src/coursebox.egg-info/PKG-INFO index f983e78026a177b883847328ceb438f5008de6d7..a813cf86b94091db616cdd25638a527e0cd35823 100644 --- a/src/coursebox.egg-info/PKG-INFO +++ b/src/coursebox.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: coursebox -Version: 0.1.1 +Version: 0.1.4 Summary: A course management system currently used at DTU Home-page: https://lab.compute.dtu.dk/tuhe/coursebox Author: Tue Herlau @@ -16,6 +16,34 @@ Description-Content-Type: text/markdown License-File: LICENSE # Coursebox DTU +DTU course management software. -DTU course management software. +## Installation +```terminal +pip install coursebox +``` +## What it can do + - Single semester-dependent configuration file + - Integrates with DTU Inside/DTU Learn + - Distribution/evalauation of project reports in Learn-compatible format + - Quiz-generation in DTU Learn/Beamer friendly format + - Automatic website/syllabus generation + - Automatic generation of lectures handouts/exercises (you don't have to track dynamic content like dates/lecture titles; it is all in the configuration) + - Easy compilation to 2/5 day formats (Continuous education) + +## Usage +Coursebox requires a specific directory structure. It is easier to start with an existing course and adapt to your needs. Please contact me at tuhe@dtu.dk for more information. + +## Citing +```bibtex +@online{coursebox, + title={Coursebox (0.1.1): \texttt{pip install coursebox}}, + url={https://lab.compute.dtu.dk/tuhe/coursebox}, + urldate = {2021-09-07}, + month={9}, + publisher={Technical University of Denmark (DTU)}, + author={Tue Herlau}, + year={2021}, +} +``` diff --git a/src/coursebox.egg-info/requires.txt b/src/coursebox.egg-info/requires.txt index 6391557b63d1ab62ba736a14ee842e51fb2623a9..0a856679d9bb3a3f9529383f867a9815d6a73cfa 100644 --- a/src/coursebox.egg-info/requires.txt +++ b/src/coursebox.egg-info/requires.txt @@ -1,13 +1,9 @@ -openpyxl +numpy +pycode_similar tika +openpyxl xlwings -pybtex -langdetect -wexpect -pexpect matplotlib -numpy -pycode_similar -jinjafy +langdetect beamer-slider tinydb diff --git a/src/coursebox/core/__pycache__/info.cpython-38.pyc b/src/coursebox/core/__pycache__/info.cpython-38.pyc index 1d680dd6b8885df10e21e68905c1c7757f176b18..e0ab976224268ffd731b58532967876fc91bff4b 100644 Binary files a/src/coursebox/core/__pycache__/info.cpython-38.pyc and b/src/coursebox/core/__pycache__/info.cpython-38.pyc differ diff --git a/src/coursebox/core/__pycache__/projects_info.cpython-38.pyc b/src/coursebox/core/__pycache__/projects_info.cpython-38.pyc index 4924025fb95a5691dc63e256c55f5a0b44df0bbd..3c221ed674c47518426218c8f64be90b275198d7 100644 Binary files a/src/coursebox/core/__pycache__/projects_info.cpython-38.pyc and b/src/coursebox/core/__pycache__/projects_info.cpython-38.pyc differ diff --git a/src/coursebox/core/info.py b/src/coursebox/core/info.py index ba913e17f9bce3fe3178ed75139a11f89eb36eff..a80d792cd773d538312bf4aee6f6d03011f9ff55 100644 --- a/src/coursebox/core/info.py +++ b/src/coursebox/core/info.py @@ -12,10 +12,14 @@ from coursebox.core.info_paths import core_conf # import pybtex.database.input.bibtex # import pybtex.plugin # import io +from line_profiler_pycharm import profile +import time - +@profile def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): - wb = openpyxl.load_workbook(xlsx_file, data_only=True) + # print("Loading...", xlsx_file, sheet, as_dict_list) + t0 = time.time() + wb = openpyxl.load_workbook(xlsx_file, data_only=True, read_only=True) if not sheet: ws = wb.worksheets[0] else: @@ -24,26 +28,65 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): return None else: ws = ws.pop() - dd = [] - key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None] - for i in range(1, ws.max_row): - rdict = {} - if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ): - continue - for j in key_cols: - key = ws.cell(row=1, column=j+1).value - if key is not None: - key = key.strip() if isinstance(key,str) else key - value = ws.cell(row=i + 1, column=j + 1).value - value = value.strip() if isinstance(value,str) else value - if isinstance(value, str): - if value == 'True': - value = True - if value == 'False': - value = False - rdict[key] = value - dd.append(rdict) - + # print(time.time()-t0) + # dd = [] + # key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None] + # print(time.time()-t0, ws.max_row) + # np.array([[i.value for i in j[1:5]] for j in ws.rows]) + + import numpy as np + A = np.array([[i.value for i in j] for j in ws.rows]) + # print(time.time() - t0, ws.max_row, len(key_cols)) + + + # for j in range(A.shape[1]): + + + + + a = 234 + + # for i in range(1, ws.max_row): + # rdict = {} + # if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ): + # continue + # for j in key_cols: + # key = ws.cell(row=1, column=j+1).value + # if key is not None: + # key = key.strip() if isinstance(key,str) else key + # value = ws.cell(row=i + 1, column=j + 1).value + # value = value.strip() if isinstance(value,str) else value + # if isinstance(value, str): + # if value == 'True': + # value = True + # if value == 'False': + # value = False + # rdict[key] = value + # dd.append(rdict) + + # print(time.time()-t0) + + A = A[:, A[0] != None] + A = A[(A != None).sum(axis=1) > 0, :] + + dd2 = [] + for i in range(1, A.shape[0]): + A[A == 'True'] = True + A[A == 'False'] = False + + d = dict(zip(A[0, :].tolist(), [a.strip() if isinstance(a,str) else a for a in A[i, :].tolist() ])) + dd2.append(d) + + # print(time.time() - t0) + dd = dd2 + # if dd != dd2: + # for k in range(len(dd)): + # if dd[k] != dd2[k]: + # print(k) + # print(dd) + # print(dd2) + # assert False + # print("BAd!") if as_dict_list: dl = list_dict2dict_list(dd) for k in dl.keys(): @@ -51,6 +94,8 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): if len(x) == 1: x = x.pop() dl[k] = x dd = dl + wb.close() + # print("xlsx2dicts", time.time()-t0) return dd def get_enrolled_students(): @@ -200,6 +245,7 @@ def get_forum(paths): d2.append({k: v[i] for k, v in dd.items()}) return d2 +@profile def class_information(): course_number = core_conf['course_number'] piazza = 'https://piazza.com/dtu.dk/%s%s/%s' % (semester().lower(), year(), course_number) @@ -214,8 +260,8 @@ def class_information(): 'piazza': piazza, # deprecated. 'course_number': course_number, 'semester': semester(), - 'reports_handout': [1,6], - 'reports_handin': [6,11], + # 'reports_handout': [1,6], # Set in excel conf. + # 'reports_handin': [6, 11], # set in excel conf. 'semester_id': semester_id(), 'today': today(), 'instructors': get_instructors(), diff --git a/src/coursebox/core/projects.py b/src/coursebox/core/projects.py index 3c017f22be6afd4b56218c2957d9767486fd7015..ef3cb0796c173da1532822ef64901be511a80edc 100644 --- a/src/coursebox/core/projects.py +++ b/src/coursebox/core/projects.py @@ -1,3 +1,6 @@ +import re +import tempfile +import tika import os import shutil import openpyxl @@ -5,7 +8,6 @@ import numpy as np import itertools import math import glob -# import zipfile from tika import parser from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.utils import get_column_letter @@ -22,6 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors import time from collections import defaultdict import zipfile +import hashlib +import pandas as pd def get_dirs(zf): @@ -32,13 +36,11 @@ def get_dirs(zf): def fix_handins_fuckup(project_id=2): """ Handle the problem with multiple hand-ins in DTU learn. """ - import zipfile paths = get_paths() from coursebox.core.info import class_information info = class_information() zf = paths['instructor_project_evaluations'] + f"/zip{project_id}.zip" - tas = [i['shortname'] for i in info['instructors'] ] ta_links = {i['shortname']: i for i in info['instructors']} @@ -51,7 +53,6 @@ def fix_handins_fuckup(project_id=2): ta_reports[r] = ta fls = get_dirs(zf) - # fls = [f for f in zip.namelist() if not f.endswith("tml") and f.endswith("/")] d = defaultdict(lambda: []) for l in fls: @@ -123,7 +124,6 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check= zip1 = instructor_path + "/zip1.zip" zip2 = instructor_path + "/zip2.zip" zip3 = instructor_path + "/zip3.zip" - zips = [None, zip1, zip2, zip3] for j,zf in enumerate(zips): @@ -138,12 +138,12 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check= continue else: # instructor files do not exist if j == 0: - copy_populate_from_template(info, sheet_number=j, zip_file=None) + copy_populate_from_template(paths, info, sheet_number=j, zip_file=None) elif os.path.exists(zf): # make a copy of report template and populate it with groups obtained from previous report evaluation. # all_groups = get_all_reports_from_collected_xlsx_file() - copy_populate_from_template(info, sheet_number=j, zip_file=zf) + copy_populate_from_template(paths, info, sheet_number=j, zip_file=zf) # distribute_zip_content(info, sheet=j, zf_base=zf) else: print("When available, please move downloaded copy of all reports from campusnet to destination:") @@ -228,13 +228,13 @@ def compute_error_files(info, paths): es = err_label + f"> Report score is {g.get('score', 0)}. The report score has to be between 0 and 4; probably due to a too high value of 'Delta' in instructor sheet." ERRORS[ins].append(es) - if repn >= 1 and not g['comments']: + if repn >= 1 and not g['comments'] and info['course_number'] != '02465': es = err_label + "> Incomplete report evaluation (missing comments field)" es += "Please fill out comments field in your excel sheet." ERRORS[ins].append(es) - if repn >= 1 and not g['approver_comments']: + if repn >= 1 and not g['approver_comments'] and info['course_number'] != '02465': es = err_label + "> Incomplete report evaluation (you are missing the approver comments field; can simply be set to 'ok')." ERRORS.get(g['approver'], []).append(es) @@ -300,10 +300,70 @@ def get_instructor_xlsx_files(info, sheet): return xlsx -import hashlib +def get_groups_from_learn_xslx_file(paths, sheet_number): + fname = f"{paths['instructor_project_evaluations']}/groups{sheet_number}.xlsx" + all_groups = [] + if os.path.exists(fname): + # Reading from the groups{number}.xlsx group-id file exported from DTU learn. Note this file contains fuckups. + dg = defaultdict(list) + df = pd.read_excel(fname) + for uname, group_id in zip(df['Username'], df['Project groups']): + id = int(group_id.split(" ")[1]) + if len(uname) == 7 and uname[0] == 's': + dg[id].append(uname) + else: + dg[id].append("DTU-LEARN-FUCKED-THIS-ID-UP-CHECK-ON-REPORT") + + all_groups = [{'group_id': id, 'student_ids': students} for id, students in dg.items()] + return all_groups + +def search_projects(paths, sheet_number, patterns): + zip_files = [paths['instructor_project_evaluations'] + "/zip%d.zip" % sheet_number] + # print(zip_files) + + all_groups = [] + gps = defaultdict(list) + for zip_file in zip_files: + if os.path.exists(zip_file): + tmpdir = tempfile.TemporaryDirectory() + zipfile.ZipFile(zip_file).extractall(path=tmpdir.name) + pdfs = glob.glob(tmpdir.name + "/**/*.pdf", recursive=True) + for pdf in pdfs: + pdf_parsed = tika.parser.from_file(pdf) + id =int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1]) + students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE) + gps[id] += students + + for id, students in gps.items(): + all_groups.append({'group_id': id, 'student_ids': list(set(students))}) + return all_groups + + +def unpack_zip_file_recursively(zip_file, destination_dir): + """ + Unpack the zip_file (extension: .zip) to the given directory. + + If the folders in the zip file contains other zip/files, these are unpacked recursively. + """ + # Unpack zip file recursively and flatten it. + zipfile.ZipFile(zip_file).extractall(path=destination_dir) + ls = glob.glob(destination_dir + "/*") + for f in ls: + if os.path.isdir(f): + zipfiles = glob.glob(f + "/*.zip") + for zp in zipfiles: + print("Unpacking student zip file>", zp) + zipfile.ZipFile(zp).extractall(path=os.path.dirname(zp) + "/") + + +def copy_populate_from_template(paths, info, sheet_number,zip_file): + # Try to load group ids from the project pdf's + all_groups = search_projects(paths, sheet_number, r"s\d{6}") + # all_groups = get_groups_from_learn_xslx_file(paths, sheet_number) + if len(all_groups) == 0: + all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else [] + # Hopefully this did the trick and we have the groups all grouped up. -def copy_populate_from_template(info, sheet_number,zip_file): - all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else [] # set up which TA approve which TA if any( [i['language'] not in ["en", "any"] for i in info['instructors'] ]): print(info['instructors']) @@ -337,10 +397,13 @@ def copy_populate_from_template(info, sheet_number,zip_file): n_groups_per_instructor = 24 + (sheet_number == 0) * 26 if sheet_number > 0: - zfd = zip_file[:-4] - if not os.path.exists(zfd): - os.mkdir(zfd) - zipfile.ZipFile(zip_file).extractall(path=zfd) + # zfd = zip_file[:-4] + # if not os.path.exists(zfd): + # os.mkdir(zfd) + zfd = tempfile.TemporaryDirectory().name + # zipfile.ZipFile(zip_file).extractall(path=tmpdir.name) + + unpack_zip_file_recursively(zip_file, destination_dir=zfd) # get all report handins (i.e. directories) ls = [l for l in glob.glob(zfd + "/*") if l[-3:] not in ["txt", "tml"]] @@ -431,8 +494,8 @@ def copy_populate_from_template(info, sheet_number,zip_file): corrector = all_tas[shortname]['approver'] if sheet_number > 0: # Copy reports to directory (distribute amongst TAs) - b_dir = os.path.dirname(zip_file) - ins_dir = "%s/project_%i_%s/"%(b_dir, sheet_number, shortname) + # b_dir = os.path.dirname(zip_file) + ins_dir = "%s/project_%i_%s/"%(zfd, sheet_number, shortname) if not os.path.exists(ins_dir): os.mkdir(ins_dir) @@ -440,7 +503,7 @@ def copy_populate_from_template(info, sheet_number,zip_file): for handin in all_tas[shortname]['handins']: shutil.move(handin['path'], ins_dir) - shutil.make_archive(ins_dir[:-1], 'zip', ins_dir) + shutil.make_archive(os.path.dirname(zip_file) +"/"+ os.path.basename(ins_dir[:-1]), 'zip', ins_dir) time.sleep(2) print("Removing tree of reports to clear up space...") shutil.rmtree(ins_dir) @@ -471,10 +534,10 @@ def copy_populate_from_template(info, sheet_number,zip_file): sheet.cell(STUDENT_ID_ROW+j, ccol+i).value = s wb.save(ifile) wb.close() - # clean up zip file directories - if sheet_number > 0: - zfd = zip_file[:-4] - shutil.rmtree(zfd) + # clean up zip file directories; since it is a tmp file, we don't have to. + # if sheet_number > 0: + # zfd = zip_file[:-4] + # shutil.rmtree(zfd) def write_dropdown_sumprod_sheet(sheet): ccol = 2 diff --git a/src/coursebox/core/projects_info.py b/src/coursebox/core/projects_info.py index 0b2433184cc76668023684c0ae55218ff333eb7a..62e1457828fc376919ee34d339bd10b0dfb33361 100644 --- a/src/coursebox/core/projects_info.py +++ b/src/coursebox/core/projects_info.py @@ -3,6 +3,7 @@ import os import re import openpyxl import numpy as np +from line_profiler_pycharm import profile INSTRUCTOR_ROW = 6 INSTRUCTOR_CHECKER_ROW = 31 @@ -16,19 +17,6 @@ RANGE_MIN_COL = 5 DELTA_ALLOWED_ROW = 111 # The range of possible delta-values. Should be in an empty (new) row at bottom. -def get_all_reports_from_collected_xlsx_file_DEFUNCT(): # when is this used? - out = get_output_file() - wb = openpyxl.load_workbook(out) - all_reports = {} - for repn in range(3, -1, -1): - cls = [] - for i in range(2, wb.worksheets[repn].max_column + 1): - cp = parse_column(wb.worksheets[repn], report_number=repn, column=i) - if not cp['student_ids']: - continue - cls.append(cp) - all_reports[repn] = cls - return all_reports def parse_column_student_ids(v): sn = [] @@ -42,7 +30,82 @@ def parse_column_student_ids(v): sn.append(g) return sn + +def parse_column_numpy(col, report_number, column): + """ Parse a column assuming it is defined as a numpy array. + This is the recommended method as it is much, much faster. + """ + # ws = worksheet # wb.worksheets[sheet] + sn = [] + group_id = col[STUDENT_ID_ROW - 1-1] #).value + + # col = ['' if col[0] is np.NAN else x for x in col] + + for i in range(0, 3): + v = col[i + STUDENT_ID_ROW-1]#, column=column).value + sn += parse_column_student_ids(v) + + + instructor = col[INSTRUCTOR_ROW-1]#, column=column).value + approver = col[INSTRUCTOR_ROW+1-1]# , column=column).value + + if instructor: + instructor = instructor.lower() + if approver: + approver = str(approver).lower() + + content = None + comments = None + appr_comments = None + if report_number > 0 and sn: + N = 38 + rarr = np.ndarray(shape=(N,1),dtype=np.object) + for j in range(N): + + v = col[3 + STUDENT_ID_ROW+j-1]#, column=column).value + rarr[j,0] = v + content = rarr + comments = col[EVALUATION_ROW_END+5-1]# , column=column).value + appr_comments = col[EVALUATION_ROW_END+6-1]# , column=column).value + + cgroup = {'column_j': column, 'student_ids': sn, 'instructor': instructor, "approver": approver, 'content': content, + "comments": comments, "approver_comments": appr_comments, 'missing_fields': [], + 'group_id': group_id} + + # Now, find errors... This involves first finding non-zero columns + if report_number > 0 and sn: + score = cgroup['content'][-3, 0] + cgroup['score'] = score + cgroup['pct'] = score2pct(score) + + # if report_number == 3: # this obviously needs fixing for next semester. + # raise Exception("No report number 3 anymore. ") + # I = [] + # for i in range(42): # max number of evaluation fields (irrelevant) + # v1 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL-1]# ).value + # v2 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL+1-1]#).value + # if (v1 == -1 and v2 == 1) or (v1 == 0 and v2 == 4): + # I.append(i) + # if v1 == -1 and v2 == 1: + # # print("delta col") + # break + # + # for i in I: + # w1 = worksheet.cell(row=WEIGHT_ROW_START + i, column=1).value + # w3_ = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=1).value # should agree with label in w1 + # w2 = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=column).value + # if w2 == None: + # cgroup['missing_fields'].append( (i, w1) ) + # if report_number < 3: + # print("old report nr.") + + return cgroup + + + def parse_column(worksheet, report_number, column): + """ This is the old method. It is very slow. Use the numpy-version above. + """ ws = worksheet # wb.worksheets[sheet] sn = [] group_id = ws.cell(row=STUDENT_ID_ROW - 1, column=column).value @@ -54,7 +117,8 @@ def parse_column(worksheet, report_number, column): instructor = ws.cell(row=INSTRUCTOR_ROW, column=column).value approver = ws.cell(row=INSTRUCTOR_ROW+1, column=column).value - if instructor: instructor = instructor.lower() + if instructor: + instructor = instructor.lower() if approver: approver = str(approver).lower() @@ -135,32 +199,47 @@ def get_groups_from_report(repn): cls.append(cp) return cls + +# @profile def populate_student_report_results(students): # take students (list-of-dicts in the info format) and assign them the results from the reports. out = get_output_file() + import time + t0 = time.time() print("> Loading student report scores from: %s"%out) if not os.path.exists(out): return students, [] for k in students: students[k]['reports'] = {i: None for i in range(4)} + import pandas as pd - wb = openpyxl.load_workbook(out,data_only=True) + wb = openpyxl.load_workbook(out, data_only=True, read_only=True) # Perhaps find non-empty cols (i.e. those with content) - + print("> time elapsed", time.time() - t0) maximal_groups = [] maximal_groups_students = [] for repn in range(3, -1, -1): cls = [] - for i in range(2, wb.worksheets[repn].max_column + 1): - cp = parse_column(wb.worksheets[repn], report_number=repn, column=i) + sheet = pd.read_excel(out, sheet_name=repn, index_col=None, header=None) + sheet = sheet.fillna('') + sheet = sheet.to_numpy() + # to_numpy() + for i in range(1,sheet.shape[1]): + + # for i in range(2, wb.worksheets[repn].max_column + 1): + # print(i, wb.worksheets[repn].max_column) + # s = pd.read_excel(out, sheet_name=1) + cp = parse_column_numpy(sheet[:,i], report_number=repn, column=i) + + + # cp = parse_column(wb.worksheets[repn], report_number=repn, column=i) if not cp['student_ids']: - continue + break cls.append(cp) for g in cls: - for sid in g['student_ids']: student = students.get(sid, None) if student is None: @@ -172,5 +251,5 @@ def populate_student_report_results(students): if sid not in maximal_groups_students: maximal_groups.append(g) maximal_groups_students += g['student_ids'] - + print("> time elapsed", time.time() -t0) return students, maximal_groups \ No newline at end of file diff --git a/src/coursebox/material/__pycache__/homepage_lectures_exercises.cpython-38.pyc b/src/coursebox/material/__pycache__/homepage_lectures_exercises.cpython-38.pyc index fcdc53b8ce3e88021bd3cba3dda0c41923e0c4a1..6ad3c21eb6a79dbef53f5e24270dca2a8f885fc2 100644 Binary files a/src/coursebox/material/__pycache__/homepage_lectures_exercises.cpython-38.pyc and b/src/coursebox/material/__pycache__/homepage_lectures_exercises.cpython-38.pyc differ diff --git a/src/coursebox/material/homepage_lectures_exercises.py b/src/coursebox/material/homepage_lectures_exercises.py index f3f71f2e430da73652be54d6819c190ed148c349..0a6b39571752473887020d86b39a649fc7dd1b47 100644 --- a/src/coursebox/material/homepage_lectures_exercises.py +++ b/src/coursebox/material/homepage_lectures_exercises.py @@ -3,6 +3,9 @@ import shutil, os, glob from datetime import datetime, timedelta import calendar import pickle +import time +from line_profiler_pycharm import profile +from coursebox.thtools_base import partition_list import slider from jinjafy import jinjafy_comment @@ -16,6 +19,7 @@ from coursebox.core.info import class_information from coursebox.material.lecture_questions import lecture_question_compiler from slider import latexmk import coursebox +# from line_profiler_pycharm import profile def get_feedback_groups(): paths = get_paths() @@ -47,12 +51,13 @@ def get_feedback_groups(): reduced_groups = [rg for rg in reduced_groups if len(rg)>0] # groups are now partitioned. if len(remaining_lectures) > 0: - fbgs = coursebox.thtools_base.partition_list(reduced_groups, len(remaining_lectures)) + fbgs = partition_list(reduced_groups, len(remaining_lectures)) for gg in fbgs: for g in gg: already_used = already_used + g - lst = thtools.thtools_base.partition_list([s for s in all_students if s not in already_used], len(remaining_lectures)) + + lst = partition_list([s for s in all_students if s not in already_used], len(remaining_lectures)) for i in range(len(remaining_lectures)): dg = [] for g in fbgs[i]: dg += g # flatten the list @@ -217,7 +222,49 @@ def compile_simple_files(paths, info, template_file_list, verbose=False): jinjafy_template(data=d2, file_in=fname, file_out=tex_out, filters=get_filters(), template_searchpath=paths['instructor']) latexmk(tex_out, pdf_out= paths['pdf_out'] + "/" + os.path.basename(tex_out)[:-4]+".pdf") -def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True): +# rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir) +import time +# import dirsync +# dirsync.sync(paths['shared'], output_dir, 'diff') + + +# Do smarter fixin' +from pathlib import Path + +from jinjafy.cache.simplecache import hash_file_ + +@profile +def get_hash_from_base(base): + if not os.path.exists(base + "/sharedcache.pkl"): + source = {} + else: + with open(base + "/sharedcache.pkl", 'rb') as f: + source = pickle.load(f) + + actual_files = {} + for f in glob.glob(base + "/**", recursive=True): + if os.path.isdir(f): + continue + if f.endswith("sharedcache.pkl"): + continue + rel = os.path.relpath(f, base) + + # d = dict(mtime=os.path.getmtime(f)) + actual_files[rel] = dict(mtime=os.path.getmtime(f), hash=-1, modified=False) + + if rel not in source or (actual_files[rel]['mtime'] != source[rel].get('mtime', -1)): # It has been modified, update hash + # print(rel, time.ctime(actual_files[rel]['mtime']), time.ctime(source[rel].get('mtime', -1))) + new_hash = hash_file_(f) + # actual_files[rel] = {} + actual_files[rel]['modified'] = new_hash != source.get(rel, {}).get('hash', -1) + actual_files[rel]['hash'] = new_hash + else: + actual_files[rel]['hash'] = source[rel]['hash'] + return actual_files + + +@profile +def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True,shallow=True): ''' Copy shared files into lecture directories ''' @@ -225,46 +272,171 @@ def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compil from jinjafy.cache import cache_contains_file, cache_update_file from slider.convert import svg2pdf, pdfcrop from slider import convert + import filecmp - def rec_fix_shared(shared_base, output_dir): - if dosvg: - for svg in glob.glob(shared_base+"/*.svg"): - if not cache_contains_file(cache_base, svg): - if verbose: - print("converting to pdf", svg) - svg2pdf(svg,crop=True, text_to_path=True) - cache_update_file(cache_base, svg) - files = glob.glob(shared_base+"/*") - for f in files: - if f.endswith("cache.pkl"): - continue - # check if template - if "templates" in f and f.endswith("_partial.tex"): - continue + t0 = time.time() + shared_base = paths['shared'] + output_dir = output_dir - if os.path.isdir(f): - od2 = output_dir + "/" + os.path.basename(f) - if not os.path.exists(od2): - os.mkdir(od2) - rec_fix_shared(f, od2) - else: - of = output_dir + "/" + os.path.basename(f) - if not cache_contains_file(cache_base, f) or not os.path.exists(of): - print(f"> {f} -> {of}") - shutil.copy(f, of) - if f.endswith(".pdf") and pdf2png: + import glob + # def get_cache_from_dir(shared_base): + # print("Beginning file cache..") + + + source = get_hash_from_base(shared_base) + target = get_hash_from_base(output_dir) - if verbose: - print(" converting to png", f) - convert.pdf2png(of) - cache_update_file(cache_base, f) + # update_source_cache = False + source_extra = {} + for rel in source: + if rel.endswith(".svg") and source[rel]['modified']: + pdf_file = svg2pdf(shared_base + "/"+rel, crop=True, text_to_path=True, verbose=True) + rel = os.path.relpath(pdf_file, shared_base) + source_extra[rel] = dict(mtime=os.path.getmtime(pdf_file), hash=hash_file_(pdf_file), modified=True) - if verbose: - print(" done!") + for k, v in source_extra.items(): + source[k] = v + # update_source_cache = True + # Perform sync here. + for rel in source: + if rel.endswith("_partial.tex"): + continue + + if rel not in target or target[rel]['hash'] != source[rel]['hash']: + print(" -> ", output_dir + "/" + rel) + shutil.copy(shared_base +"/" + rel, output_dir + "/" + rel) + target[rel] = source[rel].copy() + target[rel]['modified'] = True + target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel) + + if pdf2png: + for rel in target: + if rel.endswith(".pdf") and target[rel]['modified']: + # print("pdf2png: ") + png = convert.pdf2png(output_dir + "/" + rel, verbose=True) + target[rel]['modified'] = False + target[rel]['hash'] = hash_file_(output_dir + "/" + rel) + target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel) + + # Save the cache. + + with open(shared_base + "/sharedcache.pkl", 'wb') as f: + pickle.dump(source, f) + + with open(output_dir + "/sharedcache.pkl", 'wb') as f: + pickle.dump(target, f) + + print("fix_shared()", time.time() - t0) + + # + # if pdf2png: + # if f.endswith(".pdf") and pdf2png: + # if verbose: + # print("converting to png", f) + # convert.pdf2png(of) + # + # for f in source: + # if f not in target: + # print(f) + # else: + # if source[f]['hash'] != target[f]['hash']: + # print(f, f) + # + # + # + # a = 234 + # # if rel not in source: + # + # # source[rel] = dict(mtime=os.path.getmtime(f), hash=hash_file_(f)) + # # + # + # + # # Everything has a hash/mtime that is up to date. Now look at target dir + # + # get_cache_from_dir(output_dir) + # + # # Get the corresponding output at destination: + # + # + # + # + # + # + # for path in Path(shared_base).rglob('*'): + # print(path) + # a = 234 + # def rec_fix_shared(shared_base, output_dir): + # if dosvg: + # for svg in glob.glob(shared_base+"/*.svg"): + # # if not os.path.exists(shared_base + ) + # if not cache_contains_file(cache_base, svg): + # # if verbose: + # print("converting to pdf", svg) + # svg2pdf(svg,crop=True, text_to_path=True) + # cache_update_file(cache_base, svg) + # assert False + # + # files = glob.glob(shared_base+"/*") + # for f in files: + # if f.endswith("cache.pkl"): + # continue + # + # if "templates" in f and f.endswith("_partial.tex"): + # continue + # + # if os.path.isdir(f): + # od2 = output_dir + "/" + os.path.basename(f) + # if not os.path.exists(od2): + # os.mkdir(od2) + # rec_fix_shared(f, od2) + # else: + # of = output_dir + "/" + os.path.basename(f) + # if not os.path.exists(of) or not filecmp.cmp(f, of,shallow=shallow): + # print(f"> fix_shared() -> {of}") + # shutil.copy(f, of) + # if f.endswith(".pdf") and pdf2png: + # if verbose: + # print("converting to png", f) + # convert.pdf2png(of) + # # cache_update_file(cache_base, f) + # + # if verbose: + # print(" done!") + + # if pdf2png: + # assert False + + + + # get diff. + + # directory_cmp = filecmp.dircmp(a=paths['shared'], b=output_dir) + # from filecmp import dircmp + # from filecmp import dircmp + # def print_diff_files(dcmp): + # for name in dcmp.diff_files: + # print("diff_file %s found in %s and %s" % (name, dcmp.left, dcmp.right)) + # print("") + # for sub_dcmp in dcmp.subdirs.values(): + # print_diff_files(sub_dcmp) + # + + # t0 = time.time() + # dcmp = dircmp(paths['shared'], output_dir) + # print_diff_files(dcmp) + # print("dircmp", time.time() - t0) + # directory_cmp.report() + # import time + # t0 = time.time() + # rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir) + # import time + # # import dirsync + # # dirsync.sync(paths['shared'], output_dir, 'diff') + # print("mine", time.time() - t0) + a = 234 - rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir) def jinjafy_shared_templates_dir(paths, info): tpd = paths['shared'] + "/templates" @@ -379,6 +551,7 @@ def mvfiles(source_dir, dest_dir): if (os.path.isfile(full_file_name)): shutil.copy(full_file_name, os.path.dirname(dest_dir)) +@profile def make_webpage(dosvg=True): cinfo = class_information() paths = get_paths()