Skip to content
Snippets Groups Projects
Commit e19bb48a authored by tuhe's avatar tuhe
Browse files

Various updates for 02465 during semester

parent a6f84c4c
No related branches found
No related tags found
No related merge requests found
...@@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh: ...@@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
# beamer-slider # beamer-slider
setuptools.setup( setuptools.setup(
name="coursebox", name="coursebox",
version="0.1.2", version="0.1.4",
author="Tue Herlau", author="Tue Herlau",
author_email="tuhe@dtu.dk", author_email="tuhe@dtu.dk",
description="A course management system currently used at DTU", description="A course management system currently used at DTU",
...@@ -30,5 +30,5 @@ setuptools.setup( ...@@ -30,5 +30,5 @@ setuptools.setup(
package_dir={"": "src"}, package_dir={"": "src"},
packages=setuptools.find_packages(where="src"), packages=setuptools.find_packages(where="src"),
python_requires=">=3.8", python_requires=">=3.8",
install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','jinjafy','beamer-slider','tinydb'], install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','beamer-slider','tinydb'],
) )
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: coursebox Name: coursebox
Version: 0.1.1 Version: 0.1.4
Summary: A course management system currently used at DTU Summary: A course management system currently used at DTU
Home-page: https://lab.compute.dtu.dk/tuhe/coursebox Home-page: https://lab.compute.dtu.dk/tuhe/coursebox
Author: Tue Herlau Author: Tue Herlau
...@@ -16,6 +16,34 @@ Description-Content-Type: text/markdown ...@@ -16,6 +16,34 @@ Description-Content-Type: text/markdown
License-File: LICENSE License-File: LICENSE
# Coursebox DTU # Coursebox DTU
DTU course management software. DTU course management software.
## Installation
```terminal
pip install coursebox
```
## What it can do
- Single semester-dependent configuration file
- Integrates with DTU Inside/DTU Learn
- Distribution/evalauation of project reports in Learn-compatible format
- Quiz-generation in DTU Learn/Beamer friendly format
- Automatic website/syllabus generation
- Automatic generation of lectures handouts/exercises (you don't have to track dynamic content like dates/lecture titles; it is all in the configuration)
- Easy compilation to 2/5 day formats (Continuous education)
## Usage
Coursebox requires a specific directory structure. It is easier to start with an existing course and adapt to your needs. Please contact me at tuhe@dtu.dk for more information.
## Citing
```bibtex
@online{coursebox,
title={Coursebox (0.1.1): \texttt{pip install coursebox}},
url={https://lab.compute.dtu.dk/tuhe/coursebox},
urldate = {2021-09-07},
month={9},
publisher={Technical University of Denmark (DTU)},
author={Tue Herlau},
year={2021},
}
```
openpyxl numpy
pycode_similar
tika tika
openpyxl
xlwings xlwings
pybtex
langdetect
wexpect
pexpect
matplotlib matplotlib
numpy langdetect
pycode_similar
jinjafy
beamer-slider beamer-slider
tinydb tinydb
No preview for this file type
No preview for this file type
...@@ -12,10 +12,14 @@ from coursebox.core.info_paths import core_conf ...@@ -12,10 +12,14 @@ from coursebox.core.info_paths import core_conf
# import pybtex.database.input.bibtex # import pybtex.database.input.bibtex
# import pybtex.plugin # import pybtex.plugin
# import io # import io
from line_profiler_pycharm import profile
import time
@profile
def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
wb = openpyxl.load_workbook(xlsx_file, data_only=True) # print("Loading...", xlsx_file, sheet, as_dict_list)
t0 = time.time()
wb = openpyxl.load_workbook(xlsx_file, data_only=True, read_only=True)
if not sheet: if not sheet:
ws = wb.worksheets[0] ws = wb.worksheets[0]
else: else:
...@@ -24,26 +28,65 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): ...@@ -24,26 +28,65 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
return None return None
else: else:
ws = ws.pop() ws = ws.pop()
dd = [] # print(time.time()-t0)
key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None] # dd = []
for i in range(1, ws.max_row): # key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None]
rdict = {} # print(time.time()-t0, ws.max_row)
if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ): # np.array([[i.value for i in j[1:5]] for j in ws.rows])
continue
for j in key_cols: import numpy as np
key = ws.cell(row=1, column=j+1).value A = np.array([[i.value for i in j] for j in ws.rows])
if key is not None: # print(time.time() - t0, ws.max_row, len(key_cols))
key = key.strip() if isinstance(key,str) else key
value = ws.cell(row=i + 1, column=j + 1).value
value = value.strip() if isinstance(value,str) else value # for j in range(A.shape[1]):
if isinstance(value, str):
if value == 'True':
value = True
if value == 'False':
value = False a = 234
rdict[key] = value
dd.append(rdict) # for i in range(1, ws.max_row):
# rdict = {}
# if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ):
# continue
# for j in key_cols:
# key = ws.cell(row=1, column=j+1).value
# if key is not None:
# key = key.strip() if isinstance(key,str) else key
# value = ws.cell(row=i + 1, column=j + 1).value
# value = value.strip() if isinstance(value,str) else value
# if isinstance(value, str):
# if value == 'True':
# value = True
# if value == 'False':
# value = False
# rdict[key] = value
# dd.append(rdict)
# print(time.time()-t0)
A = A[:, A[0] != None]
A = A[(A != None).sum(axis=1) > 0, :]
dd2 = []
for i in range(1, A.shape[0]):
A[A == 'True'] = True
A[A == 'False'] = False
d = dict(zip(A[0, :].tolist(), [a.strip() if isinstance(a,str) else a for a in A[i, :].tolist() ]))
dd2.append(d)
# print(time.time() - t0)
dd = dd2
# if dd != dd2:
# for k in range(len(dd)):
# if dd[k] != dd2[k]:
# print(k)
# print(dd)
# print(dd2)
# assert False
# print("BAd!")
if as_dict_list: if as_dict_list:
dl = list_dict2dict_list(dd) dl = list_dict2dict_list(dd)
for k in dl.keys(): for k in dl.keys():
...@@ -51,6 +94,8 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False): ...@@ -51,6 +94,8 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
if len(x) == 1: x = x.pop() if len(x) == 1: x = x.pop()
dl[k] = x dl[k] = x
dd = dl dd = dl
wb.close()
# print("xlsx2dicts", time.time()-t0)
return dd return dd
def get_enrolled_students(): def get_enrolled_students():
...@@ -200,6 +245,7 @@ def get_forum(paths): ...@@ -200,6 +245,7 @@ def get_forum(paths):
d2.append({k: v[i] for k, v in dd.items()}) d2.append({k: v[i] for k, v in dd.items()})
return d2 return d2
@profile
def class_information(): def class_information():
course_number = core_conf['course_number'] course_number = core_conf['course_number']
piazza = 'https://piazza.com/dtu.dk/%s%s/%s' % (semester().lower(), year(), course_number) piazza = 'https://piazza.com/dtu.dk/%s%s/%s' % (semester().lower(), year(), course_number)
...@@ -214,8 +260,8 @@ def class_information(): ...@@ -214,8 +260,8 @@ def class_information():
'piazza': piazza, # deprecated. 'piazza': piazza, # deprecated.
'course_number': course_number, 'course_number': course_number,
'semester': semester(), 'semester': semester(),
'reports_handout': [1,6], # 'reports_handout': [1,6], # Set in excel conf.
'reports_handin': [6,11], # 'reports_handin': [6, 11], # set in excel conf.
'semester_id': semester_id(), 'semester_id': semester_id(),
'today': today(), 'today': today(),
'instructors': get_instructors(), 'instructors': get_instructors(),
......
import re
import tempfile
import tika
import os import os
import shutil import shutil
import openpyxl import openpyxl
...@@ -5,7 +8,6 @@ import numpy as np ...@@ -5,7 +8,6 @@ import numpy as np
import itertools import itertools
import math import math
import glob import glob
# import zipfile
from tika import parser from tika import parser
from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
...@@ -22,6 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors ...@@ -22,6 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors
import time import time
from collections import defaultdict from collections import defaultdict
import zipfile import zipfile
import hashlib
import pandas as pd
def get_dirs(zf): def get_dirs(zf):
...@@ -32,13 +36,11 @@ def get_dirs(zf): ...@@ -32,13 +36,11 @@ def get_dirs(zf):
def fix_handins_fuckup(project_id=2): def fix_handins_fuckup(project_id=2):
""" Handle the problem with multiple hand-ins in DTU learn. """ """ Handle the problem with multiple hand-ins in DTU learn. """
import zipfile
paths = get_paths() paths = get_paths()
from coursebox.core.info import class_information from coursebox.core.info import class_information
info = class_information() info = class_information()
zf = paths['instructor_project_evaluations'] + f"/zip{project_id}.zip" zf = paths['instructor_project_evaluations'] + f"/zip{project_id}.zip"
tas = [i['shortname'] for i in info['instructors'] ] tas = [i['shortname'] for i in info['instructors'] ]
ta_links = {i['shortname']: i for i in info['instructors']} ta_links = {i['shortname']: i for i in info['instructors']}
...@@ -51,7 +53,6 @@ def fix_handins_fuckup(project_id=2): ...@@ -51,7 +53,6 @@ def fix_handins_fuckup(project_id=2):
ta_reports[r] = ta ta_reports[r] = ta
fls = get_dirs(zf) fls = get_dirs(zf)
# fls = [f for f in zip.namelist() if not f.endswith("tml") and f.endswith("/")] # fls = [f for f in zip.namelist() if not f.endswith("tml") and f.endswith("/")]
d = defaultdict(lambda: []) d = defaultdict(lambda: [])
for l in fls: for l in fls:
...@@ -123,7 +124,6 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check= ...@@ -123,7 +124,6 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check=
zip1 = instructor_path + "/zip1.zip" zip1 = instructor_path + "/zip1.zip"
zip2 = instructor_path + "/zip2.zip" zip2 = instructor_path + "/zip2.zip"
zip3 = instructor_path + "/zip3.zip" zip3 = instructor_path + "/zip3.zip"
zips = [None, zip1, zip2, zip3] zips = [None, zip1, zip2, zip3]
for j,zf in enumerate(zips): for j,zf in enumerate(zips):
...@@ -138,12 +138,12 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check= ...@@ -138,12 +138,12 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check=
continue continue
else: # instructor files do not exist else: # instructor files do not exist
if j == 0: if j == 0:
copy_populate_from_template(info, sheet_number=j, zip_file=None) copy_populate_from_template(paths, info, sheet_number=j, zip_file=None)
elif os.path.exists(zf): elif os.path.exists(zf):
# make a copy of report template and populate it with groups obtained from previous report evaluation. # make a copy of report template and populate it with groups obtained from previous report evaluation.
# all_groups = get_all_reports_from_collected_xlsx_file() # all_groups = get_all_reports_from_collected_xlsx_file()
copy_populate_from_template(info, sheet_number=j, zip_file=zf) copy_populate_from_template(paths, info, sheet_number=j, zip_file=zf)
# distribute_zip_content(info, sheet=j, zf_base=zf) # distribute_zip_content(info, sheet=j, zf_base=zf)
else: else:
print("When available, please move downloaded copy of all reports from campusnet to destination:") print("When available, please move downloaded copy of all reports from campusnet to destination:")
...@@ -228,13 +228,13 @@ def compute_error_files(info, paths): ...@@ -228,13 +228,13 @@ def compute_error_files(info, paths):
es = err_label + f"> Report score is {g.get('score', 0)}. The report score has to be between 0 and 4; probably due to a too high value of 'Delta' in instructor sheet." es = err_label + f"> Report score is {g.get('score', 0)}. The report score has to be between 0 and 4; probably due to a too high value of 'Delta' in instructor sheet."
ERRORS[ins].append(es) ERRORS[ins].append(es)
if repn >= 1 and not g['comments']: if repn >= 1 and not g['comments'] and info['course_number'] != '02465':
es = err_label + "> Incomplete report evaluation (missing comments field)" es = err_label + "> Incomplete report evaluation (missing comments field)"
es += "Please fill out comments field in your excel sheet." es += "Please fill out comments field in your excel sheet."
ERRORS[ins].append(es) ERRORS[ins].append(es)
if repn >= 1 and not g['approver_comments']: if repn >= 1 and not g['approver_comments'] and info['course_number'] != '02465':
es = err_label + "> Incomplete report evaluation (you are missing the approver comments field; can simply be set to 'ok')." es = err_label + "> Incomplete report evaluation (you are missing the approver comments field; can simply be set to 'ok')."
ERRORS.get(g['approver'], []).append(es) ERRORS.get(g['approver'], []).append(es)
...@@ -300,10 +300,70 @@ def get_instructor_xlsx_files(info, sheet): ...@@ -300,10 +300,70 @@ def get_instructor_xlsx_files(info, sheet):
return xlsx return xlsx
import hashlib def get_groups_from_learn_xslx_file(paths, sheet_number):
fname = f"{paths['instructor_project_evaluations']}/groups{sheet_number}.xlsx"
all_groups = []
if os.path.exists(fname):
# Reading from the groups{number}.xlsx group-id file exported from DTU learn. Note this file contains fuckups.
dg = defaultdict(list)
df = pd.read_excel(fname)
for uname, group_id in zip(df['Username'], df['Project groups']):
id = int(group_id.split(" ")[1])
if len(uname) == 7 and uname[0] == 's':
dg[id].append(uname)
else:
dg[id].append("DTU-LEARN-FUCKED-THIS-ID-UP-CHECK-ON-REPORT")
def copy_populate_from_template(info, sheet_number,zip_file): all_groups = [{'group_id': id, 'student_ids': students} for id, students in dg.items()]
return all_groups
def search_projects(paths, sheet_number, patterns):
zip_files = [paths['instructor_project_evaluations'] + "/zip%d.zip" % sheet_number]
# print(zip_files)
all_groups = []
gps = defaultdict(list)
for zip_file in zip_files:
if os.path.exists(zip_file):
tmpdir = tempfile.TemporaryDirectory()
zipfile.ZipFile(zip_file).extractall(path=tmpdir.name)
pdfs = glob.glob(tmpdir.name + "/**/*.pdf", recursive=True)
for pdf in pdfs:
pdf_parsed = tika.parser.from_file(pdf)
id =int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1])
students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE)
gps[id] += students
for id, students in gps.items():
all_groups.append({'group_id': id, 'student_ids': list(set(students))})
return all_groups
def unpack_zip_file_recursively(zip_file, destination_dir):
"""
Unpack the zip_file (extension: .zip) to the given directory.
If the folders in the zip file contains other zip/files, these are unpacked recursively.
"""
# Unpack zip file recursively and flatten it.
zipfile.ZipFile(zip_file).extractall(path=destination_dir)
ls = glob.glob(destination_dir + "/*")
for f in ls:
if os.path.isdir(f):
zipfiles = glob.glob(f + "/*.zip")
for zp in zipfiles:
print("Unpacking student zip file>", zp)
zipfile.ZipFile(zp).extractall(path=os.path.dirname(zp) + "/")
def copy_populate_from_template(paths, info, sheet_number,zip_file):
# Try to load group ids from the project pdf's
all_groups = search_projects(paths, sheet_number, r"s\d{6}")
# all_groups = get_groups_from_learn_xslx_file(paths, sheet_number)
if len(all_groups) == 0:
all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else [] all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else []
# Hopefully this did the trick and we have the groups all grouped up.
# set up which TA approve which TA # set up which TA approve which TA
if any( [i['language'] not in ["en", "any"] for i in info['instructors'] ]): if any( [i['language'] not in ["en", "any"] for i in info['instructors'] ]):
print(info['instructors']) print(info['instructors'])
...@@ -337,10 +397,13 @@ def copy_populate_from_template(info, sheet_number,zip_file): ...@@ -337,10 +397,13 @@ def copy_populate_from_template(info, sheet_number,zip_file):
n_groups_per_instructor = 24 + (sheet_number == 0) * 26 n_groups_per_instructor = 24 + (sheet_number == 0) * 26
if sheet_number > 0: if sheet_number > 0:
zfd = zip_file[:-4] # zfd = zip_file[:-4]
if not os.path.exists(zfd): # if not os.path.exists(zfd):
os.mkdir(zfd) # os.mkdir(zfd)
zipfile.ZipFile(zip_file).extractall(path=zfd) zfd = tempfile.TemporaryDirectory().name
# zipfile.ZipFile(zip_file).extractall(path=tmpdir.name)
unpack_zip_file_recursively(zip_file, destination_dir=zfd)
# get all report handins (i.e. directories) # get all report handins (i.e. directories)
ls = [l for l in glob.glob(zfd + "/*") if l[-3:] not in ["txt", "tml"]] ls = [l for l in glob.glob(zfd + "/*") if l[-3:] not in ["txt", "tml"]]
...@@ -431,8 +494,8 @@ def copy_populate_from_template(info, sheet_number,zip_file): ...@@ -431,8 +494,8 @@ def copy_populate_from_template(info, sheet_number,zip_file):
corrector = all_tas[shortname]['approver'] corrector = all_tas[shortname]['approver']
if sheet_number > 0: if sheet_number > 0:
# Copy reports to directory (distribute amongst TAs) # Copy reports to directory (distribute amongst TAs)
b_dir = os.path.dirname(zip_file) # b_dir = os.path.dirname(zip_file)
ins_dir = "%s/project_%i_%s/"%(b_dir, sheet_number, shortname) ins_dir = "%s/project_%i_%s/"%(zfd, sheet_number, shortname)
if not os.path.exists(ins_dir): if not os.path.exists(ins_dir):
os.mkdir(ins_dir) os.mkdir(ins_dir)
...@@ -440,7 +503,7 @@ def copy_populate_from_template(info, sheet_number,zip_file): ...@@ -440,7 +503,7 @@ def copy_populate_from_template(info, sheet_number,zip_file):
for handin in all_tas[shortname]['handins']: for handin in all_tas[shortname]['handins']:
shutil.move(handin['path'], ins_dir) shutil.move(handin['path'], ins_dir)
shutil.make_archive(ins_dir[:-1], 'zip', ins_dir) shutil.make_archive(os.path.dirname(zip_file) +"/"+ os.path.basename(ins_dir[:-1]), 'zip', ins_dir)
time.sleep(2) time.sleep(2)
print("Removing tree of reports to clear up space...") print("Removing tree of reports to clear up space...")
shutil.rmtree(ins_dir) shutil.rmtree(ins_dir)
...@@ -471,10 +534,10 @@ def copy_populate_from_template(info, sheet_number,zip_file): ...@@ -471,10 +534,10 @@ def copy_populate_from_template(info, sheet_number,zip_file):
sheet.cell(STUDENT_ID_ROW+j, ccol+i).value = s sheet.cell(STUDENT_ID_ROW+j, ccol+i).value = s
wb.save(ifile) wb.save(ifile)
wb.close() wb.close()
# clean up zip file directories # clean up zip file directories; since it is a tmp file, we don't have to.
if sheet_number > 0: # if sheet_number > 0:
zfd = zip_file[:-4] # zfd = zip_file[:-4]
shutil.rmtree(zfd) # shutil.rmtree(zfd)
def write_dropdown_sumprod_sheet(sheet): def write_dropdown_sumprod_sheet(sheet):
ccol = 2 ccol = 2
......
...@@ -3,6 +3,7 @@ import os ...@@ -3,6 +3,7 @@ import os
import re import re
import openpyxl import openpyxl
import numpy as np import numpy as np
from line_profiler_pycharm import profile
INSTRUCTOR_ROW = 6 INSTRUCTOR_ROW = 6
INSTRUCTOR_CHECKER_ROW = 31 INSTRUCTOR_CHECKER_ROW = 31
...@@ -16,19 +17,6 @@ RANGE_MIN_COL = 5 ...@@ -16,19 +17,6 @@ RANGE_MIN_COL = 5
DELTA_ALLOWED_ROW = 111 # The range of possible delta-values. Should be in an empty (new) row at bottom. DELTA_ALLOWED_ROW = 111 # The range of possible delta-values. Should be in an empty (new) row at bottom.
def get_all_reports_from_collected_xlsx_file_DEFUNCT(): # when is this used?
out = get_output_file()
wb = openpyxl.load_workbook(out)
all_reports = {}
for repn in range(3, -1, -1):
cls = []
for i in range(2, wb.worksheets[repn].max_column + 1):
cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
if not cp['student_ids']:
continue
cls.append(cp)
all_reports[repn] = cls
return all_reports
def parse_column_student_ids(v): def parse_column_student_ids(v):
sn = [] sn = []
...@@ -42,7 +30,82 @@ def parse_column_student_ids(v): ...@@ -42,7 +30,82 @@ def parse_column_student_ids(v):
sn.append(g) sn.append(g)
return sn return sn
def parse_column_numpy(col, report_number, column):
""" Parse a column assuming it is defined as a numpy array.
This is the recommended method as it is much, much faster.
"""
# ws = worksheet # wb.worksheets[sheet]
sn = []
group_id = col[STUDENT_ID_ROW - 1-1] #).value
# col = ['' if col[0] is np.NAN else x for x in col]
for i in range(0, 3):
v = col[i + STUDENT_ID_ROW-1]#, column=column).value
sn += parse_column_student_ids(v)
instructor = col[INSTRUCTOR_ROW-1]#, column=column).value
approver = col[INSTRUCTOR_ROW+1-1]# , column=column).value
if instructor:
instructor = instructor.lower()
if approver:
approver = str(approver).lower()
content = None
comments = None
appr_comments = None
if report_number > 0 and sn:
N = 38
rarr = np.ndarray(shape=(N,1),dtype=np.object)
for j in range(N):
v = col[3 + STUDENT_ID_ROW+j-1]#, column=column).value
rarr[j,0] = v
content = rarr
comments = col[EVALUATION_ROW_END+5-1]# , column=column).value
appr_comments = col[EVALUATION_ROW_END+6-1]# , column=column).value
cgroup = {'column_j': column, 'student_ids': sn, 'instructor': instructor, "approver": approver, 'content': content,
"comments": comments, "approver_comments": appr_comments, 'missing_fields': [],
'group_id': group_id}
# Now, find errors... This involves first finding non-zero columns
if report_number > 0 and sn:
score = cgroup['content'][-3, 0]
cgroup['score'] = score
cgroup['pct'] = score2pct(score)
# if report_number == 3: # this obviously needs fixing for next semester.
# raise Exception("No report number 3 anymore. ")
# I = []
# for i in range(42): # max number of evaluation fields (irrelevant)
# v1 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL-1]# ).value
# v2 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL+1-1]#).value
# if (v1 == -1 and v2 == 1) or (v1 == 0 and v2 == 4):
# I.append(i)
# if v1 == -1 and v2 == 1:
# # print("delta col")
# break
#
# for i in I:
# w1 = worksheet.cell(row=WEIGHT_ROW_START + i, column=1).value
# w3_ = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=1).value # should agree with label in w1
# w2 = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=column).value
# if w2 == None:
# cgroup['missing_fields'].append( (i, w1) )
# if report_number < 3:
# print("old report nr.")
return cgroup
def parse_column(worksheet, report_number, column): def parse_column(worksheet, report_number, column):
""" This is the old method. It is very slow. Use the numpy-version above.
"""
ws = worksheet # wb.worksheets[sheet] ws = worksheet # wb.worksheets[sheet]
sn = [] sn = []
group_id = ws.cell(row=STUDENT_ID_ROW - 1, column=column).value group_id = ws.cell(row=STUDENT_ID_ROW - 1, column=column).value
...@@ -54,7 +117,8 @@ def parse_column(worksheet, report_number, column): ...@@ -54,7 +117,8 @@ def parse_column(worksheet, report_number, column):
instructor = ws.cell(row=INSTRUCTOR_ROW, column=column).value instructor = ws.cell(row=INSTRUCTOR_ROW, column=column).value
approver = ws.cell(row=INSTRUCTOR_ROW+1, column=column).value approver = ws.cell(row=INSTRUCTOR_ROW+1, column=column).value
if instructor: instructor = instructor.lower() if instructor:
instructor = instructor.lower()
if approver: if approver:
approver = str(approver).lower() approver = str(approver).lower()
...@@ -135,32 +199,47 @@ def get_groups_from_report(repn): ...@@ -135,32 +199,47 @@ def get_groups_from_report(repn):
cls.append(cp) cls.append(cp)
return cls return cls
# @profile
def populate_student_report_results(students): def populate_student_report_results(students):
# take students (list-of-dicts in the info format) and assign them the results from the reports. # take students (list-of-dicts in the info format) and assign them the results from the reports.
out = get_output_file() out = get_output_file()
import time
t0 = time.time()
print("> Loading student report scores from: %s"%out) print("> Loading student report scores from: %s"%out)
if not os.path.exists(out): if not os.path.exists(out):
return students, [] return students, []
for k in students: for k in students:
students[k]['reports'] = {i: None for i in range(4)} students[k]['reports'] = {i: None for i in range(4)}
import pandas as pd
wb = openpyxl.load_workbook(out,data_only=True) wb = openpyxl.load_workbook(out, data_only=True, read_only=True)
# Perhaps find non-empty cols (i.e. those with content) # Perhaps find non-empty cols (i.e. those with content)
print("> time elapsed", time.time() - t0)
maximal_groups = [] maximal_groups = []
maximal_groups_students = [] maximal_groups_students = []
for repn in range(3, -1, -1): for repn in range(3, -1, -1):
cls = [] cls = []
for i in range(2, wb.worksheets[repn].max_column + 1): sheet = pd.read_excel(out, sheet_name=repn, index_col=None, header=None)
cp = parse_column(wb.worksheets[repn], report_number=repn, column=i) sheet = sheet.fillna('')
sheet = sheet.to_numpy()
# to_numpy()
for i in range(1,sheet.shape[1]):
# for i in range(2, wb.worksheets[repn].max_column + 1):
# print(i, wb.worksheets[repn].max_column)
# s = pd.read_excel(out, sheet_name=1)
cp = parse_column_numpy(sheet[:,i], report_number=repn, column=i)
# cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
if not cp['student_ids']: if not cp['student_ids']:
continue break
cls.append(cp) cls.append(cp)
for g in cls: for g in cls:
for sid in g['student_ids']: for sid in g['student_ids']:
student = students.get(sid, None) student = students.get(sid, None)
if student is None: if student is None:
...@@ -172,5 +251,5 @@ def populate_student_report_results(students): ...@@ -172,5 +251,5 @@ def populate_student_report_results(students):
if sid not in maximal_groups_students: if sid not in maximal_groups_students:
maximal_groups.append(g) maximal_groups.append(g)
maximal_groups_students += g['student_ids'] maximal_groups_students += g['student_ids']
print("> time elapsed", time.time() -t0)
return students, maximal_groups return students, maximal_groups
\ No newline at end of file
No preview for this file type
...@@ -3,6 +3,9 @@ import shutil, os, glob ...@@ -3,6 +3,9 @@ import shutil, os, glob
from datetime import datetime, timedelta from datetime import datetime, timedelta
import calendar import calendar
import pickle import pickle
import time
from line_profiler_pycharm import profile
from coursebox.thtools_base import partition_list
import slider import slider
from jinjafy import jinjafy_comment from jinjafy import jinjafy_comment
...@@ -16,6 +19,7 @@ from coursebox.core.info import class_information ...@@ -16,6 +19,7 @@ from coursebox.core.info import class_information
from coursebox.material.lecture_questions import lecture_question_compiler from coursebox.material.lecture_questions import lecture_question_compiler
from slider import latexmk from slider import latexmk
import coursebox import coursebox
# from line_profiler_pycharm import profile
def get_feedback_groups(): def get_feedback_groups():
paths = get_paths() paths = get_paths()
...@@ -47,12 +51,13 @@ def get_feedback_groups(): ...@@ -47,12 +51,13 @@ def get_feedback_groups():
reduced_groups = [rg for rg in reduced_groups if len(rg)>0] reduced_groups = [rg for rg in reduced_groups if len(rg)>0]
# groups are now partitioned. # groups are now partitioned.
if len(remaining_lectures) > 0: if len(remaining_lectures) > 0:
fbgs = coursebox.thtools_base.partition_list(reduced_groups, len(remaining_lectures)) fbgs = partition_list(reduced_groups, len(remaining_lectures))
for gg in fbgs: for gg in fbgs:
for g in gg: for g in gg:
already_used = already_used + g already_used = already_used + g
lst = thtools.thtools_base.partition_list([s for s in all_students if s not in already_used], len(remaining_lectures))
lst = partition_list([s for s in all_students if s not in already_used], len(remaining_lectures))
for i in range(len(remaining_lectures)): for i in range(len(remaining_lectures)):
dg = [] dg = []
for g in fbgs[i]: dg += g # flatten the list for g in fbgs[i]: dg += g # flatten the list
...@@ -217,7 +222,49 @@ def compile_simple_files(paths, info, template_file_list, verbose=False): ...@@ -217,7 +222,49 @@ def compile_simple_files(paths, info, template_file_list, verbose=False):
jinjafy_template(data=d2, file_in=fname, file_out=tex_out, filters=get_filters(), template_searchpath=paths['instructor']) jinjafy_template(data=d2, file_in=fname, file_out=tex_out, filters=get_filters(), template_searchpath=paths['instructor'])
latexmk(tex_out, pdf_out= paths['pdf_out'] + "/" + os.path.basename(tex_out)[:-4]+".pdf") latexmk(tex_out, pdf_out= paths['pdf_out'] + "/" + os.path.basename(tex_out)[:-4]+".pdf")
def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True): # rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
import time
# import dirsync
# dirsync.sync(paths['shared'], output_dir, 'diff')
# Do smarter fixin'
from pathlib import Path
from jinjafy.cache.simplecache import hash_file_
@profile
def get_hash_from_base(base):
if not os.path.exists(base + "/sharedcache.pkl"):
source = {}
else:
with open(base + "/sharedcache.pkl", 'rb') as f:
source = pickle.load(f)
actual_files = {}
for f in glob.glob(base + "/**", recursive=True):
if os.path.isdir(f):
continue
if f.endswith("sharedcache.pkl"):
continue
rel = os.path.relpath(f, base)
# d = dict(mtime=os.path.getmtime(f))
actual_files[rel] = dict(mtime=os.path.getmtime(f), hash=-1, modified=False)
if rel not in source or (actual_files[rel]['mtime'] != source[rel].get('mtime', -1)): # It has been modified, update hash
# print(rel, time.ctime(actual_files[rel]['mtime']), time.ctime(source[rel].get('mtime', -1)))
new_hash = hash_file_(f)
# actual_files[rel] = {}
actual_files[rel]['modified'] = new_hash != source.get(rel, {}).get('hash', -1)
actual_files[rel]['hash'] = new_hash
else:
actual_files[rel]['hash'] = source[rel]['hash']
return actual_files
@profile
def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True,shallow=True):
''' '''
Copy shared files into lecture directories Copy shared files into lecture directories
''' '''
...@@ -225,46 +272,171 @@ def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compil ...@@ -225,46 +272,171 @@ def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compil
from jinjafy.cache import cache_contains_file, cache_update_file from jinjafy.cache import cache_contains_file, cache_update_file
from slider.convert import svg2pdf, pdfcrop from slider.convert import svg2pdf, pdfcrop
from slider import convert from slider import convert
import filecmp
def rec_fix_shared(shared_base, output_dir): t0 = time.time()
if dosvg: shared_base = paths['shared']
for svg in glob.glob(shared_base+"/*.svg"): output_dir = output_dir
if not cache_contains_file(cache_base, svg):
if verbose: import glob
print("converting to pdf", svg) # def get_cache_from_dir(shared_base):
svg2pdf(svg,crop=True, text_to_path=True) # print("Beginning file cache..")
cache_update_file(cache_base, svg)
files = glob.glob(shared_base+"/*")
for f in files: source = get_hash_from_base(shared_base)
if f.endswith("cache.pkl"): target = get_hash_from_base(output_dir)
continue
# check if template # update_source_cache = False
if "templates" in f and f.endswith("_partial.tex"): source_extra = {}
for rel in source:
if rel.endswith(".svg") and source[rel]['modified']:
pdf_file = svg2pdf(shared_base + "/"+rel, crop=True, text_to_path=True, verbose=True)
rel = os.path.relpath(pdf_file, shared_base)
source_extra[rel] = dict(mtime=os.path.getmtime(pdf_file), hash=hash_file_(pdf_file), modified=True)
for k, v in source_extra.items():
source[k] = v
# update_source_cache = True
# Perform sync here.
for rel in source:
if rel.endswith("_partial.tex"):
continue continue
if os.path.isdir(f): if rel not in target or target[rel]['hash'] != source[rel]['hash']:
od2 = output_dir + "/" + os.path.basename(f) print(" -> ", output_dir + "/" + rel)
if not os.path.exists(od2): shutil.copy(shared_base +"/" + rel, output_dir + "/" + rel)
os.mkdir(od2) target[rel] = source[rel].copy()
rec_fix_shared(f, od2) target[rel]['modified'] = True
else: target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel)
of = output_dir + "/" + os.path.basename(f)
if not cache_contains_file(cache_base, f) or not os.path.exists(of): if pdf2png:
print(f"> {f} -> {of}") for rel in target:
shutil.copy(f, of) if rel.endswith(".pdf") and target[rel]['modified']:
if f.endswith(".pdf") and pdf2png: # print("pdf2png: ")
png = convert.pdf2png(output_dir + "/" + rel, verbose=True)
target[rel]['modified'] = False
target[rel]['hash'] = hash_file_(output_dir + "/" + rel)
target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel)
# Save the cache.
if verbose: with open(shared_base + "/sharedcache.pkl", 'wb') as f:
print(" converting to png", f) pickle.dump(source, f)
convert.pdf2png(of)
cache_update_file(cache_base, f)
if verbose: with open(output_dir + "/sharedcache.pkl", 'wb') as f:
print(" done!") pickle.dump(target, f)
print("fix_shared()", time.time() - t0)
#
# if pdf2png:
# if f.endswith(".pdf") and pdf2png:
# if verbose:
# print("converting to png", f)
# convert.pdf2png(of)
#
# for f in source:
# if f not in target:
# print(f)
# else:
# if source[f]['hash'] != target[f]['hash']:
# print(f, f)
#
#
#
# a = 234
# # if rel not in source:
#
# # source[rel] = dict(mtime=os.path.getmtime(f), hash=hash_file_(f))
# #
#
#
# # Everything has a hash/mtime that is up to date. Now look at target dir
#
# get_cache_from_dir(output_dir)
#
# # Get the corresponding output at destination:
#
#
#
#
#
#
# for path in Path(shared_base).rglob('*'):
# print(path)
# a = 234
# def rec_fix_shared(shared_base, output_dir):
# if dosvg:
# for svg in glob.glob(shared_base+"/*.svg"):
# # if not os.path.exists(shared_base + )
# if not cache_contains_file(cache_base, svg):
# # if verbose:
# print("converting to pdf", svg)
# svg2pdf(svg,crop=True, text_to_path=True)
# cache_update_file(cache_base, svg)
# assert False
#
# files = glob.glob(shared_base+"/*")
# for f in files:
# if f.endswith("cache.pkl"):
# continue
#
# if "templates" in f and f.endswith("_partial.tex"):
# continue
#
# if os.path.isdir(f):
# od2 = output_dir + "/" + os.path.basename(f)
# if not os.path.exists(od2):
# os.mkdir(od2)
# rec_fix_shared(f, od2)
# else:
# of = output_dir + "/" + os.path.basename(f)
# if not os.path.exists(of) or not filecmp.cmp(f, of,shallow=shallow):
# print(f"> fix_shared() -> {of}")
# shutil.copy(f, of)
# if f.endswith(".pdf") and pdf2png:
# if verbose:
# print("converting to png", f)
# convert.pdf2png(of)
# # cache_update_file(cache_base, f)
#
# if verbose:
# print(" done!")
# if pdf2png:
# assert False
# get diff.
# directory_cmp = filecmp.dircmp(a=paths['shared'], b=output_dir)
# from filecmp import dircmp
# from filecmp import dircmp
# def print_diff_files(dcmp):
# for name in dcmp.diff_files:
# print("diff_file %s found in %s and %s" % (name, dcmp.left, dcmp.right))
# print("")
# for sub_dcmp in dcmp.subdirs.values():
# print_diff_files(sub_dcmp)
#
# t0 = time.time()
# dcmp = dircmp(paths['shared'], output_dir)
# print_diff_files(dcmp)
# print("dircmp", time.time() - t0)
# directory_cmp.report()
# import time
# t0 = time.time()
# rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
# import time
# # import dirsync
# # dirsync.sync(paths['shared'], output_dir, 'diff')
# print("mine", time.time() - t0)
a = 234
rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
def jinjafy_shared_templates_dir(paths, info): def jinjafy_shared_templates_dir(paths, info):
tpd = paths['shared'] + "/templates" tpd = paths['shared'] + "/templates"
...@@ -379,6 +551,7 @@ def mvfiles(source_dir, dest_dir): ...@@ -379,6 +551,7 @@ def mvfiles(source_dir, dest_dir):
if (os.path.isfile(full_file_name)): if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, os.path.dirname(dest_dir)) shutil.copy(full_file_name, os.path.dirname(dest_dir))
@profile
def make_webpage(dosvg=True): def make_webpage(dosvg=True):
cinfo = class_information() cinfo = class_information()
paths = get_paths() paths = get_paths()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment