Skip to content
Snippets Groups Projects
Commit e19bb48a authored by tuhe's avatar tuhe
Browse files

Various updates for 02465 during semester

parent a6f84c4c
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
# beamer-slider
setuptools.setup(
name="coursebox",
version="0.1.2",
version="0.1.4",
author="Tue Herlau",
author_email="tuhe@dtu.dk",
description="A course management system currently used at DTU",
......@@ -30,5 +30,5 @@ setuptools.setup(
package_dir={"": "src"},
packages=setuptools.find_packages(where="src"),
python_requires=">=3.8",
install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','jinjafy','beamer-slider','tinydb'],
install_requires=['numpy','pycode_similar','tika','openpyxl', 'xlwings','matplotlib','langdetect','beamer-slider','tinydb'],
)
Metadata-Version: 2.1
Name: coursebox
Version: 0.1.1
Version: 0.1.4
Summary: A course management system currently used at DTU
Home-page: https://lab.compute.dtu.dk/tuhe/coursebox
Author: Tue Herlau
......@@ -16,6 +16,34 @@ Description-Content-Type: text/markdown
License-File: LICENSE
# Coursebox DTU
DTU course management software.
## Installation
```terminal
pip install coursebox
```
## What it can do
- Single semester-dependent configuration file
- Integrates with DTU Inside/DTU Learn
- Distribution/evalauation of project reports in Learn-compatible format
- Quiz-generation in DTU Learn/Beamer friendly format
- Automatic website/syllabus generation
- Automatic generation of lectures handouts/exercises (you don't have to track dynamic content like dates/lecture titles; it is all in the configuration)
- Easy compilation to 2/5 day formats (Continuous education)
## Usage
Coursebox requires a specific directory structure. It is easier to start with an existing course and adapt to your needs. Please contact me at tuhe@dtu.dk for more information.
## Citing
```bibtex
@online{coursebox,
title={Coursebox (0.1.1): \texttt{pip install coursebox}},
url={https://lab.compute.dtu.dk/tuhe/coursebox},
urldate = {2021-09-07},
month={9},
publisher={Technical University of Denmark (DTU)},
author={Tue Herlau},
year={2021},
}
```
openpyxl
numpy
pycode_similar
tika
openpyxl
xlwings
pybtex
langdetect
wexpect
pexpect
matplotlib
numpy
pycode_similar
jinjafy
langdetect
beamer-slider
tinydb
No preview for this file type
No preview for this file type
......@@ -12,10 +12,14 @@ from coursebox.core.info_paths import core_conf
# import pybtex.database.input.bibtex
# import pybtex.plugin
# import io
from line_profiler_pycharm import profile
import time
@profile
def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
wb = openpyxl.load_workbook(xlsx_file, data_only=True)
# print("Loading...", xlsx_file, sheet, as_dict_list)
t0 = time.time()
wb = openpyxl.load_workbook(xlsx_file, data_only=True, read_only=True)
if not sheet:
ws = wb.worksheets[0]
else:
......@@ -24,26 +28,65 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
return None
else:
ws = ws.pop()
dd = []
key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None]
for i in range(1, ws.max_row):
rdict = {}
if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ):
continue
for j in key_cols:
key = ws.cell(row=1, column=j+1).value
if key is not None:
key = key.strip() if isinstance(key,str) else key
value = ws.cell(row=i + 1, column=j + 1).value
value = value.strip() if isinstance(value,str) else value
if isinstance(value, str):
if value == 'True':
value = True
if value == 'False':
value = False
rdict[key] = value
dd.append(rdict)
# print(time.time()-t0)
# dd = []
# key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None]
# print(time.time()-t0, ws.max_row)
# np.array([[i.value for i in j[1:5]] for j in ws.rows])
import numpy as np
A = np.array([[i.value for i in j] for j in ws.rows])
# print(time.time() - t0, ws.max_row, len(key_cols))
# for j in range(A.shape[1]):
a = 234
# for i in range(1, ws.max_row):
# rdict = {}
# if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ):
# continue
# for j in key_cols:
# key = ws.cell(row=1, column=j+1).value
# if key is not None:
# key = key.strip() if isinstance(key,str) else key
# value = ws.cell(row=i + 1, column=j + 1).value
# value = value.strip() if isinstance(value,str) else value
# if isinstance(value, str):
# if value == 'True':
# value = True
# if value == 'False':
# value = False
# rdict[key] = value
# dd.append(rdict)
# print(time.time()-t0)
A = A[:, A[0] != None]
A = A[(A != None).sum(axis=1) > 0, :]
dd2 = []
for i in range(1, A.shape[0]):
A[A == 'True'] = True
A[A == 'False'] = False
d = dict(zip(A[0, :].tolist(), [a.strip() if isinstance(a,str) else a for a in A[i, :].tolist() ]))
dd2.append(d)
# print(time.time() - t0)
dd = dd2
# if dd != dd2:
# for k in range(len(dd)):
# if dd[k] != dd2[k]:
# print(k)
# print(dd)
# print(dd2)
# assert False
# print("BAd!")
if as_dict_list:
dl = list_dict2dict_list(dd)
for k in dl.keys():
......@@ -51,6 +94,8 @@ def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
if len(x) == 1: x = x.pop()
dl[k] = x
dd = dl
wb.close()
# print("xlsx2dicts", time.time()-t0)
return dd
def get_enrolled_students():
......@@ -200,6 +245,7 @@ def get_forum(paths):
d2.append({k: v[i] for k, v in dd.items()})
return d2
@profile
def class_information():
course_number = core_conf['course_number']
piazza = 'https://piazza.com/dtu.dk/%s%s/%s' % (semester().lower(), year(), course_number)
......@@ -214,8 +260,8 @@ def class_information():
'piazza': piazza, # deprecated.
'course_number': course_number,
'semester': semester(),
'reports_handout': [1,6],
'reports_handin': [6,11],
# 'reports_handout': [1,6], # Set in excel conf.
# 'reports_handin': [6, 11], # set in excel conf.
'semester_id': semester_id(),
'today': today(),
'instructors': get_instructors(),
......
import re
import tempfile
import tika
import os
import shutil
import openpyxl
......@@ -5,7 +8,6 @@ import numpy as np
import itertools
import math
import glob
# import zipfile
from tika import parser
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.utils import get_column_letter
......@@ -22,6 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors
import time
from collections import defaultdict
import zipfile
import hashlib
import pandas as pd
def get_dirs(zf):
......@@ -32,13 +36,11 @@ def get_dirs(zf):
def fix_handins_fuckup(project_id=2):
""" Handle the problem with multiple hand-ins in DTU learn. """
import zipfile
paths = get_paths()
from coursebox.core.info import class_information
info = class_information()
zf = paths['instructor_project_evaluations'] + f"/zip{project_id}.zip"
tas = [i['shortname'] for i in info['instructors'] ]
ta_links = {i['shortname']: i for i in info['instructors']}
......@@ -51,7 +53,6 @@ def fix_handins_fuckup(project_id=2):
ta_reports[r] = ta
fls = get_dirs(zf)
# fls = [f for f in zip.namelist() if not f.endswith("tml") and f.endswith("/")]
d = defaultdict(lambda: [])
for l in fls:
......@@ -123,7 +124,6 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check=
zip1 = instructor_path + "/zip1.zip"
zip2 = instructor_path + "/zip2.zip"
zip3 = instructor_path + "/zip3.zip"
zips = [None, zip1, zip2, zip3]
for j,zf in enumerate(zips):
......@@ -138,12 +138,12 @@ def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check=
continue
else: # instructor files do not exist
if j == 0:
copy_populate_from_template(info, sheet_number=j, zip_file=None)
copy_populate_from_template(paths, info, sheet_number=j, zip_file=None)
elif os.path.exists(zf):
# make a copy of report template and populate it with groups obtained from previous report evaluation.
# all_groups = get_all_reports_from_collected_xlsx_file()
copy_populate_from_template(info, sheet_number=j, zip_file=zf)
copy_populate_from_template(paths, info, sheet_number=j, zip_file=zf)
# distribute_zip_content(info, sheet=j, zf_base=zf)
else:
print("When available, please move downloaded copy of all reports from campusnet to destination:")
......@@ -228,13 +228,13 @@ def compute_error_files(info, paths):
es = err_label + f"> Report score is {g.get('score', 0)}. The report score has to be between 0 and 4; probably due to a too high value of 'Delta' in instructor sheet."
ERRORS[ins].append(es)
if repn >= 1 and not g['comments']:
if repn >= 1 and not g['comments'] and info['course_number'] != '02465':
es = err_label + "> Incomplete report evaluation (missing comments field)"
es += "Please fill out comments field in your excel sheet."
ERRORS[ins].append(es)
if repn >= 1 and not g['approver_comments']:
if repn >= 1 and not g['approver_comments'] and info['course_number'] != '02465':
es = err_label + "> Incomplete report evaluation (you are missing the approver comments field; can simply be set to 'ok')."
ERRORS.get(g['approver'], []).append(es)
......@@ -300,10 +300,70 @@ def get_instructor_xlsx_files(info, sheet):
return xlsx
import hashlib
def get_groups_from_learn_xslx_file(paths, sheet_number):
fname = f"{paths['instructor_project_evaluations']}/groups{sheet_number}.xlsx"
all_groups = []
if os.path.exists(fname):
# Reading from the groups{number}.xlsx group-id file exported from DTU learn. Note this file contains fuckups.
dg = defaultdict(list)
df = pd.read_excel(fname)
for uname, group_id in zip(df['Username'], df['Project groups']):
id = int(group_id.split(" ")[1])
if len(uname) == 7 and uname[0] == 's':
dg[id].append(uname)
else:
dg[id].append("DTU-LEARN-FUCKED-THIS-ID-UP-CHECK-ON-REPORT")
def copy_populate_from_template(info, sheet_number,zip_file):
all_groups = [{'group_id': id, 'student_ids': students} for id, students in dg.items()]
return all_groups
def search_projects(paths, sheet_number, patterns):
zip_files = [paths['instructor_project_evaluations'] + "/zip%d.zip" % sheet_number]
# print(zip_files)
all_groups = []
gps = defaultdict(list)
for zip_file in zip_files:
if os.path.exists(zip_file):
tmpdir = tempfile.TemporaryDirectory()
zipfile.ZipFile(zip_file).extractall(path=tmpdir.name)
pdfs = glob.glob(tmpdir.name + "/**/*.pdf", recursive=True)
for pdf in pdfs:
pdf_parsed = tika.parser.from_file(pdf)
id =int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1])
students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE)
gps[id] += students
for id, students in gps.items():
all_groups.append({'group_id': id, 'student_ids': list(set(students))})
return all_groups
def unpack_zip_file_recursively(zip_file, destination_dir):
"""
Unpack the zip_file (extension: .zip) to the given directory.
If the folders in the zip file contains other zip/files, these are unpacked recursively.
"""
# Unpack zip file recursively and flatten it.
zipfile.ZipFile(zip_file).extractall(path=destination_dir)
ls = glob.glob(destination_dir + "/*")
for f in ls:
if os.path.isdir(f):
zipfiles = glob.glob(f + "/*.zip")
for zp in zipfiles:
print("Unpacking student zip file>", zp)
zipfile.ZipFile(zp).extractall(path=os.path.dirname(zp) + "/")
def copy_populate_from_template(paths, info, sheet_number,zip_file):
# Try to load group ids from the project pdf's
all_groups = search_projects(paths, sheet_number, r"s\d{6}")
# all_groups = get_groups_from_learn_xslx_file(paths, sheet_number)
if len(all_groups) == 0:
all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else []
# Hopefully this did the trick and we have the groups all grouped up.
# set up which TA approve which TA
if any( [i['language'] not in ["en", "any"] for i in info['instructors'] ]):
print(info['instructors'])
......@@ -337,10 +397,13 @@ def copy_populate_from_template(info, sheet_number,zip_file):
n_groups_per_instructor = 24 + (sheet_number == 0) * 26
if sheet_number > 0:
zfd = zip_file[:-4]
if not os.path.exists(zfd):
os.mkdir(zfd)
zipfile.ZipFile(zip_file).extractall(path=zfd)
# zfd = zip_file[:-4]
# if not os.path.exists(zfd):
# os.mkdir(zfd)
zfd = tempfile.TemporaryDirectory().name
# zipfile.ZipFile(zip_file).extractall(path=tmpdir.name)
unpack_zip_file_recursively(zip_file, destination_dir=zfd)
# get all report handins (i.e. directories)
ls = [l for l in glob.glob(zfd + "/*") if l[-3:] not in ["txt", "tml"]]
......@@ -431,8 +494,8 @@ def copy_populate_from_template(info, sheet_number,zip_file):
corrector = all_tas[shortname]['approver']
if sheet_number > 0:
# Copy reports to directory (distribute amongst TAs)
b_dir = os.path.dirname(zip_file)
ins_dir = "%s/project_%i_%s/"%(b_dir, sheet_number, shortname)
# b_dir = os.path.dirname(zip_file)
ins_dir = "%s/project_%i_%s/"%(zfd, sheet_number, shortname)
if not os.path.exists(ins_dir):
os.mkdir(ins_dir)
......@@ -440,7 +503,7 @@ def copy_populate_from_template(info, sheet_number,zip_file):
for handin in all_tas[shortname]['handins']:
shutil.move(handin['path'], ins_dir)
shutil.make_archive(ins_dir[:-1], 'zip', ins_dir)
shutil.make_archive(os.path.dirname(zip_file) +"/"+ os.path.basename(ins_dir[:-1]), 'zip', ins_dir)
time.sleep(2)
print("Removing tree of reports to clear up space...")
shutil.rmtree(ins_dir)
......@@ -471,10 +534,10 @@ def copy_populate_from_template(info, sheet_number,zip_file):
sheet.cell(STUDENT_ID_ROW+j, ccol+i).value = s
wb.save(ifile)
wb.close()
# clean up zip file directories
if sheet_number > 0:
zfd = zip_file[:-4]
shutil.rmtree(zfd)
# clean up zip file directories; since it is a tmp file, we don't have to.
# if sheet_number > 0:
# zfd = zip_file[:-4]
# shutil.rmtree(zfd)
def write_dropdown_sumprod_sheet(sheet):
ccol = 2
......
......@@ -3,6 +3,7 @@ import os
import re
import openpyxl
import numpy as np
from line_profiler_pycharm import profile
INSTRUCTOR_ROW = 6
INSTRUCTOR_CHECKER_ROW = 31
......@@ -16,19 +17,6 @@ RANGE_MIN_COL = 5
DELTA_ALLOWED_ROW = 111 # The range of possible delta-values. Should be in an empty (new) row at bottom.
def get_all_reports_from_collected_xlsx_file_DEFUNCT(): # when is this used?
out = get_output_file()
wb = openpyxl.load_workbook(out)
all_reports = {}
for repn in range(3, -1, -1):
cls = []
for i in range(2, wb.worksheets[repn].max_column + 1):
cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
if not cp['student_ids']:
continue
cls.append(cp)
all_reports[repn] = cls
return all_reports
def parse_column_student_ids(v):
sn = []
......@@ -42,7 +30,82 @@ def parse_column_student_ids(v):
sn.append(g)
return sn
def parse_column_numpy(col, report_number, column):
""" Parse a column assuming it is defined as a numpy array.
This is the recommended method as it is much, much faster.
"""
# ws = worksheet # wb.worksheets[sheet]
sn = []
group_id = col[STUDENT_ID_ROW - 1-1] #).value
# col = ['' if col[0] is np.NAN else x for x in col]
for i in range(0, 3):
v = col[i + STUDENT_ID_ROW-1]#, column=column).value
sn += parse_column_student_ids(v)
instructor = col[INSTRUCTOR_ROW-1]#, column=column).value
approver = col[INSTRUCTOR_ROW+1-1]# , column=column).value
if instructor:
instructor = instructor.lower()
if approver:
approver = str(approver).lower()
content = None
comments = None
appr_comments = None
if report_number > 0 and sn:
N = 38
rarr = np.ndarray(shape=(N,1),dtype=np.object)
for j in range(N):
v = col[3 + STUDENT_ID_ROW+j-1]#, column=column).value
rarr[j,0] = v
content = rarr
comments = col[EVALUATION_ROW_END+5-1]# , column=column).value
appr_comments = col[EVALUATION_ROW_END+6-1]# , column=column).value
cgroup = {'column_j': column, 'student_ids': sn, 'instructor': instructor, "approver": approver, 'content': content,
"comments": comments, "approver_comments": appr_comments, 'missing_fields': [],
'group_id': group_id}
# Now, find errors... This involves first finding non-zero columns
if report_number > 0 and sn:
score = cgroup['content'][-3, 0]
cgroup['score'] = score
cgroup['pct'] = score2pct(score)
# if report_number == 3: # this obviously needs fixing for next semester.
# raise Exception("No report number 3 anymore. ")
# I = []
# for i in range(42): # max number of evaluation fields (irrelevant)
# v1 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL-1]# ).value
# v2 = col[WEIGHT_ROW_START+i-1, RANGE_MIN_COL+1-1]#).value
# if (v1 == -1 and v2 == 1) or (v1 == 0 and v2 == 4):
# I.append(i)
# if v1 == -1 and v2 == 1:
# # print("delta col")
# break
#
# for i in I:
# w1 = worksheet.cell(row=WEIGHT_ROW_START + i, column=1).value
# w3_ = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=1).value # should agree with label in w1
# w2 = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=column).value
# if w2 == None:
# cgroup['missing_fields'].append( (i, w1) )
# if report_number < 3:
# print("old report nr.")
return cgroup
def parse_column(worksheet, report_number, column):
""" This is the old method. It is very slow. Use the numpy-version above.
"""
ws = worksheet # wb.worksheets[sheet]
sn = []
group_id = ws.cell(row=STUDENT_ID_ROW - 1, column=column).value
......@@ -54,7 +117,8 @@ def parse_column(worksheet, report_number, column):
instructor = ws.cell(row=INSTRUCTOR_ROW, column=column).value
approver = ws.cell(row=INSTRUCTOR_ROW+1, column=column).value
if instructor: instructor = instructor.lower()
if instructor:
instructor = instructor.lower()
if approver:
approver = str(approver).lower()
......@@ -135,32 +199,47 @@ def get_groups_from_report(repn):
cls.append(cp)
return cls
# @profile
def populate_student_report_results(students):
# take students (list-of-dicts in the info format) and assign them the results from the reports.
out = get_output_file()
import time
t0 = time.time()
print("> Loading student report scores from: %s"%out)
if not os.path.exists(out):
return students, []
for k in students:
students[k]['reports'] = {i: None for i in range(4)}
import pandas as pd
wb = openpyxl.load_workbook(out,data_only=True)
wb = openpyxl.load_workbook(out, data_only=True, read_only=True)
# Perhaps find non-empty cols (i.e. those with content)
print("> time elapsed", time.time() - t0)
maximal_groups = []
maximal_groups_students = []
for repn in range(3, -1, -1):
cls = []
for i in range(2, wb.worksheets[repn].max_column + 1):
cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
sheet = pd.read_excel(out, sheet_name=repn, index_col=None, header=None)
sheet = sheet.fillna('')
sheet = sheet.to_numpy()
# to_numpy()
for i in range(1,sheet.shape[1]):
# for i in range(2, wb.worksheets[repn].max_column + 1):
# print(i, wb.worksheets[repn].max_column)
# s = pd.read_excel(out, sheet_name=1)
cp = parse_column_numpy(sheet[:,i], report_number=repn, column=i)
# cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
if not cp['student_ids']:
continue
break
cls.append(cp)
for g in cls:
for sid in g['student_ids']:
student = students.get(sid, None)
if student is None:
......@@ -172,5 +251,5 @@ def populate_student_report_results(students):
if sid not in maximal_groups_students:
maximal_groups.append(g)
maximal_groups_students += g['student_ids']
print("> time elapsed", time.time() -t0)
return students, maximal_groups
\ No newline at end of file
No preview for this file type
......@@ -3,6 +3,9 @@ import shutil, os, glob
from datetime import datetime, timedelta
import calendar
import pickle
import time
from line_profiler_pycharm import profile
from coursebox.thtools_base import partition_list
import slider
from jinjafy import jinjafy_comment
......@@ -16,6 +19,7 @@ from coursebox.core.info import class_information
from coursebox.material.lecture_questions import lecture_question_compiler
from slider import latexmk
import coursebox
# from line_profiler_pycharm import profile
def get_feedback_groups():
paths = get_paths()
......@@ -47,12 +51,13 @@ def get_feedback_groups():
reduced_groups = [rg for rg in reduced_groups if len(rg)>0]
# groups are now partitioned.
if len(remaining_lectures) > 0:
fbgs = coursebox.thtools_base.partition_list(reduced_groups, len(remaining_lectures))
fbgs = partition_list(reduced_groups, len(remaining_lectures))
for gg in fbgs:
for g in gg:
already_used = already_used + g
lst = thtools.thtools_base.partition_list([s for s in all_students if s not in already_used], len(remaining_lectures))
lst = partition_list([s for s in all_students if s not in already_used], len(remaining_lectures))
for i in range(len(remaining_lectures)):
dg = []
for g in fbgs[i]: dg += g # flatten the list
......@@ -217,7 +222,49 @@ def compile_simple_files(paths, info, template_file_list, verbose=False):
jinjafy_template(data=d2, file_in=fname, file_out=tex_out, filters=get_filters(), template_searchpath=paths['instructor'])
latexmk(tex_out, pdf_out= paths['pdf_out'] + "/" + os.path.basename(tex_out)[:-4]+".pdf")
def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True):
# rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
import time
# import dirsync
# dirsync.sync(paths['shared'], output_dir, 'diff')
# Do smarter fixin'
from pathlib import Path
from jinjafy.cache.simplecache import hash_file_
@profile
def get_hash_from_base(base):
if not os.path.exists(base + "/sharedcache.pkl"):
source = {}
else:
with open(base + "/sharedcache.pkl", 'rb') as f:
source = pickle.load(f)
actual_files = {}
for f in glob.glob(base + "/**", recursive=True):
if os.path.isdir(f):
continue
if f.endswith("sharedcache.pkl"):
continue
rel = os.path.relpath(f, base)
# d = dict(mtime=os.path.getmtime(f))
actual_files[rel] = dict(mtime=os.path.getmtime(f), hash=-1, modified=False)
if rel not in source or (actual_files[rel]['mtime'] != source[rel].get('mtime', -1)): # It has been modified, update hash
# print(rel, time.ctime(actual_files[rel]['mtime']), time.ctime(source[rel].get('mtime', -1)))
new_hash = hash_file_(f)
# actual_files[rel] = {}
actual_files[rel]['modified'] = new_hash != source.get(rel, {}).get('hash', -1)
actual_files[rel]['hash'] = new_hash
else:
actual_files[rel]['hash'] = source[rel]['hash']
return actual_files
@profile
def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True,shallow=True):
'''
Copy shared files into lecture directories
'''
......@@ -225,46 +272,171 @@ def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compil
from jinjafy.cache import cache_contains_file, cache_update_file
from slider.convert import svg2pdf, pdfcrop
from slider import convert
import filecmp
def rec_fix_shared(shared_base, output_dir):
if dosvg:
for svg in glob.glob(shared_base+"/*.svg"):
if not cache_contains_file(cache_base, svg):
if verbose:
print("converting to pdf", svg)
svg2pdf(svg,crop=True, text_to_path=True)
cache_update_file(cache_base, svg)
files = glob.glob(shared_base+"/*")
for f in files:
if f.endswith("cache.pkl"):
continue
# check if template
if "templates" in f and f.endswith("_partial.tex"):
t0 = time.time()
shared_base = paths['shared']
output_dir = output_dir
import glob
# def get_cache_from_dir(shared_base):
# print("Beginning file cache..")
source = get_hash_from_base(shared_base)
target = get_hash_from_base(output_dir)
# update_source_cache = False
source_extra = {}
for rel in source:
if rel.endswith(".svg") and source[rel]['modified']:
pdf_file = svg2pdf(shared_base + "/"+rel, crop=True, text_to_path=True, verbose=True)
rel = os.path.relpath(pdf_file, shared_base)
source_extra[rel] = dict(mtime=os.path.getmtime(pdf_file), hash=hash_file_(pdf_file), modified=True)
for k, v in source_extra.items():
source[k] = v
# update_source_cache = True
# Perform sync here.
for rel in source:
if rel.endswith("_partial.tex"):
continue
if os.path.isdir(f):
od2 = output_dir + "/" + os.path.basename(f)
if not os.path.exists(od2):
os.mkdir(od2)
rec_fix_shared(f, od2)
else:
of = output_dir + "/" + os.path.basename(f)
if not cache_contains_file(cache_base, f) or not os.path.exists(of):
print(f"> {f} -> {of}")
shutil.copy(f, of)
if f.endswith(".pdf") and pdf2png:
if rel not in target or target[rel]['hash'] != source[rel]['hash']:
print(" -> ", output_dir + "/" + rel)
shutil.copy(shared_base +"/" + rel, output_dir + "/" + rel)
target[rel] = source[rel].copy()
target[rel]['modified'] = True
target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel)
if pdf2png:
for rel in target:
if rel.endswith(".pdf") and target[rel]['modified']:
# print("pdf2png: ")
png = convert.pdf2png(output_dir + "/" + rel, verbose=True)
target[rel]['modified'] = False
target[rel]['hash'] = hash_file_(output_dir + "/" + rel)
target[rel]['mtime'] = os.path.getmtime(output_dir + "/" + rel)
# Save the cache.
if verbose:
print(" converting to png", f)
convert.pdf2png(of)
cache_update_file(cache_base, f)
with open(shared_base + "/sharedcache.pkl", 'wb') as f:
pickle.dump(source, f)
if verbose:
print(" done!")
with open(output_dir + "/sharedcache.pkl", 'wb') as f:
pickle.dump(target, f)
print("fix_shared()", time.time() - t0)
#
# if pdf2png:
# if f.endswith(".pdf") and pdf2png:
# if verbose:
# print("converting to png", f)
# convert.pdf2png(of)
#
# for f in source:
# if f not in target:
# print(f)
# else:
# if source[f]['hash'] != target[f]['hash']:
# print(f, f)
#
#
#
# a = 234
# # if rel not in source:
#
# # source[rel] = dict(mtime=os.path.getmtime(f), hash=hash_file_(f))
# #
#
#
# # Everything has a hash/mtime that is up to date. Now look at target dir
#
# get_cache_from_dir(output_dir)
#
# # Get the corresponding output at destination:
#
#
#
#
#
#
# for path in Path(shared_base).rglob('*'):
# print(path)
# a = 234
# def rec_fix_shared(shared_base, output_dir):
# if dosvg:
# for svg in glob.glob(shared_base+"/*.svg"):
# # if not os.path.exists(shared_base + )
# if not cache_contains_file(cache_base, svg):
# # if verbose:
# print("converting to pdf", svg)
# svg2pdf(svg,crop=True, text_to_path=True)
# cache_update_file(cache_base, svg)
# assert False
#
# files = glob.glob(shared_base+"/*")
# for f in files:
# if f.endswith("cache.pkl"):
# continue
#
# if "templates" in f and f.endswith("_partial.tex"):
# continue
#
# if os.path.isdir(f):
# od2 = output_dir + "/" + os.path.basename(f)
# if not os.path.exists(od2):
# os.mkdir(od2)
# rec_fix_shared(f, od2)
# else:
# of = output_dir + "/" + os.path.basename(f)
# if not os.path.exists(of) or not filecmp.cmp(f, of,shallow=shallow):
# print(f"> fix_shared() -> {of}")
# shutil.copy(f, of)
# if f.endswith(".pdf") and pdf2png:
# if verbose:
# print("converting to png", f)
# convert.pdf2png(of)
# # cache_update_file(cache_base, f)
#
# if verbose:
# print(" done!")
# if pdf2png:
# assert False
# get diff.
# directory_cmp = filecmp.dircmp(a=paths['shared'], b=output_dir)
# from filecmp import dircmp
# from filecmp import dircmp
# def print_diff_files(dcmp):
# for name in dcmp.diff_files:
# print("diff_file %s found in %s and %s" % (name, dcmp.left, dcmp.right))
# print("")
# for sub_dcmp in dcmp.subdirs.values():
# print_diff_files(sub_dcmp)
#
# t0 = time.time()
# dcmp = dircmp(paths['shared'], output_dir)
# print_diff_files(dcmp)
# print("dircmp", time.time() - t0)
# directory_cmp.report()
# import time
# t0 = time.time()
# rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
# import time
# # import dirsync
# # dirsync.sync(paths['shared'], output_dir, 'diff')
# print("mine", time.time() - t0)
a = 234
rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
def jinjafy_shared_templates_dir(paths, info):
tpd = paths['shared'] + "/templates"
......@@ -379,6 +551,7 @@ def mvfiles(source_dir, dest_dir):
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, os.path.dirname(dest_dir))
@profile
def make_webpage(dosvg=True):
cinfo = class_information()
paths = get_paths()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment