diff --git a/setup.py b/setup.py index e5eb159599011e2949af935c6ee9f8b7295dc79c..6527f99fdd580b44338db239157dfdeead83f240 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh: # beamer-slider setuptools.setup( name="coursebox", - version="0.1.4", + version="0.1.6", author="Tue Herlau", author_email="tuhe@dtu.dk", description="A course management system currently used at DTU", diff --git a/src/coursebox.egg-info/PKG-INFO b/src/coursebox.egg-info/PKG-INFO index a813cf86b94091db616cdd25638a527e0cd35823..9b103a8b52794d511f609baa198725338d10c487 100644 --- a/src/coursebox.egg-info/PKG-INFO +++ b/src/coursebox.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: coursebox -Version: 0.1.4 +Version: 0.1.5 Summary: A course management system currently used at DTU Home-page: https://lab.compute.dtu.dk/tuhe/coursebox Author: Tue Herlau diff --git a/src/coursebox/core/projects.py b/src/coursebox/core/projects.py index ef3cb0796c173da1532822ef64901be511a80edc..f771303c8b9f99a3a39e713555bf75e4001f78db 100644 --- a/src/coursebox/core/projects.py +++ b/src/coursebox/core/projects.py @@ -24,9 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors import time from collections import defaultdict import zipfile -import hashlib import pandas as pd - +from slider.slide import recursive_tex_collect def get_dirs(zf): zip = zipfile.ZipFile(zf) @@ -317,6 +316,11 @@ def get_groups_from_learn_xslx_file(paths, sheet_number): all_groups = [{'group_id': id, 'student_ids': students} for id, students in dg.items()] return all_groups + +def group_id_from_file(file): + id = int(os.path.dirname(file).split(" - ")[1].split(" ")[1]) + return id + def search_projects(paths, sheet_number, patterns): zip_files = [paths['instructor_project_evaluations'] + "/zip%d.zip" % sheet_number] # print(zip_files) @@ -327,11 +331,28 @@ def search_projects(paths, sheet_number, patterns): if os.path.exists(zip_file): tmpdir = tempfile.TemporaryDirectory() zipfile.ZipFile(zip_file).extractall(path=tmpdir.name) + + # Read from PDF files: pdfs = glob.glob(tmpdir.name + "/**/*.pdf", recursive=True) for pdf in pdfs: pdf_parsed = tika.parser.from_file(pdf) - id =int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1]) - students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE) + id =group_id_from_file(pdf) # int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1]) + if pdf_parsed['content'] is None: + students = [] + print("> Finding student ID. Warning: The pdf file", pdf, "appers to have no text content.") + else: + students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE) + gps[id] += students + + # Collect from .tex files: + + # recursive_tex_collect() + texs = glob.glob(tmpdir.name + "/**/*.tex", recursive=True) + for tex in texs: + id = group_id_from_file(tex) + tex_parsed = recursive_tex_collect(tex) + tex_parsed = "\n".join([(l[:l.find("%")] if "%" in l else l) for l in tex_parsed.splitlines()]) + students = re.findall('s\d\d\d\d\d\d', tex_parsed, flags=re.IGNORECASE) gps[id] += students for id, students in gps.items():