From e529d5b69c40b96450bfb2a33dfdffad9ccb890f Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 7 Apr 2022 13:23:16 +0200
Subject: [PATCH] Updates for 02465; confirmed to work with 02450 (CE
 material); uploading v. 0.1.6.

---
 setup.py                        |  2 +-
 src/coursebox.egg-info/PKG-INFO |  2 +-
 src/coursebox/core/projects.py  | 29 +++++++++++++++++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index e5eb159..6527f99 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 # beamer-slider
 setuptools.setup(
     name="coursebox",
-    version="0.1.4",
+    version="0.1.6",
     author="Tue Herlau",
     author_email="tuhe@dtu.dk",
     description="A course management system currently used at DTU",
diff --git a/src/coursebox.egg-info/PKG-INFO b/src/coursebox.egg-info/PKG-INFO
index a813cf8..9b103a8 100644
--- a/src/coursebox.egg-info/PKG-INFO
+++ b/src/coursebox.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: coursebox
-Version: 0.1.4
+Version: 0.1.5
 Summary: A course management system currently used at DTU
 Home-page: https://lab.compute.dtu.dk/tuhe/coursebox
 Author: Tue Herlau
diff --git a/src/coursebox/core/projects.py b/src/coursebox/core/projects.py
index ef3cb07..f771303 100644
--- a/src/coursebox/core/projects.py
+++ b/src/coursebox/core/projects.py
@@ -24,9 +24,8 @@ from jinjafy.plot.plot_helpers import get_colors
 import time
 from collections import defaultdict
 import zipfile
-import hashlib
 import pandas as pd
-
+from slider.slide import recursive_tex_collect
 
 def get_dirs(zf):
     zip = zipfile.ZipFile(zf)
@@ -317,6 +316,11 @@ def get_groups_from_learn_xslx_file(paths, sheet_number):
         all_groups = [{'group_id': id, 'student_ids': students} for id, students in dg.items()]
     return all_groups
 
+
+def group_id_from_file(file):
+    id = int(os.path.dirname(file).split(" - ")[1].split(" ")[1])
+    return id
+
 def search_projects(paths, sheet_number, patterns):
     zip_files = [paths['instructor_project_evaluations'] + "/zip%d.zip" % sheet_number]
     # print(zip_files)
@@ -327,11 +331,28 @@ def search_projects(paths, sheet_number, patterns):
         if os.path.exists(zip_file):
             tmpdir = tempfile.TemporaryDirectory()
             zipfile.ZipFile(zip_file).extractall(path=tmpdir.name)
+
+            # Read from PDF files:
             pdfs = glob.glob(tmpdir.name + "/**/*.pdf", recursive=True)
             for pdf in pdfs:
                 pdf_parsed = tika.parser.from_file(pdf)
-                id =int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1])
-                students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE)
+                id =group_id_from_file(pdf) # int(os.path.dirname(pdf).split(" - ")[1].split(" ")[1])
+                if pdf_parsed['content'] is None:
+                    students = []
+                    print("> Finding student ID. Warning: The pdf file", pdf, "appers to have no text content.")
+                else:
+                    students = re.findall('s\d\d\d\d\d\d', pdf_parsed['content'], flags=re.IGNORECASE)
+                gps[id] += students
+
+            # Collect from .tex files:
+
+            # recursive_tex_collect()
+            texs = glob.glob(tmpdir.name + "/**/*.tex", recursive=True)
+            for tex in texs:
+                id = group_id_from_file(tex)
+                tex_parsed = recursive_tex_collect(tex)
+                tex_parsed = "\n".join([(l[:l.find("%")] if "%" in l else l) for l in tex_parsed.splitlines()])
+                students = re.findall('s\d\d\d\d\d\d', tex_parsed, flags=re.IGNORECASE)
                 gps[id] += students
 
     for id, students in gps.items():
-- 
GitLab