From c383af5960a2dc2874d785d75f9e2ac523ac5fbe Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Sun, 11 Jun 2023 12:42:33 +0200
Subject: [PATCH] updates to coursebox for profound

---
 src/coursebox.egg-info/PKG-INFO               |  2 +-
 src/coursebox/core/projects.py                | 18 +++--
 src/coursebox/core/projects_info.py           | 67 +++++++++++++++++--
 .../material/homepage_lectures_exercises.py   |  2 +-
 4 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/src/coursebox.egg-info/PKG-INFO b/src/coursebox.egg-info/PKG-INFO
index d60a66a..1918ac3 100644
--- a/src/coursebox.egg-info/PKG-INFO
+++ b/src/coursebox.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: coursebox
-Version: 0.1.17.10
+Version: 0.1.17.11
 Summary: A course management system currently used at DTU
 Home-page: https://lab.compute.dtu.dk/tuhe/coursebox
 Author: Tue Herlau
diff --git a/src/coursebox/core/projects.py b/src/coursebox/core/projects.py
index f771303..e9285a0 100644
--- a/src/coursebox/core/projects.py
+++ b/src/coursebox/core/projects.py
@@ -360,7 +360,7 @@ def search_projects(paths, sheet_number, patterns):
     return all_groups
 
 
-def unpack_zip_file_recursively(zip_file, destination_dir):
+def unpack_zip_file_recursively(zip_file, destination_dir, remove_zipfiles=False):
     """
     Unpack the zip_file (extension: .zip) to the given directory.
 
@@ -373,9 +373,9 @@ def unpack_zip_file_recursively(zip_file, destination_dir):
         if os.path.isdir(f):
             zipfiles = glob.glob(f + "/*.zip")
             for zp in zipfiles:
-                print("Unpacking student zip file>", zp)
                 zipfile.ZipFile(zp).extractall(path=os.path.dirname(zp) + "/")
-
+                if remove_zipfiles:
+                    os.remove(zp)
 
 def copy_populate_from_template(paths, info, sheet_number,zip_file):
     # Try to load group ids from the project pdf's
@@ -657,10 +657,14 @@ def gather_instructor_sheets(info):
     ts.save(out)
     ts.close()
     print("Collected xlsx instructor files. Using xlwings to load main worksheet, evaluate and save it")
-
-    book = xw.Book(out)
-    book.save(out)
-    book.close()
+    import xlwings
+    try:
+        book = xw.Book(out)
+        book.save(out)
+        book.close()
+    except xlwings.XlwingsError as e:
+        print("No xlsxwings installed. Sheets are not correctly evaluated.")
+        print(e)
 
 
 def weave_distribute_groups(info, groups, handins, shortnames):
diff --git a/src/coursebox/core/projects_info.py b/src/coursebox/core/projects_info.py
index cb707b1..30a4f12 100644
--- a/src/coursebox/core/projects_info.py
+++ b/src/coursebox/core/projects_info.py
@@ -4,6 +4,7 @@ import re
 import openpyxl
 import numpy as np
 # from line_profiler_pycharm import profile
+import pandas as pd
 
 INSTRUCTOR_ROW = 6
 INSTRUCTOR_CHECKER_ROW = 31
@@ -144,7 +145,7 @@ def parse_column(worksheet, report_number, column):
         cgroup['score'] = score
         cgroup['pct'] = score2pct(score)
 
-        if report_number == 3: # this obviously needs fixing for next semester.
+        if report_number == 99: # this obviously needs fixing for next semester.
             raise Exception("No report number 3 anymore. ")
             I = []
             for i in range(42): # max number of evaluation fields (irrelevant)
@@ -198,6 +199,59 @@ def get_groups_from_report(repn):
         cls.append(cp)
     return cls
 
+def load_reports(sheet=1):
+    """
+    Tue 2023: This is a  new report loading function which will return a dictionary format. It is more convenient than the
+    legacy methods that probably need to be refactored at some point.
+
+    :param sheets:
+    :return:
+    """
+    # Load the reports from the excel file.
+    out = get_output_file()
+    raise Exception()
+    # print("> time elapsed", time.time() - t0)
+    # maximal_groups = []
+    # maximal_groups_students = []
+    # for repn in sheets:
+    #
+    #     pass
+    repn = sheet
+    # for repn in sheets:
+    cls = []
+    sheet = pd.read_excel(out, sheet_name=repn, index_col=None, header=None)
+    sheet = sheet.fillna('')
+    sheet = sheet.to_numpy()
+    # to_numpy()
+    for i in range(1, sheet.shape[1]):
+
+        # for i in range(2, wb.worksheets[repn].max_column + 1):
+        # print(i, wb.worksheets[repn].max_column)
+        # s = pd.read_excel(out, sheet_name=1)
+        cp = parse_column_numpy(sheet[:, i], report_number=repn, column=i)
+        # cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
+        if not cp['student_ids']:
+            break
+        cls.append(cp)
+
+    rs = {}
+    for g in cls:
+        students = ''
+
+        for sid in g['student_ids']:
+            student = students.get(sid, None)
+            if student is None:
+                if repn > 0:  # don't care about project 0 (group registration)
+                    print("Bad error: Student id %s not found. report evaluation malformed?" % sid)
+            else:
+                # student = student.pop()
+                student['reports'][repn] = g
+                if sid not in maximal_groups_students:
+                    maximal_groups.append(g)
+                    maximal_groups_students += g['student_ids']
+    print("> time elapsed", time.time() - t0)
+
+    pass
 
 # @profile
 def populate_student_report_results(students, verbose=False):
@@ -214,9 +268,11 @@ def populate_student_report_results(students, verbose=False):
         students[k]['reports'] = {i: None for i in range(4)}
     import pandas as pd
 
-    wb = openpyxl.load_workbook(out, data_only=True, read_only=True)
+    # wb = openpyxl.load_workbook(out, data_only=True, read_only=True)
     # Perhaps find non-empty cols (i.e. those with content)
-    print("> time elapsed", time.time() - t0)
+    if verbose:
+        print("> time elapsed", time.time() - t0)
+
     maximal_groups = []
     maximal_groups_students = []
 
@@ -232,8 +288,6 @@ def populate_student_report_results(students, verbose=False):
             # print(i, wb.worksheets[repn].max_column)
             # s = pd.read_excel(out, sheet_name=1)
             cp = parse_column_numpy(sheet[:,i], report_number=repn, column=i)
-
-
             # cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
             if not cp['student_ids']:
                 break
@@ -251,5 +305,6 @@ def populate_student_report_results(students, verbose=False):
                     if sid not in maximal_groups_students:
                         maximal_groups.append(g)
                         maximal_groups_students += g['student_ids']
-    print("> time elapsed", time.time() -t0)
+    if verbose:
+        print("> time elapsed", time.time() -t0)
     return students, maximal_groups
\ No newline at end of file
diff --git a/src/coursebox/material/homepage_lectures_exercises.py b/src/coursebox/material/homepage_lectures_exercises.py
index 89abe21..0b83c37 100644
--- a/src/coursebox/material/homepage_lectures_exercises.py
+++ b/src/coursebox/material/homepage_lectures_exercises.py
@@ -298,7 +298,7 @@ def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compil
     # update_source_cache = False
     source_extra = {}
     for rel in source:
-        if rel.endswith(".svg") and source[rel]['modified']:
+        if rel.endswith(".svg") and source[rel]['modified'] and dosvg:
             pdf_file = svg2pdf(shared_base + "/"+rel, crop=True, text_to_path=True)
             rel = os.path.relpath(pdf_file, shared_base)
             source_extra[rel] = dict(mtime=os.path.getmtime(pdf_file), hash=hash_file_(pdf_file), modified=True)
-- 
GitLab