From aa3f109bdd4bf5028877526605bc1b22b4faef52 Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 2 Sep 2021 17:24:47 +0200
Subject: [PATCH] Initial commit

---
 LICENSE                                       |  19 +
 pyproject.toml                                |   6 +
 setup.py                                      |  31 +
 src/coursebox/__init__.py                     |   6 +
 src/coursebox/book/__init__.py                |   0
 src/coursebox/book/exam_includer.py           | 270 +++++++
 src/coursebox/core/__init__.py                |   0
 src/coursebox/core/citations.py               | 119 +++
 src/coursebox/core/info.py                    | 319 ++++++++
 src/coursebox/core/info_paths.py              |  99 +++
 src/coursebox/core/projects.py                | 681 ++++++++++++++++++
 src/coursebox/core/projects_info.py           | 176 +++++
 src/coursebox/core/projects_plagiarism.py     | 140 ++++
 src/coursebox/material/__init__.py            |   0
 .../material/homepage_lectures_exercises.py   | 449 ++++++++++++
 src/coursebox/material/lecture_questions.py   | 181 +++++
 src/coursebox/material/snipper.py             | 461 ++++++++++++
 src/coursebox/setup_coursebox.py              |  20 +
 18 files changed, 2977 insertions(+)
 create mode 100644 LICENSE
 create mode 100644 pyproject.toml
 create mode 100644 setup.py
 create mode 100644 src/coursebox/__init__.py
 create mode 100644 src/coursebox/book/__init__.py
 create mode 100644 src/coursebox/book/exam_includer.py
 create mode 100644 src/coursebox/core/__init__.py
 create mode 100644 src/coursebox/core/citations.py
 create mode 100644 src/coursebox/core/info.py
 create mode 100644 src/coursebox/core/info_paths.py
 create mode 100644 src/coursebox/core/projects.py
 create mode 100644 src/coursebox/core/projects_info.py
 create mode 100644 src/coursebox/core/projects_plagiarism.py
 create mode 100644 src/coursebox/material/__init__.py
 create mode 100644 src/coursebox/material/homepage_lectures_exercises.py
 create mode 100644 src/coursebox/material/lecture_questions.py
 create mode 100644 src/coursebox/material/snipper.py
 create mode 100644 src/coursebox/setup_coursebox.py

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..335ea9d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2018 The Python Packaging Authority
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b5a3c46
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..9954fcc
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,31 @@
+# Use this guide:
+# https://packaging.python.org/tutorials/packaging-projects/
+
+import setuptools
+
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+beamer-slider
+setuptools.setup(
+    name="coursebox",
+    version="0.0.1",
+    author="Tue Herlau",
+    author_email="tuhe@dtu.dk",
+    description="A course management system currently used at DTU",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    license="MIT",
+    url='https://lab.compute.dtu.dk/tuhe/coursebox',
+    project_urls={
+        "Bug Tracker": "https://lab.compute.dtu.dk/tuhe/coursebox/issues",
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    package_dir={"": "src"},
+    packages=setuptools.find_packages(where="src"),
+    python_requires=">=3.8",
+    install_requires=['jinja2',],
+)
diff --git a/src/coursebox/__init__.py b/src/coursebox/__init__.py
new file mode 100644
index 0000000..e5cdb65
--- /dev/null
+++ b/src/coursebox/__init__.py
@@ -0,0 +1,6 @@
+##
+# Set configuration file.
+# conf = "point to main caller file that defines year, semester, etc."
+
+from coursebox.setup_coursebox import setup_coursebox
+
diff --git a/src/coursebox/book/__init__.py b/src/coursebox/book/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/coursebox/book/exam_includer.py b/src/coursebox/book/exam_includer.py
new file mode 100644
index 0000000..24ff069
--- /dev/null
+++ b/src/coursebox/book/exam_includer.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+import shutil
+import os
+import re
+import ntpath
+import glob
+
+
+CDIR = os.path.dirname(os.path.realpath(__file__))
+CDIR = CDIR.replace('\\','/')
+os.chdir(CDIR)
+OUTPUT_TEX_BASE = CDIR + "/../Latex/Exams"
+
+nm = []
+def rec_input(fname) :
+    f = open(fname, "r")
+    dirname = os.path.dirname(fname)    
+    cc = f.readlines()
+    f.close()
+    for i in range(len(cc)):
+        m = re.search(r'\\input{([^}]*)\}', cc[i])
+        if m :
+            s = m.group(0)
+            fname2 = dirname + '/' + s[s.index('{')+1:s.index('}')]
+            xx, file_extension = os.path.splitext(fname2)     
+            if file_extension == ".tex" :
+                None
+            else:
+                fname2 = fname2 + ".tex"
+                            
+            s = rec_input(fname2)
+            cc[i] = s
+    
+    for (i,sc) in enumerate(cc) : 
+        if len(sc) >= 2 and sc[-2:] == '%\n' and not (len(sc) >= 3 and sc[-3:] == "\\%\n"): cc[i] = sc[:-2]
+    return ''.join(cc)
+
+
+def getgroups(s2,gstart,IncludeTags = False) : 
+    qs = [];
+    i2 = 0
+    while True :    
+        t1 = '\\begin{'+gstart+'}'
+        t2 = '\\end{'+gstart+'}'
+        i1 = s2.find(t1,i2)
+        if i1 < 0 : break
+        i2 = s2.find(t2,i1)        
+        
+        if IncludeTags :
+            d1 = 0
+            d2 = len(t2)
+        else:
+            d1 = len(t1)
+            d2 = 0
+        
+        s = s2[i1+d1:i2+d2] 
+        refs = tagfind(s,'ref')
+        refs = refs + tagfind(s,'cref')
+        
+        labels = tagfind(s,'label')        
+        if not labels : labels = [['','']]
+        
+        qs.append( [s.strip(),refs,labels] )
+    return qs
+    
+def tagfind(s,tag) :
+    tags = []
+    cp = re.compile(r'\\'+tag+'{(\S*)}')
+    for i in cp.finditer(s) : 
+        tags.append( [i.group(1), i.group(0)] ) 
+    return tags
+   
+def process_figures(fs, PREFIX, INPUT_TEX_BASE) :
+    fs = re.sub(r'\\begin{figure(}.*)', r'\\begin{figure}[H]',fs)
+    fs = re.sub(r'\\begin{table(}.*)', r'\\begin{table}[H]',fs)
+    rm = re.finditer(r'\\includegraphics(.*){([^}]*)}', fs)
+    fig_files = [] 
+    for i in rm : 
+        tex_source_name = i.groups(1)[1]
+        sourcefile = INPUT_TEX_BASE + tex_source_name.strip()
+        tex_dest_name = PREFIX+ntpath.basename(sourcefile)                  
+        fnc = glob.glob(sourcefile) + glob.glob(sourcefile+".pdf")+glob.glob(sourcefile+".png")
+        
+        sourcefile = fnc[0]        
+        xx, file_extension = os.path.splitext(sourcefile)                
+        
+        destfile = OUTPUT_TEX_BASE + "/"+tex_dest_name
+        if destfile.find(file_extension) < 0 : destfile = destfile+file_extension     
+        
+        fs = re.sub(r'\\includegraphics(.*){'+tex_source_name+'}', r'\\includegraphics\1{Exams/'+tex_dest_name+'}',fs)
+        fig_files.append([sourcefile,destfile])
+        
+        shutil.copy(sourcefile,destfile)
+    return fs
+    
+def process_question(PREFIX,INPUT_TEX_BASE,s2,question_number,figures_included) :
+    s2 = s2 + " "
+    Qs = getgroups(s2,'question')
+    Ts = getgroups(s2,'table',True)
+    Fs = getgroups(s2,'figure',True)
+    Eqs = getgroups(s2,'equation',True)
+    Als = getgroups(s2,'align',True)    
+    ELEMENTS = Ts + Fs + Eqs + Als
+    
+    pfix = []
+    eleminc = []
+    q = Qs[question_number-1]
+    referenced_labels = [r[0] for r in q[1]] # labels which we reference.         
+    referenced_labels = list(set(referenced_labels))
+    for rname in referenced_labels : 
+        pfix.append(rname)       
+        included_labels = [PREFIX + ll[0] for ll in q[2]]  + figures_included 
+        #print included_labels
+        if (PREFIX+rname) not in included_labels :                                 
+            l = [e[0] for e in ELEMENTS if e[2][0][0] == rname]   
+            eleminc.append(l[0])        
+            
+    solution = getgroups(q[0],'solution')[0][0]
+    sanswer = getgroups(q[0],'answer')[0][0]
+    answer = int(sanswer[1:2])
+    sanswer = sanswer[3:]
+    sanswer = "\\begin{enumerate}[label=\\Alph*]" + sanswer + "\\end{enumerate}"
+    
+    question_text = q[0][0:q[0].find('\\begin{answer}')]
+    question_text = question_text.strip() + "\n" + '\n'.join(eleminc) + '\n'+sanswer.strip()
+        
+    question_text = process_figures(question_text,PREFIX,INPUT_TEX_BASE)
+    question_text = question_text.replace("\\thecorrect\\", "ABCD"[answer-1]+" ")
+    for pf in pfix:
+        question_text = question_text.replace('{'+pf+'}', '{'+PREFIX+pf+'}')
+        solution = solution.replace('{'+pf+'}', '{'+PREFIX+pf+'}')
+        
+    solution = solution.replace("\\thecorrect\\", "ABCD"[answer-1]+" ")    
+    figures_included = figures_included + [PREFIX + rl for rl in referenced_labels]
+    
+    FOUT = OUTPUT_TEX_BASE + "/"+PREFIX[0:6]+str(question_number)+".tex"
+    f = open(FOUT,'w')
+    f.write(question_text.strip())
+    f.close()
+    
+    FOUT = OUTPUT_TEX_BASE + "/"+PREFIX[0:6]+str(question_number)+"sol.tex"
+    f = open(FOUT,'w')
+    f.write(solution.strip())
+    f.close()
+    return [question_text,solution,answer,figures_included]
+
+
+EXAM_INCLUDES = [[] for i in range(20)]
+if True :
+    # viz, hist, box, distance, sims 
+    EXAM_INCLUDES[2] = ['f2011q1', 'f2013q1','f2014q1',]    
+    EXAM_INCLUDES[3] = ['s2012q4','f2011q2', 'f2013q3','f2014q3','f2014q4','s2014q3',]      # PCA
+    
+    EXAM_INCLUDES[4] = ['f2014q10', 's2013Q18', 'f2013q18']  # Data, norms, variance, correlation, percentile.
+    EXAM_INCLUDES[5] = ['f2014q8','s2013q17', 'f2013q15'] # Bayes
+    EXAM_INCLUDES[6] = ['f2012q2', 'f2014Q2', 's2014q1'] # Visualization
+    
+    EXAM_INCLUDES[7] = ['s2013Q12', 's2014Q5', 'f2013q6', 'f2013q17','f2014q22']  # log. regression     
+    EXAM_INCLUDES[8] = ['f2013q14','f2013q13','f2014q9','f2014q21'] # TREES     
+    
+    EXAM_INCLUDES[9] = ['f2014q27', 's2013q13', 's2013q14', 'f2013q7','f2013q16','f2014q6','f2013q25','f2014q18',]  # overfitting, fw selection
+    
+    EXAM_INCLUDES[10] = ['f2013q9','f2014q23','f2014q12'] # KNN methods
+    EXAM_INCLUDES[11] = ['f2015q16', 'f2015q17', 'f2013q19', 'f2014q15'] # bayesian methods (classifier) + NB.    
+
+    EXAM_INCLUDES[12] = ['s2013q26']  # bias-variance, regularization. # PROBLEMS!
+    EXAM_INCLUDES[13] = ['f2013q23','f2014q5', 'f2013q12','f2014q19', ] # neural networks
+    
+    EXAM_INCLUDES[14] = ['f2014q24','f2014q25',]  # AUC, statistics (CIs)
+    EXAM_INCLUDES[15] = ['f2014q26', 'f2013q24',] # ensemble methods boosting
+    
+    ## PART 3
+    # Lecture 10   
+    EXAM_INCLUDES[16] = ['f2013q8','f2014q20','f2014q11','f2012q12'] # kmeans, hierarchical agglom
+    
+    #Lecture 11
+    EXAM_INCLUDES[17] = ['f2013q22','f2013q26', 'f2014q7',]     # EM/GMM
+    EXAM_INCLUDES[18] = ['f2013q11', 's2013q9','f2011q27', 'f2013q20','f2013q10','f2014q13','f2014q14'] # Density estimation
+
+    # Lecture 12    
+    EXAM_INCLUDES[19] = ['s2014q11', 's2014q12'] # Association rule learning
+
+AE = [b for e in EXAM_INCLUDES for b in e]
+for e in AE : 
+    if len([j for j in AE if j == e]) > 1 : 
+        print(e)
+    
+HOMEWORK_PROBLEMS = [[] for i in range(len(EXAM_INCLUDES))]
+# BKOCK 1
+HOMEWORK_PROBLEMS[2]= [(3,1), (2,1), (3,2)] # basic data and PCA
+HOMEWORK_PROBLEMS[3]= [(4,1), (4,2), (4,3)] # Measures of similarity and summary statistics.
+HOMEWORK_PROBLEMS[4]= [(5,1), (5,2), (6,1)] # Visualization, probabilities
+
+# BLOCK 2
+HOMEWORK_PROBLEMS[5]= [(8,1), (7,1), (7,2)] # Lecture 5, reg+trees
+HOMEWORK_PROBLEMS[6]= [(9,1), (9,2), (9,3)] # Lecture 6, crossval 
+HOMEWORK_PROBLEMS[7]= [(11,1), (11,2), (10,1)] # Lecture 7, KNN+Bayes+Naive-bayes.
+HOMEWORK_PROBLEMS[8]= [(13,1), (13,2), (13,3)] # Lecture 8, Bias/Var + Artificial NN.
+HOMEWORK_PROBLEMS[9]= [(14,1), (14,2), (15,1)] # Lecture 9, classimb/AUC + Ensemble
+
+# BLOCK 3
+HOMEWORK_PROBLEMS[10]= [(16,1), (16,2), (16,3)] # Lecture 9, classimb/AUC + Ensemble
+
+HOMEWORK_PROBLEMS[11]= [(18,1), (17,1), (17,2)] # Lecture 9, classimb/AUC + Ensemble
+HOMEWORK_PROBLEMS[12]= [(19,1), (16,2), (16,3)] # Lecture 9, classimb/AUC + Ensemble
+
+EXAM_BASE_DIR = CDIR+ "/../../Exam/"
+EXAM_TEX_CONTENT = {} 
+
+rr = '''
+\\newpage \\newgeometry{left=\PLLEFT,right=\PLRIGHT,top=\PLTOP,bottom=\PLBOTTOM} \\begin{multicols}{2}
+\\section*{Problems} \\addcontentsline{toc}{section}{Problems}
+%s \\end{multicols}
+\\restoregeometry  \\newpage '''
+
+if __name__ == "__main__":
+    for chap,ae in enumerate(EXAM_INCLUDES):
+        allq = []
+        allsol = []
+        adex = 0
+        figs_included = []
+        nstrings = []
+        if not ae : continue
+        sas = []
+        for qsin in ae : 
+            sem = qsin[0]
+            year = qsin[1:5]
+            q = int(qsin[6:])   
+            
+            SEML = "Spring" if sem=="s" else "Fall"
+            nstrings.append("%s %s question %i"%(SEML,year,q) )
+            
+            INPUT_TEX_BASE = EXAM_BASE_DIR + ("Exam %s %s/latex/"%(SEML,year))
+            if not os.path.isdir(INPUT_TEX_BASE) :
+                INPUT_TEX_BASE = EXAM_BASE_DIR + ("Exam %s %s/02450Exam%s%s/latex/"%(SEML,year,SEML,year))
+                        
+            exam_tex_file = INPUT_TEX_BASE + ("02450ex_%s%s_book.tex"%(SEML,year))
+            
+            if not os.path.isfile(exam_tex_file) :
+                exam_tex_file = INPUT_TEX_BASE + ("02450ex_%s%s.tex"%(SEML,year))
+            
+            FIG_PREFIX = qsin[0:6] + "c"+str(chap)
+            
+            if not EXAM_TEX_CONTENT.has_key(FIG_PREFIX) : 
+                EXAM_TEX_CONTENT[FIG_PREFIX] = rec_input(exam_tex_file)
+            s2 = EXAM_TEX_CONTENT[FIG_PREFIX]
+            
+            [qtext,qsol,adex,fi] = process_question(FIG_PREFIX,INPUT_TEX_BASE,s2,q,figs_included)
+            figs_included = figs_included + fi
+            sa = 'ABCD'[adex-1]
+            sas.append(sa)
+            # write joint solution file and joint answer thingy
+        
+        cq = ['\\begin{prob}\\label{c%iprob%i}\\textbf{%s:} \n \\input{Exams/%s}\\end{prob}'%(chap,qn+1,nstrings[qn],qs) for (qn,qs) in enumerate(ae)]
+        cq = '\n'.join(cq)
+        
+        cs = ['\\begin{sol}{c%iprob%i}\\textbf{The correct answer is %s:} \\input{Exams/%ssol}\\end{sol}'%(chap,qn+1,sas[qn],qs) for (qn,qs) in enumerate(ae)]
+        cs = '\n'.join(cs)
+        
+        ss = rr%cq       
+        FOUT = OUTPUT_TEX_BASE + "/c%iprob.tex"%chap
+        f = open(FOUT,'w')
+        f.write(ss)
+        f.close()
+
+        FOUT = OUTPUT_TEX_BASE + "/c%isol.tex"%chap
+        f = open(FOUT,'w')
+        f.write(cs)
+        f.close()
+    print("All Done")    
\ No newline at end of file
diff --git a/src/coursebox/core/__init__.py b/src/coursebox/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/coursebox/core/citations.py b/src/coursebox/core/citations.py
new file mode 100644
index 0000000..57d3a5d
--- /dev/null
+++ b/src/coursebox/core/citations.py
@@ -0,0 +1,119 @@
+import os
+import pybtex
+import io
+import six
+from thtools.coursebox.core.info_paths import get_paths
+
+def get_references(bibfile, gi):
+
+    """
+    Import and convert slides dynamically.
+    """
+    # base_dir = paths['02450public'] + "/Notes/Latex"
+    bibf = bibfile #base_dir + "/library.bib"
+    if not os.path.exists(bibf):
+        return None
+
+    pybtex_style = pybtex.plugin.find_plugin('pybtex.style.formatting', 'alpha')()
+    pybtex_html_backend = pybtex.plugin.find_plugin('pybtex.backends', 'html')()
+    pybtex_plain_backend = pybtex.plugin.find_plugin('pybtex.backends', 'plaintext')()
+
+    pybtex_parser = pybtex.database.input.bibtex.Parser()
+
+    with open(bibf, 'r', encoding='utf8') as f:
+        data = pybtex_parser.parse_stream(f)
+
+    itv = six.itervalues(data.entries)
+    data_formatted = pybtex_style.format_entries(itv)
+    refs = {}
+
+    if 'auxfile' in gi:
+        all_references = parse_aux(gi['auxfile'], bibtex=gi['bibtex'])
+    else:
+        all_references = {}
+
+    for entry in data_formatted:
+        output = io.StringIO()
+        output_plain = io.StringIO()
+        pybtex_plain_backend.output = output_plain.write
+        pybtex_html_backend.output = output.write
+        pybtex_html_backend.write_entry(entry.key, entry.label, entry.text.render(pybtex_html_backend))
+
+        pybtex_plain_backend.write_entry(entry.key, entry.label, entry.text.render(pybtex_plain_backend))
+
+        html = output.getvalue()
+        plain = output_plain.getvalue()
+
+        entry.text.parts[-2].__str__()
+        url = ""
+        for i,p in enumerate(entry.text.parts):
+            if "\\url" in p.__str__():
+                url = entry.text.parts[i+1]
+                break
+        url = url.__str__()
+        i1 = html.find("\\textbf")
+        i2 = html.find("</span>", i1)
+        dht = html[i1:i2]
+        dht = dht[dht.find(">")+1:]
+        html = html[:i1] + " <b>"+dht+"</b> " + html[i2+7:]
+
+        plain = plain.replace("\\textbf ", "")
+        iu = plain.find("URL")
+        if iu > 0:
+            plain = plain[:iu]
+
+        refs[entry.key] = {'html': html,
+                           'plain': plain,
+                            'label': entry.label,
+                            'filename': url,
+                            'references': all_references}
+
+    newref = {}
+    ls = lambda x: x if isinstance(x, list) else [x]
+    if 'tex_command' in gi:
+        for cmd, aux, display in zip( ls(gi['tex_command']), ls(gi['tex_aux'] ), ls( gi['tex_display'] ) ):
+            ax = parse_aux(aux, bibtex=gi['bibtex'])
+            for k in ax:
+                ax[k]['pyref'] = display%(ax[k]['nicelabel'],)
+            newref[cmd] = ax
+
+    return refs, newref
+
+
+def parse_aux(auxfile, bibtex):
+    paths = get_paths()
+    auxfile = os.path.join(paths['02450public'], auxfile)
+    if not os.path.exists(auxfile):
+        print(auxfile)
+        from warnings import warn
+        warn("Could not find file")
+        return {}
+
+    with open(auxfile, 'r') as f:
+        items = f.readlines()
+    entries = {}
+    for e in items:
+        e = e.strip()
+        if e.startswith("\\newlabel") and "@cref" in e:
+            # print(e)
+            i0 = e.find("{")
+            i1 = e.find("@cref}")
+            key = e[i0+1:i1]
+
+            j0 = e.find("{{[", i0)+3
+            j1 = e.find("}", j0)
+
+            val = e[j0:j1]
+
+            label = val[:val.find("]")]
+            number = val[val.rfind("]")+1:]
+
+            if label == "equation":
+                nlabel = f"eq. ({number})"
+            else:
+                nlabel = label.capitalize() + " " + number
+
+            coderef = "\\cite[%s]{%s}"%(nlabel, bibtex) if bibtex is not None else None
+            entries[key] = {'pyref': coderef, 'nicelabel': nlabel, 'rawlabel': label, 'number': number}
+
+    return entries
diff --git a/src/coursebox/core/info.py b/src/coursebox/core/info.py
new file mode 100644
index 0000000..935c9d6
--- /dev/null
+++ b/src/coursebox/core/info.py
@@ -0,0 +1,319 @@
+from datetime import timedelta
+from datetime import datetime
+import thtools
+import os
+import shutil
+from thtools.thtools_base import list_dict2dict_list
+import thtools.jinjafy
+import openpyxl
+from thtools.coursebox.core.projects_info import populate_student_report_results
+from thtools.coursebox.core.info_paths import get_paths, semester_id, semester, year, today
+from thtools.coursebox.core.info_paths import core_conf
+import six
+import pybtex.database.input.bibtex
+import pybtex.plugin
+import io
+
+
+def xlsx_to_dicts(xlsx_file,sheet=None, as_dict_list=False):
+    wb = openpyxl.load_workbook(xlsx_file, data_only=True)
+    if not sheet:
+        ws = wb.worksheets[0]
+    else:
+        ws = [ws for ws in wb.worksheets if ws.title == sheet]
+        if len(ws) < 1:
+            return None
+        else:
+            ws = ws.pop()
+    dd = []
+    key_cols = [j for j in range(ws.max_column) if ws.cell(row=1, column=j + 1).value is not None]
+    for i in range(1, ws.max_row):
+        rdict = {}
+        if not any( [ws.cell(row=i+1, column=j+1).value is not None for j in key_cols] ):
+            continue
+        for j in key_cols:
+            key = ws.cell(row=1, column=j+1).value
+            if key is not None:
+                key = key.strip() if isinstance(key,str) else key
+                value = ws.cell(row=i + 1, column=j + 1).value
+                value = value.strip() if isinstance(value,str) else value
+                if isinstance(value, str):
+                    if value == 'True':
+                        value = True
+                    if value == 'False':
+                        value = False
+                rdict[key] = value
+        dd.append(rdict)
+
+    if as_dict_list:
+        dl = list_dict2dict_list(dd)
+        for k in dl.keys():
+            x = [v for v in dl[k].tolist() if v is not None]
+            if len(x) == 1: x = x.pop()
+            dl[k] = x
+        dd = dl
+    return dd
+
+def get_enrolled_students():
+    paths = get_paths()
+    students = xlsx_to_dicts(paths['information.xlsx'], sheet='students')
+    students2 = {}
+    for s in students:
+        s2 = {}
+        if s['Study number']:
+            s['Username'] = s['Study number']
+        for k in s.keys():
+            k2 = k.lower().replace(" ", "")
+            if k2 == "studynumber": continue
+            if k2 == "username":
+                k2 = "id"
+                if not s[k] or len(s[k]) == 0:
+                    print("Bad study id: ")
+                    print(s)
+                    raise Exception("malformed course configuration file, bad student id")
+            s2[k2] = s[k]
+
+        id = s2['id']
+        students2[id] = s2
+    return students2
+
+
+def get_file_dir_or_template(filepath, templatepath):
+    thtools.ensure_dir_exists( os.path.dirname(filepath) )
+    if not os.path.exists(os.path.dirname):
+        shutil.copyfile(templatepath, filepath)
+
+
+def get_instructors():
+    paths = get_paths()
+    instructors = xlsx_to_dicts(paths['information.xlsx'], sheet='instructors')
+    return instructors
+
+def continuing_education():
+    return thtools.coursebox.core.info_paths.core_conf['continuing_education_mode']
+
+def first_day_of_class(info):
+    if continuing_education():
+        first_day_of_class = datetime(year=year(), month=info['first-month'], day=info['first-day'], hour=info['hour'][0], minute=info['minute'][0])
+    else:
+        mo_first = datetime(year=year(), month=1 if semester() == 'spring' else 8, day=1, hour=13, minute=0)
+        # scroll to monday
+        while mo_first.weekday() != 0: #strftime('%A') is not 'Monday':
+            mo_first += timedelta(days=1)
+        # add 4 weeks to get into 13 week period
+        for _ in range(4):
+            mo_first += timedelta(days=7)
+
+        dow = int(info.get('day_of_week', 1))
+        while mo_first.weekday() != dow:
+            mo_first += timedelta(days=1)
+
+        first_day_of_class = mo_first
+    return first_day_of_class
+
+
+def find_tex_cite(s, start=0, key="\\cite"):
+    txt = None
+    i = s.find(key, start)
+    if i < 0:
+        return (i,None), None, None
+    j = s.find("}", i)
+    cite = s[i:j + 1]
+
+    if cite.find("[") > 0:
+        txt = cite[cite.find("[") + 1:cite.find("]")]
+
+    reference = cite[cite.find("{") + 1:cite.find("}")]
+    return (i, j), reference, txt
+
+def lectures(info, pensum=None):
+    ow = timedelta(days=7)
+    d = first_day_of_class(info)
+
+    holiday = int(info['holiday_week']) if "holiday_week" in info else (9 if semester() == "spring" else 6)
+    paths = get_paths()
+    lectures = []
+    lecture_info = xlsx_to_dicts(paths['information.xlsx'], sheet='lectures')
+
+    for lecture in lecture_info:
+        em = lecture["resources"]
+        r = []
+        if em:
+            ems = em.split("\n")
+            for e in ems:
+                e = e.strip()
+                url = e[:e.find(" ")]
+                description = e[e.find(" ") + 1:]
+                shorturl = url[:url.find("/",url.find("."))]
+                r.append( {'url': url, 'shorturl': shorturl, 'description': description})
+        lecture["resources"] = r
+        if pensum is not None:
+            rd, html = lecture['reading'], ""
+            while True:
+                i = rd.find("\\cite")
+                if i < 0: break
+                j = rd.find("}", i)
+                html += rd[:i]
+                cite = rd[i:j + 1]
+                rd = rd[j+1:]
+                if cite.find("[") > 0:
+                    sr = cite[cite.find("[")+1:cite.find("]")]
+                else:
+                    sr = None
+                key = cite[cite.find("{")+1:cite.find("}")]
+                html += "[<b>" + pensum[key]['label'] + "</b>" + (", " + sr if sr is not None else "") + "]"
+                pensum[key]['suggested'] = True
+            html += rd
+            lecture['reading_html'] = html
+
+    if continuing_education():
+        ice = xlsx_to_dicts(paths['information.xlsx'], sheet='ce', as_dict_list=True)
+        holiday = -1
+        dd = [ice['day'][i] - ice['day'][i-1] for i in range(1, len(ice['day']))]
+        dd.append(0)
+
+    for i in range(0, len(lecture_info)):
+        l = dict()
+        l['year'] = d.year
+        l['month'] = d.strftime('%B')
+        l['day'] = d.day
+        l['date'] = d
+        l['preceded_by_holiday'] = i == holiday
+        if not continuing_education():
+            d = d + ow
+            if i == holiday - 1: d = d + ow
+            if d.month == 5 and d.day == 8: # grundlovsdag
+                d += timedelta(days=4)
+        else:
+            d = d + timedelta(days=dd[i-0] if i > 1 else 0)
+            d = d.replace(hour=ice['hour'][i-1], minute=ice['minute'][i-1])
+
+        info = lecture_info[i]
+
+        ir = info.get('reading', "")
+        info['reading_long'] = ir.replace("C", "Chapter ") if ir else ""
+
+        hwp =  info['homework_problems']
+        info['homework_problems_long'] = hwp.replace("P", "Problem ") if hwp else ""
+        if info["learning_objectives"]:
+            info["learning_objectives"] = [s.strip() for s in info["learning_objectives"].split("\n")]
+        l.update(info)
+        lectures.append(l)
+    return lectures, pensum
+
+def get_forum(paths):
+    a = xlsx_to_dicts(paths['information.xlsx'], sheet='forum', as_dict_list=True)
+    if a is None:
+        return a
+    from collections import defaultdict
+    dd = defaultdict(lambda: [])
+    kk = list(a.keys())[0]
+    for i, k in enumerate(kk.split(",")):
+        k = k.replace("[", "")
+        k = k.replace("]", "")
+        k = k.split(" ")[0]
+        for v in a[kk]:
+            dd[k.lower()].append(v.split(",")[i])
+    # list_dict2dict_list()
+    # dd = dict_(dd)
+
+    n = len(list(dd.values())[0])
+    d2 = []
+    for i in range(n):
+        d2.append({k: v[i] for k, v in dd.items()})
+    return d2
+
+def class_information():
+    course_number = core_conf['course_number']
+    piazza = 'https://piazza.com/dtu.dk/%s%s/%s' % (semester().lower(), year(), course_number)
+    paths = get_paths()
+    teachers = xlsx_to_dicts(paths['information.xlsx'], sheet='teachers')
+    students, all_groups = populate_student_report_results( get_enrolled_students() )
+    continuing_education_mode = core_conf['continuing_education_mode']
+    faq = xlsx_to_dicts(paths['information.xlsx'], sheet='faq')
+
+
+    d = {'year': year(),
+         'piazza': piazza, # deprecated.
+         'course_number': course_number,
+         'semester': semester(),
+         'reports_handout': [1,6],
+         'reports_handin': [6,11],
+         'semester_id': semester_id(),
+         'today': today(),
+         'instructors': get_instructors(),
+         'students': students,
+         'teachers': teachers,
+         "CE": continuing_education_mode,
+         "all_groups": all_groups,
+         "faq": faq,
+         'forum': get_forum(paths),
+         }
+
+    written_exam = xlsx_to_dicts(paths['information.xlsx'], sheet='exam', as_dict_list=True)
+    if "solution_q" in written_exam:
+        written_exam['solution'] = {n:a for n,a in zip( written_exam['solution_q'], written_exam['solution_a'] ) }
+
+    d['written_exam'] = written_exam
+
+    gi = xlsx_to_dicts(paths['information.xlsx'], sheet='general_information', as_dict_list=True)
+    for (k, v) in zip(gi['key'], gi['value']):
+        if v == 'True':
+            v = True
+        if v == 'False':
+            v= False
+        gi[k] = v
+    del gi['key']
+    del gi['value']
+
+    from thtools.coursebox.core.citations import get_references
+    if "pensum_bib" in gi:
+        refs, nrefs = get_references(paths['02450public'] + "/" + gi['pensum_bib'], gi)
+        d['references'], d['new_references'] = refs, nrefs
+    else:
+        d['references'], d['new_references'] = None, None
+    d.update(gi)
+    # set first day of class if CE
+    if continuing_education_mode:
+        ice = xlsx_to_dicts(paths['information.xlsx'], sheet='ce', as_dict_list=True)
+        d.update(ice)
+
+    d['lectures'], d['references'] = lectures(info=d, pensum=d['references'])
+
+    d['first_day_of_class'] = first_day_of_class(info=d)
+    d['day_of_week_str'] = d['first_day_of_class'].strftime('%A')
+    if "piazza" in gi:
+        d['piazza'] = gi['piazza']
+
+    for k in ['freeze_report_evaluation', 'freeze_grades']:
+        freeze = gi[k]
+        freeze = freeze == "True" if isinstance(freeze, str) else freeze
+        freeze = freeze[0] if isinstance(freeze, list) else freeze
+        gi[k] = freeze
+
+    for k,v in core_conf.items():
+        d[k] = v
+
+    d['CE2'] = gi.get("days", 5) == 2 if continuing_education_mode else False
+    d['CE5'] = gi.get("days", 5) == 5 if continuing_education_mode else False
+
+    d['freeze_report_evaluation'] = d['freeze_report_evaluation'] == 'True'
+    d['freeze_grades'] = d['freeze_grades'] == 'True'
+
+    d['rooms'] = xlsx_to_dicts(paths['information.xlsx'], sheet='rooms')
+    fix_instructor_comma(d['rooms'], d['instructors'])
+
+    d['teams'] = xlsx_to_dicts(paths['information.xlsx'], sheet='teams')
+    fix_instructor_comma(d['teams'], d['instructors'])
+    return d
+
+def fix_instructor_comma(dd, instructors):
+    for r in dd:
+        ri_shortnames = [i.strip().lower() for i in r['instructors'].split(",")]
+        ri = []
+        for sn in ri_shortnames:
+            di = [i for i in instructors if i['shortname'] == sn ]
+            if not di:
+                print("Did not find shortname: " + sn + ". This seems bad.")
+            ri += di
+        r['instructors'] = ri
\ No newline at end of file
diff --git a/src/coursebox/core/info_paths.py b/src/coursebox/core/info_paths.py
new file mode 100644
index 0000000..313e325
--- /dev/null
+++ b/src/coursebox/core/info_paths.py
@@ -0,0 +1,99 @@
+import thtools
+import os
+import shutil
+from datetime import datetime
+from warnings import warn
+
+# don't remove this one. Set by setup_toolbox
+core_conf = {}
+
+def get_paths():
+    cd = core_conf['working_dir']
+    cd = os.path.basename( os.path.dirname( os.path.dirname(cd) ) )
+    num = cd[:-6] # course number
+    CDIR = core_conf['working_dir']
+    course_number = core_conf['course_number']
+
+    root_02450public = os.path.normpath(CDIR + "/../..")
+    root_02450private = os.path.normpath(root_02450public + "/../%sprivate"%num)
+    root_02450instructors = os.path.normpath(root_02450private + "/../%sinstructors"%num)
+    root_02450students = os.path.normpath(root_02450private + "/../%sstudents" % num)
+
+    root_02450public = root_02450public.replace("\\", "/")
+    root_02450private = root_02450private.replace("\\", "/")
+
+    if not os.path.isdir(root_02450private):
+        root_02450private = root_02450public+'/pythontools/imlbox'
+        warn('pythontools/imlbox/core/info_paths.py: 02450private repo not found')
+        warn('Using mock info from resources folder at:')
+        warn(root_02450private)
+        # Tue: always overwrite semester path.
+        semester_path = root_02450private +"/resources/mock_semesters/" + semester_id()
+    else:
+        semester_path = root_02450private + "/semesters/" + semester_id()
+    thtools.ensure_dir_exists(semester_path)
+
+    main_conf = semester_path + "/" + semester_id() + ".xlsx"
+    if not os.path.exists(main_conf):
+        main_conf = f"{semester_path}/{course_number}_{semester_id()}.xlsx"
+    if not os.path.exists(main_conf):
+        raise Exception("Main config file not found " + main_conf)
+
+    _files = []
+    sCE = "CE" if core_conf['continuing_education_mode'] else ""
+    paths ={'02450private': root_02450private,
+            '02450public': root_02450public,
+            '02450instructors': root_02450instructors,
+            '02450students': root_02450students,
+            'shared': root_02450public+"/shared",
+            'exams': root_02450private+"/Exam",
+            'course_number': course_number,
+            'semester': semester_path,
+            'information.xlsx': main_conf,
+            'homepage_template': "%s/WEB/index_partial.html"%root_02450public,
+            'homepage_out': "%s/WEB/%sindex.html"%(root_02450public, sCE),
+            'pdf_out': "%s/%spdf_out"%(root_02450public, sCE),
+            'instructor': root_02450public + "/Exercises",
+            'shared_latex_compilation_dir': root_02450public + "/Exercises/LatexCompilationDir/Latex",
+            'book': root_02450public + "/MLBOOK/Latex",
+            'lectures': root_02450public + "/Lectures",
+            'instructor_project_evaluations': "%s/project_evaluations_%s" % (root_02450instructors, semester_id()),
+            'project_evaluations_template.xlsx': root_02450private +"/ReportEvaluation/%s_project_template.xlsx"%num,
+            'collected_project_evaluations.xlsx': semester_path + "/"+course_number+"_project_" + semester_id() + ".xlsx",
+            'electronic_exam_handin_dir': semester_path + "/exam/electronic_handin",
+            'exam_results_template.xlsx': root_02450private +"/Exam/%s_results_TEMPLATE.xlsx"%num,
+            'exam_instructions': root_02450public + "/ExamInstructions",
+    }
+    if os.path.exists(os.path.dirname(paths['instructor_project_evaluations'])):
+        if not os.path.isdir(paths['instructor_project_evaluations']):
+            os.mkdir(paths['instructor_project_evaluations'])
+    else:
+        pass
+    for (key, loc, template) in _files:
+        thtools.ensure_dir_exists(os.path.dirname(loc))
+        if not os.path.exists(loc):
+            shutil.copyfile(template, loc)
+        paths[key] = loc
+    return paths
+
+
+def semester():
+    continuing_education_mode = core_conf['continuing_education_mode']
+    continuing_education_month = core_conf['continuing_education_month']
+    semester = core_conf['semester']
+
+    if continuing_education_mode:
+        month = continuing_education_month.lower()
+        return month
+    else:
+        return semester.lower()
+
+def year():
+    return core_conf['year']
+
+def semester_id():
+    s = "CE" if core_conf['continuing_education_mode'] else ""
+    return "%s%i%s"%(s, year(), semester())
+
+def today():
+    return datetime.today()
\ No newline at end of file
diff --git a/src/coursebox/core/projects.py b/src/coursebox/core/projects.py
new file mode 100644
index 0000000..307e8cb
--- /dev/null
+++ b/src/coursebox/core/projects.py
@@ -0,0 +1,681 @@
+import os
+import shutil
+import openpyxl
+import numpy as np
+import itertools
+import math
+import glob
+import zipfile
+from tika import parser
+from openpyxl.worksheet.datavalidation import DataValidation
+from openpyxl.utils import get_column_letter
+import matplotlib.pyplot as plt
+import langdetect
+import xlwings as xw
+
+from thtools.coursebox.core.projects_info import get_output_file, INSTRUCTOR_ROW, STUDENT_ID_ROW, parse_column
+from thtools.coursebox.core.projects_info import EVALUATION_ROW_END, EVALUATION_ROW_START, WEIGHT_ROW_START, RANGE_MIN_COL, DELTA_ALLOWED_ROW
+from thtools.coursebox.core.info import get_paths, class_information, semester_id
+from thtools.coursebox.core import projects_info
+from thtools.coursebox.core.projects_plagiarism import plagiarism_checker
+
+from thtools.cache import cache_contains_dir, cache_update_dir
+from thtools.plot.plot_helpers import get_colors
+import time
+
+from collections import defaultdict
+import zipfile
+def get_dirs(zf):
+    zip = zipfile.ZipFile(zf)
+    fls = list(set([os.path.dirname(x) for x in zip.namelist()]))
+    fls = [f for f in fls if len(f) > 0]
+    return fls
+
+def fix_handins_fuckup(project_id=2):
+    """ Handle the problem with multiple hand-ins in DTU learn. """
+    import zipfile
+    paths = get_paths()
+    from thtools.coursebox.core.info import class_information
+    info = class_information()
+    zf = paths['instructor_project_evaluations'] + f"/zip{project_id}.zip"
+
+
+    tas = [i['shortname'] for i in info['instructors'] ]
+    ta_links = {i['shortname']: i for i in info['instructors']}
+
+    ta_reports = {}
+    for ta in tas:
+        fname = paths['instructor_project_evaluations']  + f"/project_{project_id}_{ta}.zip"
+        for r in get_dirs(fname):
+            if r in ta_reports:
+                raise Exception
+            ta_reports[r] = ta
+
+    fls = get_dirs(zf)
+
+    # fls = [f for f in zip.namelist() if not f.endswith("tml") and f.endswith("/")]
+    d = defaultdict(lambda: [])
+    for l in fls:
+        # print(l)
+        group_id = int(l.split("-")[2].strip().split(" ")[1])
+        date = l.split("-")[-1].strip().split(" ")
+        hm = date[-2]
+        if len(hm) < 4:
+            hm = "0" + hm
+        hm = hm[:2] + ":" + hm[2:]
+        date[-2] = hm
+        date = " ".join(date)
+        import datetime
+        date_time_obj = datetime.datetime.strptime(date, '%d %B, %Y %I:%M %p')
+
+        d[group_id].append( {'file': l, 'date': date_time_obj, 'ta': ta_reports[l], 'group_id': group_id } )
+
+    d = {k: v for k, v in d.items() if len(v)  > 1}
+
+    ta_do_not = defaultdict(lambda: [])
+    ta_do = defaultdict(lambda: [])
+    for group_id, reports in d.items():
+        recent = max( r['date'] for r in reports)
+        for r in reports:
+            if r['date'] == recent:
+                ta_do[r['ta']].append(r)
+            else:
+                ta_do_not[r['ta']].append(r)
+
+    fname = paths['instructor_project_evaluations'] + "/do_not_evaluate.txt"
+    with open(fname, 'w') as f:
+        for ta, reports in ta_do_not.items():
+            f.write(ta_links[ta ]['name'] + f" ({ta})" + "\n")
+            for r in reports:
+                f.write("DO NOT EVALUATE: " + r['file'] + "\n")
+            f.write("\n")
+            f.write("\n")
+    with open(fname, 'r') as f:
+        print(f.read())
+    return ta_do, ta_do_not
+
+
+def handle_projects(verbose=False, gather_main_xlsx_file=True, plagiarism_check=False):
+    paths = get_paths()
+    info = class_information()
+    if info['freeze_report_evaluation']:
+        print("> Report evaluations are FROZEN, meaning TA changes are no longer taken into account")
+        print("> grades, etc. now relies on report resource file:")
+        print(paths['collected_project_evaluations.xlsx'])
+    if info['CE']:
+        return
+    if plagiarism_check:
+        plagiarism_checker(paths, info)
+        return
+
+    instructor_path = paths['instructor_project_evaluations']
+    cache_changed_xlsx_files = False
+    if gather_main_xlsx_file:
+        cache_base = paths['instructor_project_evaluations']
+        if cache_contains_dir(cache_base,cache_base, pattern="*.xlsx") and os.path.exists(paths['collected_project_evaluations.xlsx']):
+            pass
+        else:
+            cache_changed_xlsx_files = True
+            gather_instructor_sheets(info)
+            print("> Done gathering main .xlsx file from instructor .xlsx files")
+            cache_update_dir(cache_base, cache_base, pattern="*.xlsx")
+            info = class_information()
+
+    zip1 = instructor_path + "/zip1.zip"
+    zip2 = instructor_path + "/zip2.zip"
+    zip3 = instructor_path + "/zip3.zip"
+
+    zips = [None, zip1, zip2, zip3]
+
+    for j,zf in enumerate(zips):
+        ifiles = get_instructor_xlsx_files(info, j)
+        ex =  [os.path.exists(f) for (f,_) in ifiles]
+        if any(ex) and not all(ex):
+            raise Exception("Partial list of instructor files") # ensure there are either no files or all files exist
+        instructor_files_exist = ex.pop()
+
+        if instructor_files_exist:
+            # if instructor files are there, we should do nothing
+            continue
+        else: # instructor files do not exist
+            if j == 0:
+                copy_populate_from_template(info, sheet_number=j, zip_file=None)
+
+            elif os.path.exists(zf):
+                # make a copy of report template and populate it with groups obtained from previous report evaluation.
+                # all_groups = get_all_reports_from_collected_xlsx_file()
+                copy_populate_from_template(info, sheet_number=j, zip_file=zf)
+                # distribute_zip_content(info, sheet=j, zf_base=zf)
+            else:
+                print("When available, please move downloaded copy of all reports from campusnet to destination:")
+                print(zf)
+
+    mkboxplots(info['students'],paths)
+
+    if cache_changed_xlsx_files:
+        # Recompute nag files if instructor xlsx files have been changed
+        compute_error_files(info, paths)
+    nags = []
+    fs = glob.glob(paths['instructor_project_evaluations'] + "/PARSE_ERRORS_*.txt")
+    for f in fs:
+        name = f.split("_").pop()[:-4]
+        if name != "transfer":
+            v = [i for i in info['instructors'] if i['shortname'] == name]
+            with open(f, "r") as file:
+                s = file.read()
+                nlines = len(s.split("\n"))
+            if len(v) == 0:
+                raise Exception("Empty instructor list. what the ...")
+
+            ins_email = v[0]['email']
+            nags.append((name, ins_email, nlines))
+    Nerrors = sum([n[-1] for n in nags])
+    if Nerrors > 0:
+        print("\n> %i errors. These instructors have parse errors (.txt)" % sum([n[-1] for n in nags]))
+        print("; ".join([email for (_, email, _) in nags]))
+        print(", ".join([name.capitalize() for (name, _, _) in nags]))
+        print("TA/Errors: " + ", ".join(["%s:%i" % (name.capitalize(), lines) for (name, _, lines) in nags]))
+        print("---")
+    else:
+        print("No parse errors found")
+
+
+def compute_error_files(info, paths):
+    print("Recomputing nag files...")
+    ERRORS = dict()
+    students = info['students']
+    for repn in range(3, -1, -1):
+        ifiles = get_instructor_xlsx_files(info, sheet=repn)
+        all_groups = []
+        for out, ins in ifiles:
+            if not os.path.exists(out):
+                continue
+
+            if not ins in ERRORS:
+                ERRORS[ins] = []
+
+            wb = openpyxl.load_workbook(out, data_only=True)
+            if len(wb.worksheets) > 1:
+                es = os.path.basename(out) + "> Entire workbook is malformed. Workbook must only contain a single sheet. Fix ASAP; aborting further operations."
+                ERRORS[ins].append(es)
+                continue
+
+            cls = []
+            for i in range(2, wb.worksheets[0].max_column + 1):
+                cp = projects_info.parse_column(wb.worksheets[0], report_number=repn, column=i)
+                if not cp['student_ids']:
+                    continue
+                cls.append(cp)
+
+            for g in cls:
+                gins = g.get('instructor')
+                err_label = "File: '%s', column with student ids: '%s'"%(os.path.basename(out), ', '.join(g['student_ids']))
+                if gins != ins:
+                    gins = "Null" if not gins else gins
+                    es = err_label +"> Corrupted sheet. Instructor name: '"  + gins + "' not found. Should perhaps be: '" + ins+"'?"
+                    print(es)
+                    ERRORS[ins].append(es)
+                    continue
+
+                if repn >= 1 and g.get("score", None) == None and len(g['student_ids'])>0:
+                    es = err_label + "> Report not scored."
+                    ERRORS[ins].append(es)
+
+                if repn >= 1 and len(g.get('student_ids', [])) > 0:
+                    if g.get('score', 0) is None:
+                        es = err_label + f"> Report does not have a score. The sheet may have been used incorrectly or fields are missing"
+                        ERRORS[ins].append(es)
+                    elif g.get("score", 0) < 0 or g.get("score", 0) > 4:
+                        es = err_label + f"> Report score is {g.get('score', 0)}. The report score has to be between 0 and 4; probably due to a too high value of 'Delta' in instructor sheet."
+                        ERRORS[ins].append(es)
+
+                if repn >= 1 and not g['comments']:
+                    es = err_label + "> Incomplete report evaluation (missing comments field)"
+                    es += "Please fill out comments field in your excel sheet."
+                    ERRORS[ins].append(es)
+
+
+                if repn >= 1 and not g['approver_comments']:
+                    es = err_label + "> Incomplete report evaluation (you are missing the approver comments field; can simply be set to 'ok')."
+                    ERRORS.get(g['approver'], []).append(es)
+
+                if g['missing_fields']:
+                    mf = g['missing_fields']
+                    es = err_label + "> Incomplete report evaluation (missing required evaluation-scoring field(s): '%s')."%(', '.join( [s for _,s in mf] ) ,)
+                    es += " Please fill out missing field in your excel sheet."
+                    ERRORS[ins].append(es)
+
+                for sid in g['student_ids']:
+                    # student = [student for student in students if student['id'] == sid]
+                    if repn >= 1 and sid not in students and False:
+                        # I disabled this since
+                        es = err_label + "> Student ID '%s' not found in registered students on campusnet. "%sid
+                        es += " To fix this, check if student is on campusnet under 'list of participants'; if he/she IS on campusnet, email me about the problem. " \
+                              + " Otherwise, simply delete extra students from sheet and send an email to all students in the group (the student likely dropped out of course)"
+
+                        ERRORS[ins].append( es )
+                        continue
+
+                    bad = [ (i, s, g) for (i,s,g) in all_groups if s == "sid"]
+                    if len(bad) > 0:
+                        i = bad[0][0]
+                        g2 = bad[0][2]
+
+                        es = err_label + "> Duplicate student ids for student: %s. " % (sid, )
+                        es += "Student is also found in sheet by instructor: " + i
+                        es += " in group with students: " + (", ".join( g2['student_ids'])) + ". "
+                        es += " Please ensure report only assigned to one instructor. "
+                        ERRORS[ins].append(es)
+                        continue
+
+                    all_groups.append( (ins, sid, g) )
+
+    ipath = paths['instructor_project_evaluations']
+    for f in glob.glob(ipath +"/PARSE_ERRORS_*.txt" ):
+        os.remove(f)
+
+    for ins in ERRORS:
+        if ERRORS[ins]:
+            ss = '\n'.join(ERRORS[ins])
+            with open(ipath +"/PARSE_ERRORS_" + ins + ".txt",'w') as f:
+                f.write(ss)
+
+
+def get_template():
+    paths = get_paths()
+    return paths['project_evaluations_template.xlsx']
+
+
+def get_instructor_xlsx_files(info, sheet):
+    ss = "groups" if sheet == 0 else "report_%i"%sheet
+    xlsx = []
+    ins_names = [ins['shortname'] for ins in info['instructors']]
+    ins_names.append("transfer")
+
+    paths = get_paths()
+    instructor_path = paths['instructor_project_evaluations']
+
+    for ins in ins_names:
+        ns = instructor_path + "/02450_"+semester_id()+"_" + ss + "_" + ins+".xlsx"
+        xlsx.append( (ns,ins) )
+    return xlsx
+
+
+import hashlib
+
+def copy_populate_from_template(info, sheet_number,zip_file):
+    all_groups = projects_info.get_groups_from_report(repn=sheet_number-1) if sheet_number > 0 else []
+    # set up which TA approve which TA
+    if any( [i['language'] not in ["en", "any"] for i in info['instructors'] ]):
+        print(info['instructors'])
+        raise Exception("An instructor does not have a language set. Please fix in main configuration file")
+    langs = ["en", "any"]
+    ifiles = get_instructor_xlsx_files(info, sheet_number)
+    all_tas = {}
+    for la in langs:
+        tas = [i for i in info['instructors'] if i['language'] == la]
+        active_tas = [t for t in tas if t['maxreports'] > 0]
+        dead_tas = [t for t in tas if t['maxreports'] <= 0]
+
+        for j, ta in enumerate(active_tas):
+            sn = ta['shortname']
+            all_tas[sn] = ta
+            all_tas[sn]['approver'] = active_tas[(j + 1) % len(active_tas)]['shortname']
+            all_tas[sn]['handins'] = []
+
+        for j, ta in enumerate(dead_tas):
+            sn = ta['shortname']
+            all_tas[sn] = ta
+            all_tas[sn]['approver'] = sn # dead dude approves himself
+            all_tas[sn]['handins'] = []
+
+    all_tas['transfer'] = {'maxreports': 0, 'language': 'any', 'approver': 'transfer', 'handins': []}
+
+    for (fn, name) in ifiles:
+        all_tas[name]['ifile'] = fn
+
+    # how many groups per instructor should be preset. Larger for report 0 (group registration).
+    n_groups_per_instructor = 24 + (sheet_number == 0) * 26
+
+    if sheet_number > 0:
+        zfd = zip_file[:-4]
+        if not os.path.exists(zfd):
+            os.mkdir(zfd)
+        zipfile.ZipFile(zip_file).extractall(path=zfd)
+        # get all report handins (i.e. directories)
+        ls = [l for l in glob.glob(zfd + "/*") if l[-3:] not in ["txt", "tml"]]
+
+        handins = []
+        handins_duplicated = [] # for duplicated handins. i.e. handins with existing student id's.
+        protected_ids = []
+        handins_MD5 = {}
+
+        group_id_counter = 1000
+        for l in ls:
+            pdf_md5 = None
+            lpdfs = glob.glob(l + "/*.pdf")
+            try:
+                group_id = int(os.path.basename(l).split("-")[2].strip().split(" ")[1])
+            except Exception as e:
+                group_id = group_id_counter
+                group_id_counter = group_id_counter + 1
+
+            if len(lpdfs) > 0:
+                pdf = lpdfs.pop()
+                try:
+                    raw = parser.from_file(pdf)
+                    if not 'content' in raw:
+                        print("parse error; unable to parse pdf content. File is probably a bit fucky")
+                        lang = "da"
+                    else:
+                        lang = langdetect.detect(raw['content'])
+                    print(lang + ": " + pdf)
+                    if lang != "en":
+                        lang = "da"
+                except Exception as e:
+                    print("Bad encoding")
+                    lang = "da"
+                # Compute MD5 hash of file:
+            else:
+                lang = "da"
+
+            hi = {'path': l, 'group_id': group_id, 'lang': lang, 'pdf_hash': pdf_md5}
+            handins.append(hi)
+
+        # fix error file about already handed in reports:
+        used_students = []
+        for h in handins:
+            gid = h['group_id']
+            # sid = h['student_id']
+            gws = []
+            for g in all_groups:
+                if g['group_id'] is not None and g['group_id'] == gid:
+                    # if sid in g['student_ids']:
+                    gws += [s for s in g["student_ids"] if s not in protected_ids + used_students]
+                    used_students += gws
+            h['group'] = gws
+
+        ta_report_langs = [(["any"], ["da"]), (["en", "any"], ["da", "en"])]
+
+        assigned_handins = []
+        for ta_l, ra_l in ta_report_langs:
+            for num, h in enumerate(handins):
+                if h['lang'] not in ra_l or num in assigned_handins:
+                    continue
+                assigned_handins.append(num)
+                TA_names = [k for k in all_tas if all_tas[k]['language'] in ta_l]
+                # compute number of groups per TA
+                nn = [len(all_tas[n]['handins']) + 100 * (all_tas[n]['maxreports'] <= len(all_tas[n]['handins'])) for n in TA_names]
+                i = np.argmin(nn)
+                all_tas[TA_names[i]]['handins'].append(h)
+
+        assert( sum([ len(all_tas[n]['handins']) for n in all_tas] ) == len(handins) )
+
+        # Saving report assignment summary to .txt file for later reference
+        summary_txt = "%s/report_%i_summary.txt"%(os.path.dirname(zip_file), sheet_number)
+        with open(summary_txt, 'w') as f:
+            ss = ["TA,    Approver,   number-of-reports,   group_ids,    Students"]
+            for ta in all_tas:
+                # for s in all_tas['niels']['students']
+                handins_students = [', '.join(ha['group']) for ha in all_tas[ta]['handins'] ]
+                handins_groups = ", ".join( [str(ha['group_id']) for ha in all_tas[ta]['handins'] ] )
+                nha = len(handins_students)
+                approver = all_tas[ta]['approver']
+                # handins_groups
+                ss.append(f'{ta}, {approver}, {nha},     ({handins_groups}),   ({", ".join(handins_students)})')
+            f.write('\n'.join(ss))
+
+    # write actual .xlsx files:
+    template = get_template()
+    for shortname in all_tas:
+        ifile = all_tas[shortname]['ifile']
+        corrector = all_tas[shortname]['approver']
+        if sheet_number > 0:
+            # Copy reports to directory (distribute amongst TAs)
+            b_dir = os.path.dirname(zip_file)
+            ins_dir = "%s/project_%i_%s/"%(b_dir, sheet_number, shortname)
+
+            if not os.path.exists(ins_dir):
+                os.mkdir(ins_dir)
+
+            for handin in all_tas[shortname]['handins']:
+                shutil.move(handin['path'], ins_dir)
+
+            shutil.make_archive(ins_dir[:-1], 'zip', ins_dir)
+            time.sleep(2)
+            print("Removing tree of reports to clear up space...")
+            shutil.rmtree(ins_dir)
+
+        if os.path.exists(ifile):
+            raise Exception("File already exists")
+        shutil.copyfile(template, ifile)
+        wb = openpyxl.load_workbook(ifile)
+        for wdex, ws in enumerate(wb.worksheets):
+            if wdex != sheet_number:
+                wb.remove(ws)
+        ccol = 2
+        sheet = wb.worksheets[0]
+        if sheet_number > 0:
+            sheet = write_dropdown_sumprod_sheet(sheet)
+
+        handins_assigned_to_this_ta = all_tas[shortname]['handins']
+        for i in range(len(handins_assigned_to_this_ta) + n_groups_per_instructor):
+            sheet.cell(INSTRUCTOR_ROW, ccol + i).value = shortname
+            if sheet_number > 0:
+                if i < len(handins_assigned_to_this_ta):
+                    sheet.cell(STUDENT_ID_ROW -1, ccol + i).value = handins_assigned_to_this_ta[i]['group_id']
+                sheet.cell(INSTRUCTOR_ROW+1, ccol + i).value = corrector
+                if i < len(handins_assigned_to_this_ta ):
+                    gg = handins_assigned_to_this_ta[i]['group']
+
+                    for j,s in enumerate(gg):
+                        sheet.cell(STUDENT_ID_ROW+j, ccol+i).value = s
+        wb.save(ifile)
+        wb.close()
+    # clean up zip file directories
+    if sheet_number > 0:
+        zfd = zip_file[:-4]
+        shutil.rmtree(zfd)
+
+def write_dropdown_sumprod_sheet(sheet):
+    ccol = 2
+    for i in range(300):    # write 300 columns of sumprod, dropdowns. Good for courses of size up to about 800 students
+        for j in range(EVALUATION_ROW_END - EVALUATION_ROW_START + 1):
+            jj = j + WEIGHT_ROW_START
+            min_value = sheet.cell(jj, RANGE_MIN_COL).value
+            max_value = sheet.cell(jj, RANGE_MIN_COL + 1).value
+            if max_value:
+
+                rng = range(min_value, max_value + 1) if min_value >= 0 else [j for j in np.linspace(min_value, max_value, 5).flat]
+                fml = '"' + ",".join([str(x) for x in rng]) + ',"'
+                if min_value < 0:
+                    fml = f"B{DELTA_ALLOWED_ROW}:N{DELTA_ALLOWED_ROW}"
+
+                data_val = DataValidation(type="list", formula1=fml, allow_blank=True)
+                sheet.add_data_validation(data_val)
+
+                my_cell = sheet.cell(j + EVALUATION_ROW_START, i + ccol)
+                data_val.add(my_cell)
+
+            cl1 = get_column_letter(i + ccol)
+            dfml1 = '%s%i:%s%i' % (cl1, EVALUATION_ROW_START, cl1, EVALUATION_ROW_END)
+            cl2 = get_column_letter(RANGE_MIN_COL - 1)
+            dfml2 = '$%s$%i:$%s$%i' % (
+            cl2, WEIGHT_ROW_START, cl2, WEIGHT_ROW_START + EVALUATION_ROW_END - EVALUATION_ROW_START)
+            fml2 = '=4*SUMPRODUCT(%s, %s)' % (dfml1, dfml2)
+            sheet[get_column_letter(i + ccol) + str(EVALUATION_ROW_END + 1)] = fml2
+    return sheet
+
+def distribute_zip_content(info, sheet, zf_base):
+    xs = get_instructor_xlsx_files(info, sheet)
+    for x, TAname in xs:
+        if not os.path.exists(x): continue
+        if TAname == "transfer": continue
+        wb = openpyxl.load_workbook(x)
+        ws_x = ([wb.worksheets[0]] + [ws for ws in wb.worksheets if ws.title == "Ark1"]).pop()
+        all_students = []
+        for col_ins in range(1, ws_x.max_column):
+            group = parse_column(ws_x, report_number=sheet, column=col_ins + 1)
+            all_students += group.get('student_ids', [])
+
+        import zipfile
+        b_dir = os.path.dirname(zf_base)
+        ins_dir = "%s/project_%i_%s/"%(b_dir, sheet, TAname)
+        if not os.path.exists(ins_dir): os.mkdir(ins_dir)
+
+        with zipfile.ZipFile(zf_base) as zf:
+            for cfile in zf.namelist():
+                for sid in all_students:
+                    if cfile.startswith(sid +'/'):
+                        zf.extract(cfile, ins_dir)
+
+        shutil.make_archive(ins_dir[:-1], 'zip', ins_dir)
+
+
+# Gather instructor sheets and save to main file
+def gather_instructor_sheets(info):
+    out = get_output_file()
+    print("Gathering instructor sheets and saving them to file: ")
+    print(" > %s"%out)
+    template = get_template()
+    shutil.copyfile(template, out)
+    ts = openpyxl.load_workbook(out)
+
+    for sheet in range(4):
+        xs = get_instructor_xlsx_files(info,sheet)
+        col_temp = 1
+
+        for x, TAname in xs:
+            if not os.path.exists(x): continue
+            wb = openpyxl.load_workbook(x)
+            ws_x = ([wb.worksheets[0]] + [ws for ws in wb.worksheets if ws.title == "Ark1"]).pop()
+            tagroups = 0
+
+            for col_ins in range(1,ws_x.max_column):
+                group = parse_column(ws_x, report_number=sheet,column=col_ins+1)
+
+                if len(group['student_ids']) > 0:
+                    for r in range(ws_x.max_row):
+                        if r > 50:
+                            continue # don't write the part about evaluating the sheet; the TAs tend to fuck that part up.
+                        dv = ws_x.cell(r+1, col_ins+1)
+                        ts.worksheets[sheet].cell(r+1, col_temp+1, dv.value)
+                    col_temp += 1
+                    tagroups += 1
+
+            if tagroups == 0 and not TAname == "transfer":
+                print("TA: " + TAname + " sheet %i; groups found: %i" % (sheet, tagroups))
+                pass
+
+            wb.close()
+        if sheet >= 1:
+            write_dropdown_sumprod_sheet(ts.worksheets[sheet] )
+
+    ts.save(out)
+    ts.close()
+    print("Collected xlsx instructor files. Using xlwings to load main worksheet, evaluate and save it")
+
+    book = xw.Book(out)
+    book.save(out)
+    book.close()
+
+
+def weave_distribute_groups(info, groups, handins, shortnames):
+    groups2 = []
+    BG = 0
+    set([g.get('instructor', "") for g in groups])
+    for sid in handins:
+        fg = []
+        all_used_students = list(itertools.chain.from_iterable([g['student_ids'] for g in groups2]))
+        for g in groups:
+            if sid in g['student_ids'] and not any(set(g['student_ids'] ) & set(all_used_students)) :
+                fg.append(g)
+        if len(fg) > 0:
+            groups2.append(fg.pop())
+        else:
+            groups2.append({'student_ids': [sid]})
+            BG += 1
+    print("Fair assigning groups. Group changes since last assignment: %i (if large -> bad TA sheet)"%BG)
+    groups2 = fair_assign(info, groups2, shortnames=shortnames)
+    return groups2
+
+
+def _ta_maxrep_by_name(info, shortname):
+    ins = [ii for ii in info['instructors'] if ii['shortname'] == shortname].pop()
+    return ins['maxreports']
+
+def fair_assign(info, groups, shortnames):
+    shortnames_no_transfer = shortnames[:-1]
+    groups_by_instructor = {i: [] for i in shortnames}
+    n_groups = len(groups)
+    MAX_groups_per_instructor = math.ceil( len(groups) / len(shortnames_no_transfer) )
+    # take initial set of groups and assign them to instructors
+    rem_groups = []
+    for g in groups:
+        found = False
+        if "instructor" in g:
+            i = g["instructor"].lower()
+            maxreps = _ta_maxrep_by_name(info, i)
+            if i in shortnames_no_transfer and len(groups_by_instructor[i]) < min([MAX_groups_per_instructor, maxreps]):
+                groups_by_instructor[i].append(g)
+                found = True
+        if not found:
+            rem_groups.append(g)
+    for g in rem_groups:
+        ls = [ len(groups_by_instructor[i]) if len(groups_by_instructor[i]) < _ta_maxrep_by_name(info, shortname=i) else 1000 for i in shortnames_no_transfer]
+        m = np.argmin(ls)
+        m = shortnames_no_transfer[m]
+        groups_by_instructor[m].append(g)
+    a = [len(groups_by_instructor[i]) for i in shortnames]
+    for i in shortnames:
+        print(i + " %i"%len(groups_by_instructor[i]))
+    if sum(a) != n_groups:
+        raise Exception("Group lost during fair group assignment!")
+    return groups_by_instructor
+
+def mkboxplots(students,paths):
+    iscores = dict()
+    for repn in range(1, 4):
+        for k in students:
+            s = students[k]
+            g = s['reports'][repn]
+            if g:
+                gs = g['score']
+                ins = g['instructor']
+                if gs:
+                    v = iscores.get(ins,[[], [], []])
+                    v[repn-1].append(gs)
+                    iscores[ins] = v
+
+    NI = len(iscores.keys())
+    cols = get_colors(max_colors=NI)
+
+    def set_box_color(bp, color):
+        plt.setp(bp['boxes'], color=color)
+        plt.setp(bp['whiskers'], color=color)
+        plt.setp(bp['caps'], color=color)
+        plt.setp(bp['medians'], color=color)
+
+    ticks = ['Report 1', 'Report 2', 'Report 3']
+    plt.figure()
+    bpl = []
+    dw = 0.8
+    lg = []
+    for dex,ins in enumerate(iscores):
+        data_a = iscores[ins]
+        pst = np.array(range(len(data_a))) * (dw*(NI+2) ) + dw*dex
+        db = plt.boxplot(data_a, positions=pst, sym='', widths=dw * 0.6/0.8)
+        set_box_color(db, cols[dex])
+        bpl.append(db)
+        lg.append(ins)
+    for dex,t in enumerate(lg):
+        plt.plot([], c=cols[dex], label=t)
+    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+    plt.xlim([-1, (NI+2) * dw * 3 + 1])
+    plt.xticks([ (i+0.5) *  dw * NI for i in range(len(ticks))], ticks)
+    plt.ylabel("Report score")
+    plt.tight_layout()
+    plt.savefig(paths['instructor_project_evaluations'] + '/TA_scores.pdf')
+    plt.savefig(os.path.dirname(paths['collected_project_evaluations.xlsx']) + '/TA_scores.pdf')
+    plt.show()
+    plt.savefig('boxcompare.png')
diff --git a/src/coursebox/core/projects_info.py b/src/coursebox/core/projects_info.py
new file mode 100644
index 0000000..3da60f3
--- /dev/null
+++ b/src/coursebox/core/projects_info.py
@@ -0,0 +1,176 @@
+from thtools.coursebox.core.info_paths import get_paths
+import os
+import re
+import openpyxl
+import numpy as np
+
+INSTRUCTOR_ROW = 6
+INSTRUCTOR_CHECKER_ROW = 31
+STUDENT_ID_ROW = 3
+STARS_ROW = 26
+
+EVALUATION_ROW_START = 8
+EVALUATION_ROW_END = 40
+WEIGHT_ROW_START = 63
+RANGE_MIN_COL = 5
+
+DELTA_ALLOWED_ROW = 111 # The range of possible delta-values. Should be in an empty (new) row at bottom.
+
+def get_all_reports_from_collected_xlsx_file_DEFUNCT():  # when is this used?
+    out = get_output_file()
+    wb = openpyxl.load_workbook(out)
+    all_reports = {}
+    for repn in range(3, -1, -1):
+        cls = []
+        for i in range(2, wb.worksheets[repn].max_column + 1):
+            cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
+            if not cp['student_ids']:
+                continue
+            cls.append(cp)
+        all_reports[repn] = cls
+    return all_reports
+
+def parse_column_student_ids(v):
+    sn = []
+    if v is not None:
+        if isinstance(v, int):
+            v = str(v)
+            v = "s" + ("0"*(6-len(v))) + v
+        v = v.lower()
+        o = re.findall(r'(s\d{6})', v)
+        for g in o:
+            sn.append(g)
+    return sn
+
+def parse_column(worksheet, report_number, column):
+    ws = worksheet  # wb.worksheets[sheet]
+    sn = []
+    group_id = ws.cell(row=STUDENT_ID_ROW - 1, column=column).value
+
+    for i in range(0, 3):
+        v = ws.cell(row=i + STUDENT_ID_ROW, column=column).value
+        sn += parse_column_student_ids(v)
+
+    instructor = ws.cell(row=INSTRUCTOR_ROW, column=column).value
+    approver = ws.cell(row=INSTRUCTOR_ROW+1, column=column).value
+
+    if instructor: instructor = instructor.lower()
+    if approver:
+        approver = str(approver).lower()
+
+    content = None
+    comments = None
+    appr_comments = None
+    if report_number > 0 and sn:
+        N = 38
+        rarr = np.ndarray(shape=(N,1),dtype=np.object)
+        for j in range(N):
+            v = ws.cell(row=3 + STUDENT_ID_ROW+j, column=column).value
+            rarr[j,0] = v
+        content = rarr
+        comments = ws.cell(row=EVALUATION_ROW_END+5, column=column).value
+        appr_comments = ws.cell(row=EVALUATION_ROW_END+6, column=column).value
+
+    cgroup = {'column_j': column, 'student_ids': sn, 'instructor': instructor, "approver": approver, 'content': content,
+              "comments": comments, "approver_comments": appr_comments, 'missing_fields': [],
+              'group_id': group_id}
+
+    # Now, find errors... This involves first finding non-zero columns
+    if report_number > 0 and sn:
+        score = cgroup['content'][-3, 0]
+        cgroup['score'] = score
+        cgroup['pct'] = score2pct(score)
+
+        if report_number == 3: # this obviously needs fixing for next semester.
+            raise Exception("No report number 3 anymore. ")
+            I = []
+            for i in range(42): # max number of evaluation fields (irrelevant)
+                v1 = worksheet.cell(row=WEIGHT_ROW_START+i, column=RANGE_MIN_COL).value
+                v2 = worksheet.cell(row=WEIGHT_ROW_START+i, column=RANGE_MIN_COL+1).value
+                if (v1 == -1 and v2 == 1) or (v1 == 0 and v2 == 4):
+                    I.append(i)
+                if v1 == -1 and v2 == 1:
+                    # print("delta col")
+                    break
+
+            for i in I:
+                w1 = worksheet.cell(row=WEIGHT_ROW_START + i, column=1).value
+                w3_ = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=1).value # should agree with label in w1
+                w2 = worksheet.cell(row=INSTRUCTOR_ROW + i+2, column=column).value
+                if w2 == None:
+                    cgroup['missing_fields'].append( (i, w1) )
+                    if report_number < 3:
+                        print("old report nr.")
+
+    return cgroup
+
+def score2pct(score):
+    if score is None:
+        return None
+    if isinstance(score, str):
+        return score
+    else:
+        pct = score / 4
+        return pct
+
+def get_output_file():
+    out = get_paths()['collected_project_evaluations.xlsx']
+    if not os.path.exists(os.path.dirname(out)):
+        os.mkdir(os.path.dirname(out))
+    return out
+
+def get_groups_from_report(repn):
+    cls = []
+    out = get_output_file()
+    print("> Loading student report scores from: %s" % out)
+    wb = openpyxl.load_workbook(out, data_only=True)
+    # Perhaps find non-empty cols (i.e. those with content)
+    maximal_groups = []
+    maximal_groups_students = []
+
+    for i in range(2, wb.worksheets[repn].max_column + 1):
+        cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
+        if len(cp['student_ids']) == 0 or cp['group_id'] is None:
+            continue
+        cls.append(cp)
+    return cls
+
+def populate_student_report_results(students):
+    # take students (list-of-dicts in the info format) and assign them the results from the reports.
+    out = get_output_file()
+    print("> Loading student report scores from: %s"%out)
+    if not os.path.exists(out):
+        return students, []
+
+    for k in students:
+        students[k]['reports'] = {i: None for i in range(4)}
+
+    wb = openpyxl.load_workbook(out,data_only=True)
+    # Perhaps find non-empty cols (i.e. those with content)
+
+    maximal_groups = []
+    maximal_groups_students = []
+
+    for repn in range(3, -1, -1):
+        cls = []
+        for i in range(2, wb.worksheets[repn].max_column + 1):
+            cp = parse_column(wb.worksheets[repn], report_number=repn, column=i)
+            if not cp['student_ids']:
+                continue
+            cls.append(cp)
+
+        for g in cls:
+
+            for sid in g['student_ids']:
+                student = students.get(sid, None)
+                if student is None:
+                    if repn > 0:  # don't care about project 0 (group registration)
+                        print("Bad error: Student id %s not found. report evaluation malformed?"%sid)
+                else:
+                    # student = student.pop()
+                    student['reports'][repn] = g
+                    if sid not in maximal_groups_students:
+                        maximal_groups.append(g)
+                        maximal_groups_students += g['student_ids']
+
+    return students, maximal_groups
\ No newline at end of file
diff --git a/src/coursebox/core/projects_plagiarism.py b/src/coursebox/core/projects_plagiarism.py
new file mode 100644
index 0000000..7dd7daf
--- /dev/null
+++ b/src/coursebox/core/projects_plagiarism.py
@@ -0,0 +1,140 @@
+from tinydb import TinyDB, Query
+import os
+import zipfile
+from tinydb import TinyDB, Query, where
+import glob
+import pycode_similar
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Update for later: Add code to copy current .zip file content to special (archived) files. Then load these files into DB later.
+# Structure should be extended to include semester structure.
+
+def plagiarism_checker(paths,info):
+    db = insert_projects(paths,info)
+    for repn in range(1, 4):
+        d = [v for v in db if v['code'] and v['report-number'] == repn]
+        if len(d) == 0:
+            continue
+
+        M = np.zeros( shape = (len(d), len(d)) )
+        for i,d1 in enumerate(d):
+            for j, d2 in enumerate(d):
+                if i < j:
+                    x = compare(d1['code'],d2['code'])
+                    M[i,j] = x
+            print(i)
+
+        plt.imshow(M)
+        plt.colorbar()
+        plt.title("Report number %i"%repn)
+        plt.show()
+
+        aa = np.flip( np.argsort( M.ravel()),axis=0)[:10]
+        ii,jj = np.unravel_index(aa, M.shape)
+        for dx in range(len(aa)):
+            i = ii[dx]
+            j = jj[dx]
+            s1 = d[i]['code'].split("\n")
+            s2 = d[j]['code'].split("\n")
+
+            bp = os.path.dirname(paths['collected_project_evaluations.xlsx']) + "/plagiarism"
+            if not os.path.exists(bp):
+                os.mkdir(bp)
+            bp = bp + "/report_%i"%repn
+            if not os.path.exists(bp):
+                os.mkdir(bp)
+            dout = bp + "/%.2f_%i_%i"%(M[i,j], i,j)
+            if not os.path.exists(dout):
+                os.mkdir(dout)
+
+            with open(dout + "/s1.txt", 'w') as f:
+                mark(f, s1, s2)
+
+            with open(dout + "/s2.txt", 'w') as f:
+                mark(f, s2, s1)
+
+            with open(dout + "/sim.txt", 'w') as f:
+                ss = [s for s in s2 if s in s1]
+                f.write("\n".join(ss))
+
+def get_toolbox_lines(paths):
+    names = [("Matlab", "m"), ("Python", "py"), ("R", "R")]
+    all_code = []
+    dirs = ["/Tools/", "/Tools/02450Tools/",'/Scripts/']
+    for (n,ex) in names:
+        for d in dirs:
+            tb = paths['instructor'] +"/02450Toolbox_" + n +d
+            if not os.path.exists(tb):
+                continue
+            ls = glob.glob(tb +"/*." + ex)
+            for l in ls:
+                with open(l, 'r') as f:
+                    s = f.read()
+                    code = code2lines(s)
+                    all_code += code
+    return all_code
+
+
+def code2lines(s):
+    ls = s.split("\n")
+    ls = [l.strip() for l in ls]
+    ls = [l for l in ls if len(l) > 3
+          and not l.startswith("# In[")
+          and not l.startswith("hold")]
+    return ls
+
+
+def compare(s1, s2):
+    s1 = set(s1.split("\n"))
+    s2 = set( s2.split("\n") )
+    eps = 1e-6
+    x = 2*len(s1 & s2) / (len(s1) + len(s2) + eps)
+    return x
+
+
+
+def mark(f, s1, s2):
+    ss = [("[!] " + s if s in s2 else s) for s in s1]
+    f.write("\n".join(ss))
+
+
+def get_db_DEFUNCT(paths):
+    bp = os.path.dirname(paths['collected_project_evaluations.xlsx'])
+    bp = os.path.dirname(bp)
+    db = TinyDB(bp + '/plagiarism.json')
+    return db
+
+
+def insert_projects(paths,info):
+    toolbox_code =  get_toolbox_lines(paths)
+    db = []
+    for i in range(3):
+        zip_file = paths['instructor_project_evaluations'] + "/zip%i.zip"%(i+1)
+        if not os.path.exists(zip_file):
+            continue
+        proj = {}
+
+        zf = zipfile.ZipFile(zip_file)
+        ls = zf.namelist()
+        for l in ls:
+            j = l.find('/')
+            if j <= 0: continue
+            key = l[:j]
+            val = proj.get(key, [])
+            val.append(l)
+            proj[key] = val
+
+        for k in proj:
+            s = ""
+            for f in proj[k]:
+                if f.lower().endswith(".py") or f.lower().endswith(".m") or f.lower().endswith(".r"):
+                    file = zf.read(f).decode('utf-8', errors='ignore')
+                    s = s + "\n" + file
+            group_code_ = code2lines(s)
+            group_code = [l for l in group_code_ if l not in toolbox_code]
+            print([len(s.split("\n")), len(group_code_), len(group_code)])
+            s = "\n".join(group_code)
+            v = {'semester': info['semester_id'], 'report-number': i+1, 'student_id': k, 'code': s}
+            db.append(v)
+    return db
\ No newline at end of file
diff --git a/src/coursebox/material/__init__.py b/src/coursebox/material/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/coursebox/material/homepage_lectures_exercises.py b/src/coursebox/material/homepage_lectures_exercises.py
new file mode 100644
index 0000000..d172128
--- /dev/null
+++ b/src/coursebox/material/homepage_lectures_exercises.py
@@ -0,0 +1,449 @@
+# -*- coding: utf-8 -*-
+import shutil, os, glob
+from datetime import datetime, timedelta
+import calendar
+import pickle
+from jinjafy import jinjafy_comment
+from jinjafy import jinjafy_template
+from coursebox.core.info_paths import get_paths
+from slider.legacy_importer import slide_to_image
+from slider.legacy_importer import li_import
+from slider.slider import set_svg_background_images
+from coursebox.book.exam_includer import HOMEWORK_PROBLEMS
+from coursebox.core.info import class_information
+from coursebox.material.lecture_questions import lecture_question_compiler
+from thtools import latexmk
+
+def get_feedback_groups():
+    paths = get_paths()
+    feedback_file = paths['semester'] +"/feedback_groups.pkl"
+    if os.path.exists(feedback_file):
+        with open(feedback_file,'rb') as f:
+            fbg = pickle.load(f)
+    else:
+        fbg = dict()
+
+    info = class_information()
+    all_students = [id for id in info['students']]
+
+    now = datetime.today()
+    already_used = []
+    remaining_lectures = []
+    for lecture in info['lectures']:
+        lnum = lecture['number']
+        if lnum == 1: continue
+        if lecture['date'] < now and lnum in fbg:
+            already_used += fbg[lnum]
+            # already_used += g
+        else:
+            remaining_lectures.append(lnum)
+
+    project_groups = [g['student_ids'] for g in info['all_groups']]
+    # remove already_used from these groups
+    reduced_groups = [[id for id in pg if id not in already_used] for pg in project_groups]
+    reduced_groups = [rg for rg in reduced_groups if len(rg)>0]
+    # groups are now partitioned.
+    if len(remaining_lectures) > 0:
+        fbgs = thtools.thtools_base.partition_list(reduced_groups, len(remaining_lectures))
+        for gg in fbgs:
+            for g in gg:
+                already_used = already_used + g
+
+        lst = thtools.thtools_base.partition_list([s for s in all_students if s not in already_used], len(remaining_lectures))
+        for i in range(len(remaining_lectures)):
+            dg = []
+            for g in fbgs[i]: dg += g  # flatten the list
+            fbg[remaining_lectures[i]] = dg + lst[i]
+
+        sum( [len(v) for k,v in fbg.items() ]) - sum( [len( set(v)) for k,v in fbg.items() ])
+
+        with open(feedback_file, 'wb') as f:
+            pickle.dump(fbg,f)
+
+    for k in fbg:
+        g = fbg[k]
+        g2 = []
+        for s in g:
+
+            if s in info['students']:
+                dl = info['students'][s]['firstname'] + " " + info['students'][s]['lastname']
+                # dl = [ss['firstname']+" "+ss['lastname'] for ss in if ss['id'] == s]
+                if not dl:
+                    print("EMPTY LIST when making feedback groups. Probably an error in project correction sheets.")
+                    continue
+                g2.append(dl)
+        fbg[k] = g2
+    return fbg
+
+PRESENTATION = 0
+NOTES = 1
+HANDOUT = 2
+def make_lectures(week=None, mode=0, gather_pdf_out=True, gather_sixup=True, make_quizzes=True, dosvg=False, Linux=False):
+    """
+    Mode determines what is compiled into the pdfs. It can be:
+
+    mode = PRESENTATION = 0: What I use to present from
+    mode = NOTES = 1: Version containing notes (used for self-study)
+    mode = HANDOUT = 2: version handed out to students.
+    """
+    paths = get_paths()
+    if os.path.exists(paths['book']):
+        book_frontpage_png = paths['shared']+"/figures/book.png" #paths['lectures']+"/static/book.png"
+        slide_to_image(paths['book'] + "/02450_Book.pdf", book_frontpage_png, page_to_take=1)
+        shutil.copy(paths['book'] + "/book_preamble.tex", paths['shared'])
+    info = class_information()
+    jinjafy_shared_templates_dir(paths, info) # Compile templates in shared/templates
+    # course_number = info['course_number']
+    if isinstance(week, int):
+        week = [week]
+    all_pdfs = []
+    for lecture in info['lectures']:
+        w = lecture['number']
+        if week is not None and w not in week:
+            continue
+
+        ag = get_feedback_groups()
+
+        lecture['feedback_groups'] = ag.get(w, [])
+        info.update({'week': w})
+        info['lecture'] = lecture
+        info['lecture']['teacher'] = [t for t in info['teachers'] if t['initials'] == lecture['teacher_initials']].pop()
+
+        lecture_texdir = paths['lectures'] + '/Lecture_%s/Latex' % w
+        lecture_texfile =lecture_texdir + "/Lecture_%i.tex" % w
+
+        fix_shared(paths, output_dir=lecture_texdir, dosvg=dosvg)
+
+        if os.path.exists(lecture_texdir):
+            print("Latex directory found for lecture %i: %s"%(w,lecture_texdir))
+            lecture_texdir_generated = lecture_texdir +"/templates"
+            if not os.path.exists(lecture_texdir_generated):
+                # shutil.rmtree(lecture_texdir_generated)
+                # time.sleep(0.2)
+                os.mkdir(lecture_texdir_generated)
+
+        if mode == PRESENTATION:
+            info['slides_shownotes'] = False
+            info['slides_handout'] = False
+            odex = "/presentation"
+        elif mode == HANDOUT:
+            info['slides_shownotes'] = False
+            info['slides_handout'] = True
+            info['slides_showsolutions'] = False
+            odex = "/handout"
+        elif mode == NOTES:
+            info['slides_shownotes'] = True
+            info['slides_handout'] = True
+            odex = "/notes"
+        else:
+            raise Exception("Mode not recognized")
+
+        for f in glob.glob(paths['lectures'] + "/templates/*.tex"):
+            ex = "_partial.tex"
+            if f.endswith(ex):
+                jinjafy_template(info, file_in=f, file_out=lecture_texdir + "/templates/"+os.path.basename(f)[:-len(ex)] + ".tex")
+
+        # Fix questions.
+        qtarg = lecture_texdir + "/questions"
+        if not os.path.exists(qtarg):
+            os.mkdir(qtarg)
+        for f in glob.glob(paths['lectures'] + "/static/questions/*"):
+            shutil.copy(f, qtarg)
+
+        # Fix questions for this lecture
+        if make_quizzes:
+            lecture_question_compiler(paths, info, lecture_texfile)
+
+        pdf_out = thtools.latexmk(lecture_texfile, Linux=Linux)
+        all_pdfs.append( (w,pdf_out))
+
+    handle_pdf_collection(paths, all_pdfs, gather_pdf_out=gather_pdf_out, gather_sixup=gather_sixup, odir=odex)
+
+
+def handle_pdf_collection(paths, all_pdfs, gather_pdf_out, gather_sixup, odir):
+    tmp_dir = paths['lectures'] + '/Collected/tmp'
+    if not os.path.exists(tmp_dir):
+        os.mkdir(tmp_dir)
+
+    collect_template = paths['lectures'] + "/Collected/lecture_collector_partial.tex"
+
+    for sixup in [False, True]:
+        if not gather_pdf_out: continue
+        if sixup and not gather_sixup: continue
+
+        pdf_compiled_all_6up = []
+        sixup_str = "-6up" if sixup else ""
+        for (week, _) in all_pdfs:
+            tv = {'week': week,
+                  'pdffiles': [paths['lectures'] + '/Lecture_%s/Latex/Lecture_%s.pdf' % (week, week)]}
+            if sixup:
+                tv['sixup'] = sixup
+                tex_out_sixup = tmp_dir + "/Lecture_%i%s.tex" % (week, sixup_str)
+                jinjafy_comment(data=tv, file_in=collect_template, file_out=tex_out_sixup, jinja_tag=None)
+                dpdf = tmp_dir + "/" + thtools.latexmk(tex_out_sixup, shell=False, cleanup=True)
+            else:
+                dpdf = tv['pdffiles'][0]
+
+            pdf_compiled_all_6up.append(dpdf)
+
+        for dpdf in pdf_compiled_all_6up:
+            output_dir = paths['pdf_out'] + odir
+            if not os.path.exists(output_dir):
+                os.mkdir(output_dir)
+
+            # if not info['slides_showsolutions']:
+            #     odir += "/lectures_without_solutions"
+            # if not os.path.isdir(odir):
+            #     os.mkdir(odir)
+            #
+            shutil.copy(dpdf, output_dir + "/" + os.path.basename(dpdf))
+
+    for f in glob.glob(tmp_dir + "/*"):
+        os.remove(f)
+
+def compile_simple_files(paths, info, template_file_list, verbose=False):
+    jinjafy_shared_templates_dir(paths, info)
+    fix_shared(paths, output_dir=paths['shared_latex_compilation_dir'], verbose=verbose)
+    for fname,dd in template_file_list:
+        d2 = info.copy()
+        d2.update(dd)
+        file = os.path.basename(fname)
+        if file.endswith("_template.tex"):
+            file = file.replace("_template.tex", ".tex")
+        tex_out = paths['shared_latex_compilation_dir'] + "/" + file
+        jinjafy_template(data=d2, file_in=fname, file_out=tex_out, filters=get_filters(), template_searchpath=paths['instructor'])
+        thtools.latexmk(tex_out, pdf_out= paths['pdf_out'] + "/" + os.path.basename(tex_out)[:-4]+".pdf")
+
+def fix_shared(paths, output_dir, pdf2png=False,dosvg=True,verbose=False, compile_templates=True):
+    '''
+    Copy shared files into lecture directories
+    '''
+    cache_base = output_dir
+    from thtools.cache import cache_contains_file, cache_update_file
+    from thtools.slider.convert import svg2pdf, pdfcrop
+    from thtools.slider import convert
+
+    def rec_fix_shared(shared_base, output_dir):
+        if dosvg:
+            for svg in glob.glob(shared_base+"/*.svg"):
+                if not cache_contains_file(cache_base, svg):
+                    if verbose:
+                        print("converting to pdf", svg)
+                    svg2pdf(svg,crop=True, text_to_path=True)
+                    cache_update_file(cache_base, svg)
+        files = glob.glob(shared_base+"/*")
+        for f in files:
+            if f.endswith("cache.pkl"):
+                continue
+            # check if template
+            if "templates" in f and f.endswith("_partial.tex"):
+                continue
+
+            if os.path.isdir(f):
+                od2 = output_dir + "/" + os.path.basename(f)
+                if not os.path.exists(od2):
+                    os.mkdir(od2)
+                rec_fix_shared(f, od2)
+            else:
+                of = output_dir + "/" + os.path.basename(f)
+                if not cache_contains_file(cache_base, f) or not os.path.exists(of):
+                    print(f"> {f} -> {of}")
+                    shutil.copy(f, of)
+                    if f.endswith(".pdf") and pdf2png:
+
+                        if verbose:
+                            print(" converting to png", f)
+                        convert.pdf2png(of)
+                    cache_update_file(cache_base, f)
+
+            if verbose:
+                print(" done!")
+
+
+
+    rec_fix_shared(shared_base=paths['shared'], output_dir=output_dir)
+
+def jinjafy_shared_templates_dir(paths, info):
+    tpd = paths['shared'] + "/templates"
+    for f in glob.glob(tpd + "/*.tex"):
+        print(f)
+        ex = "_partial.tex"
+        if f.endswith(ex):
+            jinjafy_template(info, file_in=f, file_out=f"{tpd}/{os.path.basename(f)[:-len(ex)]}.tex")
+
+
+def get_filters():
+    return {'safetex': safetex, 'tt': tt, 'bf': bf, 'verb': verb}
+
+def make_exercises_projects_tutors(week=None, only_exercises=False, make_exercises=True, make_projects=True, dosvg=False):
+    paths = get_paths()
+    filters = get_filters()
+    info = class_information()
+    course_number = info['course_number']
+    jinjafy_shared_templates_dir(paths, info) # Compile files in the shared/templates  directory.
+
+    if not only_exercises:  # Don't do any of this if we are in continuing education mode
+        for proj in range(len(info['reports_handin']) if not info['CE'] else 1):
+            info['project'] = proj+1
+            info['is_project'] = True
+            handout_week = info['reports_handout'][proj]
+            handin_week =info['reports_handin'][proj]
+
+            info['lecture'] = info['lectures'][ handout_week-1]
+            info['lecture_handin'] = info['lectures'][handin_week-1]
+
+            if info['CE']:
+                proj_base = paths['instructor'] + "/ExercisesShared/%sprojectCE_Base.tex"%(course_number, )
+                proj_tex_out = paths['instructor'] + "/Project/latex%i/%sprojectCE.tex" % (1,course_number,)
+            else:
+                proj_base = paths['instructor'] + "/ExercisesShared/%sproject%i_Base.tex" % (course_number,proj+1,)
+                proj_tex_out = paths['instructor'] + "/Project/latex%i/%sproject%i.tex" % (proj+1, course_number, proj+1)
+            info['week'] = -1
+
+            if not os.path.exists(proj_base):
+                continue
+
+            jinjafy_template(info, proj_base, file_out=proj_tex_out, filters=filters, template_searchpath=paths['instructor'])
+            fix_shared(paths, output_dir=os.path.dirname(proj_tex_out), dosvg=dosvg)
+            latexmk(proj_tex_out, pdf_out=paths['pdf_out'] + "/" + os.path.basename(proj_tex_out)[:-4] + ".pdf")
+
+    langs = ["Matlab", "Python", "R"]
+    info['is_project'] = False
+    for lang in langs:
+        if not make_exercises:
+            break
+        # Handle exercise 0 seperately:
+
+        ex0_date = info['lectures'][0]['date'] - timedelta(days=0 if info['CE'] else 7)
+        ex0 = { 'number': 0,
+                'date': ex0_date,
+                'year': ex0_date.year,
+                'month': calendar.month_name[ex0_date.month],
+                'day': ex0_date.day}
+        all_lectures = [ex0] + info['lectures'][:-1]
+
+        exercises_to_compile = all_lectures[week:week+1] if week != None else all_lectures
+
+        for lecture in exercises_to_compile: # not number 13.
+            w = lecture['number']
+            info['lecture'] = lecture
+            info['week'] = w
+
+            nicelang = lang.upper()
+            tb = '''<base-dir>/02450Toolbox_%s/''' % lang
+            if lang == "Matlab":
+                ext = "m"
+            elif lang == "Python":
+                ext = "py"
+            else:
+                ext = "R"
+
+            tv = {
+                  "lang": lang,
+                  "nicelang": nicelang,
+                  "tbscripts": tb + "Scripts/",
+                  "tbdata": tb + "Data/",
+                  "tbtools": tb + "Tools/",
+                  "tbname": '''02450Toolbox\_%s''' % lang,
+                  "tb": tb,
+                  "ext": ext,
+                  "tbsetup": tb + "setup.%s" % ext,
+                  "Python": lang == "Python",
+                  "Matlab": lang == "Matlab",
+                  "R": lang == "R",
+                  'HOMEWORK_PROBLEMS': HOMEWORK_PROBLEMS,
+                  }
+
+            # get lang dir
+            if not os.path.exists("%s/Exercises%s"%(paths['instructor'], lang)):
+                continue
+
+            info.update(tv)
+            ex_base =  "%s/ExercisesShared/%sex%i_Base.tex"%(paths['instructor'], course_number, w)
+            ex_tex_out = "%s/Exercises%s/Exercise%i/latex/%sex%i_%s.tex" % ( paths['instructor'], lang, w, course_number, w, lang)
+            # fbody = os.path.dirname(ex_tex_out) +f"/ex{w}_body.tex"
+
+            if os.path.exists(ex_base):
+                jinjafy_template(info, ex_base, file_out=ex_tex_out, filters=filters, template_searchpath=paths['instructor'])
+                fix_shared(paths, output_dir=os.path.dirname(ex_tex_out), dosvg=dosvg)
+                # mvfiles(shared_tex, os.path.dirname(ex_tex_out)+"/")
+                latexmk(ex_tex_out, pdf_out=paths['pdf_out']+"/" + os.path.basename(ex_tex_out)[:-4] + ".pdf")
+
+def mvfiles(source_dir, dest_dir):
+    src_files = os.listdir(source_dir)
+    for file_name in src_files:
+        full_file_name = os.path.join(source_dir, file_name)
+        if (os.path.isfile(full_file_name)):
+            shutil.copy(full_file_name, os.path.dirname(dest_dir))
+
+def make_webpage(dosvg=True):
+    cinfo = class_information()
+    paths = get_paths()
+    fix_shared(paths, output_dir=os.path.dirname(paths['homepage_out']), pdf2png=True, dosvg=dosvg)
+    wdir = paths['homepage_template']
+    jinjafy_template(cinfo, file_in=wdir, file_out=paths['homepage_out'])
+    print("Instructors for course: ")
+    s = ""
+    for dex,i in enumerate(cinfo['instructors']):
+        if dex > 0:
+            s += "; "
+        s += i['email']
+
+    blrb = os.path.dirname(paths['homepage_template']) +"/homepage_widget_blurb_template.html"
+    if os.path.exists( blrb):
+        jinjafy_template(cinfo, file_in=blrb, file_out=blrb.replace("_template.html", ".html"))
+    print(s)
+
+
+# This should probably  be moved into jinjafy
+def tt(value):
+    return "\\texttt{" + value + "}"
+
+def bf(value):
+    return "\\textbf{" + value + "}"
+
+def verb(value):
+    return '\\verb"' + value + '"'
+
+def safetex(value):
+    return value.replace("_", "\_")
+
+def slide_converter(week=None, verbose=True, clean_temporary_files=False, copy_template_resource_files=True, fix_broken_osvg_files=False, **kwargs):
+    """ Legacy function. """
+    print("Checking if slides should be converted from odf -> pdf format..")
+    paths = get_paths()
+    info = class_information()
+    # week = [week] if week not isinstance(week, list) else []
+    week = week if week is None else [week] #[week] if not week is None else
+
+    for lecture in info['lectures']:
+        # for n in range(1,14):
+        n = lecture['number']
+        if week is not None and n not in week:
+            continue
+        ldir = "%s/Lecture_%i"%(paths['lectures'], n)
+        texdir = ldir +"/Latex"
+        print("Testing conversion between directories:\n   > %s -> %s"%(ldir, texdir))
+
+        if not os.path.exists(texdir):
+            os.mkdir(texdir)
+            pdf_in = "%s/Lecture_%i.pdf"%(ldir, n)
+            pdf_out = texdir +"/Lecture_%i.pdf"%n
+            shutil.copyfile(pdf_in, pdf_out)
+
+            print("operating...")
+            lecture_tex_out = li_import(pdf_out, output_dir=texdir)
+            print("Wrote new main file: " + lecture_tex_out)
+        else:
+            print("%s exists; no conversion possible. "% (texdir,))
+
+        print("Handling .svg conversion in slides..")
+        slide_tex_path = texdir +"/Lecture_%i.tex"%n
+        print("   > "+slide_tex_path)
+        set_svg_background_images(slide_tex_path,
+                                  verbose=verbose,
+                                  clean_temporary_files=clean_temporary_files,
+                                  copy_template_resource_files=copy_template_resource_files,
+                                  fix_broken_osvg_files=fix_broken_osvg_files, **kwargs)
+
+    print("Slides converted!")
diff --git a/src/coursebox/material/lecture_questions.py b/src/coursebox/material/lecture_questions.py
new file mode 100644
index 0000000..cac5f9f
--- /dev/null
+++ b/src/coursebox/material/lecture_questions.py
@@ -0,0 +1,181 @@
+import os
+import shutil
+import glob
+
+from thtools.jinjafy import jinjafy_template
+from thtools import latexmk
+from thtools import execute_command
+from thtools.slider.slider import slide_no_by_text, recursive_tex_apply
+from thtools.slider.legacy_importer import slide_to_image
+
+
+def lecture_question_compiler(paths, info, lecture_texfile):
+
+    lecture_latex_path = os.path.dirname(lecture_texfile)
+    lecture_pdffile = lecture_texfile[:-3] + "pdf"
+    # nosvg = lecture_pdffile[:-4] + "_NO_SVGS.pdf"
+    qpath = lecture_latex_path +"/questions"
+    if not os.path.exists(qpath):
+        os.mkdir(qpath)
+    all_questions_csv = []
+    png_out = None
+    for fn in glob.glob(qpath + "/*_base_*.tex"):
+        print(fn)
+        with open(fn, 'r') as f:
+            s = f.read()
+
+        qstart = s.find('\\begin{question}') + len("\\begin{question}")
+        qend = s.find('\\begin{solution}') #+ len('\\begin{solution}')
+        sstart = qend + len('\\begin{solution}')
+        send = s.find('\\end{solution}')
+
+        qes = s[qstart:qend]
+        sol = s[sstart:send]
+
+        a,b,c = os.path.basename(fn).split("_")
+        question_no = c.split(".")[0]
+        fout_q = qpath +"/"+a + "_" + c
+        fout_sol = qpath +"/"+ a + "_" + question_no+ "_sol.tex"
+
+        data = {'text': qes}
+        jinjafy_template(data=data, file_in=lecture_latex_path +"/questions/question_partial.tex", file_out=fout_q)
+        fout_q_pdf = qpath + "/" + latexmk(fout_q, cleanup=True)
+        execute_command(["pdfcrop", fout_q_pdf, fout_q_pdf])
+
+        # get "nice" .png file (or fallback)
+        tex = recursive_tex_apply(lecture_texfile)
+        tex = "\n".join([tex[k] for k in tex])
+        qtex = os.path.basename(fout_q)
+        dex = tex.find(qtex[:-4])
+
+
+        if dex >= 0:
+            j = tex[:dex].rfind("\\begin{frame}")
+            ol = tex[j:dex]
+            j1 = ol.find("\\osvg{")
+            j2 = ol.find("}",j1)
+
+            ol = ol[j1 + 6:j2]
+            # n = slide_no_by_text(nosvg, ol)
+            print(lecture_pdffile, ol)
+            n = slide_no_by_text(lecture_pdffile, ol)
+            if n < 0:
+                print("Question compiler: Question missing osvg label?")
+                dex = -1
+            else:
+                png_out = fout_q_pdf[:-4] + ".png"
+                print("png_out", png_out)
+                slide_to_image(lecture_pdffile, png_out, page_to_take=n)
+        if dex < 0:
+            execute_command(["pdftocairo", fout_q_pdf, fout_q_pdf[:-4], "-png"])
+            ls = glob.glob( fout_q_pdf[:-4] +"-*.png")
+            if len(ls) > 1:
+                print("Hacky, two files exist (bad/old png conversaion code", ls)
+                l2 = glob.glob(fout_q_pdf[:-4] + "-000*.png")[0]
+                os.remove(l2)
+                ls = glob.glob(fout_q_pdf[:-4] + "-*.png")
+
+
+            # print(ls)
+            if len(ls) != 1:
+                raise Exception("Multiple question png files found", ls)
+            png_out = ls[0] if ls else None
+            print("png_out b", png_out)
+
+        qdir = paths['pdf_out'] +"/quiz"
+        if not os.path.exists(qdir):
+            os.mkdir(qdir)
+        print("png_out c", png_out)
+        if png_out:
+            # a + "_" + c[:-4] + ".png"
+            # png_out2 = os.path.basename(lecture_texfile)[:-4] + "_"+os.path.basename(png_out)
+            png_out2 = os.path.basename(lecture_texfile)[:-4] + "_" + a + "_" + c[:-4] + ".png"
+            print("Copying quiz png> " + png_out2)
+            print("png_out d", png_out)
+            shutil.copyfile(png_out, qdir+"/" + png_out2)
+
+        data = {'text': sol}
+        jinjafy_template(data=data, file_in=lecture_latex_path +"/questions/question_partial.tex", file_out=fout_sol)
+        fout_sol_pdf = qpath + "/" + latexmk(fout_sol)
+        execute_command(["pdfcrop", fout_sol_pdf, fout_sol_pdf])
+
+        # Now make the cvx fileÆ
+        try:
+            ans = [l for l in qes.splitlines() if not l.strip().startswith("%") and r"\begin{answer}" in l].pop()
+        except IndexError as e:
+            print("Bad list pop", fn)
+            print(qes)
+            print(e)
+        correct = int( ans[ans.rfind("[")+1:ans.rfind("]")] )
+        answers = []
+
+        for j in range(5):
+            lbl = ([v + " is correct" for v in "ABCD"] + ["E: Don't know"] )[j]
+            points = "100" if j+1 == correct else "0"
+            answers.append( f"Option,{points},{lbl},," )
+
+        if png_out is not None:
+            l, n = os.path.basename(lecture_texfile[:-4]).split("_")
+            n = "0"+n if len(n) < 2 else n
+            csv_out = qdir + "/" + png_out2[:-3] + "csv"
+            lines = [ "NewQuestion,MC,"
+                      f"ID,{png_out2[:-4]}",
+                      f"Title,{l} {n}: Quiz {question_no}",
+                        f"QuestionText,Select correct option or Don't know,",
+                        f"Points,1,",
+                        f"Difficulty,1,",
+                        f"Image,images/quiz/{png_out2}"] +\
+                        answers
+                        # f"Hint,This is the hint text,,,",
+                        # f"Feedback,This is the feedback text,,,",
+                # ]
+            s = "\n".join(lines)
+            all_questions_csv.append(s)
+        print("Compiled question: %s"%(fout_q_pdf,))
+
+    if png_out:
+        s = "\n\n".join(all_questions_csv)
+        csv_base = qdir + "/dtulearn_csv"
+        if not os.path.isdir(csv_base):
+            os.mkdir(csv_base)
+        with open(csv_base + "/" + os.path.basename(lecture_pdffile)[:-3] + "csv", 'w') as f:
+            f.write(s)
+
+        """ 
+        //MULTIPLE CHOICE QUESTION TYPE,,
+        //Options must include text in column3,,
+        NewQuestion,MC,
+        ID,LECTURE05_question1
+        Title,Lecture 01: Quiz 1
+        QuestionText,This is thdfsklad fjasdklj fasdkl j text for MC1,
+        Points,1,
+        Difficulty,1,
+        Image,images/quizzes/Lecture_5_question_1.png
+        Option,100,This is the asdfsd correct answer,,This is feed sda fsdf asdf back for option 1
+        Option,0,This is asdfsadfsdfsadf answer 1,,This is feedback for option 2
+        Option,0,This is incorrect answer 2,,This is feedback for option 3
+        Option,0,This is partially correct,,This is feedback for option 4
+        Hint,This is the hint text,,,
+        Feedback,This is the feedback text,,,
+        """
+        # =======
+        print("Compiled question: %s"%(fout_q_pdf,))
+
+
+        """
+        //MULTIPLE CHOICE QUESTION TYPE,,
+        //Options must include text in column3,,
+        NewQuestion,MC,
+        ID,LECTURE05_question1
+        Title,Lecture 01: Quiz 1
+        QuestionText,This is thdfsklad fjasdklj fasdkl j text for MC1,
+        Points,1,
+        Difficulty,1,
+        Image,images/quizzes/Lecture_5_question_1.png
+        Option,100,This is the asdfsd correct answer,,This is feed sda fsdf asdf back for option 1
+        Option,0,This is asdfsadfsdfsadf answer 1,,This is feedback for option 2
+        Option,0,This is incorrect answer 2,,This is feedback for option 3
+        Option,0,This is partially correct,,This is feedback for option 4
+        Hint,This is the hint text,,,
+        Feedback,This is the feedback text,,,
+        """
diff --git a/src/coursebox/material/snipper.py b/src/coursebox/material/snipper.py
new file mode 100644
index 0000000..77f330f
--- /dev/null
+++ b/src/coursebox/material/snipper.py
@@ -0,0 +1,461 @@
+from thtools.coursebox.core.info import find_tex_cite
+import os
+import functools
+from thtools import execute_command
+import textwrap
+import re
+
+COMMENT = '"""'
+def indent(l):
+    v = len(l) - len(l.lstrip())
+    return l[:v]
+
+def fix_r(lines):
+    for i,l in enumerate(lines):
+        if "#!r" in l:
+            lines[i] = indent(l) + l[l.find("#!r") + 3:].lstrip()
+    return lines
+
+def gcoms(s):
+    coms = []
+    while True:
+        i = s.find(COMMENT)
+        if i >= 0:
+            j = s.find(COMMENT, i+len(COMMENT))+3
+        else:
+            break
+        if j < 0:
+            raise Exception("comment tag not closed")
+        coms.append(s[i:j])
+        s = s[:i] + s[j:]
+        if len(coms) > 10:
+            print("long comments in file", i)
+    return coms, s
+
+def strip_tag(lines, tag):
+    lines2 = []
+    for l in lines:
+        dx = l.find(tag)
+        if dx > 0:
+            l = l[:dx]
+            if len(l.strip()) == 0:
+                l = None
+        if l is not None:
+            lines2.append(l)
+    return lines2
+
+def block_process(lines, tag, block_fun):
+    i = 0
+    didfind = False
+    lines2 = []
+    block_out = []
+    cutout = []
+    while i < len(lines):
+        l = lines[i]
+        dx = l.find(tag)
+        if dx >= 0:
+            if l.find(tag, dx+1) > 0:
+                j = i
+            else:
+                for j in range(i + 1, 10000):
+                    if j >= len(lines):
+                        print("\n".join(lines))
+                        print("very bad end-line j while fixing tag", tag)
+                        raise Exception("Bad line while fixing", tag)
+                    if lines[j].find(tag) >= 0:
+                        break
+
+            pbody = lines[i:j+1]
+            if i == j:
+                start_extra = lines[j][dx:lines[j].rfind(tag)].strip()
+                end_extra = lines[j][lines[j].rfind(tag) + len(tag):].strip()
+            else:
+                start_extra = lines[i][dx:].strip()
+                end_extra = lines[j][lines[j].rfind(tag) + len(tag):].strip()
+
+            cutout.append(pbody)
+            tmp_ = start_extra.split("=")
+            arg = None if len(tmp_) <= 1 else tmp_[1].split(" ")[0]
+            start_extra = ' '.join(start_extra.split(" ")[1:] )
+
+            pbody[0] = pbody[0][:dx]
+            if j > i:
+                pbody[-1] = pbody[-1][:pbody[-1].find(tag)]
+
+            nlines, extra = block_fun(lines=pbody, start_extra=start_extra, end_extra=end_extra, art=arg, head=lines[:i], tail=lines[j+1:])
+            lines2 += nlines
+            block_out.append(extra)
+            i = j+1
+            didfind = True
+            if "!b" in end_extra:
+                assert(False)
+        else:
+            lines2.append(l)
+            i += 1
+
+    return lines2, didfind, block_out, cutout
+
+
+def rem_nonprintable_ctrl_chars(txt):
+    """Remove non_printable ascii control characters """
+    #Removes the ascii escape chars
+    try:
+        txt = re.sub(r'[^\x20-\x7E|\x09-\x0A]','', txt)
+        # remove non-ascii characters
+        txt = repr(txt).decode('unicode_escape').encode('ascii','ignore')[1:-1]
+    except Exception as exception:
+        print(exception)
+    return txt
+
+
+def run_i(lines, file, output):
+    extra = dict(python=None, output=output, evaluated_lines=0)
+    def block_fun(lines, start_extra, end_extra, art, head="", tail="", output=None, extra=None):
+        outf = output + ("_" + art if art is not None and len(art) > 0 else "") + ".shell"
+        lines = full_strip(lines)
+        s = "\n".join(lines)
+        s.replace("...", "..") # passive-aggressively truncate ... because of #issues.
+        lines = textwrap.dedent(s).strip().splitlines()
+
+        if extra['python'] is None:
+            import thtools
+            if thtools.is_win():
+                import wexpect as we
+            else:
+                import pexpect as we
+            an = we.spawn("python", encoding="utf-8", timeout=20)
+            an.expect([">>>"])
+            extra['python'] = an
+
+        analyzer = extra['python']
+        def rsession(analyzer, lines):
+            l2 = []
+            for i, l in enumerate(lines):
+                l2.append(l)
+                if l.startswith(" ") and i < len(lines)-1 and not lines[i+1].startswith(" "):
+                    if not lines[i+1].strip().startswith("else:") and not lines[i+1].strip().startswith("elif") :
+                        l2.append("\n")
+
+            lines = l2
+            alines = []
+
+            # indented = False
+            in_dot_mode = False
+            if len(lines[-1]) > 0 and (lines[-1].startswith(" ") or lines[-1].startswith("\t")):
+                lines += [""]
+
+            for i, word in enumerate(lines):
+                analyzer.sendline(word)
+                before = ""
+                while True:
+                    analyzer.expect_exact([">>>", "..."])
+                    before += analyzer.before
+                    if analyzer.before.endswith("\n"):
+                        break
+                    else:
+                        before += analyzer.after
+
+                dotmode = analyzer.after == "..."
+                if 'dir(s)' in word:
+                    pass
+                if 'help(s.find)' in word:
+                    pass
+                if dotmode:
+                    # alines.append("..." + word)
+                    alines.append(">>>" + analyzer.before.rstrip() if not in_dot_mode else "..." + analyzer.before.rstrip())
+                    in_dot_mode = True
+                    # if i < len(lines) - 1 and not lines[i + 1].startswith(" "):
+                    #     analyzer.sendline("\n")  # going out of indentation mode .
+                    #     analyzer.expect_exact([">>>", "..."])
+                    #     alines.append("..." + analyzer.after.rstrip())
+                    #     pass
+                else:
+                    alines.append( ("..." if in_dot_mode else ">>>") + analyzer.before.rstrip())
+                    in_dot_mode = False
+            return alines
+
+        for l in (head[extra['evaluated_lines']:] + ["\n"]):
+            analyzer.sendline(l)
+            analyzer.expect_exact([">>>", "..."])
+
+
+        alines = rsession(analyzer, lines)
+        extra['evaluated_lines'] += len(head) + len(lines)
+        lines = alines
+        return lines, [outf, lines]
+    try:
+        a,b,c,_ = block_process(lines, tag="#!i", block_fun=functools.partial(block_fun, output=output, extra=extra))
+        if extra['python'] is not None:
+            extra['python'].close()
+
+        if len(c)>0:
+            kvs= { v[0] for v in c}
+            for outf in kvs:
+                out = "\n".join( ["\n".join(v[1]) for v in c if v[0] == outf] )
+                out = out.replace("\r", "")
+
+                with open(outf, 'w') as f:
+                    f.write(out)
+
+    except Exception as e:
+        print("lines are")
+        print("\n".join(lines))
+        print("Bad thing in #!i command in file", file)
+        raise e
+    return lines
+
+def save_s(lines, file, output, include_path_base=None): # save file snips to disk
+    def block_fun(lines, start_extra, end_extra, art, output, **kwargs):
+        outf = output + ("_" + art if art is not None and len(art) > 0 else "") + ".py"
+        lines = full_strip(lines)
+        return lines, [outf, lines]
+    try:
+        a,b,c,_ = block_process(lines, tag="#!s", block_fun=functools.partial(block_fun, output=output))
+
+        if len(c)>0:
+            kvs= { v[0] for v in c}
+            for outf in kvs:
+
+                out = "\n".join([f"# {include_path_base}"]  + ["\n".join(v[1]) for v in c if v[0] == outf] )
+
+                with open(outf, 'w') as f:
+                    f.write(out)
+
+    except Exception as e:
+        print("lines are")
+        print("\n".join(lines))
+        print("Bad thing in #!s command in file", file)
+        raise e
+    return lines
+
+def run_o(lines, file, output):
+    def block_fun(lines, start_extra, end_extra, art, output, **kwargs):
+        id = indent(lines[0])
+        outf = output + ("_" + art if art is not None else "") + ".txt"
+        l2 = []
+        l2 += [id + "import sys", id + f"sys.stdout = open('{outf}', 'w')"]
+        l2 += lines
+        # l2 += [indent(lines[-1]) + "sys.stdout.close()"]
+        l2 += [indent(lines[-1]) + "sys.stdout = sys.__stdout__"]
+        return l2, None
+    try:
+        lines2, didfind, extra, _ = block_process(lines, tag="#!o", block_fun=functools.partial(block_fun, output=output) )
+    except Exception as e:
+        print("Bad file: ", file)
+        print("I was cutting the #!o tag")
+        print("\n".join( lines) )
+        raise(e)
+
+    if didfind:
+        fp, ex = os.path.splitext(file)
+        file_run = fp + "_RUN_OUTPUT_CAPTURE" +ex
+        if os.path.exists(file_run):
+            print("file found mumble...")
+        else:
+            with open(file_run, 'w', encoding="utf-8") as f:
+                f.write("\n".join(lines2) )
+            cmd = "python " + file_run
+
+            s,ok = execute_command(cmd.split(), shell=True)
+            print(s)
+            os.remove(file_run)
+
+def fix_f(lines, debug):
+    lines2 = []
+    i = 0
+    while i < len(lines):
+        l = lines[i]
+        dx = l.find("#!f")
+        if dx >= 0:
+            l_head = l[dx+3:].strip()
+            l = l[:dx]
+            lines2.append(l)
+            id = indent(lines[i+1])
+            for j in range(i+1, 10000):
+                jid = len( indent(lines[j]) )
+                if  j+1 == len(lines) or ( jid < len(id) and len(lines[j].strip() ) > 0):
+                    break
+
+            if len(lines[j-1].strip()) == 0:
+                j = j - 1
+            funbody = "\n".join( lines[i+1:j] )
+            if i == j:
+                raise Exception("Empty function body")
+            i = j
+            comments, funrem = gcoms(funbody)
+            comments = [id + c for c in comments]
+            if len(comments) > 0:
+                lines2 += comments[0].split("\n")
+            lines2 += [id+"#!b"]
+            lines2 += (id+funrem.strip()).split("\n")
+            errm = l_head if len(l_head) > 0 else "Implement function body"
+            lines2 += [f'{id}#!b {errm}']
+
+        else:
+            lines2.append(l)
+            i += 1
+    return lines2
+
+def fix_b2(lines):
+    stats = {'n': 0}
+    def block_fun(lines, start_extra, end_extra, art, stats=None, **kwargs):
+        id = indent(lines[0])
+        lines = lines[1:] if len(lines[0].strip()) == 0 else lines
+        lines = lines[:-1] if len(lines[-1].strip()) == 0 else lines
+        cc = len(lines)
+        ee = end_extra.strip()
+        if len(ee) >= 2 and ee[0] == '"':
+            ee = ee[1:-1]
+        start_extra = start_extra.strip()
+        l2 = ([id+start_extra] if len(start_extra) > 0 else []) + [id + f"# TODO: {cc} lines missing.",
+                                         id+f'raise NotImplementedError("{ee}")']
+        # if "\n".join(l2).find("l=l")>0:
+        #     a = 2342342
+        stats['n'] += cc
+        return l2, cc
+    lines2, _, _, cutout = block_process(lines, tag="#!b", block_fun=functools.partial(block_fun, stats=stats))
+    return lines2, stats['n'], cutout
+
+
+def fix_references(lines, info, strict=True):
+    for cmd in info['new_references']:
+        lines = fix_single_reference(lines, cmd, info['new_references'][cmd], strict=strict)
+    return lines
+
+def fix_single_reference(lines, cmd, aux, strict=True):
+    references = aux
+    s = "\n".join(lines)
+    i = 0
+    while True:
+        (i, j), reference, txt = find_tex_cite(s, start=i, key=cmd)
+        if i < 0:
+            break
+        if reference not in references:
+            er = "cref label not found for label: " + reference
+            if strict:
+                raise IndexError(er)
+            else:
+                print(er)
+                continue
+        r = references[reference]
+        rtxt = r['pyref']
+        s = s[:i] + rtxt + s[j + 1:]
+        i = i + len(rtxt)
+        print(cmd, rtxt)
+
+    lines = s.splitlines(keepends=False)
+    return lines
+
+
+def fix_cite(lines, info, strict=True):
+    lines = fix_references(lines, info, strict=strict)
+
+    s = "\n".join(lines)
+    i = 0
+    all_refs = []
+    while True:
+        (i, j), reference, txt = find_tex_cite(s, start=i, key="\\cite")
+        if i < 0:
+            break
+        if reference not in info['references']:
+            raise IndexError("no such reference: " + reference)
+        ref = info['references'][reference]
+        label = ref['label']
+        rtxt = f"({label}" + (", "+txt if txt is not None else "") + ")"
+        r = ref['plain']
+        if r not in all_refs:
+            all_refs.append(r)
+        s = s[:i] + rtxt + s[j+1:]
+        i = i + len(rtxt)
+
+    cpr = "{{copyright}}"
+    if not s.startswith(COMMENT):
+        s = f"{COMMENT}\n{cpr}\n{COMMENT}\n" + s
+    if len(all_refs) > 0:
+        i = s.find(COMMENT, s.find(COMMENT)+1)
+        all_refs = ["  " + r for r in all_refs]
+        s = s[:i] + "\nReferences:\n" + "\n".join(all_refs) + "\n" + s[i:]
+
+    s = s.replace(cpr, info['code_copyright'])
+    return s
+
+def full_strip(lines, tags=None):
+    if tags is None:
+        tags = ["#!s", "#!o", "#!f", "#!b"]
+    for t in tags:
+        lines = strip_tag(lines, t)
+    return lines
+
+def censor_file(file, info, paths, run_files=True, run_out_dirs=None, cut_files=True, solution_list=None,
+                censor_files=True,
+                include_path_base=None,
+                strict=True):
+    dbug = False
+    with open(file, 'r', encoding='utf8') as f:
+        s = f.read()
+        s = s.lstrip()
+        lines = s.split("\n")
+        for k, l in enumerate(lines):
+            if l.find(" # !") > 0:
+                print(f"{file}:{k}> bad snipper tag, fixing")
+            lines[k] = l.replace("# !", "#!")
+
+        try:
+            s = fix_cite(lines, info, strict=strict)
+            lines = s.split("\n")
+        except IndexError as e:
+            print(e)
+            print("Fuckup in file, cite/reference tag not found!>", file)
+            raise e
+
+        if run_files or cut_files:
+            ofiles = []
+            for rod in run_out_dirs:
+                if not os.path.isdir(rod):
+                    os.mkdir(rod)
+                ofiles.append(os.path.join(rod, os.path.basename(file).split(".")[0]) )
+            ofiles[0] = ofiles[0].replace("\\", "/")
+
+            if run_files:
+                run_o(lines, file=file, output=ofiles[0])
+                run_i(lines, file=file, output=ofiles[0])
+            if cut_files:
+                save_s(lines, file=file, output=ofiles[0], include_path_base=include_path_base)  # save file snips to disk
+        lines = full_strip(lines, ["#!s", "#!o", '#!i'])
+
+        # lines = fix_c(lines)
+        if censor_files:
+            lines = fix_f(lines, dbug)
+            lines, nB, cut = fix_b2(lines)
+        else:
+            nB = 0
+        lines = fix_r(lines)
+
+        if censor_files and len(cut) > 0 and solution_list is not None:
+            fname = file.__str__()
+            i = fname.find("irlc")
+            wk = fname[i+5:fname.find("\\", i+6)]
+            sp = paths['02450students'] +"/solutions/"
+            if not os.path.exists(sp):
+                os.mkdir(sp)
+            sp = sp + wk
+            if not os.path.exists(sp):
+                os.mkdir(sp)
+
+            stext = ["\n".join(lines) for lines in cut]
+            for i,sol in enumerate(stext):
+                sout = sp + f"/{os.path.basename(fname)[:-3]}_TODO_{i+1}.py"
+                wsol = any([True for s in solution_list if os.path.basename(sout).startswith(s)])
+                print(sout, "(published)" if wsol else "")
+                if wsol:
+                    with open(sout, "w") as f:
+                        f.write(sol)
+
+        if len(lines[-1])>0:
+            lines.append("")
+        s2 = "\n".join(lines)
+
+    with open(file, 'w', encoding='utf-8') as f:
+        f.write(s2)
+    return nB
+# lines: 294, 399, 420, 270
\ No newline at end of file
diff --git a/src/coursebox/setup_coursebox.py b/src/coursebox/setup_coursebox.py
new file mode 100644
index 0000000..aca8307
--- /dev/null
+++ b/src/coursebox/setup_coursebox.py
@@ -0,0 +1,20 @@
+from coursebox.core import info_paths
+
+def setup_coursebox(working_dir, course_number="02450", semester='spring', year=2019,
+    slides_showsolutions=True,
+    slides_includelabels=False,
+    continuing_education_mode = False,
+    slides_shownotes=False,
+    continuing_education_month = "March", **kwargs):
+
+    info_paths.core_conf['working_dir'] = working_dir
+    info_paths.core_conf['course_number'] = course_number
+    info_paths.core_conf['semester'] = semester
+    info_paths.core_conf['year'] = year
+    info_paths.core_conf['slides_showsolutions'] = slides_showsolutions
+    info_paths.core_conf['slides_includelabels'] = slides_includelabels
+    info_paths.core_conf['continuing_education_mode'] = continuing_education_mode
+    info_paths.core_conf['continuing_education_month'] = continuing_education_month
+    info_paths.core_conf['slides_shownotes'] = slides_shownotes
+    for a, val in kwargs.items():
+        info_paths.core_conf[a] = val
-- 
GitLab