Updates to lecture examples and some documentation fixes

5e943cbe · tuhe · 4d78e8d8 · 5e943cbe · 5e943cbe · 5e943cbe
Commit 5e943cbe authored 5 months ago by tuhe
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@ exam_tabular_examples
 #solutions/ex01
 #solutions/ex02
-solutions/ex03
+#solutions/ex03
 solutions/ex04
 solutions/ex05
 solutions/ex06
@@ -29,8 +29,8 @@ solutions/ex13
 # irlc/tests/tests_week01.py
 #irlc/tests/tests_week02.py
-irlc/tests/tests_week03.py
+#irlc/tests/tests_week03.py
-irlc/tests/tests_week04.py
+#irlc/tests/tests_week04.py
 irlc/tests/tests_week05.py
 irlc/tests/tests_week06.py
 irlc/tests/tests_week07.py
@@ -66,8 +66,8 @@ irlc/exam/exam20*/solution
 # irlc/lectures/lec01
 # irlc/lectures/lec02
-irlc/lectures/lec03
+#irlc/lectures/lec03
-irlc/lectures/lec04
+#irlc/lectures/lec04
 irlc/lectures/lec05
 irlc/lectures/lec06
 irlc/lectures/lec07

--- a/irlc/ex02/dp.py
+++ b/irlc/ex02/dp.py
@@ -26,7 +26,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
        - ``pi`` - A list of dictionaries so that ``pi[k][x]`` represents :math:`\mu_k(x)`
    """
-    """ 
+    r""" 
    In case you run into problems, I recommend following the hints in (Her25, Subsection 6.2.1) and focus on the
    case without a noise term; once it works, you can add the w-terms. When you don't loop over noise terms, just specify
    them as w = None in env.f and env.g.
@@ -37,7 +37,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
    J[N] = {x: model.gN(x) for x in model.S(model.N)}
    for k in range(N-1, -1, -1):
        for x in model.S(k):
-            """
+            r"""
            Update pi[k][x] and Jstar[k][x] using the general DP algorithm given in (Her25, Algorithm 1).
            If you implement it using the pseudo-code, I recommend you define Q (from the algorithm) as a dictionary like the J-function such that
@@ -61,9 +61,9 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
    return J, pi 
-if __name__ == "__main__":  # Test dp on small graph given in (Her25, Subsection 6.2.1)
+if __name__ == "__main__":  # Test dp on the deterministic version of the inventory control environment.
    print("Testing the deterministic DP algorithm on the small graph environment")
-    model = DeterministicInventoryDPModel()  # Instantiate the small graph with target node 5 
+    model = DeterministicInventoryDPModel()  # Instantiate the inventory control environment 
    J, pi = DP_stochastic(model)
    # Print all optimal cost functions J_k(x_k) 
    for k in range(len(J)):

--- a/irlc/exam/readme.md
+++ b/irlc/exam/readme.md
@@ -9,7 +9,10 @@ During the exam:
   - For the midterm, you can find the file on DTU Learn
 - The `zip` file will contain the toolbox code including solutions. It will also contain a directory:
    ```bash
-    irlc/exam/exam2024spring
+    irlc/exam/exam2025spring
    ```
 - This directory contains the code you need to work on for the exam. Replace the directory on your local computer with this directory and you should be all set up
 - The `.zip` file will also contain solutions to nearly all exercises. Use these if benefits you.
+## Midterms
+The two 'midterms' were used for practice during the first year with written exams. There are no midterms in the current version of the course.  
--- a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py
+++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py
@@ -10,8 +10,7 @@ def ppac(layout_str, name="pac"):
    env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True)
    agent = Agent(env)
    env, agent = interactive(env, agent)
-    # agent = PlayWrapper(agent, env)
-    # env = VideoMonitor(env)
    stats, _ = train(env, agent, num_episodes=5, max_steps=8)
    print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats])
    env.close()

--- a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py
+++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py
@@ -4,20 +4,14 @@ from irlc.ex02.dp_agent import DynamicalProgrammingAgent
 from gymnasium.wrappers import TimeLimit
 from irlc.pacman.pacman_environment import PacmanWinWrapper
 from irlc.ex01.agent import train
-# from irlc import VideoMonitor
-# from irlc.ex02.old.dp_pacman import DPPacmanModel
 from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel
-# from irlc import PlayWrapper
 from irlc import interactive
 def simulate_1_game(layout_str):
    N = 30
    env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human')
-    # env = VideoMonitor(env, fps=3)
    model = DPPacmanModel(env, N=N, verbose=True)
    agent = DynamicalProgrammingAgent(env, model=model)
-    # agent = PlayWrapper(agent, env)
    env, agent = interactive(env, agent)
    env = TimeLimit(env, max_episode_steps=N)
    env = PacmanWinWrapper(env)

--- a/irlc/lectures/lec02/lecture_02_optimal_inventory.py
+++ b/irlc/lectures/lec02/lecture_02_optimal_inventory.py
 # This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc import Agent, interactive
+from irlc import train, interactive
 from irlc.lectures.lec01.viz_inventory_environment import VizInventoryEnvironment
 if __name__ == "__main__":
    env = VizInventoryEnvironment(render_mode='human')
    from irlc.ex02.inventory import InventoryDPModel
    from irlc.ex02.dp_agent import DynamicalProgrammingAgent
-    from irlc import train, interactive
    agent = DynamicalProgrammingAgent(env, model=InventoryDPModel())
    env, agent = interactive(env, agent)

--- a/irlc/lectures/lec03/ex_03_search.py
+++ b/irlc/lectures/lec03/ex_03_search.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc import Agent, train, savepdf
-from irlc.pacman.pacman_environment import PacmanEnvironment
-from irlc.ex03.dp_forward import dp_forward
-from irlc.ex03.search_problem import SearchProblem
-from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper
-from irlc.ex03.pacman_search import layout2, layout1
-if __name__ == "__main__":
-    env = PacmanEnvironment(layout_str=layout1, render_mode='human')
-    env.reset()
-    savepdf("ex03_layout1", env=env)
-    env.close()
-    env = PacmanEnvironment(layout_str=layout1, render_mode='human')
-    env.reset()
-    savepdf("ex03_layout2", env=env)
-    env.close()
--- a/irlc/lectures/lec03/lecture_03_alphab.py
+++ b/irlc/lectures/lec03/lecture_03_alphab.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent
-from irlc.lectures.lec03.lecture_03_minimax import gminmax
-if __name__ == "__main__":
-    d = 3
-    gminmax(Agent=GymAlphaBetaAgent,depth=d)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
-from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
-from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic
-if __name__ == "__main__":
-    agent_args = dict(heuristic=manhattanHeuristic)
-    singledot(SAgent=AStarAgentYield, agent_args=agent_args)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
-from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
-if __name__ == "__main__":
-    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
-    singledot(SAgent=BFSAgentYield)
-    # singledot(SAgent=BFSAgentYield)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
-from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield
-if __name__ == "__main__":
-    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
-    singledot(SAgent=DFSAgentYield)
-    # singledot(SAgent=BFSAgentYield)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs
-# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
-from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic
-def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None):
-    stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30)  # part 3
-if __name__ == "__main__":
-    agent_args = dict(N=30)
-    singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args)
--- a/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py
+++ b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py
--- a/irlc/lectures/lec03/lecture_03_expectimax.py
+++ b/irlc/lectures/lec03/lecture_03_expectimax.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent
-from irlc.lectures.lec03.lecture_03_minimax import gminmax
-if __name__ == "__main__":
-    d = 3
-    gminmax(Agent=GymExpectimaxAgent,depth=d)
--- a/irlc/lectures/lec02/lecture_02_frozen_lake.py
+++ b/irlc/lectures/lec02/lecture_02_frozen_lake.py
--- a/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py
+++ b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py
--- a/irlc/lectures/lec04/lecture_04_harmonic.py
+++ b/irlc/lectures/lec04/lecture_04_harmonic.py
--- a/irlc/lectures/lec03/lecture_03_minimax.py
+++ b/irlc/lectures/lec03/lecture_03_minimax.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.ex01.agent import train
-from irlc.pacman.pacman_environment import GymPacmanEnvironment
-from irlc.utils.video_monitor import VideoMonitor
-from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent
-layout_str = """
-%%%%%%%%%
-%       %
-% %%%%  %
-%       %
-%   P   %
-%%%%    %
-%%%% .G %
-%%%%    %
-%%%%%%%%%
-""".strip()
-def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs):
-    zoom = 2
-    env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs)
-    agent = Agent(env, depth=depth)
-    from irlc import PlayWrapper
-    agent = PlayWrapper(agent, env)
-    env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10)
-    train(env, agent, num_episodes=30)
-    env.close()
-if __name__ == "__main__":
-    d = 3
-    gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d)
-    # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d)
-    # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d)
--- a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py
+++ b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
-from irlc.lectures.chapter4search.search_tables import s_large
-# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None):
-#     stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000)  # part 3
-from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
-if __name__ == "__main__":
-    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
-    tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large)
--- a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py
+++ b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py
-# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
-# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
-from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
-from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
-from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic
-if __name__ == "__main__":
-    agent_args = dict(heuristic=foodHeuristic)
-    tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args)