From 5e943cbe72dc1207d9877cae26c3d9e935b901c3 Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Tue, 18 Feb 2025 11:19:19 +0100 Subject: [PATCH] Updates to lecture examples and some documentation fixes --- .gitignore | 10 ++--- irlc/ex02/dp.py | 8 ++-- irlc/exam/readme.md | 5 ++- .../lec02/lecture_02_keyboard_pacman_g1.py | 3 +- .../lec02/lecture_02_optimal_dp_g0.py | 6 --- .../lec02/lecture_02_optimal_inventory.py | 4 +- irlc/lectures/lec03/ex_03_search.py | 18 --------- irlc/lectures/lec03/lecture_03_alphab.py | 7 ---- .../lecture_03_dotsearch_astar_manhattan.py | 8 ---- .../lec03/lecture_03_dotsearch_bfs.py | 9 ----- .../lec03/lecture_03_dotsearch_dfs.py | 9 ----- .../lectures/lec03/lecture_03_dotsearch_dp.py | 12 ------ .../lecture_03_dp_gridworld_short.py} | 0 irlc/lectures/lec03/lecture_03_expectimax.py | 7 ---- .../lecture_03_frozen_lake.py} | 0 .../lecture_03_frozen_long_slippery.py} | 0 .../lecture_03_harmonic.py} | 0 irlc/lectures/lec03/lecture_03_minimax.py | 35 ------------------ .../lec03/lecture_03_squaresearch_bfs.py | 12 ------ .../lec03/lecture_03_tricksearch_astar.py | 10 ----- .../lec03/lecture_03_tricksearch_bfs.py | 21 ----------- .../lec03/lecture_03_tricksearch_dfs.py | 10 ----- ...enaigym.video.0.8068.video000000.meta.json | 1 - .../openaigym.video.0.8068.video000000.mp4 | Bin 48 -> 0 bytes solutions/ex03/inventory_evaluation_TODO_1.py | 2 + solutions/ex03/inventory_evaluation_TODO_2.py | 12 ++++++ solutions/ex03/kuramoto_TODO_1.py | 1 + solutions/ex03/kuramoto_TODO_2.py | 1 + solutions/ex03/kuramoto_TODO_3.py | 7 ++++ solutions/ex03/toy_2d_control_TODO_1.py | 2 + solutions/ex03/toy_2d_control_TODO_2.py | 4 ++ 31 files changed, 44 insertions(+), 180 deletions(-) delete mode 100644 irlc/lectures/lec03/ex_03_search.py delete mode 100644 irlc/lectures/lec03/lecture_03_alphab.py delete mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py delete mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_bfs.py delete mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_dfs.py delete mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_dp.py rename irlc/lectures/{lec02/lecture_02_dp_gridworld_short.py => lec03/lecture_03_dp_gridworld_short.py} (100%) delete mode 100644 irlc/lectures/lec03/lecture_03_expectimax.py rename irlc/lectures/{lec02/lecture_02_frozen_lake.py => lec03/lecture_03_frozen_lake.py} (100%) rename irlc/lectures/{lec02/lecture_02_frozen_long_slippery.py => lec03/lecture_03_frozen_long_slippery.py} (100%) rename irlc/lectures/{lec04/lecture_04_harmonic.py => lec03/lecture_03_harmonic.py} (100%) delete mode 100644 irlc/lectures/lec03/lecture_03_minimax.py delete mode 100644 irlc/lectures/lec03/lecture_03_squaresearch_bfs.py delete mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_astar.py delete mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_bfs.py delete mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_dfs.py delete mode 100644 irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json delete mode 100644 irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 create mode 100644 solutions/ex03/inventory_evaluation_TODO_1.py create mode 100644 solutions/ex03/inventory_evaluation_TODO_2.py create mode 100644 solutions/ex03/kuramoto_TODO_1.py create mode 100644 solutions/ex03/kuramoto_TODO_2.py create mode 100644 solutions/ex03/kuramoto_TODO_3.py create mode 100644 solutions/ex03/toy_2d_control_TODO_1.py create mode 100644 solutions/ex03/toy_2d_control_TODO_2.py diff --git a/.gitignore b/.gitignore index f8ab102..1e25f17 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ exam_tabular_examples #solutions/ex01 #solutions/ex02 -solutions/ex03 +#solutions/ex03 solutions/ex04 solutions/ex05 solutions/ex06 @@ -29,8 +29,8 @@ solutions/ex13 # irlc/tests/tests_week01.py #irlc/tests/tests_week02.py -irlc/tests/tests_week03.py -irlc/tests/tests_week04.py +#irlc/tests/tests_week03.py +#irlc/tests/tests_week04.py irlc/tests/tests_week05.py irlc/tests/tests_week06.py irlc/tests/tests_week07.py @@ -66,8 +66,8 @@ irlc/exam/exam20*/solution # irlc/lectures/lec01 # irlc/lectures/lec02 -irlc/lectures/lec03 -irlc/lectures/lec04 +#irlc/lectures/lec03 +#irlc/lectures/lec04 irlc/lectures/lec05 irlc/lectures/lec06 irlc/lectures/lec07 diff --git a/irlc/ex02/dp.py b/irlc/ex02/dp.py index 24e0614..66bb1e3 100644 --- a/irlc/ex02/dp.py +++ b/irlc/ex02/dp.py @@ -26,7 +26,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: - ``pi`` - A list of dictionaries so that ``pi[k][x]`` represents :math:`\mu_k(x)` """ - """ + r""" In case you run into problems, I recommend following the hints in (Her25, Subsection 6.2.1) and focus on the case without a noise term; once it works, you can add the w-terms. When you don't loop over noise terms, just specify them as w = None in env.f and env.g. @@ -37,7 +37,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: J[N] = {x: model.gN(x) for x in model.S(model.N)} for k in range(N-1, -1, -1): for x in model.S(k): - """ + r""" Update pi[k][x] and Jstar[k][x] using the general DP algorithm given in (Her25, Algorithm 1). If you implement it using the pseudo-code, I recommend you define Q (from the algorithm) as a dictionary like the J-function such that @@ -61,9 +61,9 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: return J, pi -if __name__ == "__main__": # Test dp on small graph given in (Her25, Subsection 6.2.1) +if __name__ == "__main__": # Test dp on the deterministic version of the inventory control environment. print("Testing the deterministic DP algorithm on the small graph environment") - model = DeterministicInventoryDPModel() # Instantiate the small graph with target node 5 + model = DeterministicInventoryDPModel() # Instantiate the inventory control environment J, pi = DP_stochastic(model) # Print all optimal cost functions J_k(x_k) for k in range(len(J)): diff --git a/irlc/exam/readme.md b/irlc/exam/readme.md index c189b31..f7b5903 100644 --- a/irlc/exam/readme.md +++ b/irlc/exam/readme.md @@ -9,7 +9,10 @@ During the exam: - For the midterm, you can find the file on DTU Learn - The `zip` file will contain the toolbox code including solutions. It will also contain a directory: ```bash - irlc/exam/exam2024spring + irlc/exam/exam2025spring ``` - This directory contains the code you need to work on for the exam. Replace the directory on your local computer with this directory and you should be all set up - The `.zip` file will also contain solutions to nearly all exercises. Use these if benefits you. + +## Midterms +The two 'midterms' were used for practice during the first year with written exams. There are no midterms in the current version of the course. diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py index 717f430..da4b7ac 100644 --- a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py +++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py @@ -10,8 +10,7 @@ def ppac(layout_str, name="pac"): env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True) agent = Agent(env) env, agent = interactive(env, agent) - # agent = PlayWrapper(agent, env) - # env = VideoMonitor(env) + stats, _ = train(env, agent, num_episodes=5, max_steps=8) print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats]) env.close() diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py index 8c91497..6753253 100644 --- a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py @@ -4,20 +4,14 @@ from irlc.ex02.dp_agent import DynamicalProgrammingAgent from gymnasium.wrappers import TimeLimit from irlc.pacman.pacman_environment import PacmanWinWrapper from irlc.ex01.agent import train -# from irlc import VideoMonitor -# from irlc.ex02.old.dp_pacman import DPPacmanModel from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel -# from irlc import PlayWrapper from irlc import interactive def simulate_1_game(layout_str): N = 30 env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human') - - # env = VideoMonitor(env, fps=3) model = DPPacmanModel(env, N=N, verbose=True) agent = DynamicalProgrammingAgent(env, model=model) - # agent = PlayWrapper(agent, env) env, agent = interactive(env, agent) env = TimeLimit(env, max_episode_steps=N) env = PacmanWinWrapper(env) diff --git a/irlc/lectures/lec02/lecture_02_optimal_inventory.py b/irlc/lectures/lec02/lecture_02_optimal_inventory.py index 48eb5e1..71b7bc6 100644 --- a/irlc/lectures/lec02/lecture_02_optimal_inventory.py +++ b/irlc/lectures/lec02/lecture_02_optimal_inventory.py @@ -1,13 +1,11 @@ # This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc import Agent, interactive +from irlc import train, interactive from irlc.lectures.lec01.viz_inventory_environment import VizInventoryEnvironment if __name__ == "__main__": env = VizInventoryEnvironment(render_mode='human') from irlc.ex02.inventory import InventoryDPModel from irlc.ex02.dp_agent import DynamicalProgrammingAgent - - from irlc import train, interactive agent = DynamicalProgrammingAgent(env, model=InventoryDPModel()) env, agent = interactive(env, agent) diff --git a/irlc/lectures/lec03/ex_03_search.py b/irlc/lectures/lec03/ex_03_search.py deleted file mode 100644 index 7d5ce2c..0000000 --- a/irlc/lectures/lec03/ex_03_search.py +++ /dev/null @@ -1,18 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc import Agent, train, savepdf -from irlc.pacman.pacman_environment import PacmanEnvironment -from irlc.ex03.dp_forward import dp_forward -from irlc.ex03.search_problem import SearchProblem -from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper -from irlc.ex03.pacman_search import layout2, layout1 - -if __name__ == "__main__": - env = PacmanEnvironment(layout_str=layout1, render_mode='human') - env.reset() - savepdf("ex03_layout1", env=env) - env.close() - - env = PacmanEnvironment(layout_str=layout1, render_mode='human') - env.reset() - savepdf("ex03_layout2", env=env) - env.close() diff --git a/irlc/lectures/lec03/lecture_03_alphab.py b/irlc/lectures/lec03/lecture_03_alphab.py deleted file mode 100644 index fa81c07..0000000 --- a/irlc/lectures/lec03/lecture_03_alphab.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent -from irlc.lectures.lec03.lecture_03_minimax import gminmax - -if __name__ == "__main__": - d = 3 - gminmax(Agent=GymAlphaBetaAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py deleted file mode 100644 index ebea74a..0000000 --- a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py +++ /dev/null @@ -1,8 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot -from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield -from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic - -if __name__ == "__main__": - agent_args = dict(heuristic=manhattanHeuristic) - singledot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py deleted file mode 100644 index 2fafd77..0000000 --- a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py +++ /dev/null @@ -1,9 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot -from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield - -if __name__ == "__main__": - # agent_args = dict(heuristic=manhattanHeuristic,N=30) - singledot(SAgent=BFSAgentYield) - - # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py deleted file mode 100644 index 276aa6b..0000000 --- a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py +++ /dev/null @@ -1,9 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot -from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield - -if __name__ == "__main__": - # agent_args = dict(heuristic=manhattanHeuristic,N=30) - singledot(SAgent=DFSAgentYield) - - # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py deleted file mode 100644 index baff1ee..0000000 --- a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py +++ /dev/null @@ -1,12 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs -# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem -from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic - - -def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None): - stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30) # part 3 - -if __name__ == "__main__": - agent_args = dict(N=30) - singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args) diff --git a/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py b/irlc/lectures/lec03/lecture_03_dp_gridworld_short.py similarity index 100% rename from irlc/lectures/lec02/lecture_02_dp_gridworld_short.py rename to irlc/lectures/lec03/lecture_03_dp_gridworld_short.py diff --git a/irlc/lectures/lec03/lecture_03_expectimax.py b/irlc/lectures/lec03/lecture_03_expectimax.py deleted file mode 100644 index 826975f..0000000 --- a/irlc/lectures/lec03/lecture_03_expectimax.py +++ /dev/null @@ -1,7 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent -from irlc.lectures.lec03.lecture_03_minimax import gminmax - -if __name__ == "__main__": - d = 3 - gminmax(Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec02/lecture_02_frozen_lake.py b/irlc/lectures/lec03/lecture_03_frozen_lake.py similarity index 100% rename from irlc/lectures/lec02/lecture_02_frozen_lake.py rename to irlc/lectures/lec03/lecture_03_frozen_lake.py diff --git a/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py b/irlc/lectures/lec03/lecture_03_frozen_long_slippery.py similarity index 100% rename from irlc/lectures/lec02/lecture_02_frozen_long_slippery.py rename to irlc/lectures/lec03/lecture_03_frozen_long_slippery.py diff --git a/irlc/lectures/lec04/lecture_04_harmonic.py b/irlc/lectures/lec03/lecture_03_harmonic.py similarity index 100% rename from irlc/lectures/lec04/lecture_04_harmonic.py rename to irlc/lectures/lec03/lecture_03_harmonic.py diff --git a/irlc/lectures/lec03/lecture_03_minimax.py b/irlc/lectures/lec03/lecture_03_minimax.py deleted file mode 100644 index eb8ee73..0000000 --- a/irlc/lectures/lec03/lecture_03_minimax.py +++ /dev/null @@ -1,35 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.ex01.agent import train -from irlc.pacman.pacman_environment import GymPacmanEnvironment -from irlc.utils.video_monitor import VideoMonitor -from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent - - -layout_str = """ -%%%%%%%%% -% % -% %%%% % -% % -% P % -%%%% % -%%%% .G % -%%%% % -%%%%%%%%% -""".strip() - -def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs): - zoom = 2 - env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs) - agent = Agent(env, depth=depth) - from irlc import PlayWrapper - agent = PlayWrapper(agent, env) - - env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10) - train(env, agent, num_episodes=30) - env.close() - -if __name__ == "__main__": - d = 3 - gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d) - # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d) - # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py deleted file mode 100644 index ac1e095..0000000 --- a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py +++ /dev/null @@ -1,12 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield -from irlc.lectures.chapter4search.search_tables import s_large - -# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None): -# stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 - -from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot - -if __name__ == "__main__": - # agent_args = dict(heuristic=manhattanHeuristic,N=30) - tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py deleted file mode 100644 index 6c65849..0000000 --- a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py +++ /dev/null @@ -1,10 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem -from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield - -from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot -from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic - -if __name__ == "__main__": - agent_args = dict(heuristic=foodHeuristic) - tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py deleted file mode 100644 index 89b7764..0000000 --- a/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py +++ /dev/null @@ -1,21 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, dargs -from irlc.ex03multisearch.pacman_problem_foodsearch import GymFoodSearchProblem -from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield - -layout_str = """ -%%%%%%%%%%%% -% % % -%.%.%.%% % % -% P % % -%%%%%%%%%% % -%. % -%%%%%%%%%%%% -""".strip() - -def tricksearchdot(layout_str=layout_str, SAgent=None, agent_args=None): - stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 - -if __name__ == "__main__": - # agent_args = dict(heuristic=manhattanHeuristic,N=30) - tricksearchdot(SAgent=BFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py deleted file mode 100644 index f3b2ac4..0000000 --- a/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py +++ /dev/null @@ -1,10 +0,0 @@ -# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. -# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem - -from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield -from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot - - -if __name__ == "__main__": - # agent_args = dict(heuristic=manhattanHeuristic,N=30) - tricksearchdot(SAgent=DFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json deleted file mode 100644 index 5dc734d..0000000 --- a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json +++ /dev/null @@ -1 +0,0 @@ -{"episode_id": 0, "content_type": "video/mp4"} \ No newline at end of file diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 deleted file mode 100644 index 17e5e5fbd204f4f1c8bf240b166ab0a318db4744..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 48 xcmZQzU{FXasVvAW&d+6FU}6B#Kx~v)mTZ_?U}DI?z`&7Kl$r{p8FEt+O8`9Q4A1}o diff --git a/solutions/ex03/inventory_evaluation_TODO_1.py b/solutions/ex03/inventory_evaluation_TODO_1.py new file mode 100644 index 0000000..ec037ab --- /dev/null +++ b/solutions/ex03/inventory_evaluation_TODO_1.py @@ -0,0 +1,2 @@ + k = 0 + expected_number_of_items = sum([p * model.f(x, u, w, k=0) for w, p in model.Pw(x, u, k).items()]) \ No newline at end of file diff --git a/solutions/ex03/inventory_evaluation_TODO_2.py b/solutions/ex03/inventory_evaluation_TODO_2.py new file mode 100644 index 0000000..e2897b1 --- /dev/null +++ b/solutions/ex03/inventory_evaluation_TODO_2.py @@ -0,0 +1,12 @@ + model = InventoryDPModel() + N = model.N + J = [{} for _ in range(N + 1)] + J[N] = {x: model.gN(x) for x in model.S(model.N)} + for k in range(N - 1, -1, -1): + for x in model.S(k): + Qu = {u: sum(pw * (model.g(x, u, w, k) + J[k + 1][model.f(x, u, w, k)]) for w, pw in model.Pw(x, u, k).items()) for u + in model.A(x, k)} + + umin = pi[k][x] # min(Qu, key=Qu.get) + J[k][x] = Qu[umin] # Compute the expected cost function + J_pi_x0 = J[0][x0] \ No newline at end of file diff --git a/solutions/ex03/kuramoto_TODO_1.py b/solutions/ex03/kuramoto_TODO_1.py new file mode 100644 index 0000000..27e0fcd --- /dev/null +++ b/solutions/ex03/kuramoto_TODO_1.py @@ -0,0 +1 @@ + symbolic_f_list = [u[0] + sym.cos(x[0])] \ No newline at end of file diff --git a/solutions/ex03/kuramoto_TODO_2.py b/solutions/ex03/kuramoto_TODO_2.py new file mode 100644 index 0000000..0f1d611 --- /dev/null +++ b/solutions/ex03/kuramoto_TODO_2.py @@ -0,0 +1 @@ + f_value = cmodel.f(x, u, t=0) \ No newline at end of file diff --git a/solutions/ex03/kuramoto_TODO_3.py b/solutions/ex03/kuramoto_TODO_3.py new file mode 100644 index 0000000..f28ac99 --- /dev/null +++ b/solutions/ex03/kuramoto_TODO_3.py @@ -0,0 +1,7 @@ + Delta = tt[k + 1] - tt[k] + xn = xs[k] + k1 = np.asarray(f(xn, u)) + k2 = np.asarray(f(xn + Delta * k1/2, u)) + k3 = np.asarray(f(xn + Delta * k2/2, u)) + k4 = np.asarray(f(xn + Delta * k3, u)) + x_next = xn + 1/6 * Delta * (k1 + 2*k2 + 2*k3 + k4) \ No newline at end of file diff --git a/solutions/ex03/toy_2d_control_TODO_1.py b/solutions/ex03/toy_2d_control_TODO_1.py new file mode 100644 index 0000000..137a63b --- /dev/null +++ b/solutions/ex03/toy_2d_control_TODO_1.py @@ -0,0 +1,2 @@ + def sym_f(self, x, u, t=None): + return [x[1], sym.cos(x[0] + u[0])] \ No newline at end of file diff --git a/solutions/ex03/toy_2d_control_TODO_2.py b/solutions/ex03/toy_2d_control_TODO_2.py new file mode 100644 index 0000000..91c054a --- /dev/null +++ b/solutions/ex03/toy_2d_control_TODO_2.py @@ -0,0 +1,4 @@ + toy = Toy2DControl() + x0 = np.asarray([np.pi/2, 0]) + xs, us, ts, cost = toy.simulate( x0=x0, u_fun = u0, t0=0, tF=T) + wT = xs[-1][0] \ No newline at end of file -- GitLab