Skip to content
Snippets Groups Projects
Commit 5e943cbe authored by tuhe's avatar tuhe
Browse files

Updates to lecture examples and some documentation fixes

parent 4d78e8d8
Branches
No related tags found
No related merge requests found
Showing
with 15 additions and 148 deletions
...@@ -3,7 +3,7 @@ exam_tabular_examples ...@@ -3,7 +3,7 @@ exam_tabular_examples
#solutions/ex01 #solutions/ex01
#solutions/ex02 #solutions/ex02
solutions/ex03 #solutions/ex03
solutions/ex04 solutions/ex04
solutions/ex05 solutions/ex05
solutions/ex06 solutions/ex06
...@@ -29,8 +29,8 @@ solutions/ex13 ...@@ -29,8 +29,8 @@ solutions/ex13
# irlc/tests/tests_week01.py # irlc/tests/tests_week01.py
#irlc/tests/tests_week02.py #irlc/tests/tests_week02.py
irlc/tests/tests_week03.py #irlc/tests/tests_week03.py
irlc/tests/tests_week04.py #irlc/tests/tests_week04.py
irlc/tests/tests_week05.py irlc/tests/tests_week05.py
irlc/tests/tests_week06.py irlc/tests/tests_week06.py
irlc/tests/tests_week07.py irlc/tests/tests_week07.py
...@@ -66,8 +66,8 @@ irlc/exam/exam20*/solution ...@@ -66,8 +66,8 @@ irlc/exam/exam20*/solution
# irlc/lectures/lec01 # irlc/lectures/lec01
# irlc/lectures/lec02 # irlc/lectures/lec02
irlc/lectures/lec03 #irlc/lectures/lec03
irlc/lectures/lec04 #irlc/lectures/lec04
irlc/lectures/lec05 irlc/lectures/lec05
irlc/lectures/lec06 irlc/lectures/lec06
irlc/lectures/lec07 irlc/lectures/lec07
......
...@@ -26,7 +26,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: ...@@ -26,7 +26,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
- ``pi`` - A list of dictionaries so that ``pi[k][x]`` represents :math:`\mu_k(x)` - ``pi`` - A list of dictionaries so that ``pi[k][x]`` represents :math:`\mu_k(x)`
""" """
""" r"""
In case you run into problems, I recommend following the hints in (Her25, Subsection 6.2.1) and focus on the In case you run into problems, I recommend following the hints in (Her25, Subsection 6.2.1) and focus on the
case without a noise term; once it works, you can add the w-terms. When you don't loop over noise terms, just specify case without a noise term; once it works, you can add the w-terms. When you don't loop over noise terms, just specify
them as w = None in env.f and env.g. them as w = None in env.f and env.g.
...@@ -37,7 +37,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: ...@@ -37,7 +37,7 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
J[N] = {x: model.gN(x) for x in model.S(model.N)} J[N] = {x: model.gN(x) for x in model.S(model.N)}
for k in range(N-1, -1, -1): for k in range(N-1, -1, -1):
for x in model.S(k): for x in model.S(k):
""" r"""
Update pi[k][x] and Jstar[k][x] using the general DP algorithm given in (Her25, Algorithm 1). Update pi[k][x] and Jstar[k][x] using the general DP algorithm given in (Her25, Algorithm 1).
If you implement it using the pseudo-code, I recommend you define Q (from the algorithm) as a dictionary like the J-function such that If you implement it using the pseudo-code, I recommend you define Q (from the algorithm) as a dictionary like the J-function such that
...@@ -61,9 +61,9 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]: ...@@ -61,9 +61,9 @@ def DP_stochastic(model: DPModel) -> tuple[list[dict], list[dict]]:
return J, pi return J, pi
if __name__ == "__main__": # Test dp on small graph given in (Her25, Subsection 6.2.1) if __name__ == "__main__": # Test dp on the deterministic version of the inventory control environment.
print("Testing the deterministic DP algorithm on the small graph environment") print("Testing the deterministic DP algorithm on the small graph environment")
model = DeterministicInventoryDPModel() # Instantiate the small graph with target node 5 model = DeterministicInventoryDPModel() # Instantiate the inventory control environment
J, pi = DP_stochastic(model) J, pi = DP_stochastic(model)
# Print all optimal cost functions J_k(x_k) # Print all optimal cost functions J_k(x_k)
for k in range(len(J)): for k in range(len(J)):
......
...@@ -9,7 +9,10 @@ During the exam: ...@@ -9,7 +9,10 @@ During the exam:
- For the midterm, you can find the file on DTU Learn - For the midterm, you can find the file on DTU Learn
- The `zip` file will contain the toolbox code including solutions. It will also contain a directory: - The `zip` file will contain the toolbox code including solutions. It will also contain a directory:
```bash ```bash
irlc/exam/exam2024spring irlc/exam/exam2025spring
``` ```
- This directory contains the code you need to work on for the exam. Replace the directory on your local computer with this directory and you should be all set up - This directory contains the code you need to work on for the exam. Replace the directory on your local computer with this directory and you should be all set up
- The `.zip` file will also contain solutions to nearly all exercises. Use these if benefits you. - The `.zip` file will also contain solutions to nearly all exercises. Use these if benefits you.
## Midterms
The two 'midterms' were used for practice during the first year with written exams. There are no midterms in the current version of the course.
...@@ -10,8 +10,7 @@ def ppac(layout_str, name="pac"): ...@@ -10,8 +10,7 @@ def ppac(layout_str, name="pac"):
env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True) env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True)
agent = Agent(env) agent = Agent(env)
env, agent = interactive(env, agent) env, agent = interactive(env, agent)
# agent = PlayWrapper(agent, env)
# env = VideoMonitor(env)
stats, _ = train(env, agent, num_episodes=5, max_steps=8) stats, _ = train(env, agent, num_episodes=5, max_steps=8)
print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats]) print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats])
env.close() env.close()
......
...@@ -4,20 +4,14 @@ from irlc.ex02.dp_agent import DynamicalProgrammingAgent ...@@ -4,20 +4,14 @@ from irlc.ex02.dp_agent import DynamicalProgrammingAgent
from gymnasium.wrappers import TimeLimit from gymnasium.wrappers import TimeLimit
from irlc.pacman.pacman_environment import PacmanWinWrapper from irlc.pacman.pacman_environment import PacmanWinWrapper
from irlc.ex01.agent import train from irlc.ex01.agent import train
# from irlc import VideoMonitor
# from irlc.ex02.old.dp_pacman import DPPacmanModel
from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel
# from irlc import PlayWrapper
from irlc import interactive from irlc import interactive
def simulate_1_game(layout_str): def simulate_1_game(layout_str):
N = 30 N = 30
env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human') env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human')
# env = VideoMonitor(env, fps=3)
model = DPPacmanModel(env, N=N, verbose=True) model = DPPacmanModel(env, N=N, verbose=True)
agent = DynamicalProgrammingAgent(env, model=model) agent = DynamicalProgrammingAgent(env, model=model)
# agent = PlayWrapper(agent, env)
env, agent = interactive(env, agent) env, agent = interactive(env, agent)
env = TimeLimit(env, max_episode_steps=N) env = TimeLimit(env, max_episode_steps=N)
env = PacmanWinWrapper(env) env = PacmanWinWrapper(env)
......
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. # This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc import Agent, interactive from irlc import train, interactive
from irlc.lectures.lec01.viz_inventory_environment import VizInventoryEnvironment from irlc.lectures.lec01.viz_inventory_environment import VizInventoryEnvironment
if __name__ == "__main__": if __name__ == "__main__":
env = VizInventoryEnvironment(render_mode='human') env = VizInventoryEnvironment(render_mode='human')
from irlc.ex02.inventory import InventoryDPModel from irlc.ex02.inventory import InventoryDPModel
from irlc.ex02.dp_agent import DynamicalProgrammingAgent from irlc.ex02.dp_agent import DynamicalProgrammingAgent
from irlc import train, interactive
agent = DynamicalProgrammingAgent(env, model=InventoryDPModel()) agent = DynamicalProgrammingAgent(env, model=InventoryDPModel())
env, agent = interactive(env, agent) env, agent = interactive(env, agent)
......
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc import Agent, train, savepdf
from irlc.pacman.pacman_environment import PacmanEnvironment
from irlc.ex03.dp_forward import dp_forward
from irlc.ex03.search_problem import SearchProblem
from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper
from irlc.ex03.pacman_search import layout2, layout1
if __name__ == "__main__":
env = PacmanEnvironment(layout_str=layout1, render_mode='human')
env.reset()
savepdf("ex03_layout1", env=env)
env.close()
env = PacmanEnvironment(layout_str=layout1, render_mode='human')
env.reset()
savepdf("ex03_layout2", env=env)
env.close()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent
from irlc.lectures.lec03.lecture_03_minimax import gminmax
if __name__ == "__main__":
d = 3
gminmax(Agent=GymAlphaBetaAgent,depth=d)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic
if __name__ == "__main__":
agent_args = dict(heuristic=manhattanHeuristic)
singledot(SAgent=AStarAgentYield, agent_args=agent_args)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
if __name__ == "__main__":
# agent_args = dict(heuristic=manhattanHeuristic,N=30)
singledot(SAgent=BFSAgentYield)
# singledot(SAgent=BFSAgentYield)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield
if __name__ == "__main__":
# agent_args = dict(heuristic=manhattanHeuristic,N=30)
singledot(SAgent=DFSAgentYield)
# singledot(SAgent=BFSAgentYield)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs
# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic
def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None):
stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30) # part 3
if __name__ == "__main__":
agent_args = dict(N=30)
singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent
from irlc.lectures.lec03.lecture_03_minimax import gminmax
if __name__ == "__main__":
d = 3
gminmax(Agent=GymExpectimaxAgent,depth=d)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex01.agent import train
from irlc.pacman.pacman_environment import GymPacmanEnvironment
from irlc.utils.video_monitor import VideoMonitor
from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent
layout_str = """
%%%%%%%%%
% %
% %%%% %
% %
% P %
%%%% %
%%%% .G %
%%%% %
%%%%%%%%%
""".strip()
def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs):
zoom = 2
env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs)
agent = Agent(env, depth=depth)
from irlc import PlayWrapper
agent = PlayWrapper(agent, env)
env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10)
train(env, agent, num_episodes=30)
env.close()
if __name__ == "__main__":
d = 3
gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d)
# gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d)
# gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
from irlc.lectures.chapter4search.search_tables import s_large
# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None):
# stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3
from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
if __name__ == "__main__":
# agent_args = dict(heuristic=manhattanHeuristic,N=30)
tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic
if __name__ == "__main__":
agent_args = dict(heuristic=foodHeuristic)
tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment