From dc7923563b85d82fee0c96d2a9f18db9ef16e71b Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Thu, 13 Feb 2025 23:24:45 +0100 Subject: [PATCH] Examples for week 2 --- .gitignore | 17 ++++----- irlc/lectures/lec02/__init__.py | 1 + .../lec02/lecture_02_dp_gridworld_short.py | 8 ++++ irlc/lectures/lec02/lecture_02_frozen_lake.py | 13 +++++++ .../lec02/lecture_02_frozen_long_slippery.py | 8 ++++ .../lec02/lecture_02_keyboard_pacman_g1.py | 20 ++++++++++ .../lec02/lecture_02_keyboard_pacman_g2.py | 6 +++ .../lec02/lecture_02_optimal_dp_g0.py | 38 +++++++++++++++++++ .../lec02/lecture_02_optimal_dp_g1.py | 7 ++++ .../lec02/lecture_02_optimal_dp_g2.py | 6 +++ 10 files changed, 115 insertions(+), 9 deletions(-) create mode 100644 irlc/lectures/lec02/__init__.py create mode 100644 irlc/lectures/lec02/lecture_02_dp_gridworld_short.py create mode 100644 irlc/lectures/lec02/lecture_02_frozen_lake.py create mode 100644 irlc/lectures/lec02/lecture_02_frozen_long_slippery.py create mode 100644 irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py create mode 100644 irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g0.py create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g1.py create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g2.py diff --git a/.gitignore b/.gitignore index c8c09b0..f8ab102 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,8 @@ -**/__pycache__/* solutions/exam -solutions exam_tabular_examples -solutions/ex01 -solutions/ex02 +#solutions/ex01 +#solutions/ex02 solutions/ex03 solutions/ex04 solutions/ex05 @@ -52,8 +50,8 @@ irlc/project0/fruit*_complete*.py # irlc/exam/exam2024spring/*.zip # irlc/exam/exam2024spring/*.pdf -irlc/exam/exam202*/*.zip -irlc/exam/exam202*/*.pdf +#irlc/exam/exam202*/*.zip +#irlc/exam/exam202*/*.pdf irlc/exam/exam2024august/*.zip irlc/exam/exam2024august/*.pdf @@ -61,13 +59,13 @@ irlc/exam/exam2024august/*.pdf irlc/exam/exam2025*/*.zip irlc/exam/exam2025*/*.pdf -irlc/exam/exam2*/solution +irlc/exam/exam20*/solution # irlc/exam/midterm2023a # irlc/exam/midterm2023b # irlc/lectures/lec01 -irlc/lectures/lec02 +# irlc/lectures/lec02 irlc/lectures/lec03 irlc/lectures/lec04 irlc/lectures/lec05 @@ -81,4 +79,5 @@ irlc/lectures/lec12 irlc/lectures/lec13 - +# Always ignored. +**/__pycache__/* diff --git a/irlc/lectures/lec02/__init__.py b/irlc/lectures/lec02/__init__.py new file mode 100644 index 0000000..a56057c --- /dev/null +++ b/irlc/lectures/lec02/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py new file mode 100644 index 0000000..d2831e6 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter1.dp_planning_agent import dp_visualization +from irlc.gridworld.gridworld_environments import FrozenLake + +if __name__ == "__main__": + env = FrozenLake(render_mode='human') + dp_visualization(env, N=4, num_episodes=10) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_frozen_lake.py b/irlc/lectures/lec02/lecture_02_frozen_lake.py new file mode 100644 index 0000000..3a91f81 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_frozen_lake.py @@ -0,0 +1,13 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import FrozenLake +from gymnasium.wrappers import TimeLimit +from irlc import Agent, interactive, train + +if __name__ == "__main__": + env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode="human") + N = 40 + env, agent = interactive(env, Agent(env)) + env = TimeLimit(env, max_episode_steps=N) + num_episodes = 100 + train(env, agent, num_episodes=num_episodes) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py new file mode 100644 index 0000000..217929b --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter1.dp_planning_agent import dp_visualization +from irlc.gridworld.gridworld_environments import FrozenLake + +if __name__ == "__main__": + env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode='human') + dp_visualization(env, N=40, num_episodes=100) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py new file mode 100644 index 0000000..717f430 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex01.agent import train +from irlc.ex01.agent import Agent +from irlc import interactive +from irlc.lectures.chapter3dp.dp_pacman import SS1tiny + + +def ppac(layout_str, name="pac"): + env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True) + agent = Agent(env) + env, agent = interactive(env, agent) + # agent = PlayWrapper(agent, env) + # env = VideoMonitor(env) + stats, _ = train(env, agent, num_episodes=5, max_steps=8) + print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats]) + env.close() + +if __name__ == "__main__": + ppac(SS1tiny) diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py new file mode 100644 index 0000000..46d285b --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py @@ -0,0 +1,6 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec02.lecture_02_keyboard_pacman_g1 import ppac +from irlc.lectures.chapter3dp.dp_pacman import SS2tiny + +if __name__ == "__main__": + ppac(SS2tiny) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py new file mode 100644 index 0000000..8c91497 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py @@ -0,0 +1,38 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex02.dp_agent import DynamicalProgrammingAgent +from gymnasium.wrappers import TimeLimit +from irlc.pacman.pacman_environment import PacmanWinWrapper +from irlc.ex01.agent import train +# from irlc import VideoMonitor +# from irlc.ex02.old.dp_pacman import DPPacmanModel +from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel +# from irlc import PlayWrapper +from irlc import interactive + +def simulate_1_game(layout_str): + N = 30 + env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human') + + # env = VideoMonitor(env, fps=3) + model = DPPacmanModel(env, N=N, verbose=True) + agent = DynamicalProgrammingAgent(env, model=model) + # agent = PlayWrapper(agent, env) + env, agent = interactive(env, agent) + env = TimeLimit(env, max_episode_steps=N) + env = PacmanWinWrapper(env) + stats, trajectories = train(env, agent, num_episodes=100, verbose=False, return_trajectory=True) + env.close() + + +SS0 = """ +%%%%%%%%%% +% P . % +% %%%%%. % +% % +% %%% %%%% +%. .% +%%%%%%%%%% +""" +if __name__ == "__main__": + simulate_1_game(layout_str=SS0) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py new file mode 100644 index 0000000..568b291 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter3dp.dp_pacman import SS1tiny +from irlc.lectures.lec02.lecture_02_optimal_dp_g0 import simulate_1_game + + +if __name__ == "__main__": + simulate_1_game(layout_str=SS1tiny) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py new file mode 100644 index 0000000..32c4b59 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py @@ -0,0 +1,6 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter3dp.dp_pacman import SS2tiny +from irlc.lectures.lec02.lecture_02_optimal_dp_g1 import simulate_1_game + +if __name__ == "__main__": + simulate_1_game(layout_str=SS2tiny) -- GitLab