diff --git a/.gitignore b/.gitignore index c8c09b0b41fb0d69c2e1ea0549e5bec7353933f7..f8ab10216febb7f4f8b0408aa260e1f423c3c9e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,8 @@ -**/__pycache__/* solutions/exam -solutions exam_tabular_examples -solutions/ex01 -solutions/ex02 +#solutions/ex01 +#solutions/ex02 solutions/ex03 solutions/ex04 solutions/ex05 @@ -52,8 +50,8 @@ irlc/project0/fruit*_complete*.py # irlc/exam/exam2024spring/*.zip # irlc/exam/exam2024spring/*.pdf -irlc/exam/exam202*/*.zip -irlc/exam/exam202*/*.pdf +#irlc/exam/exam202*/*.zip +#irlc/exam/exam202*/*.pdf irlc/exam/exam2024august/*.zip irlc/exam/exam2024august/*.pdf @@ -61,13 +59,13 @@ irlc/exam/exam2024august/*.pdf irlc/exam/exam2025*/*.zip irlc/exam/exam2025*/*.pdf -irlc/exam/exam2*/solution +irlc/exam/exam20*/solution # irlc/exam/midterm2023a # irlc/exam/midterm2023b # irlc/lectures/lec01 -irlc/lectures/lec02 +# irlc/lectures/lec02 irlc/lectures/lec03 irlc/lectures/lec04 irlc/lectures/lec05 @@ -81,4 +79,5 @@ irlc/lectures/lec12 irlc/lectures/lec13 - +# Always ignored. +**/__pycache__/* diff --git a/irlc/lectures/lec02/__init__.py b/irlc/lectures/lec02/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec02/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py new file mode 100644 index 0000000000000000000000000000000000000000..d2831e64a18df333041cb383950e84b0d4ebc289 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter1.dp_planning_agent import dp_visualization +from irlc.gridworld.gridworld_environments import FrozenLake + +if __name__ == "__main__": + env = FrozenLake(render_mode='human') + dp_visualization(env, N=4, num_episodes=10) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_frozen_lake.py b/irlc/lectures/lec02/lecture_02_frozen_lake.py new file mode 100644 index 0000000000000000000000000000000000000000..3a91f818af94df9f8b5dd3d7cbb7ce4b4b211012 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_frozen_lake.py @@ -0,0 +1,13 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import FrozenLake +from gymnasium.wrappers import TimeLimit +from irlc import Agent, interactive, train + +if __name__ == "__main__": + env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode="human") + N = 40 + env, agent = interactive(env, Agent(env)) + env = TimeLimit(env, max_episode_steps=N) + num_episodes = 100 + train(env, agent, num_episodes=num_episodes) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py new file mode 100644 index 0000000000000000000000000000000000000000..217929b2a325c160cebed3363f23c0f5733f0e84 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter1.dp_planning_agent import dp_visualization +from irlc.gridworld.gridworld_environments import FrozenLake + +if __name__ == "__main__": + env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode='human') + dp_visualization(env, N=40, num_episodes=100) + env.close() diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py new file mode 100644 index 0000000000000000000000000000000000000000..717f4300157d125eb3059d488f1519d61badef09 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex01.agent import train +from irlc.ex01.agent import Agent +from irlc import interactive +from irlc.lectures.chapter3dp.dp_pacman import SS1tiny + + +def ppac(layout_str, name="pac"): + env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True) + agent = Agent(env) + env, agent = interactive(env, agent) + # agent = PlayWrapper(agent, env) + # env = VideoMonitor(env) + stats, _ = train(env, agent, num_episodes=5, max_steps=8) + print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats]) + env.close() + +if __name__ == "__main__": + ppac(SS1tiny) diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py new file mode 100644 index 0000000000000000000000000000000000000000..46d285bcd28e0585d1ca04420d0e232f32521567 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py @@ -0,0 +1,6 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec02.lecture_02_keyboard_pacman_g1 import ppac +from irlc.lectures.chapter3dp.dp_pacman import SS2tiny + +if __name__ == "__main__": + ppac(SS2tiny) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py new file mode 100644 index 0000000000000000000000000000000000000000..8c914974699e423122d2d1bf7429fd91048afe20 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py @@ -0,0 +1,38 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex02.dp_agent import DynamicalProgrammingAgent +from gymnasium.wrappers import TimeLimit +from irlc.pacman.pacman_environment import PacmanWinWrapper +from irlc.ex01.agent import train +# from irlc import VideoMonitor +# from irlc.ex02.old.dp_pacman import DPPacmanModel +from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel +# from irlc import PlayWrapper +from irlc import interactive + +def simulate_1_game(layout_str): + N = 30 + env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human') + + # env = VideoMonitor(env, fps=3) + model = DPPacmanModel(env, N=N, verbose=True) + agent = DynamicalProgrammingAgent(env, model=model) + # agent = PlayWrapper(agent, env) + env, agent = interactive(env, agent) + env = TimeLimit(env, max_episode_steps=N) + env = PacmanWinWrapper(env) + stats, trajectories = train(env, agent, num_episodes=100, verbose=False, return_trajectory=True) + env.close() + + +SS0 = """ +%%%%%%%%%% +% P . % +% %%%%%. % +% % +% %%% %%%% +%. .% +%%%%%%%%%% +""" +if __name__ == "__main__": + simulate_1_game(layout_str=SS0) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py new file mode 100644 index 0000000000000000000000000000000000000000..568b291d9532341e4dcad15fcb8473466a789f1e --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter3dp.dp_pacman import SS1tiny +from irlc.lectures.lec02.lecture_02_optimal_dp_g0 import simulate_1_game + + +if __name__ == "__main__": + simulate_1_game(layout_str=SS1tiny) diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py new file mode 100644 index 0000000000000000000000000000000000000000..32c4b590116bcfd1c2eb52e55efb8fd1832dd371 --- /dev/null +++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py @@ -0,0 +1,6 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter3dp.dp_pacman import SS2tiny +from irlc.lectures.lec02.lecture_02_optimal_dp_g1 import simulate_1_game + +if __name__ == "__main__": + simulate_1_game(layout_str=SS2tiny)