From dc7923563b85d82fee0c96d2a9f18db9ef16e71b Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 13 Feb 2025 23:24:45 +0100
Subject: [PATCH] Examples for week 2

---
 .gitignore                                    | 17 ++++-----
 irlc/lectures/lec02/__init__.py               |  1 +
 .../lec02/lecture_02_dp_gridworld_short.py    |  8 ++++
 irlc/lectures/lec02/lecture_02_frozen_lake.py | 13 +++++++
 .../lec02/lecture_02_frozen_long_slippery.py  |  8 ++++
 .../lec02/lecture_02_keyboard_pacman_g1.py    | 20 ++++++++++
 .../lec02/lecture_02_keyboard_pacman_g2.py    |  6 +++
 .../lec02/lecture_02_optimal_dp_g0.py         | 38 +++++++++++++++++++
 .../lec02/lecture_02_optimal_dp_g1.py         |  7 ++++
 .../lec02/lecture_02_optimal_dp_g2.py         |  6 +++
 10 files changed, 115 insertions(+), 9 deletions(-)
 create mode 100644 irlc/lectures/lec02/__init__.py
 create mode 100644 irlc/lectures/lec02/lecture_02_dp_gridworld_short.py
 create mode 100644 irlc/lectures/lec02/lecture_02_frozen_lake.py
 create mode 100644 irlc/lectures/lec02/lecture_02_frozen_long_slippery.py
 create mode 100644 irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py
 create mode 100644 irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py
 create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g0.py
 create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g1.py
 create mode 100644 irlc/lectures/lec02/lecture_02_optimal_dp_g2.py

diff --git a/.gitignore b/.gitignore
index c8c09b0..f8ab102 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,8 @@
-**/__pycache__/*
 solutions/exam
-solutions
 exam_tabular_examples
 
-solutions/ex01
-solutions/ex02
+#solutions/ex01
+#solutions/ex02
 solutions/ex03
 solutions/ex04
 solutions/ex05
@@ -52,8 +50,8 @@ irlc/project0/fruit*_complete*.py
 # irlc/exam/exam2024spring/*.zip
 # irlc/exam/exam2024spring/*.pdf
 
-irlc/exam/exam202*/*.zip
-irlc/exam/exam202*/*.pdf
+#irlc/exam/exam202*/*.zip
+#irlc/exam/exam202*/*.pdf
 
 irlc/exam/exam2024august/*.zip
 irlc/exam/exam2024august/*.pdf
@@ -61,13 +59,13 @@ irlc/exam/exam2024august/*.pdf
 irlc/exam/exam2025*/*.zip
 irlc/exam/exam2025*/*.pdf
 
-irlc/exam/exam2*/solution
+irlc/exam/exam20*/solution
 
 # irlc/exam/midterm2023a
 # irlc/exam/midterm2023b
 
 # irlc/lectures/lec01
-irlc/lectures/lec02
+# irlc/lectures/lec02
 irlc/lectures/lec03
 irlc/lectures/lec04
 irlc/lectures/lec05
@@ -81,4 +79,5 @@ irlc/lectures/lec12
 irlc/lectures/lec13
 
 
-
+# Always ignored.
+**/__pycache__/*
diff --git a/irlc/lectures/lec02/__init__.py b/irlc/lectures/lec02/__init__.py
new file mode 100644
index 0000000..a56057c
--- /dev/null
+++ b/irlc/lectures/lec02/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py
new file mode 100644
index 0000000..d2831e6
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_dp_gridworld_short.py
@@ -0,0 +1,8 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter1.dp_planning_agent import dp_visualization
+from irlc.gridworld.gridworld_environments import FrozenLake
+
+if __name__ == "__main__":
+    env = FrozenLake(render_mode='human')
+    dp_visualization(env, N=4, num_episodes=10)
+    env.close()
diff --git a/irlc/lectures/lec02/lecture_02_frozen_lake.py b/irlc/lectures/lec02/lecture_02_frozen_lake.py
new file mode 100644
index 0000000..3a91f81
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_frozen_lake.py
@@ -0,0 +1,13 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import FrozenLake
+from gymnasium.wrappers import TimeLimit
+from irlc import Agent, interactive, train
+
+if __name__ == "__main__":
+    env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode="human")
+    N = 40
+    env, agent = interactive(env, Agent(env))
+    env = TimeLimit(env, max_episode_steps=N)
+    num_episodes = 100
+    train(env, agent, num_episodes=num_episodes)
+    env.close()
diff --git a/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py
new file mode 100644
index 0000000..217929b
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_frozen_long_slippery.py
@@ -0,0 +1,8 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter1.dp_planning_agent import dp_visualization
+from irlc.gridworld.gridworld_environments import FrozenLake
+
+if __name__ == "__main__":
+    env = FrozenLake(is_slippery=True, living_reward=-1e-4, render_mode='human')
+    dp_visualization(env, N=40, num_episodes=100)
+    env.close()
diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py
new file mode 100644
index 0000000..717f430
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g1.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.ex01.agent import train
+from irlc.ex01.agent import Agent
+from irlc import interactive
+from irlc.lectures.chapter3dp.dp_pacman import SS1tiny
+
+
+def ppac(layout_str, name="pac"):
+    env = PacmanEnvironment(layout=None, layout_str=layout_str, animate_movement=True)
+    agent = Agent(env)
+    env, agent = interactive(env, agent)
+    # agent = PlayWrapper(agent, env)
+    # env = VideoMonitor(env)
+    stats, _ = train(env, agent, num_episodes=5, max_steps=8)
+    print("Accumulated reward for all episodes:", [s['Accumulated Reward'] for s in stats])
+    env.close()
+
+if __name__ == "__main__":
+    ppac(SS1tiny)
diff --git a/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py
new file mode 100644
index 0000000..46d285b
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_keyboard_pacman_g2.py
@@ -0,0 +1,6 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec02.lecture_02_keyboard_pacman_g1 import ppac
+from irlc.lectures.chapter3dp.dp_pacman import SS2tiny
+
+if __name__ == "__main__":
+    ppac(SS2tiny)
diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py
new file mode 100644
index 0000000..8c91497
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g0.py
@@ -0,0 +1,38 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.ex02.dp_agent import DynamicalProgrammingAgent
+from gymnasium.wrappers import TimeLimit
+from irlc.pacman.pacman_environment import PacmanWinWrapper
+from irlc.ex01.agent import train
+# from irlc import VideoMonitor
+# from irlc.ex02.old.dp_pacman import DPPacmanModel
+from irlc.lectures.chapter3dp.dp_pacman import DPPacmanModel
+# from irlc import PlayWrapper
+from irlc import interactive
+
+def simulate_1_game(layout_str):
+    N = 30
+    env = PacmanEnvironment(layout=None, layout_str=layout_str, render_mode='human')
+
+    # env = VideoMonitor(env, fps=3)
+    model = DPPacmanModel(env, N=N, verbose=True)
+    agent = DynamicalProgrammingAgent(env, model=model)
+    # agent = PlayWrapper(agent, env)
+    env, agent = interactive(env, agent)
+    env = TimeLimit(env, max_episode_steps=N)
+    env = PacmanWinWrapper(env)
+    stats, trajectories = train(env, agent, num_episodes=100, verbose=False, return_trajectory=True)
+    env.close()
+
+
+SS0 = """
+%%%%%%%%%%
+% P  .   %
+% %%%%%. %
+%        %
+% %%% %%%%
+%.      .%
+%%%%%%%%%%
+"""
+if __name__ == "__main__":
+    simulate_1_game(layout_str=SS0)
diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py
new file mode 100644
index 0000000..568b291
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g1.py
@@ -0,0 +1,7 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter3dp.dp_pacman import SS1tiny
+from irlc.lectures.lec02.lecture_02_optimal_dp_g0 import simulate_1_game
+
+
+if __name__ == "__main__":
+    simulate_1_game(layout_str=SS1tiny)
diff --git a/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py
new file mode 100644
index 0000000..32c4b59
--- /dev/null
+++ b/irlc/lectures/lec02/lecture_02_optimal_dp_g2.py
@@ -0,0 +1,6 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter3dp.dp_pacman import SS2tiny
+from irlc.lectures.lec02.lecture_02_optimal_dp_g1 import simulate_1_game
+
+if __name__ == "__main__":
+    simulate_1_game(layout_str=SS2tiny)
-- 
GitLab