Updated lecture examples

4d78e8d8 · tuhe · 37aa3bb5 · 4d78e8d8 · 4d78e8d8 · 4d78e8d8
Commit 4d78e8d8 authored 4 months ago by tuhe
--- a/irlc/lectures/lec01/lecture_01_car_random.py
+++ b/irlc/lectures/lec01/lecture_01_car_random.py
@@ -8,5 +8,5 @@ if __name__ == "__main__":
    env.action_space.low[1] = 0  # To ensure we do not drive backwards.
    agent = Agent(env)
    env, agent = interactive(env, agent, autoplay=False)
-    stats, _ = train(env, agent, num_episodes=1, verbose=False)
+    stats, _ = train(env, agent, num_episodes=10, verbose=False)
    env.close()
--- a/irlc/lectures/lec03/__init__.py
+++ b/irlc/lectures/lec03/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
--- a/irlc/lectures/lec03/ex_03_search.py
+++ b/irlc/lectures/lec03/ex_03_search.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import Agent, train, savepdf
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.ex03.dp_forward import dp_forward
+from irlc.ex03.search_problem import SearchProblem
+from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper
+from irlc.ex03.pacman_search import layout2, layout1
+if __name__ == "__main__":
+    env = PacmanEnvironment(layout_str=layout1, render_mode='human')
+    env.reset()
+    savepdf("ex03_layout1", env=env)
+    env.close()
+    env = PacmanEnvironment(layout_str=layout1, render_mode='human')
+    env.reset()
+    savepdf("ex03_layout2", env=env)
+    env.close()
--- a/irlc/lectures/lec03/lecture_03_alphab.py
+++ b/irlc/lectures/lec03/lecture_03_alphab.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent
+from irlc.lectures.lec03.lecture_03_minimax import gminmax
+if __name__ == "__main__":
+    d = 3
+    gminmax(Agent=GymAlphaBetaAgent,depth=d)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
+from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
+from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic
+if __name__ == "__main__":
+    agent_args = dict(heuristic=manhattanHeuristic)
+    singledot(SAgent=AStarAgentYield, agent_args=agent_args)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
+from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
+if __name__ == "__main__":
+    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
+    singledot(SAgent=BFSAgentYield)
+    # singledot(SAgent=BFSAgentYield)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot
+from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield
+if __name__ == "__main__":
+    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
+    singledot(SAgent=DFSAgentYield)
+    # singledot(SAgent=BFSAgentYield)
--- a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py
+++ b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs
+# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
+from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic
+def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None):
+    stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30)  # part 3
+if __name__ == "__main__":
+    agent_args = dict(N=30)
+    singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args)
--- a/irlc/lectures/lec03/lecture_03_expectimax.py
+++ b/irlc/lectures/lec03/lecture_03_expectimax.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent
+from irlc.lectures.lec03.lecture_03_minimax import gminmax
+if __name__ == "__main__":
+    d = 3
+    gminmax(Agent=GymExpectimaxAgent,depth=d)
--- a/irlc/lectures/lec03/lecture_03_minimax.py
+++ b/irlc/lectures/lec03/lecture_03_minimax.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex01.agent import train
+from irlc.pacman.pacman_environment import GymPacmanEnvironment
+from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent
+layout_str = """
+%%%%%%%%%
+%       %
+% %%%%  %
+%       %
+%   P   %
+%%%%    %
+%%%% .G %
+%%%%    %
+%%%%%%%%%
+""".strip()
+def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs):
+    zoom = 2
+    env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs)
+    agent = Agent(env, depth=depth)
+    from irlc import PlayWrapper
+    agent = PlayWrapper(agent, env)
+    env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10)
+    train(env, agent, num_episodes=30)
+    env.close()
+if __name__ == "__main__":
+    d = 3
+    gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d)
+    # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d)
+    # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d)
--- a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py
+++ b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
+from irlc.lectures.chapter4search.search_tables import s_large
+# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None):
+#     stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000)  # part 3
+from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
+if __name__ == "__main__":
+    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
+    tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large)
--- a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py
+++ b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
+from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield
+from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
+from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic
+if __name__ == "__main__":
+    agent_args = dict(heuristic=foodHeuristic)
+    tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args)
--- a/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py
+++ b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, dargs
+from irlc.ex03multisearch.pacman_problem_foodsearch import  GymFoodSearchProblem
+from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield
+layout_str = """
+%%%%%%%%%%%%
+%      %   %
+%.%.%.%% % %
+%   P    % %
+%%%%%%%%%% %
+%.         %
+%%%%%%%%%%%%
+""".strip()
+def tricksearchdot(layout_str=layout_str, SAgent=None, agent_args=None):
+    stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000)  # part 3
+if __name__ == "__main__":
+    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
+    tricksearchdot(SAgent=BFSAgentYield, agent_args=None)
--- a/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py
+++ b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem
+from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield
+from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot
+if __name__ == "__main__":
+    # agent_args = dict(heuristic=manhattanHeuristic,N=30)
+    tricksearchdot(SAgent=DFSAgentYield, agent_args=None)
--- a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json
+++ b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json
+{"episode_id": 0, "content_type": "video/mp4"}
\ No newline at end of file
--- a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4
+++ b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4
--- a/irlc/lectures/lec04/__init__.py
+++ b/irlc/lectures/lec04/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
--- a/irlc/lectures/lec04/lecture_04_car_basic_pid.py
+++ b/irlc/lectures/lec04/lecture_04_car_basic_pid.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import train
+from irlc.car.car_model import CarEnvironment
+from irlc.ex04.pid_car import PIDCarAgent
+if __name__ == "__main__":
+    env = CarEnvironment(noise_scale=0, Tmax=30, max_laps=1, render_mode='human')
+    agent = PIDCarAgent(env, v_target=.2, use_both_x5_x3=False)
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+    # agent = PIDCarAgent(env, v_target=1, use_both_x5_x3=True) # I recommend lowering v_target to make the problem simpler.
--- a/irlc/lectures/lec04/lecture_04_cartpole_A.py
+++ b/irlc/lectures/lec04/lecture_04_cartpole_A.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import train
+from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart
+if __name__ == "__main__":
+    env = get_offbalance_cart(30)
+    agent = PIDCartpoleAgent(env, dt=env.dt, Kp=120, Ki=0, Kd=10, balance_to_x0=False)
+    _, trajectories = train(env, agent, num_episodes=1, reset=False)
+    env.close()
--- a/irlc/lectures/lec04/lecture_04_cartpole_B.py
+++ b/irlc/lectures/lec04/lecture_04_cartpole_B.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import train
+from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart
+if __name__ == "__main__":
+    """
+    Second task: We will now also try to bring the cart towards x=0.
+    """
+    env = get_offbalance_cart(30)
+    agent = PIDCartpoleAgent(env, env.dt, ...)
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Define your agent here (including parameters)")
+    _, trajectories = train(env, agent, num_episodes=1, reset=False)  # Note reset=False to maintain initial conditions.
+    env.close()