From 4d78e8d873e6a37365335c802151f8ce64eb8294 Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Tue, 18 Feb 2025 11:13:26 +0100 Subject: [PATCH] Updated lecture examples --- irlc/lectures/lec01/lecture_01_car_random.py | 2 +- irlc/lectures/lec03/__init__.py | 1 + irlc/lectures/lec03/ex_03_search.py | 18 +++ irlc/lectures/lec03/lecture_03_alphab.py | 7 + .../lecture_03_dotsearch_astar_manhattan.py | 8 ++ .../lec03/lecture_03_dotsearch_bfs.py | 9 ++ .../lec03/lecture_03_dotsearch_dfs.py | 9 ++ .../lectures/lec03/lecture_03_dotsearch_dp.py | 12 ++ irlc/lectures/lec03/lecture_03_expectimax.py | 7 + irlc/lectures/lec03/lecture_03_minimax.py | 35 +++++ .../lec03/lecture_03_squaresearch_bfs.py | 12 ++ .../lec03/lecture_03_tricksearch_astar.py | 10 ++ .../lec03/lecture_03_tricksearch_bfs.py | 21 +++ .../lec03/lecture_03_tricksearch_dfs.py | 10 ++ ...enaigym.video.0.8068.video000000.meta.json | 1 + .../openaigym.video.0.8068.video000000.mp4 | Bin 0 -> 48 bytes irlc/lectures/lec04/__init__.py | 1 + .../lec04/lecture_04_car_basic_pid.py | 12 ++ irlc/lectures/lec04/lecture_04_cartpole_A.py | 9 ++ irlc/lectures/lec04/lecture_04_cartpole_B.py | 14 ++ irlc/lectures/lec04/lecture_04_harmonic.py | 14 ++ irlc/lectures/lec04/lecture_04_lunar.py | 12 ++ .../lec04/lecture_04_pendulum_random.py | 8 ++ irlc/lectures/lec04/lecture_04_pid_d.py | 5 + irlc/lectures/lec04/lecture_04_pid_iA.py | 5 + irlc/lectures/lec04/lecture_04_pid_iB.py | 5 + irlc/lectures/lec04/lecture_04_pid_p.py | 16 +++ irlc/tests/tests_week03.py | 88 ++++++++++++ irlc/tests/tests_week04.py | 131 ++++++++++++++++++ .../Exam5InventoryEvaluation.pkl | Bin 217 -> 217 bytes irlc/tests/unitgrade_data/Exam6Toy2d.pkl | Bin 283 -> 283 bytes .../ExamQuestion7FlowersStore.pkl | Bin 182 -> 182 bytes .../unitgrade_data/Problem1BobsFriend.pkl | Bin 170 -> 170 bytes .../Problem1DiscreteKuromoto.pkl | Bin 570 -> 570 bytes .../tests/unitgrade_data/Problem1Kuramoto.pkl | Bin 3014 -> 3014 bytes .../unitgrade_data/Problem2BobsPolicy.pkl | Bin 368 -> 368 bytes .../Problem2DeterministicDP.pkl | Bin 161 -> 161 bytes .../Problem2DeterministicInventory.pkl | Bin 128 -> 128 bytes .../Problem3InventoryInventoryEnvironment.pkl | Bin 323 -> 323 bytes irlc/tests/unitgrade_data/Problem3PID.pkl | Bin 334 -> 334 bytes .../unitgrade_data/Problem3StochasticDP.pkl | Bin 345 -> 345 bytes irlc/tests/unitgrade_data/Problem4DPAgent.pkl | Bin 121 -> 121 bytes .../unitgrade_data/Problem4InventoryTrain.pkl | Bin 242 -> 242 bytes .../tests/unitgrade_data/Problem4PIDAgent.pkl | Bin 4673 -> 4673 bytes .../Problem5PacmanHardcoded.pkl | Bin 125 -> 125 bytes .../Problem6ChessTournament.pkl | Bin 197 -> 197 bytes irlc/tests/unitgrade_data/Problem7PIDCar.pkl | Bin 419 -> 419 bytes 47 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 irlc/lectures/lec03/__init__.py create mode 100644 irlc/lectures/lec03/ex_03_search.py create mode 100644 irlc/lectures/lec03/lecture_03_alphab.py create mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py create mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_bfs.py create mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_dfs.py create mode 100644 irlc/lectures/lec03/lecture_03_dotsearch_dp.py create mode 100644 irlc/lectures/lec03/lecture_03_expectimax.py create mode 100644 irlc/lectures/lec03/lecture_03_minimax.py create mode 100644 irlc/lectures/lec03/lecture_03_squaresearch_bfs.py create mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_astar.py create mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_bfs.py create mode 100644 irlc/lectures/lec03/lecture_03_tricksearch_dfs.py create mode 100644 irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json create mode 100644 irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 create mode 100644 irlc/lectures/lec04/__init__.py create mode 100644 irlc/lectures/lec04/lecture_04_car_basic_pid.py create mode 100644 irlc/lectures/lec04/lecture_04_cartpole_A.py create mode 100644 irlc/lectures/lec04/lecture_04_cartpole_B.py create mode 100644 irlc/lectures/lec04/lecture_04_harmonic.py create mode 100644 irlc/lectures/lec04/lecture_04_lunar.py create mode 100644 irlc/lectures/lec04/lecture_04_pendulum_random.py create mode 100644 irlc/lectures/lec04/lecture_04_pid_d.py create mode 100644 irlc/lectures/lec04/lecture_04_pid_iA.py create mode 100644 irlc/lectures/lec04/lecture_04_pid_iB.py create mode 100644 irlc/lectures/lec04/lecture_04_pid_p.py create mode 100644 irlc/tests/tests_week03.py create mode 100644 irlc/tests/tests_week04.py diff --git a/irlc/lectures/lec01/lecture_01_car_random.py b/irlc/lectures/lec01/lecture_01_car_random.py index fb25f46..76e673c 100644 --- a/irlc/lectures/lec01/lecture_01_car_random.py +++ b/irlc/lectures/lec01/lecture_01_car_random.py @@ -8,5 +8,5 @@ if __name__ == "__main__": env.action_space.low[1] = 0 # To ensure we do not drive backwards. agent = Agent(env) env, agent = interactive(env, agent, autoplay=False) - stats, _ = train(env, agent, num_episodes=1, verbose=False) + stats, _ = train(env, agent, num_episodes=10, verbose=False) env.close() diff --git a/irlc/lectures/lec03/__init__.py b/irlc/lectures/lec03/__init__.py new file mode 100644 index 0000000..a56057c --- /dev/null +++ b/irlc/lectures/lec03/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec03/ex_03_search.py b/irlc/lectures/lec03/ex_03_search.py new file mode 100644 index 0000000..7d5ce2c --- /dev/null +++ b/irlc/lectures/lec03/ex_03_search.py @@ -0,0 +1,18 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import Agent, train, savepdf +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex03.dp_forward import dp_forward +from irlc.ex03.search_problem import SearchProblem +from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper +from irlc.ex03.pacman_search import layout2, layout1 + +if __name__ == "__main__": + env = PacmanEnvironment(layout_str=layout1, render_mode='human') + env.reset() + savepdf("ex03_layout1", env=env) + env.close() + + env = PacmanEnvironment(layout_str=layout1, render_mode='human') + env.reset() + savepdf("ex03_layout2", env=env) + env.close() diff --git a/irlc/lectures/lec03/lecture_03_alphab.py b/irlc/lectures/lec03/lecture_03_alphab.py new file mode 100644 index 0000000..fa81c07 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_alphab.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent +from irlc.lectures.lec03.lecture_03_minimax import gminmax + +if __name__ == "__main__": + d = 3 + gminmax(Agent=GymAlphaBetaAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py new file mode 100644 index 0000000..ebea74a --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield +from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic + +if __name__ == "__main__": + agent_args = dict(heuristic=manhattanHeuristic) + singledot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py new file mode 100644 index 0000000..2fafd77 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + singledot(SAgent=BFSAgentYield) + + # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py new file mode 100644 index 0000000..276aa6b --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + singledot(SAgent=DFSAgentYield) + + # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py new file mode 100644 index 0000000..baff1ee --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem +from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic + + +def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None): + stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30) # part 3 + +if __name__ == "__main__": + agent_args = dict(N=30) + singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_expectimax.py b/irlc/lectures/lec03/lecture_03_expectimax.py new file mode 100644 index 0000000..826975f --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_expectimax.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent +from irlc.lectures.lec03.lecture_03_minimax import gminmax + +if __name__ == "__main__": + d = 3 + gminmax(Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_minimax.py b/irlc/lectures/lec03/lecture_03_minimax.py new file mode 100644 index 0000000..eb8ee73 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_minimax.py @@ -0,0 +1,35 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex01.agent import train +from irlc.pacman.pacman_environment import GymPacmanEnvironment +from irlc.utils.video_monitor import VideoMonitor +from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent + + +layout_str = """ +%%%%%%%%% +% % +% %%%% % +% % +% P % +%%%% % +%%%% .G % +%%%% % +%%%%%%%%% +""".strip() + +def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs): + zoom = 2 + env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs) + agent = Agent(env, depth=depth) + from irlc import PlayWrapper + agent = PlayWrapper(agent, env) + + env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10) + train(env, agent, num_episodes=30) + env.close() + +if __name__ == "__main__": + d = 3 + gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d) + # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d) + # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py new file mode 100644 index 0000000..ac1e095 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield +from irlc.lectures.chapter4search.search_tables import s_large + +# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None): +# stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 + +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py new file mode 100644 index 0000000..6c65849 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem +from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield + +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot +from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic + +if __name__ == "__main__": + agent_args = dict(heuristic=foodHeuristic) + tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py new file mode 100644 index 0000000..89b7764 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py @@ -0,0 +1,21 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, dargs +from irlc.ex03multisearch.pacman_problem_foodsearch import GymFoodSearchProblem +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield + +layout_str = """ +%%%%%%%%%%%% +% % % +%.%.%.%% % % +% P % % +%%%%%%%%%% % +%. % +%%%%%%%%%%%% +""".strip() + +def tricksearchdot(layout_str=layout_str, SAgent=None, agent_args=None): + stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=BFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py new file mode 100644 index 0000000..f3b2ac4 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem + +from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot + + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=DFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json new file mode 100644 index 0000000..5dc734d --- /dev/null +++ b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json @@ -0,0 +1 @@ +{"episode_id": 0, "content_type": "video/mp4"} \ No newline at end of file diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..17e5e5fbd204f4f1c8bf240b166ab0a318db4744 GIT binary patch literal 48 xcmZQzU{FXasVvAW&d+6FU}6B#Kx~v)mTZ_?U}DI?z`&7Kl$r{p8FEt+O8`9Q4A1}o literal 0 HcmV?d00001 diff --git a/irlc/lectures/lec04/__init__.py b/irlc/lectures/lec04/__init__.py new file mode 100644 index 0000000..a56057c --- /dev/null +++ b/irlc/lectures/lec04/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec04/lecture_04_car_basic_pid.py b/irlc/lectures/lec04/lecture_04_car_basic_pid.py new file mode 100644 index 0000000..e364ba1 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_car_basic_pid.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.car.car_model import CarEnvironment +from irlc.ex04.pid_car import PIDCarAgent + +if __name__ == "__main__": + env = CarEnvironment(noise_scale=0, Tmax=30, max_laps=1, render_mode='human') + agent = PIDCarAgent(env, v_target=.2, use_both_x5_x3=False) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + + # agent = PIDCarAgent(env, v_target=1, use_both_x5_x3=True) # I recommend lowering v_target to make the problem simpler. diff --git a/irlc/lectures/lec04/lecture_04_cartpole_A.py b/irlc/lectures/lec04/lecture_04_cartpole_A.py new file mode 100644 index 0000000..3117d00 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_cartpole_A.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart + +if __name__ == "__main__": + env = get_offbalance_cart(30) + agent = PIDCartpoleAgent(env, dt=env.dt, Kp=120, Ki=0, Kd=10, balance_to_x0=False) + _, trajectories = train(env, agent, num_episodes=1, reset=False) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_cartpole_B.py b/irlc/lectures/lec04/lecture_04_cartpole_B.py new file mode 100644 index 0000000..a57e095 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_cartpole_B.py @@ -0,0 +1,14 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart + +if __name__ == "__main__": + """ + Second task: We will now also try to bring the cart towards x=0. + """ + env = get_offbalance_cart(30) + agent = PIDCartpoleAgent(env, env.dt, ...) + # TODO: 1 lines missing. + raise NotImplementedError("Define your agent here (including parameters)") + _, trajectories = train(env, agent, num_episodes=1, reset=False) # Note reset=False to maintain initial conditions. + env.close() diff --git a/irlc/lectures/lec04/lecture_04_harmonic.py b/irlc/lectures/lec04/lecture_04_harmonic.py new file mode 100644 index 0000000..7d74099 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_harmonic.py @@ -0,0 +1,14 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment +from irlc import Agent +import numpy as np + +class NullAgent(Agent): + def pi(self, x, k, info=None): + return np.asarray([0]) + +if __name__ == "__main__": + env = HarmonicOscilatorEnvironment(render_mode='human') + train(env, NullAgent(env), num_episodes=1, max_steps=200) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_lunar.py b/irlc/lectures/lec04/lecture_04_lunar.py new file mode 100644 index 0000000..9f870d7 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_lunar.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.pid_lunar import get_lunar_lander +import gymnasium +from irlc import train + +if __name__ == "__main__": + env = gymnasium.make('LunarLanderContinuous-v3', render_mode='human') + env._max_episode_steps = 1000 # We don't want it to time out. + + agent = get_lunar_lander(env) + stats, traj = train(env, agent, return_trajectory=True, num_episodes=10) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_pendulum_random.py b/irlc/lectures/lec04/lecture_04_pendulum_random.py new file mode 100644 index 0000000..58d0843 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pendulum_random.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import Agent, train +from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment + +if __name__ == "__main__": + env = GymSinCosPendulumEnvironment(Tmax=20, render_mode='human') + train(env, Agent(env), num_episodes=1) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_pid_d.py b/irlc/lectures/lec04/lecture_04_pid_d.py new file mode 100644 index 0000000..8b05ff1 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_d.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=100, Ki=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_iA.py b/irlc/lectures/lec04/lecture_04_pid_iA.py new file mode 100644 index 0000000..3e8ac8b --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_iA.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=50, Ki=0, slope=2, target=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_iB.py b/irlc/lectures/lec04/lecture_04_pid_iB.py new file mode 100644 index 0000000..bc10b3a --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_iB.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=50, Ki=10, slope=2, target=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_p.py b/irlc/lectures/lec04/lecture_04_pid_p.py new file mode 100644 index 0000000..488d5fd --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_p.py @@ -0,0 +1,16 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.locomotive import LocomotiveEnvironment +from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent +from irlc.ex01.agent import train + +def pidplot(Kp=40, Kd=0, Ki=0, slope=0, target=0): + dt = .04 + m = 70 + Tmax=20 + env = LocomotiveEnvironment(m=m, slope=slope, dt=dt, Tmax=Tmax, render_mode='human') + agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=Ki, Kd=Kd, target=0) + train(env, agent, num_episodes=1) + env.close() + +if __name__ == "__main__": + pidplot(Kp=40, Kd=0, Ki=0) diff --git a/irlc/tests/tests_week03.py b/irlc/tests/tests_week03.py new file mode 100644 index 0000000..403e29a --- /dev/null +++ b/irlc/tests/tests_week03.py @@ -0,0 +1,88 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +import irlc +from unitgrade import UTestCase +from irlc.ex03.kuramoto import KuramotoModel, f +import sympy as sym +import numpy as np + +class Problem1Kuramoto(UTestCase): + """ Test the Kuromoto Osscilator """ + def test_continious_model(self): + cmodel = KuramotoModel() + x, u = sym.symbols("x u") + expr = cmodel.sym_f([x], [u]) + # Check the expression has the right type. + self.assertIsInstance(expr, list) + # Evaluate the expression and check the result in a given point. + self.assertEqualC(expr[0].subs([(x, 0.2), (u, 0.93)])) + + def test_f(self): + self.assertLinf(f([0.1], [0.4]), tol=1e-6) + + + def test_RK4(self): + from irlc.ex03.kuramoto import rk4_simulate + + cmodel = KuramotoModel() + x0 = np.asarray(cmodel.x0_bound().low) # Get the starting state x=0. + u = 1.3 + xs, ts = rk4_simulate(x0, [u], t0=0, tF=20, N=100) + + # xs, us, ts = cmodel.simulate(x0, u_fun=u , t0=0, tF=20) + self.assertLinf(ts, tol=1e-6) + # self.assertLinf(us, tol=1e-6) + self.assertLinf(xs, tol=1e-6) + + # Test the same with a varying function: + xs, ts = rk4_simulate(x0, [u+1], t0=0, tF=10, N=50) + # xs, us, ts = cmodel.simulate(x0, u_fun=lambda x,t: np.sin(x + u) , t0=0, tF=10) + self.assertLinf(ts, tol=1e-6) + # self.assertLinf(us, tol=1e-6) + self.assertLinf(xs, tol=1e-6) + +class Exam5InventoryEvaluation(UTestCase): + def test_a_test_expected_items_next_day(self): + from irlc.ex03.inventory_evaluation import a_expected_items_next_day + self.assertAlmostEqual(a_expected_items_next_day(x=0, u=1), 0.1, places=5) + + def test_b_test_expected_items_next_day(self): + from irlc.ex03.inventory_evaluation import b_evaluate_policy + pi = self.get_pi() + self.assertAlmostEqual(b_evaluate_policy(pi, 1), 2.7, places=5) + + def get_pi(self): + from irlc.ex02.inventory import InventoryDPModel + model = InventoryDPModel() + pi = [{x: 1 if x == 0 else 0 for x in model.S(k)} for k in range(model.N)] + return pi + +class Exam6Toy2d(UTestCase): + def test_rk4_a(self): + from irlc.ex03.toy_2d_control import toy_simulation + w = toy_simulation(u0=0.4, T=5) + self.assertFalse(isinstance(w, np.ndarray), msg="Your toy_simulation function must return a float") + self.assertEqual(type(float(w)), float, msg="Your toy_simulation function must return a float") + self.assertLinf(w, tol=0.01, msg="Your simulation ended up at the wrong angle") + + def test_rk4_b(self): + from irlc.ex03.toy_2d_control import toy_simulation + w = toy_simulation(u0=-0.1, T=2) + self.assertFalse( isinstance(w, np.ndarray), msg="Your toy_simulation function must return a float") + self.assertEqual(type(float(w)), float, msg="Your toy_simulation function must return a float") + self.assertLinf(w, tol=0.01, msg="Your simulation ended up at the wrong angle") + + +class Week03Tests(Report): #240 total. + title = "Tests for week 03" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem1Kuramoto, 10), + (Exam5InventoryEvaluation, 10), + (Exam6Toy2d, 10), + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week03Tests()) diff --git a/irlc/tests/tests_week04.py b/irlc/tests/tests_week04.py new file mode 100644 index 0000000..b032c0b --- /dev/null +++ b/irlc/tests/tests_week04.py @@ -0,0 +1,131 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +from unitgrade import UTestCase +import irlc +from irlc.car.car_model import CarEnvironment +from irlc.ex04.pid_car import PIDCarAgent +from irlc import train +from irlc.ex04.pid_locomotive_agent import LocomotiveEnvironment, PIDLocomotiveAgent +from irlc.ex03.kuramoto import KuramotoModel, f +from irlc.ex04.discrete_kuramoto import fk, dfk_dx +import sympy as sym +import numpy as np + +class Problem1DiscreteKuromoto(UTestCase): + """ Test the Kuromoto Osscilator """ + def test_continious_model(self): + cmodel = KuramotoModel() + x, u = sym.symbols("x u") + expr = cmodel.sym_f([x], [u]) + # Check the expression has the right type. + self.assertIsInstance(expr, list) + # Evaluate the expression and check the result in a given point. + self.assertEqualC(expr[0].subs([(x, 0.2), (u, 0.93)])) + + def test_f(self): + self.assertLinf(f([0.1], [0.4]), tol=1e-6) + + def test_fk(self): + self.assertLinf(fk([0.1], [0.4]), tol=1e-6) + + def test_dfk_dx(self): + self.assertLinf(dfk_dx([0.1], [0.4]), tol=1e-6) + +class Problem3PID(UTestCase): + """ PID Control """ + + def test_pid_class(self, Kp=40, Ki=0, Kd=0, target=0, x=0): + dt = 0.08 + from irlc.ex04.pid import PID + pid = PID(Kp=Kp, Kd=Kd, Ki=Ki, target=target, dt=0.8) + u = pid.pi(x) + self.assertL2(u, tol=1e-4) + + def test_pid_Kp(self): + self.test_pid_class(40, 0, 0, 0, 1) + self.test_pid_class(10, 0, 0, 0, 2) + + + def test_pid_target(self): + self.test_pid_class(40, 0, 0, 3, 1) + self.test_pid_class(20, 0, 0, 0, 2) + + + def test_pid_all(self): + self.test_pid_class(4, 3, 8, 1, 1) + self.test_pid_class(40, 10, 3, 0, 2) + + +class Problem4PIDAgent(UTestCase): + """ PID Control """ + + def pid_locomotive(self, Kp=40, Ki=0, Kd=0, slope=0, target=0): + dt = 0.08 + env = LocomotiveEnvironment(m=10, slope=slope, dt=dt, Tmax=5) + agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=Ki, Kd=Kd, target=target) + stats, traj = train(env, agent, return_trajectory=True, verbose=False) + self.assertL2(traj[0].state, tol=1e-4) + + def test_locomotive_flat(self): + self.pid_locomotive() + + def test_locomotive_Kd(self): + """ Test the derivative term """ + self.pid_locomotive(Kd = 10) + + def test_locomotive_Ki(self): + """ Test the integral term """ + self.pid_locomotive(Kd = 10, Ki=5, slope=5) + + + def test_locomotive_all(self): + """ Test all terms """ + self.pid_locomotive(Kp=35, Kd = 10, Ki=5, slope=5, target=1) + + + + +class Problem7PIDCar(UTestCase): + lt = -1 + + @classmethod + def setUpClass(cls) -> None: + env = CarEnvironment(noise_scale=0, Tmax=80, max_laps=2) + agent = PIDCarAgent(env, v_target=1.0) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + d = trajectories[0].state[:, 4] + lt = len(d) * env.dt / 2 + print("Lap time", lt) + cls.lt = lt + + def test_below_60(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 60) + + def test_below_40(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 40) + + + def test_below_30(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 30) + + def test_below_22(self): + """ Testing if lap time is < 22 """ + self.assertTrue(0 < self.__class__.lt < 22) + +class Week04Tests(Report): + title = "Tests for week 04" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem1DiscreteKuromoto, 10), + (Problem3PID, 10), + (Problem4PIDAgent, 10), # ok + (Problem7PIDCar, 10), # ok + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week04Tests()) diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index 39ab00d05f08228855c1778e9433e4c61a329a33..7ea10ed52c9791c2377088198836e6db33a6458b 100644 GIT binary patch delta 28 hcmcb~c$0C0Gn1^qL{|?c(>oLU9XZt$7(k#@4*-3%2Ri@& delta 28 hcmcb~c$0C0GZWW?iLM?@##<)#J93H#Fn~a*9sr9F2k!s? diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index 96d0e4d9b4fd352e8c93a686fccf7054a4812251..a03b1180f12e0d758b892bba3b90b86479d324df 100644 GIT binary patch delta 29 kcmbQuG@EIHDU*W0MDy9q+pc|>c-5A3`9}u^ASl%X0G-qdI{*Lx delta 29 jcmbQuG@EIHDHBJ-MDy9q%TzliUbW?19KVAB2uk$;l6eVC diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index 2aca5b7ac5f5f01abd4f81af3d043b6781859556..5ac1a3b16ed56331d78bfee48060770518f91c14 100644 GIT binary patch delta 27 gcmdnSxQ%gwE8~NSZe~nD4HN71Ijx>BfIz7p0D{p74gdfE delta 27 gcmdnSxQ%gwE2G0iH!~*3kcoBroU#WPK%i6)0C2?y@Bjb+ diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index f8993993fa7109ee295362187acbe079966a8976..a3ccc61da262af2509fb54e73cb13211d1985255 100644 GIT binary patch delta 28 hcmZ3*xQcOtEt90dM0-<4=82`+Ty`HD7(k#@4*+ms2L%8C delta 28 hcmZ3*xQcOtEt5gUM0-<4$%&=fT=x783?NXd2LNye28jRw diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index c843e1bd443b6168df4e428a83b7acb35d40e238..4bad30f14aecbb24fbb45cb685e1dff83fc3a989 100644 GIT binary patch delta 42 xcmdnRvWsPcGZUY~MAtsX_Y<#dX9_$uxtGz8xrgJy<a><4oE6U<7=WNu4**;O4<i5o delta 42 xcmdnRvWsPcGn072MAtsX>WNpjGx<AB?q&32Zk14&e2+1hvv`pM0}zzz0RTGX4NU+5 diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index a5d5081d8a4e66119e91491dcc272482aa65c02a..e23272d107553ef01432ebbab1f927cf2dce232c 100644 GIT binary patch delta 37 tcmX>meoTCVHIvAJiMGAW3o->Jp54Y2Fne<=qXie|^SkF77#J8z^#B#K4h;YR delta 37 tcmX>meoTCVHIvweiMGAW^H>Zfp54Y2l(@N-(SnQf!HiiA3=9mVdH@DX4FCWD diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 155ac41c14a34472f76bb8cf851e03ef7ef244f4..2eb951e988eb8349ac9e93b038be29f198799c6c 100644 GIT binary patch delta 58 zcmeys^nq!DEfaUbM0+D<)&_=&tK}w65@YVY3}l%wpPch!vKph@Bt~WCt@mqyOh?Y6 LFPa#Dpi~b4T(1-6 delta 57 zcmeys^nq!DEt3|<M0+D9g@%c%<R?xNV_tBxVd82N=3{(nlhqjICNU~AA86nKG95XO L_fBE}f>J#IGTIUF diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index 64984f893686559f08898102997966328856fddb..c8d3953e5053023c2412a3d7208369da3257b522 100644 GIT binary patch delta 22 bcmZ3;xR7yz6BGA>i7p|W{w@q4P^t$2M==E~ delta 22 bcmZ3;xR7yz6BCESM3)dwcNqo{DAfZ1Ks*GQ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index b29e17182a49435cc5fd0b1a1cd217497cdc34c7..dd94ff3d53e56b972caab0e514adf88a60a61109 100644 GIT binary patch delta 22 bcmZo*Y+#(=$t2Y<(OZY}j{^e;l<ENhMeqe^ delta 22 bcmZo*Y+#(=$;7u{qPGs`7Y7CqDAfZ1MzjTV diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index 8bcdcc94c44e2c11c162a4b19c4175a8bcb9c1a6..10ef0d0879a8acc8800b7907d7bfc9555633ed1d 100644 GIT binary patch delta 63 zcmX@ibeL&EAY;PBpmsJU?RK%7eiQGDGD(<C{HCRR-@kWy-X(zpQ`)8kO)1V`%#d!I TlEKn8#ohkc+6@dqP^t$24d)mb delta 63 zcmX@ibeL&EAmf6GLG5g(7rJiyTs!f;D3eLn#BW;4zv{B~lw1}#Fr{ru(3Ii~#tiAU TDH$wnQ{3&3C2wK?f>J#Ij?x;L diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 38dbd0f745e0be5a20f30d32593682985001a463..e6bd0d04183b656cab531b03669f8f05246b7e66 100644 GIT binary patch delta 41 ucmX@dbdG6)856g|L<@T+)ddqP9hvkhChl=%(q)*;%;>~v*uVe+rFsDV=n8EB delta 41 vcmX@dbdG6)857rmi5B)uiVr4MIx^|(n7GH4N&muRW=1DYZ3zYtDAfZ1Ba;it diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 2fb1dd65efebdd3c5491f61efc464ff4283da211..ebb615932b54984a7b955fcd8ca517fbd2637a96 100644 GIT binary patch delta 26 fcmcb~bdza<J(Fm|M8_OPwTbH%b5t>aK&c)8cJl{` delta 26 fcmcb~bdza<Jrjq+M8_OPg^BAHa|kkkK&c)8ZRQ5e diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index 290a59793d1494fc5fc50ec86e6cb959f53df6e3..471b04d04764c2c7ac0f9e5d7cf2eb6abda40484 100644 GIT binary patch delta 20 Zcmb=doM6Sobzq{638$g}0|=Dr0RTHU1ZMyM delta 20 Zcmb=doM6So@nE8j38%sg1`sIK0{}q~1q=WH diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index c9c41623d25d4dbd13005e9ae115a2fc96e2e41e..a2565317907506395af00b12ff1ca090bbd1c513 100644 GIT binary patch delta 47 zcmeyw_=$0X6Jx?e=N{#!7nE-QtP?merEN;kl;RA=47Ro@87ys6-0fGo|6l-uQau3p CWD_L- delta 47 zcmeyw_=$0X6QjdK=N{!5c|y$xP6-~E(l#Y%N^u5b23y;d43@Sj?)EDVE?@wHQau3D CMiL_c diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index 83f95e17e4c0613ff3572f0023d6547b5be787a0..032cf4b5a48e195fe36d6ceca7e86391d5ff21c9 100644 GIT binary patch delta 53 zcmX@8a!_T0H52!NjkY^knD;JQGdYJ-a<dR?I1BUckSCjSI5Sz8^fzq2&YQ@>c|c_X J0}zzz0RR*u5$*s0 delta 53 zcmX@8a!_T0H4~@9M%$e%%)7p&OwQqy+$_Wz&cb{!LuYdiXC@2N=LMUu^Cq%z?)Y_p I0SHR<00UMMg#Z8m diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 322ff5b7a38f90b4446cd00c456f4ce713784afc..7bd2c4e13a274e17171d07440189adaaaa0dbb7e 100644 GIT binary patch delta 18 Ycmb=eo#4!vFww=3L+k(p5R~cx05$dmwg3PC delta 18 Ycmb=eo#4!PV4{m5hlc|L5R~cx0669Z$N&HU diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 90c7b19e1b43df06497155d15b787aa102807bcd..2e417a7c35f1291647008d44d675e85d118ee917 100644 GIT binary patch delta 20 bcmX@gc$9I1GvkAaE|nZzmm3%u7)td3Qn&{f delta 20 ccmX@gc$9I1Gvk7ZE|na2ZZ$A4FqG;608%*zFaQ7m diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index 72d293c30b4036e1d9a4b5d49488d85c7e7b3178..c1d31f985ab53b13c82af6a1008087d60e1ef895 100644 GIT binary patch delta 35 ocmZ3?yqI}{C5wt_+k}a>aZC>zCe96JygKpYO3sT6U{I<D0O*<w7XSbN delta 37 qcmZ3?yqI}{C5y%lg^r1~ag28-eiWU!(Vp?j#E&aEFED^XsU83k9t~gs -- GitLab