diff --git a/irlc/lectures/lec01/lecture_01_car_random.py b/irlc/lectures/lec01/lecture_01_car_random.py index fb25f46648345dd7fe89790a21bb3cf14387367c..76e673cb79f732fce368fb4595613dd17681af06 100644 --- a/irlc/lectures/lec01/lecture_01_car_random.py +++ b/irlc/lectures/lec01/lecture_01_car_random.py @@ -8,5 +8,5 @@ if __name__ == "__main__": env.action_space.low[1] = 0 # To ensure we do not drive backwards. agent = Agent(env) env, agent = interactive(env, agent, autoplay=False) - stats, _ = train(env, agent, num_episodes=1, verbose=False) + stats, _ = train(env, agent, num_episodes=10, verbose=False) env.close() diff --git a/irlc/lectures/lec03/__init__.py b/irlc/lectures/lec03/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec03/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec03/ex_03_search.py b/irlc/lectures/lec03/ex_03_search.py new file mode 100644 index 0000000000000000000000000000000000000000..7d5ce2ca57e2fb179f264be0f519d6d334287b12 --- /dev/null +++ b/irlc/lectures/lec03/ex_03_search.py @@ -0,0 +1,18 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import Agent, train, savepdf +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc.ex03.dp_forward import dp_forward +from irlc.ex03.search_problem import SearchProblem +from irlc.ex03.search_problem import EnsureTerminalSelfTransitionsWrapper +from irlc.ex03.pacman_search import layout2, layout1 + +if __name__ == "__main__": + env = PacmanEnvironment(layout_str=layout1, render_mode='human') + env.reset() + savepdf("ex03_layout1", env=env) + env.close() + + env = PacmanEnvironment(layout_str=layout1, render_mode='human') + env.reset() + savepdf("ex03_layout2", env=env) + env.close() diff --git a/irlc/lectures/lec03/lecture_03_alphab.py b/irlc/lectures/lec03/lecture_03_alphab.py new file mode 100644 index 0000000000000000000000000000000000000000..fa81c07f7264c87577af4431c2be9339a152c139 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_alphab.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex03multisearch.multisearch_alphabeta import GymAlphaBetaAgent +from irlc.lectures.lec03.lecture_03_minimax import gminmax + +if __name__ == "__main__": + d = 3 + gminmax(Agent=GymAlphaBetaAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py new file mode 100644 index 0000000000000000000000000000000000000000..ebea74a0e4980b47a004271f184f81f38154fa9e --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_astar_manhattan.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield +from irlc.ex03multisearch.pacman_problem_positionsearch_astar import manhattanHeuristic + +if __name__ == "__main__": + agent_args = dict(heuristic=manhattanHeuristic) + singledot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py new file mode 100644 index 0000000000000000000000000000000000000000..2fafd77ced41a6c50ad917927cc70801ea29061a --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_bfs.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + singledot(SAgent=BFSAgentYield) + + # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py new file mode 100644 index 0000000000000000000000000000000000000000..276aa6bee3f60db8a9172d3dab1ba3ee463918f4 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_dfs.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec03.lecture_03_dotsearch_dp import singledot +from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + singledot(SAgent=DFSAgentYield) + + # singledot(SAgent=BFSAgentYield) diff --git a/irlc/lectures/lec03/lecture_03_dotsearch_dp.py b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py new file mode 100644 index 0000000000000000000000000000000000000000..baff1ee775c117f2d1cfb55948667899eba0db5e --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_dotsearch_dp.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, ForwardDPSearchAgent, dargs +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem +from irlc.ex03multisearch.pacman_problem_positionsearch import GymPositionSearchProblem#, manhattanHeuristic + + +def singledot(layout='smallMaze', SAgent=None, agent_args=None, layout_str=None): + stest(layout=layout, layout_str=layout_str, SAgent=SAgent, prob=GymPositionSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=30) # part 3 + +if __name__ == "__main__": + agent_args = dict(N=30) + singledot(SAgent=ForwardDPSearchAgent, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_expectimax.py b/irlc/lectures/lec03/lecture_03_expectimax.py new file mode 100644 index 0000000000000000000000000000000000000000..826975f29ec88a7aeaedf08eff8bf356980791f7 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_expectimax.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex03multisearch.multisearch_agents import GymExpectimaxAgent +from irlc.lectures.lec03.lecture_03_minimax import gminmax + +if __name__ == "__main__": + d = 3 + gminmax(Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_minimax.py b/irlc/lectures/lec03/lecture_03_minimax.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8ee7362072498ac35df5df4367f822898fd4bb --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_minimax.py @@ -0,0 +1,35 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex01.agent import train +from irlc.pacman.pacman_environment import GymPacmanEnvironment +from irlc.utils.video_monitor import VideoMonitor +from irlc.ex03multisearch.multisearch_agents import GymMinimaxAgent + + +layout_str = """ +%%%%%%%%% +% % +% %%%% % +% % +% P % +%%%% % +%%%% .G % +%%%% % +%%%%%%%%% +""".strip() + +def gminmax(layout='smallClassic', layout_str=layout_str, Agent=None, depth=3, **kwargs): + zoom = 2 + env = GymPacmanEnvironment(layout=layout, layout_str=layout_str, zoom=zoom, **kwargs) + agent = Agent(env, depth=depth) + from irlc import PlayWrapper + agent = PlayWrapper(agent, env) + + env = VideoMonitor(env, agent=agent, agent_monitor_keys=tuple(), fps=10) + train(env, agent, num_episodes=30) + env.close() + +if __name__ == "__main__": + d = 3 + gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymMinimaxAgent,depth=d) + # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymAlphaBetaAgent, depth=d) + # gminmax(layout='minimaxClassic', layout_str=layout_str, Agent=GymExpectimaxAgent,depth=d) diff --git a/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py new file mode 100644 index 0000000000000000000000000000000000000000..ac1e0953cd661d8ae9d0c859b6470ff7b28af798 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_squaresearch_bfs.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield +from irlc.lectures.chapter4search.search_tables import s_large + +# def tricksearchdot(layout='trickySearch', SAgent=None, agent_args=None, layout_str=None): +# stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 + +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=BFSAgentYield, agent_args=None, layout_str=s_large) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_astar.py b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py new file mode 100644 index 0000000000000000000000000000000000000000..6c658491e286be118f4d82e429add412cd680b40 --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_astar.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem +from irlc.lectures.chapter4search.yield_version.pacman_yield import AStarAgentYield + +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot +from irlc.ex03multisearch.pacman_problem_foodsearch_astar import foodHeuristic + +if __name__ == "__main__": + agent_args = dict(heuristic=foodHeuristic) + tricksearchdot(SAgent=AStarAgentYield, agent_args=agent_args) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py new file mode 100644 index 0000000000000000000000000000000000000000..89b776456aa03640d6e75721ff7804ca3dbf8b6a --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_bfs.py @@ -0,0 +1,21 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.chapter4search.yield_version.pacman_yield import stest, dargs +from irlc.ex03multisearch.pacman_problem_foodsearch import GymFoodSearchProblem +from irlc.lectures.chapter4search.yield_version.pacman_yield import BFSAgentYield + +layout_str = """ +%%%%%%%%%%%% +% % % +%.%.%.%% % % +% P % % +%%%%%%%%%% % +%. % +%%%%%%%%%%%% +""".strip() + +def tricksearchdot(layout_str=layout_str, SAgent=None, agent_args=None): + stest(layout_str=layout_str, SAgent=SAgent, prob=GymFoodSearchProblem(), agent_args=agent_args, zoom=2, **dargs, fps=1000) # part 3 + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=BFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py new file mode 100644 index 0000000000000000000000000000000000000000..f3b2ac4ad2eeed59217fe591c95385576e69c7ec --- /dev/null +++ b/irlc/lectures/lec03/lecture_03_tricksearch_dfs.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.ex03.pacsearch_agents import GymPositionSearchProblem, manhattanHeuristic, GymCornersProblem, cornersHeuristic, foodHeuristic, GymFoodSearchProblem, GymAnyFoodSearchProblem + +from irlc.lectures.chapter4search.yield_version.pacman_yield import DFSAgentYield +from irlc.lectures.lec03.lecture_03_tricksearch_bfs import tricksearchdot + + +if __name__ == "__main__": + # agent_args = dict(heuristic=manhattanHeuristic,N=30) + tricksearchdot(SAgent=DFSAgentYield, agent_args=None) diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json new file mode 100644 index 0000000000000000000000000000000000000000..5dc734d01281b1a52d401032ec7e9c6da2d4ea39 --- /dev/null +++ b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.meta.json @@ -0,0 +1 @@ +{"episode_id": 0, "content_type": "video/mp4"} \ No newline at end of file diff --git a/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..17e5e5fbd204f4f1c8bf240b166ab0a318db4744 Binary files /dev/null and b/irlc/lectures/lec03/snapshot_base/openaigym.video.0.8068.video000000.mp4 differ diff --git a/irlc/lectures/lec04/__init__.py b/irlc/lectures/lec04/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec04/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec04/lecture_04_car_basic_pid.py b/irlc/lectures/lec04/lecture_04_car_basic_pid.py new file mode 100644 index 0000000000000000000000000000000000000000..e364ba19b2679d110704da3117880a032abd3487 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_car_basic_pid.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.car.car_model import CarEnvironment +from irlc.ex04.pid_car import PIDCarAgent + +if __name__ == "__main__": + env = CarEnvironment(noise_scale=0, Tmax=30, max_laps=1, render_mode='human') + agent = PIDCarAgent(env, v_target=.2, use_both_x5_x3=False) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + + # agent = PIDCarAgent(env, v_target=1, use_both_x5_x3=True) # I recommend lowering v_target to make the problem simpler. diff --git a/irlc/lectures/lec04/lecture_04_cartpole_A.py b/irlc/lectures/lec04/lecture_04_cartpole_A.py new file mode 100644 index 0000000000000000000000000000000000000000..3117d0012a758365d1592e263138e7e25df78d11 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_cartpole_A.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart + +if __name__ == "__main__": + env = get_offbalance_cart(30) + agent = PIDCartpoleAgent(env, dt=env.dt, Kp=120, Ki=0, Kd=10, balance_to_x0=False) + _, trajectories = train(env, agent, num_episodes=1, reset=False) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_cartpole_B.py b/irlc/lectures/lec04/lecture_04_cartpole_B.py new file mode 100644 index 0000000000000000000000000000000000000000..a57e0950c43dd941464534354d377206b719097a --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_cartpole_B.py @@ -0,0 +1,14 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.pid_cartpole import PIDCartpoleAgent, get_offbalance_cart + +if __name__ == "__main__": + """ + Second task: We will now also try to bring the cart towards x=0. + """ + env = get_offbalance_cart(30) + agent = PIDCartpoleAgent(env, env.dt, ...) + # TODO: 1 lines missing. + raise NotImplementedError("Define your agent here (including parameters)") + _, trajectories = train(env, agent, num_episodes=1, reset=False) # Note reset=False to maintain initial conditions. + env.close() diff --git a/irlc/lectures/lec04/lecture_04_harmonic.py b/irlc/lectures/lec04/lecture_04_harmonic.py new file mode 100644 index 0000000000000000000000000000000000000000..7d7409954d82b313805391ec1d35bdfc6ab5a054 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_harmonic.py @@ -0,0 +1,14 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import train +from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment +from irlc import Agent +import numpy as np + +class NullAgent(Agent): + def pi(self, x, k, info=None): + return np.asarray([0]) + +if __name__ == "__main__": + env = HarmonicOscilatorEnvironment(render_mode='human') + train(env, NullAgent(env), num_episodes=1, max_steps=200) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_lunar.py b/irlc/lectures/lec04/lecture_04_lunar.py new file mode 100644 index 0000000000000000000000000000000000000000..9f870d738c6d2783178b8aa4f54525c1b964eb0a --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_lunar.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.pid_lunar import get_lunar_lander +import gymnasium +from irlc import train + +if __name__ == "__main__": + env = gymnasium.make('LunarLanderContinuous-v3', render_mode='human') + env._max_episode_steps = 1000 # We don't want it to time out. + + agent = get_lunar_lander(env) + stats, traj = train(env, agent, return_trajectory=True, num_episodes=10) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_pendulum_random.py b/irlc/lectures/lec04/lecture_04_pendulum_random.py new file mode 100644 index 0000000000000000000000000000000000000000..58d084308b202ff91b5d7b4a332904b1f88979f6 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pendulum_random.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import Agent, train +from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment + +if __name__ == "__main__": + env = GymSinCosPendulumEnvironment(Tmax=20, render_mode='human') + train(env, Agent(env), num_episodes=1) + env.close() diff --git a/irlc/lectures/lec04/lecture_04_pid_d.py b/irlc/lectures/lec04/lecture_04_pid_d.py new file mode 100644 index 0000000000000000000000000000000000000000..8b05ff10e27da1bb65508aaf74b573559f5c15fd --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_d.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=100, Ki=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_iA.py b/irlc/lectures/lec04/lecture_04_pid_iA.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8ac8b51b1e51c3a8e6dd45feb1a0ef6cce5302 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_iA.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=50, Ki=0, slope=2, target=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_iB.py b/irlc/lectures/lec04/lecture_04_pid_iB.py new file mode 100644 index 0000000000000000000000000000000000000000..bc10b3aa93e5126cbc7ffccf8d604296d942ff56 --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_iB.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec04.lecture_04_pid_p import pidplot + +if __name__ == "__main__": + pidplot(Kp=40, Kd=50, Ki=10, slope=2, target=0) diff --git a/irlc/lectures/lec04/lecture_04_pid_p.py b/irlc/lectures/lec04/lecture_04_pid_p.py new file mode 100644 index 0000000000000000000000000000000000000000..488d5fd0f369296f2275e97c820a096a2fb37c6e --- /dev/null +++ b/irlc/lectures/lec04/lecture_04_pid_p.py @@ -0,0 +1,16 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.locomotive import LocomotiveEnvironment +from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent +from irlc.ex01.agent import train + +def pidplot(Kp=40, Kd=0, Ki=0, slope=0, target=0): + dt = .04 + m = 70 + Tmax=20 + env = LocomotiveEnvironment(m=m, slope=slope, dt=dt, Tmax=Tmax, render_mode='human') + agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=Ki, Kd=Kd, target=0) + train(env, agent, num_episodes=1) + env.close() + +if __name__ == "__main__": + pidplot(Kp=40, Kd=0, Ki=0) diff --git a/irlc/tests/tests_week03.py b/irlc/tests/tests_week03.py new file mode 100644 index 0000000000000000000000000000000000000000..403e29a53524dbc4250ec8b49a8dbd06bdc84e58 --- /dev/null +++ b/irlc/tests/tests_week03.py @@ -0,0 +1,88 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +import irlc +from unitgrade import UTestCase +from irlc.ex03.kuramoto import KuramotoModel, f +import sympy as sym +import numpy as np + +class Problem1Kuramoto(UTestCase): + """ Test the Kuromoto Osscilator """ + def test_continious_model(self): + cmodel = KuramotoModel() + x, u = sym.symbols("x u") + expr = cmodel.sym_f([x], [u]) + # Check the expression has the right type. + self.assertIsInstance(expr, list) + # Evaluate the expression and check the result in a given point. + self.assertEqualC(expr[0].subs([(x, 0.2), (u, 0.93)])) + + def test_f(self): + self.assertLinf(f([0.1], [0.4]), tol=1e-6) + + + def test_RK4(self): + from irlc.ex03.kuramoto import rk4_simulate + + cmodel = KuramotoModel() + x0 = np.asarray(cmodel.x0_bound().low) # Get the starting state x=0. + u = 1.3 + xs, ts = rk4_simulate(x0, [u], t0=0, tF=20, N=100) + + # xs, us, ts = cmodel.simulate(x0, u_fun=u , t0=0, tF=20) + self.assertLinf(ts, tol=1e-6) + # self.assertLinf(us, tol=1e-6) + self.assertLinf(xs, tol=1e-6) + + # Test the same with a varying function: + xs, ts = rk4_simulate(x0, [u+1], t0=0, tF=10, N=50) + # xs, us, ts = cmodel.simulate(x0, u_fun=lambda x,t: np.sin(x + u) , t0=0, tF=10) + self.assertLinf(ts, tol=1e-6) + # self.assertLinf(us, tol=1e-6) + self.assertLinf(xs, tol=1e-6) + +class Exam5InventoryEvaluation(UTestCase): + def test_a_test_expected_items_next_day(self): + from irlc.ex03.inventory_evaluation import a_expected_items_next_day + self.assertAlmostEqual(a_expected_items_next_day(x=0, u=1), 0.1, places=5) + + def test_b_test_expected_items_next_day(self): + from irlc.ex03.inventory_evaluation import b_evaluate_policy + pi = self.get_pi() + self.assertAlmostEqual(b_evaluate_policy(pi, 1), 2.7, places=5) + + def get_pi(self): + from irlc.ex02.inventory import InventoryDPModel + model = InventoryDPModel() + pi = [{x: 1 if x == 0 else 0 for x in model.S(k)} for k in range(model.N)] + return pi + +class Exam6Toy2d(UTestCase): + def test_rk4_a(self): + from irlc.ex03.toy_2d_control import toy_simulation + w = toy_simulation(u0=0.4, T=5) + self.assertFalse(isinstance(w, np.ndarray), msg="Your toy_simulation function must return a float") + self.assertEqual(type(float(w)), float, msg="Your toy_simulation function must return a float") + self.assertLinf(w, tol=0.01, msg="Your simulation ended up at the wrong angle") + + def test_rk4_b(self): + from irlc.ex03.toy_2d_control import toy_simulation + w = toy_simulation(u0=-0.1, T=2) + self.assertFalse( isinstance(w, np.ndarray), msg="Your toy_simulation function must return a float") + self.assertEqual(type(float(w)), float, msg="Your toy_simulation function must return a float") + self.assertLinf(w, tol=0.01, msg="Your simulation ended up at the wrong angle") + + +class Week03Tests(Report): #240 total. + title = "Tests for week 03" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem1Kuramoto, 10), + (Exam5InventoryEvaluation, 10), + (Exam6Toy2d, 10), + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week03Tests()) diff --git a/irlc/tests/tests_week04.py b/irlc/tests/tests_week04.py new file mode 100644 index 0000000000000000000000000000000000000000..b032c0bc49423607e2113a55d244181aa7763ec7 --- /dev/null +++ b/irlc/tests/tests_week04.py @@ -0,0 +1,131 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +from unitgrade import UTestCase +import irlc +from irlc.car.car_model import CarEnvironment +from irlc.ex04.pid_car import PIDCarAgent +from irlc import train +from irlc.ex04.pid_locomotive_agent import LocomotiveEnvironment, PIDLocomotiveAgent +from irlc.ex03.kuramoto import KuramotoModel, f +from irlc.ex04.discrete_kuramoto import fk, dfk_dx +import sympy as sym +import numpy as np + +class Problem1DiscreteKuromoto(UTestCase): + """ Test the Kuromoto Osscilator """ + def test_continious_model(self): + cmodel = KuramotoModel() + x, u = sym.symbols("x u") + expr = cmodel.sym_f([x], [u]) + # Check the expression has the right type. + self.assertIsInstance(expr, list) + # Evaluate the expression and check the result in a given point. + self.assertEqualC(expr[0].subs([(x, 0.2), (u, 0.93)])) + + def test_f(self): + self.assertLinf(f([0.1], [0.4]), tol=1e-6) + + def test_fk(self): + self.assertLinf(fk([0.1], [0.4]), tol=1e-6) + + def test_dfk_dx(self): + self.assertLinf(dfk_dx([0.1], [0.4]), tol=1e-6) + +class Problem3PID(UTestCase): + """ PID Control """ + + def test_pid_class(self, Kp=40, Ki=0, Kd=0, target=0, x=0): + dt = 0.08 + from irlc.ex04.pid import PID + pid = PID(Kp=Kp, Kd=Kd, Ki=Ki, target=target, dt=0.8) + u = pid.pi(x) + self.assertL2(u, tol=1e-4) + + def test_pid_Kp(self): + self.test_pid_class(40, 0, 0, 0, 1) + self.test_pid_class(10, 0, 0, 0, 2) + + + def test_pid_target(self): + self.test_pid_class(40, 0, 0, 3, 1) + self.test_pid_class(20, 0, 0, 0, 2) + + + def test_pid_all(self): + self.test_pid_class(4, 3, 8, 1, 1) + self.test_pid_class(40, 10, 3, 0, 2) + + +class Problem4PIDAgent(UTestCase): + """ PID Control """ + + def pid_locomotive(self, Kp=40, Ki=0, Kd=0, slope=0, target=0): + dt = 0.08 + env = LocomotiveEnvironment(m=10, slope=slope, dt=dt, Tmax=5) + agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=Ki, Kd=Kd, target=target) + stats, traj = train(env, agent, return_trajectory=True, verbose=False) + self.assertL2(traj[0].state, tol=1e-4) + + def test_locomotive_flat(self): + self.pid_locomotive() + + def test_locomotive_Kd(self): + """ Test the derivative term """ + self.pid_locomotive(Kd = 10) + + def test_locomotive_Ki(self): + """ Test the integral term """ + self.pid_locomotive(Kd = 10, Ki=5, slope=5) + + + def test_locomotive_all(self): + """ Test all terms """ + self.pid_locomotive(Kp=35, Kd = 10, Ki=5, slope=5, target=1) + + + + +class Problem7PIDCar(UTestCase): + lt = -1 + + @classmethod + def setUpClass(cls) -> None: + env = CarEnvironment(noise_scale=0, Tmax=80, max_laps=2) + agent = PIDCarAgent(env, v_target=1.0) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + d = trajectories[0].state[:, 4] + lt = len(d) * env.dt / 2 + print("Lap time", lt) + cls.lt = lt + + def test_below_60(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 60) + + def test_below_40(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 40) + + + def test_below_30(self): + """ Testing if lap time is < 60 """ + self.assertTrue(0 < self.__class__.lt < 30) + + def test_below_22(self): + """ Testing if lap time is < 22 """ + self.assertTrue(0 < self.__class__.lt < 22) + +class Week04Tests(Report): + title = "Tests for week 04" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem1DiscreteKuromoto, 10), + (Problem3PID, 10), + (Problem4PIDAgent, 10), # ok + (Problem7PIDCar, 10), # ok + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week04Tests()) diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index 39ab00d05f08228855c1778e9433e4c61a329a33..7ea10ed52c9791c2377088198836e6db33a6458b 100644 Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index 96d0e4d9b4fd352e8c93a686fccf7054a4812251..a03b1180f12e0d758b892bba3b90b86479d324df 100644 Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index 2aca5b7ac5f5f01abd4f81af3d043b6781859556..5ac1a3b16ed56331d78bfee48060770518f91c14 100644 Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index f8993993fa7109ee295362187acbe079966a8976..a3ccc61da262af2509fb54e73cb13211d1985255 100644 Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index c843e1bd443b6168df4e428a83b7acb35d40e238..4bad30f14aecbb24fbb45cb685e1dff83fc3a989 100644 Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index a5d5081d8a4e66119e91491dcc272482aa65c02a..e23272d107553ef01432ebbab1f927cf2dce232c 100644 Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 155ac41c14a34472f76bb8cf851e03ef7ef244f4..2eb951e988eb8349ac9e93b038be29f198799c6c 100644 Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index 64984f893686559f08898102997966328856fddb..c8d3953e5053023c2412a3d7208369da3257b522 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index b29e17182a49435cc5fd0b1a1cd217497cdc34c7..dd94ff3d53e56b972caab0e514adf88a60a61109 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index 8bcdcc94c44e2c11c162a4b19c4175a8bcb9c1a6..10ef0d0879a8acc8800b7907d7bfc9555633ed1d 100644 Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 38dbd0f745e0be5a20f30d32593682985001a463..e6bd0d04183b656cab531b03669f8f05246b7e66 100644 Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 2fb1dd65efebdd3c5491f61efc464ff4283da211..ebb615932b54984a7b955fcd8ca517fbd2637a96 100644 Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index 290a59793d1494fc5fc50ec86e6cb959f53df6e3..471b04d04764c2c7ac0f9e5d7cf2eb6abda40484 100644 Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index c9c41623d25d4dbd13005e9ae115a2fc96e2e41e..a2565317907506395af00b12ff1ca090bbd1c513 100644 Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index 83f95e17e4c0613ff3572f0023d6547b5be787a0..032cf4b5a48e195fe36d6ceca7e86391d5ff21c9 100644 Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 322ff5b7a38f90b4446cd00c456f4ce713784afc..7bd2c4e13a274e17171d07440189adaaaa0dbb7e 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 90c7b19e1b43df06497155d15b787aa102807bcd..2e417a7c35f1291647008d44d675e85d118ee917 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index 72d293c30b4036e1d9a4b5d49488d85c7e7b3178..c1d31f985ab53b13c82af6a1008087d60e1ef895 100644 Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ