diff --git a/irlc/tests/tests_week09.py b/irlc/tests/tests_week09.py new file mode 100644 index 0000000000000000000000000000000000000000..74279f6b3d0a617b81ce36570501ea56a1a64fd9 --- /dev/null +++ b/irlc/tests/tests_week09.py @@ -0,0 +1,314 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import UTestCase, Report +import numpy as np +import irlc +from irlc import train +from irlc.ex09.small_gridworld import SmallGridworldMDP +from irlc.ex09.policy_iteration import policy_iteration +from irlc.ex09.value_iteration import value_iteration +from irlc.gridworld.gridworld_environments import FrozenLake +from irlc.ex09.policy_evaluation import policy_evaluation + +class Problem1_to_3_Warmup(UTestCase): + def test_part1_average_reward(self): + from irlc.ex09.mdp_warmup import expected_reward + mdp = FrozenLake(living_reward=0.2).mdp # Get the MDP of this environment. + s0 = mdp.initial_state + ## Part 1: Expected reward + self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=0), places=5) + self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5) + self.assertAlmostEqualC(expected_reward(mdp, s=(1,2), a=0), places=5) + mdp = FrozenLake(living_reward=0.2).mdp # Get the MDP of this environment. + self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5) + + def test_part2_v2q(self): + ## Part 2 + # First let's create a non-trivial value function + V = {} + mdp = FrozenLake(living_reward=0.3).mdp + + for k, s in enumerate(sorted(mdp.nonterminal_states)): + V[s] = 2 * (s[0] - s[1]) - 3.5 + + from irlc.ex09.mdp_warmup import value_function2q_function + + states = [(0, 1), (2, 3), (0, 3), (1,3), (1, 2)] + + s0 = mdp.initial_state + + q_ = value_function2q_function(mdp, s=s0, gamma=0.9, v=V) + self.assertIsInstance(q_, dict) + self.assertEqual(list(sorted(q_.keys())), [0, 1, 2, 3] ) + + self.assertEqual(len(q_), 4) + self.assertEqual(len(value_function2q_function(mdp, s=(1,2), gamma=0.9, v=V)), 1) + self.assertAlmostEqualC(q_[0],places=4) + self.assertAlmostEqualC(q_[2], places=4) + + + for s in sorted(states): + q_ = value_function2q_function(mdp, s=s, gamma=0.9, v=V) + for a in [0, 1, 2, 3]: + if a in mdp.A(s): + self.assertAlmostEqualC(q_[a], places=4) + + def test_part2_q2v(self): + ## Part 3 + mdp = FrozenLake(living_reward=0.2).mdp + from irlc.ex09.mdp_warmup import value_function2q_function, q_function2value_function + # Create a non-trivial Q-function for this problem. + Q = {} + s0 = mdp.initial_state + + for k, s in enumerate(mdp.nonterminal_states): + for a in mdp.A(s): + Q[s, a] = (s[0] - s[1]) - 5 * a # The particular values are not important in this example + # Create a policy. In this case pi(a=3) = 0.4. + pi = {0: 0.2, + 1: 0.4, + 2: 0.2, + 3: 0.2} + self.assertAlmostEqualC(q_function2value_function(pi, Q, s=s0), places=4) + +def train_recording(env, agent, trajectories): + for t in trajectories: + env.reset() + for k in range(len(t.action)): + s = t.state[k] + r = t.reward[k] + a = t.action[k] + sp = t.state[k+1] + info = t.info[k] + info_sp = t.info[k+1] + + agent.pi(s,k) + agent.train(s, a, r, sp, done=k == len(t.action)-1, info_s = info, info_sp=info_sp) + + +class ValueFunctionTest(UTestCase): + def check_value_function(self, mdp, V): + self.assertL2(np.asarray([V[s] for s in mdp.states]), tol=1e-3) + +class Problem5PolicyIteration(ValueFunctionTest): + """ Iterative Policy iteration """ + def test_policy_iteration(self): + env = SmallGridworldMDP() + pi, v = policy_iteration(env, gamma=0.91) + self.check_value_function(env, v) + + + +class Problem6ValueIteration(ValueFunctionTest): + """ Iterative value iteration """ + def test_value_iteration(self): + env = SmallGridworldMDP() + # from i + pi, v = value_iteration(env, gamma=0.91) + self.check_value_function(env, v) + + + +class Problem4PolicyEvaluation(ValueFunctionTest): + """ Iterative value iteration """ + def test_policy_evaluation(self): + mdp = SmallGridworldMDP() + pi = {s: {a: 1/len(mdp.A(s)) for a in mdp.A(s) } for s in mdp.nonterminal_states } + v = policy_evaluation(pi, mdp, gamma=0.91) + self.check_value_function(mdp, v) + + def test_policy_evaluation_b(self): + mdp = SmallGridworldMDP() + pi = {s: {a: 1 if a == 0 else 0 for a in mdp.A(s) } for s in mdp.nonterminal_states } + v = policy_evaluation(pi, mdp, gamma=0.91) + self.check_value_function(mdp, v) + + + + +class Problem9Gambler(ValueFunctionTest): + """ Gambler's problem """ + def test_gambler_value_function(self): + # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function + # from irlc.ex09.policy_iteration import policy_iteration + # from irlc.ex09.value_iteration import value_iteration + from irlc.ex09.gambler import GamblerMDP + env = GamblerMDP() + pi, v = value_iteration(env, gamma=0.91) + self.check_value_function(env, v) + +# class JackQuestion(ValueFunctionTest): +# """ Gambler's problem """ +# def test_jacks_rental_value_function(self): +# # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function +# # from irlc.ex09.policy_iteration import policy_iteration +# # from irlc.ex09.value_iteration import value_iteration +# # from irlc.ex09.gambler import GamblerEnv +# from irlc.ex09.jacks_car_rental import JackRentalMDP +# max_cars = 5 +# env = JackRentalMDP(max_cars=max_cars, verbose=True) +# pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True) +# self.check_value_function(env, V) + +# class JackQuestion(QuestionGroup): +# title = "Jacks car rental problem" +# +# class JackItem(GridworldDPItem): +# title = "Value function test" +# max_cars = 5 +# tol = 0.01 +# +# def get_value_function(self): +# from irlc.ex09.value_iteration import value_iteration +# from irlc.ex09.jacks_car_rental import JackRentalMDP +# env = JackRentalMDP(max_cars=self.max_cars, verbose=True) +# pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True) +# return V, env + + + # return v, env + # pass +# class DynamicalProgrammingGroup(QuestionGroup): +# title = "Dynamical Programming test" +# +# class PolicyEvaluationItem(GridworldDPItem): +# title = "Iterative Policy evaluation" +# +# +# +# class PolicyIterationItem(GridworldDPItem): +# title = "policy iteration" +# def get_value_function(self): +# from irlc.ex09.small_gridworld import SmallGridworldMDP +# from irlc.ex09.policy_iteration import policy_iteration +# env = SmallGridworldMDP() +# pi, v = policy_iteration(env, gamma=0.91) +# return v, env +# class ValueIteartionItem(GridworldDPItem): +# title = "value iteration" +# +# def get_value_function(self): +# from irlc.ex09.value_iteration import value_iteration +# from irlc.ex09.small_gridworld import SmallGridworldMDP +# env = SmallGridworldMDP() +# policy, v = value_iteration(env, gamma=0.92, theta=1e-6) +# return v, env + +# class GamlerQuestion(QuestionGroup): +# title = "Gamblers problem" +# class GamlerItem(GridworldDPItem): +# title = "Value-function test" +# def get_value_function(self): +# # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function +# # from irlc.ex09.policy_iteration import policy_iteration +# from irlc.ex09.value_iteration import value_iteration +# from irlc.ex09.gambler import GamblerEnv +# env = GamblerEnv() +# pi, v = value_iteration(env, gamma=0.91) +# return v, env + +# class JackQuestion(QuestionGroup): +# title ="Jacks car rental problem" +# class JackItem(GridworldDPItem): +# title = "Value function test" +# max_cars = 5 +# tol = 0.01 +# def get_value_function(self): +# from irlc.ex09.value_iteration import value_iteration +# from irlc.ex09.jacks_car_rental import JackRentalMDP +# env = JackRentalMDP(max_cars=self.max_cars, verbose=True) +# pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True) +# return V, env + +class Problem8ValueIterationAgent(UTestCase): + """ Value-iteration agent test """ + + def test_sutton_gridworld(self): + tol = 1e-2 + from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment + env = SuttonCornerGridEnvironment(living_reward=-1) + from irlc.ex09.value_iteration_agent import ValueIterationAgent + agent = ValueIterationAgent(env, mdp=env.mdp) + stats, _ = train(env, agent, num_episodes=1000) + self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol) + + def test_bookgrid_gridworld(self): + tol = 1e-2 + from irlc.gridworld.gridworld_environments import BookGridEnvironment + env = BookGridEnvironment(living_reward=-1) + from irlc.ex09.value_iteration_agent import ValueIterationAgent + agent = ValueIterationAgent(env, mdp=env.mdp) + stats, _ = train(env, agent, num_episodes=1000) + self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol) + + + # + # + # pass + # class ValueAgentItem(GridworldDPItem): + # title = "Evaluation on Suttons small gridworld" + # tol = 1e-2 + # def get_env(self): + # from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment + # return SuttonCornerGridEnvironment(living_reward=-1) + # + # def compute_answer_print(self): + # env = self.get_env() + # from irlc.ex09.value_iteration_agent import ValueIterationAgent + # agent = ValueIterationAgent(env, mdp=env.mdp) + # # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',)) + # stats, _ = train(env, agent, num_episodes=1000) + # return np.mean( [s['Accumulated Reward'] for s in stats]) + # + # def process_output(self, res, txt, numbers): + # return res + + # class BookItem(ValueAgentItem): + # title = "Evaluation on alternative gridworld (Bookgrid)" + # def get_env(self): + # from irlc.gridworld.gridworld_environments import BookGridEnvironment + # return BookGridEnvironment(living_reward=-0.6) + +# class DPAgentRLQuestion(QuestionGroup): +# title = "Value-iteration agent test" +# class ValueAgentItem(GridworldDPItem): +# title = "Evaluation on Suttons small gridworld" +# tol = 1e-2 +# def get_env(self): +# from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment +# return SuttonCornerGridEnvironment(living_reward=-1) +# +# def compute_answer_print(self): +# env = self.get_env() +# from irlc.ex09.value_iteration_agent import ValueIterationAgent +# agent = ValueIterationAgent(env, mdp=env.mdp) +# # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',)) +# stats, _ = train(env, agent, num_episodes=1000) +# return np.mean( [s['Accumulated Reward'] for s in stats]) +# +# def process_output(self, res, txt, numbers): +# return res +# +# class BookItem(ValueAgentItem): +# title = "Evaluation on alternative gridworld (Bookgrid)" +# def get_env(self): +# from irlc.gridworld.gridworld_environments import BookGridEnvironment +# return BookGridEnvironment(living_reward=-0.6) + +class Week09Tests(Report): + title = "Tests for week 09" + pack_imports = [irlc] + individual_imports = [] + questions = [ (Problem1_to_3_Warmup, 10), + (Problem4PolicyEvaluation, 10), + (Problem5PolicyIteration, 10), + (Problem6ValueIteration, 10), + (Problem8ValueIterationAgent, 10), + (Problem9Gambler, 10), + ] + # (JackQuestion, 10), + # (ValueFunctionTest, 20), + + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week09Tests()) diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644 Binary files a/irlc/tests/unitgrade_data/BanditQuestion.pkl and b/irlc/tests/unitgrade_data/BanditQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl index 8bcfd04385b49acb537aa90a6c1906443c00c348..0486fcbe41eac606ac7ef8045f455761dee11be9 100644 Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl index 1872c37be157b1d23e330e90fb98df324bc707a7..9b175b43f74d0fcda46ed5150c8c7c7071ffe545 100644 Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644 Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index 288459bca52e824a5d9dabdcb4cf10e164f64114..26af5ecf71d06771737bf666e043228cdeb0b306 100644 Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index 06341fef90fd2beed50cccac023bdd729b480a91..27985d2c70d9c619a927df1a9311b0dedaf28faf 100644 Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index 7de7875d690be1fc4143070c2139bd34f61288ae..d47a7262321148b608adf93be0fd09c4824f561e 100644 Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644 Binary files a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl and b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl index 94b38667b6a59b2bdd827e9569ad5bce677cc91e..60e863514a92b0ed49d6e0a508e28fee4b13dc33 100644 Binary files a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl index af4efa1cc7fc8336bfab2d97317419f4573a58da..1a68e6a4c61615f654c923cc2161c1b51e1252ce 100644 Binary files a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644 Binary files a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl and b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644 Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index 0a911216fa96ee726261d5fd6122f47c63b7becd..fa48f111d5c714f5015e0d38fed0574ed138a4fb 100644 Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index 6174c0b3159b23350a66f8510986566388f9a9e9..9ce1577be4210acac290d13b30ecb25c2bc8c6cc 100644 Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index 5da65912b9c77917947555ed5b62336969918a99..40be323303985937c830f555d411030c06d34d35 100644 Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl new file mode 100644 index 0000000000000000000000000000000000000000..43b1807780fed0fce2f285873d9259c47915a715 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 5d994baa391da54fd3a6e1c1a369b72a9df5f17a..eb4eb650c6b317f7918a5ac1659b5b4c3f6d6a51 100644 Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index 4029b85e80a9ebbf315924351ada7ba445fcb24a..80c46cf2a55088fa63963112dcaf91d55888c204 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index 547769c9bb40f7e2f9e061a3d24943b7bf016ea1..02f1c422b92a9bbff073f4ceab60753965310751 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index f8b966396874d03b37f527e8166a7431bd63ce66..2b5c6f9bc563562e2152b7f53744a65ea5b43f98 100644 Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl index cd8f6f6cd8072c224d9de2763d5585bdba4a6d80..841522bceb9b08611b411ba2716d2af183339f60 100644 Binary files a/irlc/tests/unitgrade_data/Problem3LQR.pkl and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 252cfd024c97e5da728820dacd87ab9910607247..636821ec8ce7350d4207e6c9d14ef7ebe8135044 100644 Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 0e1fc83741cb9bd0877d29de2b3828b78bdd5b01..b772ddc285d774ffc919998fb84c4fbfb9e58c4d 100644 Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index 178368d13873f75c43be9a31cb3dbdb10d5fef36..abeb2698baffe4071706d26b41de533166e0cd65 100644 Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index 22065591b65be79d935c05472a7603be0e00bcdb..c3dddca21dff68192c921eb2679f12aae31d5f27 100644 Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl index 42b50d8f321a365c574de2e27cc5dead749dbee4..d28333e07c4cc38531db644776aa5666aa9d423f 100644 Binary files a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index 14b3e4b4c95270f0c2953a2cc41a66833ba99d7f..5eb16a6dc2bf9316926640bb060b68e052a40354 100644 Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5cff598a8271d14e39c5b19d503084e49e0db927 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 33dfa81f677fd061a0a39b2c51757d929785cd80..8c339845720431122e05901db839d432dfbe6f49 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9d24486ad6d7a659101f343a46a174a7a5c414c8 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl index b61782009434e3024f670821a02eff567ea7220c..344cba45db9b19e7d1c241f8db3f8a33d30cc86d 100644 Binary files a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 354e3485c6913c4ed2b0e90c1416d05becf63c1c..d73714e87077f359513abd9a96d8679f3a001cc5 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1f1dd0327b87e703efc548ac9ae8c830fb686ff5 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index 2ff576403f28ebc1f96c87a40defa18f2263737b..693d3794e690c2158431aff5ecf09e4b16bd3a63 100644 Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl index c0103b3e977fa2b98a34cf16e69b4168cf7d8d53..3d4c035314271e378418b77fb438734ddd7987cb 100644 Binary files a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl new file mode 100644 index 0000000000000000000000000000000000000000..70d8eda754f751f57162a36381b27e192fa0601f Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b52c804782d7804492a93342a7e3a8940e620837 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem9Gambler.pkl differ diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl index 2ea308be8ae3ae254027640d548e0f9972c8cfe6..23c941bdfaca6edfe522b74947632170b089b3a4 100644 Binary files a/irlc/tests/unitgrade_data/RendevouzItem.pkl and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644 Binary files a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl and b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl differ