diff --git a/irlc/tests/tests_week09.py b/irlc/tests/tests_week09.py
new file mode 100644
index 0000000000000000000000000000000000000000..74279f6b3d0a617b81ce36570501ea56a1a64fd9
--- /dev/null
+++ b/irlc/tests/tests_week09.py
@@ -0,0 +1,314 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import UTestCase, Report
+import numpy as np
+import irlc
+from irlc import train
+from irlc.ex09.small_gridworld import SmallGridworldMDP
+from irlc.ex09.policy_iteration import policy_iteration
+from irlc.ex09.value_iteration import value_iteration
+from irlc.gridworld.gridworld_environments import FrozenLake
+from irlc.ex09.policy_evaluation import policy_evaluation
+
+class Problem1_to_3_Warmup(UTestCase):
+    def test_part1_average_reward(self):
+        from irlc.ex09.mdp_warmup import expected_reward
+        mdp = FrozenLake(living_reward=0.2).mdp  # Get the MDP of this environment.
+        s0 = mdp.initial_state
+        ## Part 1: Expected reward
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=0), places=5)
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5)
+        self.assertAlmostEqualC(expected_reward(mdp, s=(1,2), a=0), places=5)
+        mdp = FrozenLake(living_reward=0.2).mdp  # Get the MDP of this environment.
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5)
+
+    def test_part2_v2q(self):
+        ## Part 2
+        # First let's create a non-trivial value function
+        V = {}
+        mdp = FrozenLake(living_reward=0.3).mdp
+
+        for k, s in enumerate(sorted(mdp.nonterminal_states)):
+            V[s] = 2 * (s[0] - s[1]) - 3.5
+
+        from irlc.ex09.mdp_warmup import value_function2q_function
+
+        states = [(0, 1), (2, 3), (0, 3), (1,3), (1, 2)]
+
+        s0 = mdp.initial_state
+
+        q_ = value_function2q_function(mdp, s=s0, gamma=0.9, v=V)
+        self.assertIsInstance(q_, dict)
+        self.assertEqual(list(sorted(q_.keys())), [0, 1, 2, 3] )
+
+        self.assertEqual(len(q_), 4)
+        self.assertEqual(len(value_function2q_function(mdp, s=(1,2), gamma=0.9, v=V)), 1)
+        self.assertAlmostEqualC(q_[0],places=4)
+        self.assertAlmostEqualC(q_[2], places=4)
+
+
+        for s in sorted(states):
+            q_ = value_function2q_function(mdp, s=s, gamma=0.9, v=V)
+            for a in [0, 1, 2, 3]:
+                if a in mdp.A(s):
+                    self.assertAlmostEqualC(q_[a], places=4)
+
+    def test_part2_q2v(self):
+        ## Part 3
+        mdp = FrozenLake(living_reward=0.2).mdp
+        from irlc.ex09.mdp_warmup import value_function2q_function, q_function2value_function
+        # Create a non-trivial Q-function for this problem.
+        Q = {}
+        s0 = mdp.initial_state
+
+        for k, s in enumerate(mdp.nonterminal_states):
+            for a in mdp.A(s):
+                Q[s, a] =  (s[0] - s[1]) - 5 * a  # The particular values are not important in this example
+        # Create a policy. In this case pi(a=3) = 0.4.
+        pi = {0: 0.2,
+              1: 0.4,
+              2: 0.2,
+              3: 0.2}
+        self.assertAlmostEqualC(q_function2value_function(pi, Q, s=s0), places=4)
+
+def train_recording(env, agent, trajectories):
+    for t in trajectories:
+        env.reset()
+        for k in range(len(t.action)):
+            s = t.state[k]
+            r = t.reward[k]
+            a = t.action[k]
+            sp = t.state[k+1]
+            info = t.info[k]
+            info_sp = t.info[k+1]
+
+            agent.pi(s,k)
+            agent.train(s, a, r, sp, done=k == len(t.action)-1, info_s = info, info_sp=info_sp)
+
+
+class ValueFunctionTest(UTestCase):
+    def check_value_function(self, mdp, V):
+        self.assertL2(np.asarray([V[s] for s in mdp.states]), tol=1e-3)
+
+class Problem5PolicyIteration(ValueFunctionTest):
+    """ Iterative Policy iteration """
+    def test_policy_iteration(self):
+        env = SmallGridworldMDP()
+        pi, v = policy_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+
+
+class Problem6ValueIteration(ValueFunctionTest):
+    """ Iterative value iteration """
+    def test_value_iteration(self):
+        env = SmallGridworldMDP()
+        # from i
+        pi, v = value_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+
+
+class Problem4PolicyEvaluation(ValueFunctionTest):
+    """ Iterative value iteration """
+    def test_policy_evaluation(self):
+        mdp = SmallGridworldMDP()
+        pi = {s: {a: 1/len(mdp.A(s)) for a in mdp.A(s) } for s in mdp.nonterminal_states }
+        v = policy_evaluation(pi, mdp, gamma=0.91)
+        self.check_value_function(mdp, v)
+
+    def test_policy_evaluation_b(self):
+        mdp = SmallGridworldMDP()
+        pi = {s: {a: 1 if a == 0 else 0 for a in mdp.A(s) } for s in mdp.nonterminal_states }
+        v = policy_evaluation(pi, mdp, gamma=0.91)
+        self.check_value_function(mdp, v)
+
+
+
+
+class Problem9Gambler(ValueFunctionTest):
+    """ Gambler's problem """
+    def test_gambler_value_function(self):
+        # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+        # from irlc.ex09.policy_iteration import policy_iteration
+        # from irlc.ex09.value_iteration import value_iteration
+        from irlc.ex09.gambler import GamblerMDP
+        env = GamblerMDP()
+        pi, v = value_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+# class JackQuestion(ValueFunctionTest):
+#     """ Gambler's problem """
+#     def test_jacks_rental_value_function(self):
+#         # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+#         # from irlc.ex09.policy_iteration import policy_iteration
+#         # from irlc.ex09.value_iteration import value_iteration
+#         # from irlc.ex09.gambler import GamblerEnv
+#         from irlc.ex09.jacks_car_rental import JackRentalMDP
+#         max_cars = 5
+#         env = JackRentalMDP(max_cars=max_cars, verbose=True)
+#         pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#         self.check_value_function(env, V)
+
+# class JackQuestion(QuestionGroup):
+#     title = "Jacks car rental problem"
+#
+#     class JackItem(GridworldDPItem):
+#         title = "Value function test"
+#         max_cars = 5
+#         tol = 0.01
+#
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.jacks_car_rental import JackRentalMDP
+#             env = JackRentalMDP(max_cars=self.max_cars, verbose=True)
+#             pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#             return V, env
+
+
+        # return v, env
+    # pass
+# class DynamicalProgrammingGroup(QuestionGroup):
+#     title = "Dynamical Programming test"
+#
+#     class PolicyEvaluationItem(GridworldDPItem):
+#         title = "Iterative Policy evaluation"
+#
+#
+#
+#     class PolicyIterationItem(GridworldDPItem):
+#         title = "policy iteration"
+#         def get_value_function(self):
+#             from irlc.ex09.small_gridworld import SmallGridworldMDP
+#             from irlc.ex09.policy_iteration import policy_iteration
+#             env = SmallGridworldMDP()
+#             pi, v = policy_iteration(env, gamma=0.91)
+#             return v, env
+#     class ValueIteartionItem(GridworldDPItem):
+#         title = "value iteration"
+#
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.small_gridworld import SmallGridworldMDP
+#             env = SmallGridworldMDP()
+#             policy, v = value_iteration(env, gamma=0.92, theta=1e-6)
+#             return v, env
+
+# class GamlerQuestion(QuestionGroup):
+#     title = "Gamblers problem"
+#     class GamlerItem(GridworldDPItem):
+#         title = "Value-function test"
+#         def get_value_function(self):
+#             # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+#             # from irlc.ex09.policy_iteration import policy_iteration
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.gambler import GamblerEnv
+#             env = GamblerEnv()
+#             pi, v = value_iteration(env, gamma=0.91)
+#             return v, env
+
+# class JackQuestion(QuestionGroup):
+#     title ="Jacks car rental problem"
+#     class JackItem(GridworldDPItem):
+#         title = "Value function test"
+#         max_cars = 5
+#         tol = 0.01
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.jacks_car_rental import JackRentalMDP
+#             env = JackRentalMDP(max_cars=self.max_cars, verbose=True)
+#             pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#             return V, env
+
+class Problem8ValueIterationAgent(UTestCase):
+    """ Value-iteration agent test """
+
+    def test_sutton_gridworld(self):
+        tol = 1e-2
+        from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+        env = SuttonCornerGridEnvironment(living_reward=-1)
+        from irlc.ex09.value_iteration_agent import ValueIterationAgent
+        agent = ValueIterationAgent(env, mdp=env.mdp)
+        stats, _ = train(env, agent, num_episodes=1000)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol)
+
+    def test_bookgrid_gridworld(self):
+        tol = 1e-2
+        from irlc.gridworld.gridworld_environments import BookGridEnvironment
+        env = BookGridEnvironment(living_reward=-1)
+        from irlc.ex09.value_iteration_agent import ValueIterationAgent
+        agent = ValueIterationAgent(env, mdp=env.mdp)
+        stats, _ = train(env, agent, num_episodes=1000)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol)
+
+
+    #
+    #
+    #     pass
+    # class ValueAgentItem(GridworldDPItem):
+    #     title = "Evaluation on Suttons small gridworld"
+    #     tol = 1e-2
+    #     def get_env(self):
+    #         from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+    #         return SuttonCornerGridEnvironment(living_reward=-1)
+    #
+    #     def compute_answer_print(self):
+    #         env = self.get_env()
+    #         from irlc.ex09.value_iteration_agent import ValueIterationAgent
+    #         agent = ValueIterationAgent(env, mdp=env.mdp)
+    #         # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',))
+    #         stats, _ = train(env, agent, num_episodes=1000)
+    #         return np.mean( [s['Accumulated Reward'] for s in stats])
+    #
+    #     def process_output(self, res, txt, numbers):
+    #         return res
+
+    # class BookItem(ValueAgentItem):
+    #     title = "Evaluation on alternative gridworld (Bookgrid)"
+    #     def get_env(self):
+    #         from irlc.gridworld.gridworld_environments import BookGridEnvironment
+    #         return BookGridEnvironment(living_reward=-0.6)
+
+# class DPAgentRLQuestion(QuestionGroup):
+#     title = "Value-iteration agent test"
+#     class ValueAgentItem(GridworldDPItem):
+#         title = "Evaluation on Suttons small gridworld"
+#         tol = 1e-2
+#         def get_env(self):
+#             from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+#             return SuttonCornerGridEnvironment(living_reward=-1)
+#
+#         def compute_answer_print(self):
+#             env = self.get_env()
+#             from irlc.ex09.value_iteration_agent import ValueIterationAgent
+#             agent = ValueIterationAgent(env, mdp=env.mdp)
+#             # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',))
+#             stats, _ = train(env, agent, num_episodes=1000)
+#             return np.mean( [s['Accumulated Reward'] for s in stats])
+#
+#         def process_output(self, res, txt, numbers):
+#             return res
+#
+#     class BookItem(ValueAgentItem):
+#         title = "Evaluation on alternative gridworld (Bookgrid)"
+#         def get_env(self):
+#             from irlc.gridworld.gridworld_environments import BookGridEnvironment
+#             return BookGridEnvironment(living_reward=-0.6)
+
+class Week09Tests(Report):
+    title = "Tests for week 09"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [ (Problem1_to_3_Warmup, 10),
+                  (Problem4PolicyEvaluation, 10),
+                  (Problem5PolicyIteration, 10),
+                  (Problem6ValueIteration, 10),
+                  (Problem8ValueIterationAgent, 10),
+                  (Problem9Gambler, 10),
+                  ]
+    # (JackQuestion, 10),
+    # (ValueFunctionTest, 20),
+
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week09Tests())
diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
Binary files a/irlc/tests/unitgrade_data/BanditQuestion.pkl and b/irlc/tests/unitgrade_data/BanditQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
index 8bcfd04385b49acb537aa90a6c1906443c00c348..0486fcbe41eac606ac7ef8045f455761dee11be9 100644
Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl
index 1872c37be157b1d23e330e90fb98df324bc707a7..9b175b43f74d0fcda46ed5150c8c7c7071ffe545 100644
Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
index 288459bca52e824a5d9dabdcb4cf10e164f64114..26af5ecf71d06771737bf666e043228cdeb0b306 100644
Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
index 06341fef90fd2beed50cccac023bdd729b480a91..27985d2c70d9c619a927df1a9311b0dedaf28faf 100644
Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ
diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
index 7de7875d690be1fc4143070c2139bd34f61288ae..d47a7262321148b608adf93be0fd09c4824f561e 100644
Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ
diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
Binary files a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl and b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
index 94b38667b6a59b2bdd827e9569ad5bce677cc91e..60e863514a92b0ed49d6e0a508e28fee4b13dc33 100644
Binary files a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl
index af4efa1cc7fc8336bfab2d97317419f4573a58da..1a68e6a4c61615f654c923cc2161c1b51e1252ce 100644
Binary files a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
Binary files a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl and b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl
index 0a911216fa96ee726261d5fd6122f47c63b7becd..fa48f111d5c714f5015e0d38fed0574ed138a4fb 100644
Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl
index 6174c0b3159b23350a66f8510986566388f9a9e9..9ce1577be4210acac290d13b30ecb25c2bc8c6cc 100644
Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl
index 5da65912b9c77917947555ed5b62336969918a99..40be323303985937c830f555d411030c06d34d35 100644
Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..43b1807780fed0fce2f285873d9259c47915a715
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl
index 5d994baa391da54fd3a6e1c1a369b72a9df5f17a..eb4eb650c6b317f7918a5ac1659b5b4c3f6d6a51 100644
Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl
index 4029b85e80a9ebbf315924351ada7ba445fcb24a..80c46cf2a55088fa63963112dcaf91d55888c204 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl
index 547769c9bb40f7e2f9e061a3d24943b7bf016ea1..02f1c422b92a9bbff073f4ceab60753965310751 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl
index f8b966396874d03b37f527e8166a7431bd63ce66..2b5c6f9bc563562e2152b7f53744a65ea5b43f98 100644
Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl
index cd8f6f6cd8072c224d9de2763d5585bdba4a6d80..841522bceb9b08611b411ba2716d2af183339f60 100644
Binary files a/irlc/tests/unitgrade_data/Problem3LQR.pkl and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl
index 252cfd024c97e5da728820dacd87ab9910607247..636821ec8ce7350d4207e6c9d14ef7ebe8135044 100644
Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl
index 0e1fc83741cb9bd0877d29de2b3828b78bdd5b01..b772ddc285d774ffc919998fb84c4fbfb9e58c4d 100644
Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl
index 178368d13873f75c43be9a31cb3dbdb10d5fef36..abeb2698baffe4071706d26b41de533166e0cd65 100644
Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl
index 22065591b65be79d935c05472a7603be0e00bcdb..c3dddca21dff68192c921eb2679f12aae31d5f27 100644
Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl
index 42b50d8f321a365c574de2e27cc5dead749dbee4..d28333e07c4cc38531db644776aa5666aa9d423f 100644
Binary files a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl
index 14b3e4b4c95270f0c2953a2cc41a66833ba99d7f..5eb16a6dc2bf9316926640bb060b68e052a40354 100644
Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..5cff598a8271d14e39c5b19d503084e49e0db927
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl
index 33dfa81f677fd061a0a39b2c51757d929785cd80..8c339845720431122e05901db839d432dfbe6f49 100644
Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..9d24486ad6d7a659101f343a46a174a7a5c414c8
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl
index b61782009434e3024f670821a02eff567ea7220c..344cba45db9b19e7d1c241f8db3f8a33d30cc86d 100644
Binary files a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl
index 354e3485c6913c4ed2b0e90c1416d05becf63c1c..d73714e87077f359513abd9a96d8679f3a001cc5 100644
Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1f1dd0327b87e703efc548ac9ae8c830fb686ff5
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl
index 2ff576403f28ebc1f96c87a40defa18f2263737b..693d3794e690c2158431aff5ecf09e4b16bd3a63 100644
Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl
index c0103b3e977fa2b98a34cf16e69b4168cf7d8d53..3d4c035314271e378418b77fb438734ddd7987cb 100644
Binary files a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..70d8eda754f751f57162a36381b27e192fa0601f
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b52c804782d7804492a93342a7e3a8940e620837
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem9Gambler.pkl differ
diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl
index 2ea308be8ae3ae254027640d548e0f9972c8cfe6..23c941bdfaca6edfe522b74947632170b089b3a4 100644
Binary files a/irlc/tests/unitgrade_data/RendevouzItem.pkl and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ
diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
Binary files a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl and b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl differ