From 3c712cf70d7b300b81a17c99a17dca6baee92ccf Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 3 Apr 2025 16:40:22 +0200
Subject: [PATCH] Tests for week 9

---
 irlc/tests/tests_week09.py                    | 314 ++++++++++++++++++
 irlc/tests/unitgrade_data/BanditQuestion.pkl  | Bin 96256 -> 96256 bytes
 .../unitgrade_data/CartpoleCostQuestion.pkl   | Bin 5447 -> 5447 bytes
 .../unitgrade_data/CartpoleTimeQuestion.pkl   | Bin 5447 -> 5447 bytes
 .../unitgrade_data/DirectAgentPendulum.pkl    | Bin 231 -> 231 bytes
 irlc/tests/unitgrade_data/DirectMethods.pkl   | Bin 1459 -> 1459 bytes
 .../unitgrade_data/DirectSolverQuestion.pkl   | Bin 5447 -> 5447 bytes
 .../Exam5InventoryEvaluation.pkl              | Bin 217 -> 217 bytes
 irlc/tests/unitgrade_data/Exam6Toy2d.pkl      | Bin 283 -> 283 bytes
 .../ExamQuestion7FlowersStore.pkl             | Bin 182 -> 182 bytes
 .../unitgrade_data/GradientBanditQuestion.pkl | Bin 96256 -> 96256 bytes
 .../unitgrade_data/ILQRAgentQuestion.pkl      | Bin 326 -> 326 bytes
 .../unitgrade_data/ILQRPendulumQuestion.pkl   | Bin 298 -> 298 bytes
 .../NonstatiotnaryAgentQuestion.pkl           | Bin 96256 -> 96256 bytes
 .../tests/unitgrade_data/PendulumQuestion.pkl | Bin 5447 -> 5447 bytes
 .../unitgrade_data/Problem1BobsFriend.pkl     | Bin 170 -> 170 bytes
 .../Problem1DiscreteKuromoto.pkl              | Bin 570 -> 570 bytes
 .../tests/unitgrade_data/Problem1Kuramoto.pkl | Bin 3014 -> 3014 bytes
 .../unitgrade_data/Problem1_to_3_Warmup.pkl   | Bin 0 -> 497 bytes
 .../unitgrade_data/Problem2BobsPolicy.pkl     | Bin 368 -> 368 bytes
 .../Problem2DeterministicDP.pkl               | Bin 161 -> 161 bytes
 .../Problem2DeterministicInventory.pkl        | Bin 128 -> 128 bytes
 .../Problem3InventoryInventoryEnvironment.pkl | Bin 323 -> 323 bytes
 irlc/tests/unitgrade_data/Problem3LQR.pkl     | Bin 2025 -> 2025 bytes
 irlc/tests/unitgrade_data/Problem3PID.pkl     | Bin 334 -> 334 bytes
 .../unitgrade_data/Problem3StochasticDP.pkl   | Bin 345 -> 345 bytes
 irlc/tests/unitgrade_data/Problem4DPAgent.pkl | Bin 121 -> 121 bytes
 .../unitgrade_data/Problem4InventoryTrain.pkl | Bin 242 -> 242 bytes
 .../tests/unitgrade_data/Problem4LQRAgent.pkl | Bin 443 -> 443 bytes
 .../tests/unitgrade_data/Problem4PIDAgent.pkl | Bin 4673 -> 4673 bytes
 .../Problem4PolicyEvaluation.pkl              | Bin 0 -> 621 bytes
 .../Problem5PacmanHardcoded.pkl               | Bin 125 -> 125 bytes
 .../Problem5PolicyIteration.pkl               | Bin 0 -> 402 bytes
 .../unitgrade_data/Problem5_6_Boeing.pkl      | Bin 4219 -> 4219 bytes
 .../Problem6ChessTournament.pkl               | Bin 197 -> 197 bytes
 .../unitgrade_data/Problem6ValueIteration.pkl | Bin 0 -> 400 bytes
 irlc/tests/unitgrade_data/Problem7PIDCar.pkl  | Bin 419 -> 419 bytes
 .../unitgrade_data/Problem7_8_PidLQR.pkl      | Bin 415 -> 415 bytes
 .../Problem8ValueIterationAgent.pkl           | Bin 0 -> 324 bytes
 irlc/tests/unitgrade_data/Problem9Gambler.pkl | Bin 0 -> 1083 bytes
 irlc/tests/unitgrade_data/RendevouzItem.pkl   | Bin 603 -> 603 bytes
 .../tests/unitgrade_data/UCBAgentQuestion.pkl | Bin 96256 -> 96256 bytes
 42 files changed, 314 insertions(+)
 create mode 100644 irlc/tests/tests_week09.py
 create mode 100644 irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl
 create mode 100644 irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl
 create mode 100644 irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl
 create mode 100644 irlc/tests/unitgrade_data/Problem6ValueIteration.pkl
 create mode 100644 irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl
 create mode 100644 irlc/tests/unitgrade_data/Problem9Gambler.pkl

diff --git a/irlc/tests/tests_week09.py b/irlc/tests/tests_week09.py
new file mode 100644
index 0000000..74279f6
--- /dev/null
+++ b/irlc/tests/tests_week09.py
@@ -0,0 +1,314 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import UTestCase, Report
+import numpy as np
+import irlc
+from irlc import train
+from irlc.ex09.small_gridworld import SmallGridworldMDP
+from irlc.ex09.policy_iteration import policy_iteration
+from irlc.ex09.value_iteration import value_iteration
+from irlc.gridworld.gridworld_environments import FrozenLake
+from irlc.ex09.policy_evaluation import policy_evaluation
+
+class Problem1_to_3_Warmup(UTestCase):
+    def test_part1_average_reward(self):
+        from irlc.ex09.mdp_warmup import expected_reward
+        mdp = FrozenLake(living_reward=0.2).mdp  # Get the MDP of this environment.
+        s0 = mdp.initial_state
+        ## Part 1: Expected reward
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=0), places=5)
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5)
+        self.assertAlmostEqualC(expected_reward(mdp, s=(1,2), a=0), places=5)
+        mdp = FrozenLake(living_reward=0.2).mdp  # Get the MDP of this environment.
+        self.assertAlmostEqualC(expected_reward(mdp, s=s0, a=2), places=5)
+
+    def test_part2_v2q(self):
+        ## Part 2
+        # First let's create a non-trivial value function
+        V = {}
+        mdp = FrozenLake(living_reward=0.3).mdp
+
+        for k, s in enumerate(sorted(mdp.nonterminal_states)):
+            V[s] = 2 * (s[0] - s[1]) - 3.5
+
+        from irlc.ex09.mdp_warmup import value_function2q_function
+
+        states = [(0, 1), (2, 3), (0, 3), (1,3), (1, 2)]
+
+        s0 = mdp.initial_state
+
+        q_ = value_function2q_function(mdp, s=s0, gamma=0.9, v=V)
+        self.assertIsInstance(q_, dict)
+        self.assertEqual(list(sorted(q_.keys())), [0, 1, 2, 3] )
+
+        self.assertEqual(len(q_), 4)
+        self.assertEqual(len(value_function2q_function(mdp, s=(1,2), gamma=0.9, v=V)), 1)
+        self.assertAlmostEqualC(q_[0],places=4)
+        self.assertAlmostEqualC(q_[2], places=4)
+
+
+        for s in sorted(states):
+            q_ = value_function2q_function(mdp, s=s, gamma=0.9, v=V)
+            for a in [0, 1, 2, 3]:
+                if a in mdp.A(s):
+                    self.assertAlmostEqualC(q_[a], places=4)
+
+    def test_part2_q2v(self):
+        ## Part 3
+        mdp = FrozenLake(living_reward=0.2).mdp
+        from irlc.ex09.mdp_warmup import value_function2q_function, q_function2value_function
+        # Create a non-trivial Q-function for this problem.
+        Q = {}
+        s0 = mdp.initial_state
+
+        for k, s in enumerate(mdp.nonterminal_states):
+            for a in mdp.A(s):
+                Q[s, a] =  (s[0] - s[1]) - 5 * a  # The particular values are not important in this example
+        # Create a policy. In this case pi(a=3) = 0.4.
+        pi = {0: 0.2,
+              1: 0.4,
+              2: 0.2,
+              3: 0.2}
+        self.assertAlmostEqualC(q_function2value_function(pi, Q, s=s0), places=4)
+
+def train_recording(env, agent, trajectories):
+    for t in trajectories:
+        env.reset()
+        for k in range(len(t.action)):
+            s = t.state[k]
+            r = t.reward[k]
+            a = t.action[k]
+            sp = t.state[k+1]
+            info = t.info[k]
+            info_sp = t.info[k+1]
+
+            agent.pi(s,k)
+            agent.train(s, a, r, sp, done=k == len(t.action)-1, info_s = info, info_sp=info_sp)
+
+
+class ValueFunctionTest(UTestCase):
+    def check_value_function(self, mdp, V):
+        self.assertL2(np.asarray([V[s] for s in mdp.states]), tol=1e-3)
+
+class Problem5PolicyIteration(ValueFunctionTest):
+    """ Iterative Policy iteration """
+    def test_policy_iteration(self):
+        env = SmallGridworldMDP()
+        pi, v = policy_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+
+
+class Problem6ValueIteration(ValueFunctionTest):
+    """ Iterative value iteration """
+    def test_value_iteration(self):
+        env = SmallGridworldMDP()
+        # from i
+        pi, v = value_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+
+
+class Problem4PolicyEvaluation(ValueFunctionTest):
+    """ Iterative value iteration """
+    def test_policy_evaluation(self):
+        mdp = SmallGridworldMDP()
+        pi = {s: {a: 1/len(mdp.A(s)) for a in mdp.A(s) } for s in mdp.nonterminal_states }
+        v = policy_evaluation(pi, mdp, gamma=0.91)
+        self.check_value_function(mdp, v)
+
+    def test_policy_evaluation_b(self):
+        mdp = SmallGridworldMDP()
+        pi = {s: {a: 1 if a == 0 else 0 for a in mdp.A(s) } for s in mdp.nonterminal_states }
+        v = policy_evaluation(pi, mdp, gamma=0.91)
+        self.check_value_function(mdp, v)
+
+
+
+
+class Problem9Gambler(ValueFunctionTest):
+    """ Gambler's problem """
+    def test_gambler_value_function(self):
+        # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+        # from irlc.ex09.policy_iteration import policy_iteration
+        # from irlc.ex09.value_iteration import value_iteration
+        from irlc.ex09.gambler import GamblerMDP
+        env = GamblerMDP()
+        pi, v = value_iteration(env, gamma=0.91)
+        self.check_value_function(env, v)
+
+# class JackQuestion(ValueFunctionTest):
+#     """ Gambler's problem """
+#     def test_jacks_rental_value_function(self):
+#         # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+#         # from irlc.ex09.policy_iteration import policy_iteration
+#         # from irlc.ex09.value_iteration import value_iteration
+#         # from irlc.ex09.gambler import GamblerEnv
+#         from irlc.ex09.jacks_car_rental import JackRentalMDP
+#         max_cars = 5
+#         env = JackRentalMDP(max_cars=max_cars, verbose=True)
+#         pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#         self.check_value_function(env, V)
+
+# class JackQuestion(QuestionGroup):
+#     title = "Jacks car rental problem"
+#
+#     class JackItem(GridworldDPItem):
+#         title = "Value function test"
+#         max_cars = 5
+#         tol = 0.01
+#
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.jacks_car_rental import JackRentalMDP
+#             env = JackRentalMDP(max_cars=self.max_cars, verbose=True)
+#             pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#             return V, env
+
+
+        # return v, env
+    # pass
+# class DynamicalProgrammingGroup(QuestionGroup):
+#     title = "Dynamical Programming test"
+#
+#     class PolicyEvaluationItem(GridworldDPItem):
+#         title = "Iterative Policy evaluation"
+#
+#
+#
+#     class PolicyIterationItem(GridworldDPItem):
+#         title = "policy iteration"
+#         def get_value_function(self):
+#             from irlc.ex09.small_gridworld import SmallGridworldMDP
+#             from irlc.ex09.policy_iteration import policy_iteration
+#             env = SmallGridworldMDP()
+#             pi, v = policy_iteration(env, gamma=0.91)
+#             return v, env
+#     class ValueIteartionItem(GridworldDPItem):
+#         title = "value iteration"
+#
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.small_gridworld import SmallGridworldMDP
+#             env = SmallGridworldMDP()
+#             policy, v = value_iteration(env, gamma=0.92, theta=1e-6)
+#             return v, env
+
+# class GamlerQuestion(QuestionGroup):
+#     title = "Gamblers problem"
+#     class GamlerItem(GridworldDPItem):
+#         title = "Value-function test"
+#         def get_value_function(self):
+#             # from irlc.ex09.small_gridworld import SmallGridworldMDP, plot_value_function
+#             # from irlc.ex09.policy_iteration import policy_iteration
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.gambler import GamblerEnv
+#             env = GamblerEnv()
+#             pi, v = value_iteration(env, gamma=0.91)
+#             return v, env
+
+# class JackQuestion(QuestionGroup):
+#     title ="Jacks car rental problem"
+#     class JackItem(GridworldDPItem):
+#         title = "Value function test"
+#         max_cars = 5
+#         tol = 0.01
+#         def get_value_function(self):
+#             from irlc.ex09.value_iteration import value_iteration
+#             from irlc.ex09.jacks_car_rental import JackRentalMDP
+#             env = JackRentalMDP(max_cars=self.max_cars, verbose=True)
+#             pi, V = value_iteration(env, gamma=.9, theta=1e-3, max_iters=1000, verbose=True)
+#             return V, env
+
+class Problem8ValueIterationAgent(UTestCase):
+    """ Value-iteration agent test """
+
+    def test_sutton_gridworld(self):
+        tol = 1e-2
+        from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+        env = SuttonCornerGridEnvironment(living_reward=-1)
+        from irlc.ex09.value_iteration_agent import ValueIterationAgent
+        agent = ValueIterationAgent(env, mdp=env.mdp)
+        stats, _ = train(env, agent, num_episodes=1000)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol)
+
+    def test_bookgrid_gridworld(self):
+        tol = 1e-2
+        from irlc.gridworld.gridworld_environments import BookGridEnvironment
+        env = BookGridEnvironment(living_reward=-1)
+        from irlc.ex09.value_iteration_agent import ValueIterationAgent
+        agent = ValueIterationAgent(env, mdp=env.mdp)
+        stats, _ = train(env, agent, num_episodes=1000)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=tol)
+
+
+    #
+    #
+    #     pass
+    # class ValueAgentItem(GridworldDPItem):
+    #     title = "Evaluation on Suttons small gridworld"
+    #     tol = 1e-2
+    #     def get_env(self):
+    #         from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+    #         return SuttonCornerGridEnvironment(living_reward=-1)
+    #
+    #     def compute_answer_print(self):
+    #         env = self.get_env()
+    #         from irlc.ex09.value_iteration_agent import ValueIterationAgent
+    #         agent = ValueIterationAgent(env, mdp=env.mdp)
+    #         # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',))
+    #         stats, _ = train(env, agent, num_episodes=1000)
+    #         return np.mean( [s['Accumulated Reward'] for s in stats])
+    #
+    #     def process_output(self, res, txt, numbers):
+    #         return res
+
+    # class BookItem(ValueAgentItem):
+    #     title = "Evaluation on alternative gridworld (Bookgrid)"
+    #     def get_env(self):
+    #         from irlc.gridworld.gridworld_environments import BookGridEnvironment
+    #         return BookGridEnvironment(living_reward=-0.6)
+
+# class DPAgentRLQuestion(QuestionGroup):
+#     title = "Value-iteration agent test"
+#     class ValueAgentItem(GridworldDPItem):
+#         title = "Evaluation on Suttons small gridworld"
+#         tol = 1e-2
+#         def get_env(self):
+#             from irlc.gridworld.gridworld_environments import SuttonCornerGridEnvironment
+#             return SuttonCornerGridEnvironment(living_reward=-1)
+#
+#         def compute_answer_print(self):
+#             env = self.get_env()
+#             from irlc.ex09.value_iteration_agent import ValueIterationAgent
+#             agent = ValueIterationAgent(env, mdp=env.mdp)
+#             # env = VideoMonitor(env, agent=agent, agent_monitor_keys=('v',))
+#             stats, _ = train(env, agent, num_episodes=1000)
+#             return np.mean( [s['Accumulated Reward'] for s in stats])
+#
+#         def process_output(self, res, txt, numbers):
+#             return res
+#
+#     class BookItem(ValueAgentItem):
+#         title = "Evaluation on alternative gridworld (Bookgrid)"
+#         def get_env(self):
+#             from irlc.gridworld.gridworld_environments import BookGridEnvironment
+#             return BookGridEnvironment(living_reward=-0.6)
+
+class Week09Tests(Report):
+    title = "Tests for week 09"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [ (Problem1_to_3_Warmup, 10),
+                  (Problem4PolicyEvaluation, 10),
+                  (Problem5PolicyIteration, 10),
+                  (Problem6ValueIteration, 10),
+                  (Problem8ValueIterationAgent, 10),
+                  (Problem9Gambler, 10),
+                  ]
+    # (JackQuestion, 10),
+    # (ValueFunctionTest, 20),
+
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week09Tests())
diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
GIT binary patch
delta 103
zcmV-t0GR)P@CAVI1&}!c{{XQ&D=`81vu80}Faf!f#9=4`_JFhQVLmAWadfjTi$^H~
zwVp(ic(fq`_CT|nv<@)>R7|t0<sdNwpHOg@&K3bJ0f4vs76Ga-0#O0Cs7C=%FbI_3
J00000buNMuCjI~b

delta 103
zcmV-t0GR)P@CAVI1&}!c{XnrhD=`6?vu80}FagVx#9=4`^+2=kVLmAWZ78!Yi$^H~
zwk(*Fc(fq`^}w^6v<@)>TB5V7<sdNwpcE*V&K3bJ0l>HX76Ga-0#N3+s7C=%FbMOY
J00000buN5#C)@x4

diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
GIT binary patch
delta 171
zcmX@EbzEzLJ@bmF6%(Dz84paXn#CmeVX_$GF2)6uFET1A2;F`kz`($eVU)qvHYJ0j
zZA$GFD4V5iio4yZ$-+z~oRSxSN<fMxA7qo;oX2#InMr8DWIHxB1*03ABtSC0NZKK6
yu=W#^Ti8sPq(5xl#rBwkNyuRGe13K2&C3fWpX3i@e6U$sK$(?O$^dM5sU83rH8_m`

delta 171
zcmX@EbzEzLJ@dvr2@{>n8Ba{Cn#Cl>Fj<Uo7vqD;7a5flgfq_;FfcG=7-g`vP08SB
zn^HRk%4TVs;%;|jvM`egr_2GM5|E<F2ifE{=P{jQW)c*bY{#aiV9YRG0wm*$q#eQr
yYd<!*h0TOXj$!jIw#OVyf)11C^Q$v2JGfx-N&Y~_fX&hZ%B-9M7r=&>>Hz@pV>M?0

diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
GIT binary patch
delta 171
zcmX@EbzEzLJ@bmF6%(Dz84paXn#CmeVX_$GF2)6uFET1A2;F`kz`($eVU)qvHYJ0j
zZA$GFD4V5iio4yZ$-+z~oRSxSN<fMxA7qo;oX2#InMr8DWIHxB1*03ABtSC0NZKK6
yu=W#^Ti8sPq(5xl#rBwkNyuRGe13K2&C3fWpX3i@e6U$sK$(?O$^dM5sU83rH8_m`

delta 171
zcmX@EbzEzLJ@dvr2@{>n8Ba{Cn#Cl>Fj<Uo7vqD;7a5flgfq_;FfcG=7-g`vP08SB
zn^HRk%4TVs;%;|jvM`egr_2GM5|E<F2ifE{=P{jQW)c*bY{#aiV9YRG0wm*$q#eQr
yYd<!*h0TOXj$!jIw#OVyf)11C^Q$v2JGfx-N&Y~_fX&hZ%B-9M7r=&>>Hz@pV>M?0

diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
index 8bcfd04385b49acb537aa90a6c1906443c00c348..0486fcbe41eac606ac7ef8045f455761dee11be9 100644
GIT binary patch
delta 20
bcmaFP_?&TqJ)^@!hxr`M;sp#045fMiO8f=}

delta 20
ccmaFP_?&TqJ!8W}hxr^YS1B+sFqG;608^0$?*IS*

diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl
index 1872c37be157b1d23e330e90fb98df324bc707a7..9b175b43f74d0fcda46ed5150c8c7c7071ffe545 100644
GIT binary patch
delta 42
wcmdnYy_tK0B@<W0MC(nA4IAGbVq_9E*u06UhLQ2XW_^|#Mvepq5Gd6H06De{r2qf`

delta 42
wcmdnYy_tK0CDY$26RkHfCTx6nh>=O0Ve=-Y8b-#1&H5}gj2sFKAW*6Y08Nz)rT_o{

diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
GIT binary patch
delta 171
zcmX@EbzEzLJ@bmF6%(Dz84paXn#CmeVX_$GF2)6uFET1A2;F`kz`($eVU)qvHYJ0j
zZA$GFD4V5iio4yZ$-+z~oRSxSN<fMxA7qo;oX2#InMr8DWIHxB1*03ABtSC0NZKK6
yu=W#^Ti8sPq(5xl#rBwkNyuRGe13K2&C3fWpX3i@e6U$sK$(?O$^dM5sU83rH8_m`

delta 171
zcmX@EbzEzLJ@dvr2@{>n8Ba{Cn#Cl>Fj<Uo7vqD;7a5flgfq_;FfcG=7-g`vP08SB
zn^HRk%4TVs;%;|jvM`egr_2GM5|E<F2ifE{=P{jQW)c*bY{#aiV9YRG0wm*$q#eQr
yYd<!*h0TOXj$!jIw#OVyf)11C^Q$v2JGfx-N&Y~_fX&hZ%B-9M7r=&>>Hz@pV>M?0

diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
index 288459bca52e824a5d9dabdcb4cf10e164f64114..26af5ecf71d06771737bf666e043228cdeb0b306 100644
GIT binary patch
delta 27
gcmcb~c$0C0Gt>726J0%+BmyS(J8>v5fIz7p0E=@6_5c6?

delta 27
gcmcb~c$0C0Gt&=;iLM?@vJ)ouJ8>j1fIz7p0EdkS`~Uy|

diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
index 06341fef90fd2beed50cccac023bdd729b480a91..27985d2c70d9c619a927df1a9311b0dedaf28faf 100644
GIT binary patch
delta 26
gcmbQuG@EIHDdU%kW^<YP4otji$5Hx#0SHR<0ElV{O#lD@

delta 26
hcmbQuG@EIHDdYEvW^<WtEtq)Kj^kPZ0}zzz0RW3532guX

diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
index 7de7875d690be1fc4143070c2139bd34f61288ae..d47a7262321148b608adf93be0fd09c4824f561e 100644
GIT binary patch
delta 27
gcmdnSxQ%gw3)2^diEd_0#y=+38E_~tfIz7p0DW}_eE<Le

delta 27
gcmdnSxQ%gw3)ANV6Wz?1Ob$$}GvHXj00O0Y0F2iNg#Z8m

diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
GIT binary patch
delta 103
zcmV-t0GR)P@CAVI1&}!c{{XQ&D=`81vu80}Faf!f#9=4`_JFhQVLmAWadfjTi$^H~
zwVp(ic(fq`_CT|nv<@)>R7|t0<sdNwpHOg@&K3bJ0f4vs76Ga-0#O0Cs7C=%FbI_3
J00000buNMuCjI~b

delta 103
zcmV-t0GR)P@CAVI1&}!c{XnrhD=`6?vu80}FagVx#9=4`^+2=kVLmAWZ78!Yi$^H~
zwk(*Fc(fq`^}w^6v<@)>TB5V7<sdNwpcE*V&K3bJ0l>HX76Ga-0#N3+s7C=%FbMOY
J00000buN5#C)@x4

diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
index 94b38667b6a59b2bdd827e9569ad5bce677cc91e..60e863514a92b0ed49d6e0a508e28fee4b13dc33 100644
GIT binary patch
delta 22
ccmX@cbc|_&4byLiiFW@vPhG2E0D@9I0A-N}>;M1&

delta 22
ccmX@cbc|_&4b%4n6Yc(Uo;10^00gCa0Bu|dWdHyG

diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl
index af4efa1cc7fc8336bfab2d97317419f4573a58da..1a68e6a4c61615f654c923cc2161c1b51e1252ce 100644
GIT binary patch
delta 22
ccmZ3*w2Enh9kV@)!bHc19ETklfS^<l088No?*IS*

delta 22
ccmZ3*w2Enh9kbn!0}~w|a-3Vl00gCa0AxuA?f?J)

diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
GIT binary patch
delta 103
zcmV-t0GR)P@CAVI1&}!c{{XQ&D=`81vu80}Faf!f#9=4`_JFhQVLmAWadfjTi$^H~
zwVp(ic(fq`_CT|nv<@)>R7|t0<sdNwpHOg@&K3bJ0f4vs76Ga-0#O0Cs7C=%FbI_3
J00000buNMuCjI~b

delta 103
zcmV-t0GR)P@CAVI1&}!c{XnrhD=`6?vu80}FagVx#9=4`^+2=kVLmAWZ78!Yi$^H~
zwk(*Fc(fq`^}w^6v<@)>TB5V7<sdNwpcE*V&K3bJ0l>HX76Ga-0#N3+s7C=%FbMOY
J00000buN5#C)@x4

diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl
index 21e4c24c13dd49d445c4efe18438fe4a0b360513..343b7179775815208e5bcba235808d27ecaf5eba 100644
GIT binary patch
delta 171
zcmX@EbzEzLJ@bmF6%(Dz84paXn#CmeVX_$GF2)6uFET1A2;F`kz`($eVU)qvHYJ0j
zZA$GFD4V5iio4yZ$-+z~oRSxSN<fMxA7qo;oX2#InMr8DWIHxB1*03ABtSC0NZKK6
yu=W#^Ti8sPq(5xl#rBwkNyuRGe13K2&C3fWpX3i@e6U$sK$(?O$^dM5sU83rH8_m`

delta 171
zcmX@EbzEzLJ@dvr2@{>n8Ba{Cn#Cl>Fj<Uo7vqD;7a5flgfq_;FfcG=7-g`vP08SB
zn^HRk%4TVs;%;|jvM`egr_2GM5|E<F2ifE{=P{jQW)c*bY{#aiV9YRG0wm*$q#eQr
yYd<!*h0TOXj$!jIw#OVyf)11C^Q$v2JGfx-N&Y~_fX&hZ%B-9M7r=&>>Hz@pV>M?0

diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl
index 0a911216fa96ee726261d5fd6122f47c63b7becd..fa48f111d5c714f5015e0d38fed0574ed138a4fb 100644
GIT binary patch
delta 28
gcmZ3*xQcOtEt6owM0-=Fp9~Yrv^gIzfI+Dq0DLtEbpQYW

delta 28
hcmZ3*xQcOtEt5dPM0-=FUk($?v^k$QFn~a*9sq%E2pj+a

diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl
index 6174c0b3159b23350a66f8510986566388f9a9e9..9ce1577be4210acac290d13b30ecb25c2bc8c6cc 100644
GIT binary patch
delta 41
vcmdnRvWsPc6SKX{hKa7dOg0h|uk2t9nB2?g&!{^2E@KF1Tq6Srl<ENhB@PT~

delta 41
vcmdnRvWsPc6SLjlhKa7dOxAlQUfIEzGP#%0pRr-`UB(d3cp(N5DAfZ1Py`LW

diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl
index 5da65912b9c77917947555ed5b62336969918a99..40be323303985937c830f555d411030c06d34d35 100644
GIT binary patch
delta 32
ncmX>meoTCVHRGR&HvNoqCZ5^BxM6cEqa_zdD-Qz@l<ENh*sluu

delta 32
ncmX>meoTCVHRJDzHvNpE6VL2mytBEL(UOZ}ZWaR&l<ENh)9?!v

diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..43b1807780fed0fce2f285873d9259c47915a715
GIT binary patch
literal 497
zcmZo*nfi>80Ss!VX!M8#6y+!7q~;pNm*mGA$A>2t<(3vq>ES9)EeS1f&PgmTp3*j@
zhovMlHx<ZmxBCmxn!(s3S&~{@5?_#5RALyPSe9Cpn4TJ6lv<uxlmgPh2Gp5aR03oI
zZSiJsw?8=(1ZH_NBA86>5W^T4yqV#mr5TJFY;99ASb(<KyB!0o0NTa}x6LTN%%~7(
zZUzU;>;rOevk%A^gMf)QlluW7s3^1h0Rgb6u{VqR0dA-$D?$OAJKO|zcen{02qq^&
z0T;r}-0lY?&w#*LZyxspq7c7$^SU1pPXmE;Z$5+qeuQlT2sa2KTqXop015zJSO8c{
fU<-gkqcV5^0K@ix3dHVWpiW_!PO~jw!BRZ{KMt=*

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl
index 5d994baa391da54fd3a6e1c1a369b72a9df5f17a..eb4eb650c6b317f7918a5ac1659b5b4c3f6d6a51 100644
GIT binary patch
delta 56
zcmeys^nq!D9V5d;dn4x03=9)j%T1gl&cp&_m@pr>W-wWeQEn2WGV`IB2q4pubKxol
J1|TTa0{{eE5SRb}

delta 55
zcmeys^nq!D9iziUdn2YF2PUqPpEyaJN!VfHY7^$eQ-da}G0IJ1RAyd${sNHc$a&~r
K90L%P>Hz>msS{2B

diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl
index 4029b85e80a9ebbf315924351ada7ba445fcb24a..80c46cf2a55088fa63963112dcaf91d55888c204 100644
GIT binary patch
delta 22
bcmZ3;xR7yzBeOlD!bF!)juQ+ZP^t$2LsA76

delta 22
bcmZ3;xR7yzBeUIChlwts919pgpi~b4Q2Pb}

diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl
index 547769c9bb40f7e2f9e061a3d24943b7bf016ea1..02f1c422b92a9bbff073f4ceab60753965310751 100644
GIT binary patch
delta 22
bcmZo*Y+#(=!EE>cz(j9djs^x0DAfZ1P|*et

delta 22
ZcmZo*Y+#(=!EDcvFwt9=g8>Xm^#D2!1Tg>r

diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl
index f8b966396874d03b37f527e8166a7431bd63ce66..2b5c6f9bc563562e2152b7f53744a65ea5b43f98 100644
GIT binary patch
delta 65
zcmX@ibeL&E0Mnm_i9zk`Oxo>YH_r)7d?3P9*)j2(mU8K236?NF!2?s;rUXqX&S1=t
VZkv+9(l*82ez_b60}zzz0RT!<7g+!R

delta 65
zcmX@ibeL&E0MqXS6NB2>C%&z*H;ogV_&|h7Nn+wRE#(<`Ld^%f1P@GUn-VmoID;`m
Vx@}4ZOWPE8`{lkT7=WNu4*+pI82JDI

diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl
index cd8f6f6cd8072c224d9de2763d5585bdba4a6d80..841522bceb9b08611b411ba2716d2af183339f60 100644
GIT binary patch
delta 50
zcmaFK|B`=#Ib+R63l=8E1)FV|m>C%zHajw>FfnOf*u0!IfsskQV=^cE4Gs+k5Gd6H
E0DF!N6951J

delta 50
zcmaFK|B`=#IpdX$7A#DR9h+^Lm>C%>Hajw>FfnO)Y+lZqz{sR=VKOKC4GtFu5Gd6H
E0EV;;X#fBK

diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl
index 252cfd024c97e5da728820dacd87ab9910607247..636821ec8ce7350d4207e6c9d14ef7ebe8135044 100644
GIT binary patch
delta 39
tcmX@dbdG6)8RPef<_?U46DypU#1~B5<H{t`FqxUriBtFh0|=Dr0RRIP3jF{8

delta 39
scmX@dbdG6)8RL(M<_?S^6DypUWEv*!ab*$>n9R)R#3>@c00O0Y0Qn9I2mk;8

diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl
index 0e1fc83741cb9bd0877d29de2b3828b78bdd5b01..b772ddc285d774ffc919998fb84c4fbfb9e58c4d 100644
GIT binary patch
delta 27
gcmcb~bdza<J<~UaiH^C93KQ2a<a7*T0D)3H0ERdR{r~^~

delta 27
gcmcb~bdza<J=50%6CHCI1tzXv$mvwY00O0Y0F=!L)&Kwi

diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl
index 178368d13873f75c43be9a31cb3dbdb10d5fef36..abeb2698baffe4071706d26b41de533166e0cd65 100644
GIT binary patch
delta 18
Wcmb=doM6q^Fww@8!+`+=O7#FTas(^@

delta 18
Xcmb=doM6r9Fww@8;{XE)l<ENhGcp8h

diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl
index 22065591b65be79d935c05472a7603be0e00bcdb..c3dddca21dff68192c921eb2679f12aae31d5f27 100644
GIT binary patch
delta 48
zcmeyw_=$0XBh$|V6P<gMcgVHA4zv(FFr{ru(3Ii~#tgQ$DH$wnQ{3%);tntXL8%@9
D1i=#L

delta 48
zcmeyw_=$0XBh!zDiOxOBwf3fQyF3LCOlg}EG^IF$F@vpbN(M{Y6nFdHl?)(Iss{k(
Cz!9|o

diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl
index 42b50d8f321a365c574de2e27cc5dead749dbee4..d28333e07c4cc38531db644776aa5666aa9d423f 100644
GIT binary patch
delta 32
lcmdnZyqkG~HRHF5HlLYm_8*vNCo$QJF@v*2nE?b!^#IR^3T*%Y

delta 32
mcmdnZyqkG~HRIQbHlLXrrX)<XlbGzqn88`x*1!M+rFsC&T?)ql

diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl
index 14b3e4b4c95270f0c2953a2cc41a66833ba99d7f..5eb16a6dc2bf9316926640bb060b68e052a40354 100644
GIT binary patch
delta 52
zcmX@8a!_T04Wq+G+np@TOP9}>oWm)(S%@{9g?Y)DADeSHGg+7%3pQWpO=RI*@~VLW
H2uk$;|J4#1

delta 52
zcmX@8a!_T04P(Pb+np@T%W@qi=Wt4H7Ge!&VP0A@VRH^=CJWO>jm_726InQyCVXH3
Hf>J#I<|`2(

diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..5cff598a8271d14e39c5b19d503084e49e0db927
GIT binary patch
literal 621
zcmZo*nVQ7J00y;FG<qZgit>|kQgclL@^dniD_zSHb4n9SGV}AM^l%lYmV_2K=Oh*v
zPidRd!%~u&n+jyO+kHI%)}F!GBUX}HToPXZ)*7FR&<j+~2Go{XR07mcJH?x!M=Y;2
zx1drlJ~_WARWG+RrzA76s3@^=N)JzbQEGC2UU5lLX>!Sw$y0h*!78Wpu;-;fluQQd
zZ=K@Im^7t*O3)MyZ$@u{)+rf$J*+7ul?ACFElg<^Q#v~!vdle<HdFlk{Jj1H0hsV+
zD4CMf>D<7k+TUSTDt2I}calrzCb<Jm3(dhaBrq5luGRBwD6Ew`u<b4D!Zv-G1AR>Q
zAFNGPH~<s(JM(MLIv^irPGEo^SUpTXOkdTepi*<7Jun)kZVJ#D#TkqlY;99ASb*VZ
zpP>i~M+xlV7@q_Tz6=gX5N7aZ2!H}2LkKw-q<|j#+#s5H-9qgEj85Wt(QpIEhdJzJ
zb$9(uARi_UQ#awzSB;qdfBSc3bN*cLoE1YJrXQvc!vC-z-8`7b6k#5#+5@(`R1W|j
CSoYcg

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl
index 33dfa81f677fd061a0a39b2c51757d929785cd80..8c339845720431122e05901db839d432dfbe6f49 100644
GIT binary patch
delta 13
Rcmb=eolweQzyJcJdH@;Z0^0xp

delta 13
Scmb=eolwg0rhx$nO7#FEL<AQA

diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..9d24486ad6d7a659101f343a46a174a7a5c414c8
GIT binary patch
literal 402
zcmZo*ncB|C00y;FG<w7Xit>|kQgclM@^dniD?Lk6ixNvR^Yf<ka22PPgcdmGBo-G>
zX`9l+Qj(dQ3S_w3eLVoyox#{6T9R5^5?=t;8J`K)3RKMo)RkIP0#si+#hal=EUz@T
zpi(bBIlm}XFSj(OBr~z7D6w)%4^MniYI1&FaY<2Wa><m*Q+imzDyQ_Y=cPcDOa|(2
zo#M=xG^Kq?&=d`CMsI=EDH(h{tSKdx1*sq{OlcNVIy)e;%sq@YQ~dn=y#50LnDAyO
znUd7$+yHhn!-xI5vN?Y)c+PqNMnl9I7$zM0su9!wZ$FHN$)odO=D@_!^~2<0G)&zT
apfidy7&F+~rev@HgVBERkq;m>rFsAuqokPt

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl
index b61782009434e3024f670821a02eff567ea7220c..344cba45db9b19e7d1c241f8db3f8a33d30cc86d 100644
GIT binary patch
delta 42
xcmeyZ@LOSm4bu;Yjdsr&nJ2ARm}oD$*_O$Xk!h*I<_6}^JREC87=WNu4*+6d4fp^6

delta 42
xcmeyZ@LOSm4b%6Ajdsr&nfLCjm}oD$*_O$Xk;y$_a|82d9*!4(7=WNu4*-9M56S=l

diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl
index 354e3485c6913c4ed2b0e90c1416d05becf63c1c..d73714e87077f359513abd9a96d8679f3a001cc5 100644
GIT binary patch
delta 23
dcmX@gc$9I1BeUH%hKVi}oOfToX8?jyJpgK%2txn>

delta 23
dcmX@gc$9I1BeOk&!bFz}&g<Iy8GxWv4**t)26_Mh

diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1f1dd0327b87e703efc548ac9ae8c830fb686ff5
GIT binary patch
literal 400
zcmZo*ncB+800y;FG<w7Wit>|kQghA15_3vZJxfxH5=%1k^QQE06{nVj7C7f578g%x
zo6^Hll9`(dWVqXXg=o%T>=7wREiQ>K18I!UgzE$<W&>(UEh+&jubtw}&?A;tnp;q*
z7oVJ8l&Y6onp2XQSX7i)Ii-guz9=<0Kd-o?s5H4`%H%0MtYDQ>df4+)AW9|!^|wxO
zW=xvWJ|$?1hBu?PK<ktYz8=<;lFEWqkQSyiiz%HQ5LxCPMw=;qetus6fdEW+Gn7n8
z>U3^^xcS5WUD=#J7d&S@0HYz|3=9(vebtER|F<7T!{pKVFmqty==x#uFdC+A3eXwF
W8H^chZBsH>fT3t_RRC5}ss{kDp`k+n

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl
index 2ff576403f28ebc1f96c87a40defa18f2263737b..693d3794e690c2158431aff5ecf09e4b16bd3a63 100644
GIT binary patch
delta 37
qcmZ3?yqI}{B@1u!S%-<Xag3KHeiWU!(Vp?x#E&aE_b`A#sU83gJq;)T

delta 37
qcmZ3?yqI}{B@3U7(t?S$af}xyeiWU!(Vp?h#E&aE4={j1sU86M{|tBl

diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl
index c0103b3e977fa2b98a34cf16e69b4168cf7d8d53..3d4c035314271e378418b77fb438734ddd7987cb 100644
GIT binary patch
delta 30
jcmbQwJfC@j4byjriFQAj8!snJwq%UutXawc0;PHYt<?#?

delta 30
kcmbQwJfC@j4b#7diFQAjn`SUfwq%UutbTET0SHR<0I~)OZU6uP

diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..70d8eda754f751f57162a36381b27e192fa0601f
GIT binary patch
literal 324
zcmZo*nQF<%00y;FG<u{1it>|kQgbcB5_3vZJxfxH5=%1k^BmJt^Gc@la22PPgcdmG
zBo-G>X`9l+Qj(dQ3S_w3eQ5xjkipm^T9R5^5?@?eQj(t+pI(%iQl4LwlLAuB2Go^W
zR03qyPVr{w5z8yhEvVFsPtGq&)ypl-DalMMDoU&bsw+-T%t<VoGI>f5D_9MXPARD@
zNCk;7rCChr>}a17G)2Rkxrfnail3jK*MA@Y6W$CZQ<9uHAl_j(0JJJ-N^u5b23y;d
z3>Kg}?bjVj07(MfDGqjLQht6m$dM>6&ESAIF@rZl7-pCN!Z2x=VOtiMfD9|u0{|28
Bd1n9s

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b52c804782d7804492a93342a7e3a8940e620837
GIT binary patch
literal 1083
zcmXw&YcSjg6vZtTMJp1O2O0!1szzs;rb2SO4LZ8@YN^(9*V47S*@t$Gq)R>1zpTc;
zQZ#QB8JY-IhK>pnLWm+Ii(qBRF7e2tNrlqI^nAGY%-r+)a6cTe_D{~*8k;&s+7m`J
z9w&}PPjT^!O;K+iN$B(Gf-tTZE0)hEb4Ws4kd#7K8^0qf?wb^oLYN8Yd_i>L77?8q
z%M#L~8A5ivAc@1?V06?hohMM+V`QKPVagV!a79kh@f;rADMiQ<B*pT0u_BT%jONkf
zIc&awCyW=6MI=GpOp=5yJ7FtQq~^0pFG?JlO9qqnfs{acHpzU8pe6`JT>1v0$@qxO
z&)d3c5fnEvC@3gkU9HU#s39QZ@;&Xfwm#mN-`C%{u;derT^<JV=G{s7XMWlwTqH!9
zzVQ0<_za{+T2Q&RxzKH{aP|ML09Ug@Eqw+^m>mddc2u0keto4_L@5Jp=<6@S@1Db|
za$C!JT*T{F`zwMy%VCor!)NiTAa2#D>M5y4?q_CoDS=m^>}5)(m^Bz~?>1U<zYe0+
zIJ}am#d>ko$5}>FIOZ9z<qb(8nTXoAvf2P{Yp%z3<3=og+awjg(}e8Npo7-;e@FXH
zyAbN7Hpq_}*bd9uaLT5t=;@($%#B+{Nz2=zsEn}Ud*1>coFB}+cMIu_&CV|vw^6=y
zz@K-b10~7D9b*=qm=!r@B-VAJ!M>;5t>g}@IX$KFv@Y!a)x1;b+70Kx<hA+v9)!gB
z?x>CGMe*mf)S&fVykA>?<l57F=x956f1s!jsr9lh58pqLkr3h=daxhQ&dT&&X7oc(
z9P}h%sUNHtwwbCM17Nfg_J5lX!tT_nbX)5nN=r5*!xdj5Q&TE~rsY_pWLgHNe@)LG
z-7^A*xH>T{c?1uZhh&l0Mv$6J*ZXm01o}A_OlSq8ptgS+LLDB(Z{5GtbX$$V<RZ&c
zN{+$*Hmf<8`Vc;oWQYOdA!v23i*L4!W2*VeGUw$uw3?jW{ljkp{WCF=g_{#N`O5A+
zjW;H-(`qTo(QgtK%Z_EzmPv$}a?^FDCo!4huyEL1j_n?%E*fGv3NQN`=G4luD4`4%
zX-}cy?UlO=U60`Q{IBAw@kgi*P_b2-3M5!Y^c6ZNP{AnIO=BpqFZdIMXQ=|APgJj&
zg9_;DdQ{$UpGL<AKBw2enMPLH;WImGrePdaa>7ez2BK6MLmoK;hsy;l?@KcXt?_o*
zTk#lk&bp+e?=ehzlArxxsKhx5EsLF~#86n~m}|KbHYUzC$0;ggB@;Iz8dTu!iSqVc
zP+_22_xLW4S$y?#7pHw_7P2ottje;UL(-vuu#z)#_`e~fx-onvg{hN6GPTvcN1==R
I#!cw-AGO%_)Bpeg

literal 0
HcmV?d00001

diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl
index 2ea308be8ae3ae254027640d548e0f9972c8cfe6..23c941bdfaca6edfe522b74947632170b089b3a4 100644
GIT binary patch
delta 29
jcmcc3a+_s>1>^6HmY$4EGxkhwVLZpVHoJlW2uk$;ti1{^

delta 29
jcmcc3a+_s>1>>KMmY$4E?k$sB7|(GolgMBIf>J#Ip)?6S

diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl
index 55e379fe474d7a967700bb9c83202905b8ebcbfa..4d5b7500dc9b2e7fec8a5e1783f2026156f00962 100644
GIT binary patch
delta 103
zcmV-t0GR)P@CAVI1&}!c{{XQ&D=`81vu80}Faf!f#9=4`_JFhQVLmAWadfjTi$^H~
zwVp(ic(fq`_CT|nv<@)>R7|t0<sdNwpHOg@&K3bJ0f4vs76Ga-0#O0Cs7C=%FbI_3
J00000buNMuCjI~b

delta 103
zcmV-t0GR)P@CAVI1&}!c{XnrhD=`6?vu80}FagVx#9=4`^+2=kVLmAWZ78!Yi$^H~
zwk(*Fc(fq`^}w^6v<@)>TB5V7<sdNwpcE*V&K3bJ0l>HX76Ga-0#N3+s7C=%FbMOY
J00000buN5#C)@x4

-- 
GitLab