diff --git a/.gitignore b/.gitignore
index 6a3f2a9c0333afa4f4e1e48a25605b1333527faf..d73cb8de4f3d0c9f3b15278b1874922df218895c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,8 +60,10 @@ irlc/exam/exam2024august/*.pdf
 irlc/exam/exam2025*/*.zip
 irlc/exam/exam2025*/*.pdf
 
-#irlc/exam/midterm2023a
-#irlc/exam/midterm2023b
+irlc/exam/exam2*/solution
+
+irlc/exam/midterm2023a
+irlc/exam/midterm2023b
 
 irlc/lectures/lec01
 irlc/lectures/lec02
diff --git a/irlc/car/car_model.py b/irlc/car/car_model.py
index 2d4567e9ceb693d52a774673a59b08797bc6efc0..2897d6268891cab9da635ca87a298293fdf7d4ad 100644
--- a/irlc/car/car_model.py
+++ b/irlc/car/car_model.py
@@ -91,10 +91,10 @@ class SymbolicBicycleModel(ControlModel):
 
     def x_bound(self) -> Box:
         return Box(np.asarray([-np.inf, -np.inf, -np.inf, -np.inf, -np.inf, -self.map.width]),
-                   np.asarray([self.v_max, np.inf, np.inf, np.inf, np.inf, self.map.width]))
+                   np.asarray([self.v_max, np.inf, np.inf, np.inf, np.inf, self.map.width]), dtype=np.float64)
 
     def u_bound(self) -> Box:
-        return Box(np.asarray([-0.5, -1]),np.asarray([0.5, 1]))
+        return Box(np.asarray([-0.5, -1]),np.asarray([0.5, 1]), dtype=np.float64)
 
     def render(self, x, render_mode='human'):
         if self.viewer == None:
diff --git a/irlc/ex01/__init__.py b/irlc/ex01/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..51d06d4927f23b2f23bf8b39f2b235f268d55ca8
--- /dev/null
+++ b/irlc/ex01/__init__.py
@@ -0,0 +1,2 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 1."""
diff --git a/irlc/ex01/agent.py b/irlc/ex01/agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cca61aa985b22c27d8f17b8210c3e8056e04e1a
--- /dev/null
+++ b/irlc/ex01/agent.py
@@ -0,0 +1,385 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""The Agent class.
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+import typing
+import itertools
+import os
+import sys
+from collections import OrderedDict, namedtuple
+import numpy as np
+from tqdm import tqdm
+from irlc.utils.common import load_time_series, log_time_series
+from irlc.utils.irlc_plot import existing_runs
+import shutil
+from gymnasium import Env
+from dataclasses import dataclass
+
+class Agent: 
+    r"""The main agent class. See (Her25, Subsection 4.4.3) for additional details.
+
+    To use the agent class, you should first create an environment. In this case we will just create an instance of the
+    ``InventoryEnvironment`` (see (Her25, Subsection 4.2.3))
+
+    :Example:
+
+        .. runblock:: pycon
+
+            >>> from irlc import Agent                                              # You can import directly from top-level package
+            >>> import numpy as np
+            >>> np.random.seed(42)                                                  # Fix the seed for reproduciability
+            >>> from irlc.ex01.inventory_environment import InventoryEnvironment
+            >>> env = InventoryEnvironment()                                        # Create an instance of the environment
+            >>> agent = Agent(env)                                                  # Create an instance of the agent.
+            >>> s0, info0 = env.reset()                                             # Always call reset to start the environment
+            >>> a0 = agent.pi(s0, k=0, info=info0)                                  # Tell the agent to compute action $a_{k=0}$
+            >>> print(f"In state {s0=}, the agent took the action {a0=}")
+    """
+    
+    def __init__(self, env: Env):
+        """Instantiate the Agent class.
+
+        The agent is given the openai gym environment it must interact with. This allows the agent to know what the
+        action and observation space is.
+
+        :param env: The openai gym ``Env`` instance the agent should interact with.
+        """
+        self.env = env   
+
+    def pi(self, s, k : int, info : typing.Optional[dict] =None):
+        r"""Evaluate the Agent's policy (i.e., compute the action the agent want to take) at time step ``k`` in state ``s``.
+        
+        This correspond to the environment being in a state evaluating :math:`x_k`, and the function should compute the next
+        action the agent wish to take:
+                
+        .. math::
+            u_k = \mu_k(x_k)
+        
+        This means that ``s`` = :math:`x_k` and ``k`` = :math:`k =\{0, 1, ...\}`. The function should return an action that lies in the action-space
+        of the environment.
+        
+        The info dictionary:
+            The ``info``-dictionary contains possible extra information returned from the environment, for instance when calling the ``s, info = env.reset()`` function.
+            The main use in this course is in control, where the dictionary contains a value ``info['time_seconds']`` (which corresponds to the simulation time :math:`t` in seconds).
+            
+            We will also use the info dictionary to let the agent know certain actions are not available. This is done by setting the ``info['mask']``-key. 
+            Note that this is only relevant for reinforcement learning, and you should see the documentation/exercises for reinforcement learning for additional details.
+        
+        The default behavior of the agent is to return a random action. An example:
+        
+        .. runblock:: pycon
+        
+            >>> from irlc.pacman.pacman_environment import PacmanEnvironment
+            >>> from irlc import Agent
+            >>> env = PacmanEnvironment()
+            >>> s, info = env.reset()
+            >>> agent = Agent(env)            
+            >>> agent.pi(s, k=0, info=info) # get a random action
+            >>> agent.pi(s, k=0)            # If info is not specified, all actions are assumed permissible.
+                
+
+        :param s: Current state the environment is in.
+        :param timestep: Current time
+        :return: The action the agent want to take in the given state at the given time. By default the agent returns a random action
+        """ 
+        if info is None or 'mask' not in info:
+            return self.env.action_space.sample()
+        else:
+            """ In the case where the actions available in each state differ, openAI deals with that by specifying a 
+            ``mask``-entry in the info-dictionary. The mask can then be passed on to the 
+            env.action_space.sample-function to make sure we don't sample illegal actions. I consider this the most 
+            difficult and annoying thing about openai gym."""
+            if info['mask'].max() > 1:
+                raise Exception("Bad mask!")
+            return self.env.action_space.sample(mask=info['mask']) 
+
+
+    def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None): 
+        r"""Implement this function if the agent has to learn (be trained).
+
+        Note that you only have to implement this function from week 7 onwards -- before that, we are not interested in control methods that learn.
+        
+        The agent takes a number of input arguments. You should imagine that
+         
+        * ``s`` is the current state :math:`x_k``
+        * ``a`` is the action the agent took in state ``s``, i.e. ``a`` :math:`= u_k = \mu_k(x_k)`
+        * ``r`` is the reward the the agent got from that action
+        * ``sp`` (s-plus) is the state the environment then transitioned to, i.e. ``sp`` :math:`= x_{k+1}`
+        * '``done`` tells the agent if the environment has stopped
+        * ``info_s`` is the information-dictionary returned by the environment as it transitioned to ``s``
+        * ``info_sp`` is the information-dictionary returned by the environment as it transitioned to ``sp``.
+          
+        The following example will hopefully clarify it by showing how you would manually call the train-function once:
+          
+        :Example:      
+           
+            .. runblock:: pycon
+
+                >>> from irlc.ex01.inventory_environment import InventoryEnvironment    # import environment
+                >>> from irlc import Agent
+                >>> env = InventoryEnvironment()                                        # Create an instance of the environment
+                >>> agent = Agent(env)                                                  # Create an instance of the agent.
+                >>> s, info_s = env.reset()                                             # s is the current state
+                >>> a = agent.pi(s, k=0, info=info_s)                                   # The agent takes an action
+                >>> sp, r, done, _, info_sp = env.step(a)                               # Environment updates
+                >>> agent.train(s, a, r, sp, done, info_s, info_sp)                     # How the training function is called
+
+        
+        In control and dynamical programming, please recall that the reward is equal to minus the cost.
+        
+        :param s: Current state :math:`x_k`
+        :param a: Action taken :math:`u_k`
+        :param r: Reward obtained by taking action :math:`a_k` in state :math:`x_k`
+        :param sp: The state that the environment transitioned to :math:`{\\bf x}_{k+1}`
+        :param info_s: The information dictionary corresponding to ``s`` returned by ``env.reset`` (when :math:`k=0`) and otherwise ``env.step``.
+        :param info_sp: The information-dictionary corresponding to ``sp`` returned by ``env.step``
+        :param done: Whether environment terminated when transitioning to ``sp``
+        :return: None
+        """
+        pass  
+
+    def __str__(self):
+        """**Optional:** A unique name for this agent. Used for labels when plotting, but can be kept like this."""
+        return super().__str__()
+
+    def extra_stats(self) -> dict:
+        """**Optional:** Implement this function if you wish to record extra information from the ``Agent`` while training.
+
+        You can safely ignore this method as it will only be used for control theory to create nicer plots """
+        return {}
+
+fields = ('time', 'state', 'action', 'reward')
+Trajectory = namedtuple('Trajectory', fields + ("env_info",))
+
+# Experiment using a dataclass.
+@dataclass
+class Stats:
+    episode: int
+    episode_length: int
+    accumulated_reward: float
+
+    total_steps: int
+    trajectory : Trajectory = None
+    agent_stats : dict = None
+
+    @property
+    def average_reward(self):
+        return self.accumulated_reward / self.episode_length
+
+# s = Stats(episode=0, episode_length=5, accumulated_reward=4, total_steps=2, trajectory=Trajectory())
+
+
+def train(env,
+          agent=None,
+          experiment_name=None,
+          num_episodes=1,
+          verbose=True,
+          reset=True, # If True we will call env.reset() upon episode start.
+          max_steps=1e10,
+          max_runs=None,
+          return_trajectory=True, # Return the current trajectories as a list
+          resume_stats=None, # Resume stat collection from last save.
+          log_interval=1, # Only log every log_interval steps. Reduces size of log files.
+          delete_old_experiments=False, # Remove the old experiments folder. Useful while debugging a model (or to conserve disk space)
+          seed=None, # Attempt to set the seed of the random number generator to produce reproducible results.
+          ):
+    """This function implements the main training loop as described in (Her25, Subsection 4.4.4).
+
+    The loop will simulate the interaction between agent `agent` and the environment `env`.
+    The function has a lot of special functionality, so it is useful to consider the common cases. An example:
+
+    >>> stats, _ = train(env, agent, num_episodes=2)
+
+    Simulate interaction for two episodes (i.e. environment terminates two times and is reset).
+    `stats` will be a list of length two containing information from each run
+
+    >>> stats, trajectories = train(env, agent, num_episodes=2, return_Trajectory=True)
+
+    `trajectories` will be a list of length two containing information from the two trajectories.
+
+    >>> stats, _ = train(env, agent, experiment_name='experiments/my_run', num_episodes=2)
+
+    Save `stats`, and trajectories, to a file which can easily be loaded/plotted (see course software for examples of this).
+    The file will be time-stamped so using several calls you can repeat the same experiment (run) many times.
+
+    >>> stats, _ = train(env, agent, experiment_name='experiments/my_run', num_episodes=2, max_runs=10)
+
+    As above, but do not perform more than 10 runs. Useful for repeated experiments.
+
+    :param env: An openai-Gym ``Env`` instance (the environment)
+    :param agent: An ``Agent`` instance
+    :param experiment_name: The outcome of this experiment will be saved in a folder with this name. This will allow you to run multiple (repeated) experiment and visualize the results in a single plot, which is very important in reinforcement learning.
+    :param num_episodes: Number of episodes to simulate
+    :param verbose: Display progress bar
+    :param reset: Call ``env.reset()`` before simulation start. Default is ``True``. This is only useful in very rare cases.
+    :param max_steps: Terminate if this many steps have elapsed (for non-terminating environments)
+    :param max_runs: Maximum number of repeated experiments (requires ``experiment_name``)
+    :param return_trajectory: Return trajectories list (Off by default since it might consume lots of memory)
+    :param resume_stats: Resume stat collection from last run (this requires the ``experiment_name`` variable to be set)
+    :param log_interval: Log stats less frequently than each episode. Useful if you want to run really long experiments.
+    :param delete_old_experiments: If true, old saved experiments will be deleted. This is useful during debugging.
+    :param seed: An integer. The random number generator of the environment will be reset to this seed allowing for reproducible results.
+    :return: A list where each element corresponds to each (started) episode. The elements are dictionaries, and contain the statistics for that episode.
+    """
+
+    from irlc import cache_write
+    from irlc import cache_read
+    saveload_model = False
+    # temporal_policy = None
+    save_stats = True
+    if agent is None:
+        print("[train] No agent was specified. Using irlc.Agent(env) (this agent selects actions at random)")
+        agent = Agent(env)
+
+    if delete_old_experiments and experiment_name is not None and os.path.isdir(experiment_name):
+        shutil.rmtree(experiment_name)
+
+    if experiment_name is not None and max_runs is not None and existing_runs(experiment_name) >= max_runs:
+        stats, recent = load_time_series(experiment_name=experiment_name)
+        if return_trajectory:
+            trajectories = cache_read(recent+"/trajectories.pkl")
+        else:
+            trajectories = []
+        return stats, trajectories
+    stats = []
+    steps = 0
+    ep_start = 0
+    resume_stats = saveload_model if resume_stats is None else resume_stats
+
+    recent = None
+    if resume_stats:
+        stats, recent = load_time_series(experiment_name=experiment_name)
+        if recent is not None:
+            ep_start, steps = stats[-1]['Episode']+1, stats[-1]['Steps']
+
+    trajectories = []
+    # include_metadata = len(inspect.getfullargspec(agent.train).args) >= 7
+    break_outer = False
+
+    with tqdm(total=num_episodes, disable=not verbose, file=sys.stdout, mininterval=int(num_episodes/100) if num_episodes>100 else None) as tq:
+        for i_episode in range(num_episodes): 
+            if break_outer:
+                break
+            info_s = {}
+            if reset or i_episode > 0:
+                if seed is not None:
+                    s, info_s = env.reset(seed=seed)
+                    seed = None
+                else:
+                    s, info_s = env.reset()  
+            elif hasattr(env, "s"):  # This is doing what, exactly? Perhaps save/load of agent?
+                s = env.s
+            elif hasattr(env, 'state'):
+                s = env.state
+            else:
+                s = env.model.s
+            # time = 0
+            reward = []
+            trajectory = Trajectory(time=[], state=[], action=[], reward=[], env_info=[])
+            k = 0 # initial state k.
+            for _ in itertools.count():
+                # policy is always temporal
+                a = agent.pi(s, k, info_s) # if temporal_policy else agent.pi(s)
+                k = k + 1
+                sp, r, terminated, truncated, info_sp = env.step(a)
+                done = terminated or truncated
+
+                if info_sp is not None and 'mask' in info_sp and info_sp['mask'].max() > 1:
+                    print("bad")
+
+                agent.train(s, a, r, sp, done, info_s, info_sp)
+
+                if return_trajectory:
+                    trajectory.time.append(np.asarray(info_s['time_seconds'] if 'time_seconds' in info_s else steps)) #np.asarray(time))
+                    trajectory.state.append(s)
+                    trajectory.action.append(a)
+                    trajectory.reward.append(np.asarray(r))
+                    trajectory.env_info.append(info_s)
+
+                reward.append(r)
+                steps += 1
+                # time += info_sp['dt'] if 'dt' in info_sp else 1
+                # time += 1
+
+                if done or steps >= max_steps:
+                    trajectory.state.append(sp)
+                    trajectory.env_info.append(info_sp)
+                    trajectory.time.append(np.asarray(info_sp['time_seconds'] if 'time_seconds' in info_s else steps))
+                    break_outer = steps >= max_steps
+                    break
+                s = sp 
+                info_s = info_sp
+            if return_trajectory:
+                try:
+                    from irlc.ex04.control_environment import ControlEnvironment
+                    if isinstance(env, ControlEnvironment): # TODO: this is too hacky. States/actions should be lists, and subsequent methods should stack.
+                        trajectory = Trajectory(**{field: np.stack([np.asarray(x_) for x_ in getattr(trajectory, field)]) for field in fields}, env_info=trajectory.env_info)
+                    # else:
+                    #     trajectory = Trajectory(**{field: np.stack([np.asarray(x_) for x_ in getattr(trajectory, field)]) for field in fields}, env_info=trajectory.env_info)
+
+                except Exception as e:
+                    pass
+
+                trajectories.append(trajectory)
+            if (i_episode + 1) % log_interval == 0:
+                stats.append({"Episode": i_episode + ep_start,
+                              "Accumulated Reward": sum(reward),
+                              # "Average Reward": np.mean(reward), # Not sure we need this anymore.
+                              "Length": len(reward),
+                              "Steps": steps, # Useful for deep learning applications. This should be kept, or week 13 will have issues.
+                              **agent.extra_stats()})
+
+            rate = int(num_episodes / 100)
+            if rate > 0 and i_episode % rate == 0:
+                tq.set_postfix(ordered_dict=OrderedDict(list(OrderedDict(stats[-1]).items())[:5])) if len(stats) > 0 else None
+            tq.update()
+
+    sys.stderr.flush()
+
+    if resume_stats and save_stats and recent is not None:
+        os.remove(recent+"/log.txt")
+
+    if experiment_name is not None and save_stats:
+        path = log_time_series(experiment=experiment_name, list_obs=stats)
+        if return_trajectory:
+            cache_write(trajectories, path+"/trajectories.pkl")
+
+        print(f"Training completed. Logging {experiment_name}: '{', '.join( stats[0].keys()) }'")
+
+    for i, t in enumerate(trajectories):
+        from collections import defaultdict
+        nt = defaultdict(lambda: [])
+        if t.env_info is not None and t.env_info[1] is not None and "supersample" in t.env_info[1]:
+            for f in fields:
+                for k, ei in enumerate(t.env_info):
+                    if 'supersample' not in ei:
+                        continue
+                    z = ei['supersample'].__getattribute__(f).T
+                    if k == 0:
+                        pass
+                    else:
+                        z = z[1:]
+                    nt[f].append(z)
+
+            for f in fields:
+                nt[f] = np.concatenate([z for z in nt[f]],axis=0)
+            traj2 = Trajectory(**nt, env_info=[])
+            trajectories[i] = traj2
+
+    # for k, t in enumerate(stats):
+    #     if k < len(trajectories):
+    #         stats[k]['trajectory'] = trajectories[k]
+    # Turn this into a single episodes-list (refactor later)
+    return stats, trajectories
+
+
+if __name__ == "__main__":
+    # Use the trajectories here.
+    from irlc.ex01.inventory_environment import InventoryEnvironment
+    env = InventoryEnvironment(N=10)
+    stats, traj = train(env, Agent(env))
+    print(stats)
+    s = Stats(episode=1, episode_length=2, accumulated_reward=4, total_steps=4, trajectory=None, agent_stats={})
+    print(s)
diff --git a/irlc/ex01/bobs_friend.py b/irlc/ex01/bobs_friend.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d515d8e5e2f8186451fe37b03aa8b83ea7f66ed
--- /dev/null
+++ b/irlc/ex01/bobs_friend.py
@@ -0,0 +1,59 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import gymnasium
+import numpy as np
+from gymnasium.spaces.discrete import Discrete
+from irlc.ex01.agent import Agent, train
+
+class BobFriendEnvironment(gymnasium.Env): 
+    def __init__(self, x0=20):
+        self.x0 = x0
+        self.action_space = Discrete(2)     # Possible actions {0, 1} 
+
+    def reset(self):
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        return self.s, {}
+
+    def step(self, a):
+        # TODO: 9 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        return s_next, reward, terminated, False, {}
+
+class AlwaysAction_u0(Agent):
+    def pi(self, s, k, info=None):  
+        """This agent should always take action u=0."""
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Implement function body")
+
+class AlwaysAction_u1(Agent):
+    def pi(self, s, k, info=None):  
+        """This agent should always take action u=1."""
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Implement function body")
+
+if __name__ == "__main__":
+    # Part A:
+    env = BobFriendEnvironment()
+    x0, _ = env.reset()
+    print(f"Initial amount of money is x0 = {x0} (should be 20 kroner)")
+    print("Lets put it in the bank, we should end up in state x1=22 and get a reward of 2 kroner")
+    x1, reward, _, _, _ = env.step(0)
+    print("we got", x1, reward)
+    # Since we reset the environment, we should get the same result as before:
+    env.reset()
+    x1, reward, _, _, _ = env.step(0)
+    print("(once more) we got", x1, reward, "(should be the same as before)")
+
+    env.reset()  # We must call reset -- the environment has possibly been changed!
+    print("Lets lend it to our friend -- what happens will now be random")
+    x1, reward, _, _, _ = env.step(1)
+    print("we got", x1, reward)
+
+    # Part B:
+    stats, _ = train(env, AlwaysAction_u0(env), num_episodes=1000)
+    average_u0 = np.mean([stat['Accumulated Reward'] for stat in stats])
+
+    stats, _ = train(env, AlwaysAction_u1(env), num_episodes=1000)
+    average_u1 = np.mean([stat['Accumulated Reward'] for stat in stats])
+    print(f"Average reward while taking action u=0 was {average_u0} (should be 2)")
+    print(f"Average reward while taking action u=1 was {average_u1} (should be 4)")
diff --git a/irlc/ex01/chess.py b/irlc/ex01/chess.py
new file mode 100644
index 0000000000000000000000000000000000000000..935e1fc1c4c40d121bcf249eb00b17e11e618c82
--- /dev/null
+++ b/irlc/ex01/chess.py
@@ -0,0 +1,99 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This file contains code for the Chess Tournament problem."""
+import numpy as np
+from gymnasium.spaces.discrete import Discrete
+from gymnasium import Env
+
+class ChessTournament(Env):
+    """The ChessTournament gymnasium-environment which simulate a chess tournament.
+
+    In the problem, a chess tournament ends when a player wins two games in a row. The results
+    of each game are -1, 0, 1 corresponding to a loss, draw and win for player 1. See:
+    https://www.youtube.com/watch?v=5UQU1oBpAic
+
+    To implement this, we define the step-function such that one episode of the environment corresponds to playing
+    a chess tournament to completion. Once the environment completes, it returns a reward of +1 if the player won
+    the tournament, and otherwise 0.
+
+    Each step therefore corresponds to playing a single game in the tournament.
+    To implement this, we use a state corresponding to the sequence of games in the tournament:
+
+    >>> self.s = [0, -1, 1, 0, 0, 1]
+
+    In the self.step(action)-function, we ignore the action, simulate the outcome of a single game,
+    and append the outcome to self.s. We then compute whether the tournament has completed, and if so
+    a reward of 1 if we won.
+    """
+
+    def __init__(self, p_draw=3 / 4, p_win=2 / 3):
+        self.action_space = Discrete(1)
+        self.p_draw = p_draw
+        self.p_win = p_win
+        self.s = []  # A chess tournament is a sequence of won/lost games s = [0, -1, 1, 0, ...]
+
+    def reset(self): 
+        """Reset the tournament environment to begin to simulate a new tournament.
+
+        After each episode is complete, this function will reset :python:`self.s` and return the current state s and an empty dictionary.
+        :return:
+            - s - The initial state (what is it?)
+            - info - An empty dictionary, ``{}``
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Implement function body")
+        return self.s, {}
+
+    def step(self, action):
+        """Play a single game in the current tournament
+
+        The variable action is required by gymnasium but it is not used since no (player) actions occur in this problem.
+
+        The step-method should update `self.state` to be the next (new) state, compute the reward, and determine whether
+        the environment has terminated (:python:`done`).
+
+        :param action: This input is required by gymnasium but it is not used in this case.
+        :return: A tuple of the form :python:`(new_state, reward, done, False, {})`
+        """
+        game_outcome = None # should be -1, 0, or 1 depending on outcome of single game.
+        ## TODO: Oy veh, the following 7 lines below have been permuted. Uncomment, rearrange to the correct order and remove the error.
+        #-------------------------------------------------------------------------------------------------------------------------------
+        #     else:
+        # else:
+        #         game_outcome = 1
+        #     if np.random.rand() < self.p_win:
+        #         game_outcome = -1 
+        #     game_outcome = 0
+        # if np.random.rand() < self.p_draw: 
+        raise NotImplementedError("Compute game_outcome here")
+        self.s.append(game_outcome)
+
+        #done = True if the tournament has ended otherwise false. Compute using s.
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute 'done', whether the tournament has ended.")
+        # r = ... . Compute reward. Let r=1 if we won the tournament otherwise 0.
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute the reward 'r' here.")
+        return self.s, r, done, False, {}
+
+def main():
+    """The main method of the chess-game problem.
+
+    This function will simulate T tournament games and estimate average win probability for player 1 as p_win (answer to riddle) and also
+    the average length. Note the later should be a 1-liner, but would require non-trivial computations to solve
+    analytically. Please see the :class:`gymnasium.Env` class for additional details.
+    """
+    T = 5000
+    from irlc import train, Agent
+    env = ChessTournament()
+    # Compute stats using the train function. Simulate the tournament for a total of T=10'000 episodes.
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute stats here using train(env, ...). Use num_episodes.")
+    p_win = np.mean([st['Accumulated Reward'] for st in stats])
+    avg_length = np.mean([st['Length'] for st in stats])
+
+    print("Agent: Estimated chance I won the tournament: ", p_win)  
+    print("Agent: Average tournament length", avg_length)  
+
+
+if __name__ == "__main__":
+    main()
diff --git a/irlc/ex01/inventory_environment.py b/irlc/ex01/inventory_environment.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4fb9b0d7b81fbf071dfa19057a056c4ae4eaf83
--- /dev/null
+++ b/irlc/ex01/inventory_environment.py
@@ -0,0 +1,71 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from gymnasium.spaces.discrete import Discrete
+from gymnasium import Env
+from irlc.ex01.agent import Agent, train
+
+class InventoryEnvironment(Env): 
+    def __init__(self, N=2):
+        self.N = N                               # planning horizon
+        self.action_space      = Discrete(3)     # Possible actions {0, 1, 2}
+        self.observation_space = Discrete(3)     # Possible observations {0, 1, 2}
+
+    def reset(self):
+        self.s = 0                               # reset initial state x0=0
+        self.k = 0                               # reset time step k=0
+        return self.s, {}                        # Return the state we reset to (and an empty dict)
+
+    def step(self, a):
+        w = np.random.choice(3, p=(.1, .7, .2))    # Generate random disturbance
+        # TODO: 5 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        return s_next, reward, terminated, False, {}  # return transition information  
+
+class RandomAgent(Agent): 
+    def pi(self, s, k, info=None): 
+        """ Return action to take in state s at time step k """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Implement function body")
+
+
+def simplified_train(env: Env, agent: Agent) -> float: 
+    s, _ = env.reset()
+    J = 0  # Accumulated reward for this rollout
+    for k in range(1000):
+        ## TODO: Oy veh, the following 7 lines below have been permuted. Uncomment, rearrange to the correct order and remove the error.
+        #-------------------------------------------------------------------------------------------------------------------------------
+        # if terminated or truncated:
+        # sp, r, terminated, truncated, metadata = env.step(a)
+        # a = agent.pi(s, k) 
+        # s = sp
+        # J += r
+        # agent.train(s, a, sp, r, terminated)
+        #     break 
+        raise NotImplementedError("Remove this exception after the above lines have been uncommented and rearranged.")
+    return J 
+
+def run_inventory():
+    env = InventoryEnvironment() 
+    agent = RandomAgent(env)
+    stats, _ = train(env,agent,num_episodes=1,verbose=False)  # Perform one rollout.
+    print("Accumulated reward of first episode", stats[0]['Accumulated Reward']) 
+    # I recommend inspecting 'stats' in a debugger; why do you think it is a list of length 1?
+
+    stats, _ = train(env, agent, num_episodes=1000,verbose=False)  # do 1000 rollouts 
+    avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
+    print("[RandomAgent class] Average cost of random policy J_pi_random(0)=", -avg_reward) 
+    # Try to inspect stats again in a debugger here. How long is the list now?
+
+    stats, _ = train(env, Agent(env), num_episodes=1000,verbose=False)  # Perform 1000 rollouts using Agent class 
+    avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
+    print("[Agent class] Average cost of random policy J_pi_random(0)=", -avg_reward)  
+
+    """ Second part: Using the simplified training method. I.e. do not use train() below.
+     You can find some pretty strong hints about what goes on in simplified_train in the lecture slides for today. """
+    avg_reward_simplified_train = np.mean( [simplified_train(env, agent) for i in range(1000)]) 
+    print("[simplified train] Average cost of random policy J_pi_random(0) =", -avg_reward_simplified_train)  
+
+
+
+if __name__ == "__main__":
+    run_inventory()
diff --git a/irlc/ex01/pacman_hardcoded.py b/irlc/ex01/pacman_hardcoded.py
new file mode 100644
index 0000000000000000000000000000000000000000..62547565232907e67c339e90463d1c7a9cd6f121
--- /dev/null
+++ b/irlc/ex01/pacman_hardcoded.py
@@ -0,0 +1,60 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc import Agent, train, savepdf
+
+
+# Maze layouts can be specified using a string.
+layout = """
+%%%%%%%%%%
+%P.......%
+%.%%%%%%.%
+%.%    %.%
+%.%    %.%
+%.%    %.%
+%.%    %.%
+%.%%%%%%.%
+%........%
+%%%%%%%%%%
+"""
+
+# This is our first agent. Note it inherits from the Agent class. Use <ctrl>+click in pycharm to navigate to code definitions --
+# this is a very useful habbit when you work with other peoples code in general, and object-oriented code in particular.
+class GoAroundAgent(Agent):
+    def pi(self, x, k, info=None): 
+        """ Collect all dots in the maze in the smallest amount of time.
+        This function should return an action, check the output of the code below to see what actions you can potentially
+        return.
+        Remember Pacman only have to solve this single maze, so don't make the function general.
+
+        Hints:
+            - Insert a breakpoint in the function. Try to write self.env and self.env.action_space.actions in the interpreter. Where did self.env get set?
+            - Remember that k is the current step number.
+            - Ignore the info dictionary; you can probably also ignore the state x.
+            - The function should return a string (the actions are strings such as 'North')
+        """
+        # TODO: 7 lines missing.
+        raise NotImplementedError("Implement function body")
+        return 'West'
+
+if __name__ == "__main__":
+    # Create an environment with the given layout. animate_movement is just for a nicer visualization.
+    env = PacmanEnvironment(layout_str=layout, render_mode='human')
+    # This creates a visualization (Note this makes the environment slower) which can help us see what Pacman does
+    # This create the GoAroundAgent-instance
+    agent = GoAroundAgent(env)
+    # Uncomment the following line to input actions instead of the agent using the keyboard:
+    # env, agent = interactive(env, agent)
+    s, info = env.reset() # Reset (and start) the environment
+
+    savepdf("pacman_roundabout.pdf", env=env) # Saves a snapshot of the start layout
+    # The next two lines display two ways to get the available actions. The 'canonical' way using the
+    # env.action_space, and a way particular to Pacman by using the s.A() function on the state.
+    # You can read more about the functions in the state in project 1.
+    # print("Available actions at start:", env.action_space.actions) # This will list the available actions. 
+    print("Alternative way of getting actions:", s.A())  # See also project description
+
+    # Simulate the agent for one episode
+    stats, _ = train(env, agent, num_episodes=1)
+    # Print your obtained score.
+    print("Your obtained score was", stats[0]['Accumulated Reward'])
+    env.close()  # When working with visualizations, call env.close() to close windows it may have opened. "
diff --git a/irlc/exam/exam2023spring/__init__.py b/irlc/exam/exam2023spring/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a56057c84d0ceac54aab1d40ba0f370c77fe10be 100644
--- a/irlc/exam/exam2023spring/__init__.py
+++ b/irlc/exam/exam2023spring/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/exam/exam2023spring/exam2023spring.pdf b/irlc/exam/exam2023spring/exam2023spring.pdf
deleted file mode 100644
index bdd6de3cfe5d79986fff304f59484f44e1dbd197..0000000000000000000000000000000000000000
Binary files a/irlc/exam/exam2023spring/exam2023spring.pdf and /dev/null differ
diff --git a/irlc/exam/exam2023spring/exam2023spring_problems_nosol.zip b/irlc/exam/exam2023spring/exam2023spring_problems_nosol.zip
deleted file mode 100644
index f2dc8fe47613b69a5fa8915b716f90f8ec51c988..0000000000000000000000000000000000000000
Binary files a/irlc/exam/exam2023spring/exam2023spring_problems_nosol.zip and /dev/null differ
diff --git a/irlc/exam/exam2023spring/readme.md b/irlc/exam/exam2023spring/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..c041c5216f5a12754b844c5d316e1d0e835ec09a
--- /dev/null
+++ b/irlc/exam/exam2023spring/readme.md
@@ -0,0 +1,2 @@
+This directory is purposefully left empty. During the exam, you will be given a `.zip` file with the content of this directory. 
+Replace this directory with the corresponding directory from the `.zip` file to begin working on the exam. 
diff --git a/irlc/exam/exam2024spring/__init__.py b/irlc/exam/exam2024spring/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a56057c84d0ceac54aab1d40ba0f370c77fe10be 100644
--- a/irlc/exam/exam2024spring/__init__.py
+++ b/irlc/exam/exam2024spring/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/exam/exam2024spring/readme.md b/irlc/exam/exam2024spring/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..c041c5216f5a12754b844c5d316e1d0e835ec09a
--- /dev/null
+++ b/irlc/exam/exam2024spring/readme.md
@@ -0,0 +1,2 @@
+This directory is purposefully left empty. During the exam, you will be given a `.zip` file with the content of this directory. 
+Replace this directory with the corresponding directory from the `.zip` file to begin working on the exam. 
diff --git a/irlc/exam/midterm2023a/__init__.py b/irlc/exam/midterm2023a/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a56057c84d0ceac54aab1d40ba0f370c77fe10be 100644
--- a/irlc/exam/midterm2023a/__init__.py
+++ b/irlc/exam/midterm2023a/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/exam/midterm2023a/midterm2023a.pdf b/irlc/exam/midterm2023a/midterm2023a.pdf
deleted file mode 100644
index 1f744703e6e059a56a52385921eac20ac1c407f4..0000000000000000000000000000000000000000
Binary files a/irlc/exam/midterm2023a/midterm2023a.pdf and /dev/null differ
diff --git a/irlc/exam/midterm2023a/midterm2023a_problems_nosol.zip b/irlc/exam/midterm2023a/midterm2023a_problems_nosol.zip
deleted file mode 100644
index 32b50ea36fd9ef796ce38b983407b626e6ff97ca..0000000000000000000000000000000000000000
Binary files a/irlc/exam/midterm2023a/midterm2023a_problems_nosol.zip and /dev/null differ
diff --git a/irlc/exam/midterm2023b/__init__.py b/irlc/exam/midterm2023b/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a56057c84d0ceac54aab1d40ba0f370c77fe10be 100644
--- a/irlc/exam/midterm2023b/__init__.py
+++ b/irlc/exam/midterm2023b/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/exam/midterm2023b/midterm2023b.pdf b/irlc/exam/midterm2023b/midterm2023b.pdf
deleted file mode 100644
index 8285b441e012d3dbb3ded27452268acf50c25534..0000000000000000000000000000000000000000
Binary files a/irlc/exam/midterm2023b/midterm2023b.pdf and /dev/null differ
diff --git a/irlc/exam/midterm2023b/midterm2023b_problems_nosol.zip b/irlc/exam/midterm2023b/midterm2023b_problems_nosol.zip
deleted file mode 100644
index 63d3d8a4589ecc9d862f1ad4b8bb5227355087fb..0000000000000000000000000000000000000000
Binary files a/irlc/exam/midterm2023b/midterm2023b_problems_nosol.zip and /dev/null differ
diff --git a/irlc/gridworld/gridworld_environments.py b/irlc/gridworld/gridworld_environments.py
index 6d72a4604efb618601f8f6fb51ff77a863be92b5..ec7497a9757fdc781ef55337b3f965ea055fffbf 100644
--- a/irlc/gridworld/gridworld_environments.py
+++ b/irlc/gridworld/gridworld_environments.py
@@ -130,7 +130,7 @@ class GridworldEnvironment(MDP2GymEnv):
             from irlc.gridworld.gridworld_graphics_display import GraphicsGridworldDisplay
             self.display_pygame = GraphicsGridworldDisplay(self.mdp, size=int(150 * self.zoom), frames_per_second=self.frames_per_second) # last item is grid size
 
-        agent = self.agent
+        agent = self.unwrapped.agent
         label = None
         method_label = agent.method if hasattr(agent, 'method') else ''
         if label is None and len(method_label) > 0:
diff --git a/irlc/project0/fruit_project_grade.py b/irlc/project0/fruit_project_grade.py
index 09f167b5c8a0f91562f2669a6894fdd19f0db404..45effc9d1199015fbb7986c0b3f76d7d0966054e 100644
--- a/irlc/project0/fruit_project_grade.py
+++ b/irlc/project0/fruit_project_grade.py
@@ -1,4 +1,4 @@
 # irlc/project0/fruit_project_tests.py
 ''' WARNING: Modifying, decompiling or otherwise tampering with this script, it's data or the resulting .token file will be investigated as a cheating attempt. '''
 import bz2, base64
-exec(bz2.decompress(base64.b64decode('')))
\ No newline at end of file
+exec(bz2.decompress(base64.b64decode('')))
\ No newline at end of file
diff --git a/irlc/project0/unitgrade_data/AdditionQuestion.pkl b/irlc/project0/unitgrade_data/AdditionQuestion.pkl
index 0c4041c8e0e84dd5d88fbdef9bb42dfe5acb86c1..fe9c2a23f2af8dc94ac2bd4bf81d5e54a9b68160 100644
Binary files a/irlc/project0/unitgrade_data/AdditionQuestion.pkl and b/irlc/project0/unitgrade_data/AdditionQuestion.pkl differ
diff --git a/irlc/project0/unitgrade_data/BasicClass.pkl b/irlc/project0/unitgrade_data/BasicClass.pkl
index 92432f3a7881a8671667fa274e3f2ad4ce75f0a7..47ed674dfcb1e6f259cee38efda6aaac9ceb15e8 100644
Binary files a/irlc/project0/unitgrade_data/BasicClass.pkl and b/irlc/project0/unitgrade_data/BasicClass.pkl differ
diff --git a/irlc/project0/unitgrade_data/MeanOfDie.pkl b/irlc/project0/unitgrade_data/MeanOfDie.pkl
index 1fc660ef2bdeb1a5a38c2b4da18a0e5cbe3e314f..da4fbd8a3572fb97700917d214f1198c6c5f3434 100644
Binary files a/irlc/project0/unitgrade_data/MeanOfDie.pkl and b/irlc/project0/unitgrade_data/MeanOfDie.pkl differ
diff --git a/irlc/project0/unitgrade_data/MisterfyQuestion.pkl b/irlc/project0/unitgrade_data/MisterfyQuestion.pkl
index 2fd6e50ff8e35454d3b2f7706f33086fc43d468b..3c7b548715ac22b52e6f066bc7131de0f81ff4c8 100644
Binary files a/irlc/project0/unitgrade_data/MisterfyQuestion.pkl and b/irlc/project0/unitgrade_data/MisterfyQuestion.pkl differ
diff --git a/irlc/project1/Latex/02465project1_handin.tex b/irlc/project1/Latex/02465project1_handin.tex
new file mode 100644
index 0000000000000000000000000000000000000000..f59e1d27e2cf427513a83618a9f3df9d071bd70b
--- /dev/null
+++ b/irlc/project1/Latex/02465project1_handin.tex
@@ -0,0 +1,107 @@
+\documentclass[12pt,twoside]{article}
+%\usepackage[table]{xcolor} % important to avoid options clash.
+%\input{02465shared_preamble}
+%\usepackage{cleveref}
+\usepackage{url}
+\usepackage{graphics}
+\usepackage{multicol}
+\usepackage{rotate}
+\usepackage{rotating}
+\usepackage{booktabs}
+\usepackage{hyperref}
+\usepackage{pifont}
+\usepackage{latexsym}
+\usepackage[english]{babel}
+\usepackage{epstopdf}
+\usepackage{etoolbox}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{multirow,epstopdf}
+\usepackage{fancyhdr}
+\usepackage{booktabs}
+\usepackage{xcolor}
+\newcommand\redt[1]{ {\textcolor[rgb]{0.60, 0.00, 0.00}{\textbf{ #1} } } }
+
+
+\newcommand{\m}[1]{\boldsymbol{ #1}}
+\newcommand{\yoursolution}{ \redt{(your solution here) } } 
+
+
+
+\title{ Report 1 hand-in }
+\date{ \today }
+\author{Alice (\texttt{s000001})\and  Bob (\texttt{s000002})\and Clara (\texttt{s000003}) } 
+
+\begin{document}
+\maketitle
+
+\begin{table}[ht!]
+\caption{Attribution table. Feel free to add/remove rows and columns}
+\begin{tabular}{llll}
+\toprule
+                                                        & Alice   & Bob    & Clara   \\
+\midrule
+ 1: A basic blaster-business                            & 0-100\%  & 0-100\% & 0-100\%  \\
+ 2: Warmup                                              & 0-100\%  & 0-100\% & 0-100\%  \\
+ 3: Manually computing $J_{N-1}$                        & 0-100\%  & 0-100\% & 0-100\%  \\
+ 4: Compute optimal policy and value function           & 0-100\%  & 0-100\% & 0-100\%  \\
+ 5: Kiosk2                                              & 0-100\%  & 0-100\% & 0-100\%  \\
+ 6: Explaining the policy                               & 0-100\%  & 0-100\% & 0-100\%  \\
+ 7: Policy explanation continued                        & 0-100\%  & 0-100\% & 0-100\%  \\
+ 8: Go east                                             & 0-100\%  & 0-100\% & 0-100\%  \\
+ 9: Describe the go-east problem                        & 0-100\%  & 0-100\% & 0-100\%  \\
+ 10: Predict consequence of actions                     & 0-100\%  & 0-100\% & 0-100\%  \\
+ 11: Possible future states                             & 0-100\%  & 0-100\% & 0-100\%  \\
+ 12: Shortest path                                      & 0-100\%  & 0-100\% & 0-100\%  \\
+ 13: Predict consequence of actions with one ghost      & 0-100\%  & 0-100\% & 0-100\%  \\
+ 14: Possible future states with one ghost              & 0-100\%  & 0-100\% & 0-100\%  \\
+ 15: Optimal one-ghost planning                         & 0-100\%  & 0-100\% & 0-100\%  \\
+ 16: Predict consequence of actions with several ghosts & 0-100\%  & 0-100\% & 0-100\%  \\
+ 17: Future states                                      & 0-100\%  & 0-100\% & 0-100\%  \\
+ 18: Optimal planning                                   & 0-100\%  & 0-100\% & 0-100\%  \\
+\bottomrule
+\end{tabular}
+\end{table}
+
+%\paragraph{Statement about collaboration:}
+%Please edit this section to reflect how you have used external resources. The following statement will in most cases suffice: 
+%\emph{The code in the irls/project1 directory is entirely}
+
+%\paragraph{Main report:}
+Headings have been inserted in the document for readability. You only have to edit the part which says \yoursolution. 
+
+\section{The kiosk (\texttt{kiosk.py})}
+\subsubsection*{{\color{red}Problem 1:  A basic blaster-business}}
+
+\yoursolution 	
+\redt{To get you started: \begin{align}
+	N & = 14 \\
+	\mbox{for $k=0,\dots,N$: }\quad	\mathcal{S}_k & = \dots \\
+	\mbox{for $k=0,\dots,N-1$: }\quad \mathcal{A}_k(x_k) & = \dots \\
+	 & \vdots 
+\end{align} }
+
+\subsubsection*{{\color{red}Problem 3:  Manually computing $J_{N-1}$}}
+		
+	\yoursolution 	
+	$$
+	J_{N-1}(20)  = ...
+	$$
+	
+\subsubsection*{{\color{red}Problem 6:  Explaining the policy}}
+
+					The first policy... this can be explained by noting ... \yoursolution 
+
+\subsubsection*{{\color{red}Problem 7:  Policy explanation continued}}
+	
+	$$\mu_{N-1}(0) = ...$$
+\yoursolution 		
+
+\section{Avoid the droid (\texttt{pacman.py)}} 
+\subsubsection*{{\color{red}Problem 9:  Describe the go-east problem}}
+	
+		The environment is an example of a .... \\		
+		The controller is an example of a ...
+		\yoursolution 	
+	
+\end{document}
\ No newline at end of file
diff --git a/irlc/project1/Latex/figures/kiosk1.pdf b/irlc/project1/Latex/figures/kiosk1.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..54c179fa1703c83e77398a3f6382d3e685fc8fd9
Binary files /dev/null and b/irlc/project1/Latex/figures/kiosk1.pdf differ
diff --git a/irlc/project1/Latex/figures/kiosk2.pdf b/irlc/project1/Latex/figures/kiosk2.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..07dd964485a357336d64c2393ce3fc97c8af1e14
Binary files /dev/null and b/irlc/project1/Latex/figures/kiosk2.pdf differ
diff --git a/irlc/project1/Latex/figures/your_answer.pdf b/irlc/project1/Latex/figures/your_answer.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..d8c092974e20aaaf1165958a53bdce3a2ebdbf8f
Binary files /dev/null and b/irlc/project1/Latex/figures/your_answer.pdf differ
diff --git a/irlc/project1/__init__.py b/irlc/project1/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be
--- /dev/null
+++ b/irlc/project1/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/project1/kiosk.py b/irlc/project1/kiosk.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef5136b474551ef8f8d156bd5e8d5df7b32975a6
--- /dev/null
+++ b/irlc/project1/kiosk.py
@@ -0,0 +1,70 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+This project resembles the Inventory-control problem discussed in (Her25, Subsection 5.1.2) but with more complicated rules.
+If you are stuck, the inventory-control problem will be a good place to start.
+
+I recommend to use the DP_stochastic function (as we did with the inventory-control example). This means
+your main problem is to build appropriate DPModel-classes to represent the different problems.
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex02.dp_model import DPModel
+from irlc.ex02.dp import DP_stochastic
+import matplotlib.pyplot as plt
+from scipy.stats import binom
+from irlc import savepdf
+import numpy as np
+
+def plot_policy(pi, title, pdf):
+    """ Helper function to plot the policy functions pi, as generated by the DP_stochastic function. This function
+    can be used to visualize which actions are taken in which state (y-axis) at which time step (x-axis). """
+    N = len(pi)
+    W = max(pi[0].keys())
+    A = np.zeros((W, N))
+    for i in range(W):
+        for j in range(N):
+            A[i, j] = pi[j][i]
+    plt.imshow(A)
+    plt.title(title)
+    savepdf(pdf)
+    plt.show()
+
+# TODO: 51 lines missing.
+raise NotImplementedError("Insert your solution and remove this error.")
+
+def warmup_states(): 
+    # TODO: 1 lines missing.
+    raise NotImplementedError("return state set")
+
+def warmup_actions(): 
+    # TODO: 1 lines missing.
+    raise NotImplementedError("return action set")
+
+def solve_kiosk_1(): 
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Return cost and policy here (same format as DP_stochastic)")
+
+def solve_kiosk_2(): 
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Return cost and policy here (same format as DP_stochastic)")
+
+
+def main():
+    # Problem 14
+    print("Available states S_0:", warmup_states())
+    print("Available actions A_0(x_0):", warmup_actions())
+
+    J, pi = solve_kiosk_1() # Problem 16
+    print("Kiosk1: Expected profits: ", -J[0][0], " imperial credits")
+    plot_policy(pi, "Kiosk1", "Latex/figures/kiosk1")
+    plt.show()
+
+    J, pi = solve_kiosk_2() # Problem 17
+    print("Kiosk 2: Expected profits: ", -J[0][0], " imperial credits")
+    plot_policy(pi, "Kiosk2", "Latex/figures/kiosk2")
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/irlc/project1/pacman.py b/irlc/project1/pacman.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fb9dd9c53ac962e134cfd4a31024029cad507eb
--- /dev/null
+++ b/irlc/project1/pacman.py
@@ -0,0 +1,169 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from collections import defaultdict
+from irlc import train
+from irlc.ex02.dp_model import DPModel
+from irlc.ex02.dp import DP_stochastic
+from irlc.ex02.dp_agent import DynamicalProgrammingAgent
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.pacman.gamestate import GameState
+
+east = """ 
+%%%%%%%%
+% P   .%
+%%%%%%%% """ 
+
+east2 = """
+%%%%%%%%
+%    P.%
+%%%%%%%% """
+
+SS2tiny = """
+%%%%%%
+%.P  %
+% GG.%
+%%%%%%
+"""
+
+SS0tiny = """
+%%%%%%
+%.P  %
+%   .%
+%%%%%%
+"""
+
+SS1tiny = """
+%%%%%%
+%.P  %
+%  G.%
+%%%%%%
+"""
+
+datadiscs = """
+%%%%%%%
+%    .%
+%.P%% %
+%.   .%
+%%%%%%%
+"""
+
+# TODO: 30 lines missing.
+raise NotImplementedError("Put your own code here")
+
+def p_next(x : GameState, u: str): 
+    """ Given the agent is in GameState x and takes action u, the game will transition to a new state xp.
+    The state xp will be random when there are ghosts. This function should return a dictionary of the form
+
+    {..., xp: p, ...}
+
+    of all possible next states xp and their probability -- you need to compute this probability.
+
+    Hints:
+        * In the above, xp should be a GameState, and p will be a float. These are generated using the functions in the GameState x.
+        * Start simple (zero ghosts). Then make it work with one ghosts, and then finally with any number of ghosts.
+        * Remember the ghosts move at random. I.e. if a ghost has 3 available actions, it will choose one with probability 1/3
+        * The slightly tricky part is that when there are multiple ghosts, different actions by the individual ghosts may lead to the same final state
+        * Check the probabilities sum to 1. This will be your main way of debugging your code and catching issues relating to the previous point.
+    """
+    # TODO: 8 lines missing.
+    raise NotImplementedError("Return a dictionary {.., xp: p, ..} where xp is a possible next state and p the probability")
+    return states
+
+
+def go_east(map): 
+    """ Given a map-string map (see examples in the top of this file) that can be solved by only going east, this will return
+    a list of states Pacman will traverse. The list it returns should therefore be of the form:
+
+    [s0, s1, s2, ..., sn]
+
+    where each sk is a GameState object, the first element s0 is the start-configuration (corresponding to that in the Map),
+    and the last configuration sn is a won GameState obtained by going east.
+
+    Note this function should work independently of the number of required east-actions.
+
+    Hints:
+        * Use the GymPacmanEnvironment class. The report description will contain information about how to set it up, as will pacman_demo.py
+        * Use this environment to get the first GameState, then use the recommended functions to go east
+    """
+    # TODO: 5 lines missing.
+    raise NotImplementedError("Return the list of states pacman will traverse if he goes east until he wins the map")
+    return states
+
+def get_future_states(x, N): 
+    # TODO: 4 lines missing.
+    raise NotImplementedError("return a list-of-list of future states [S_0, ... ,S_N]. Each S_k is a state space, i.e. a list of GameState objects.")
+    return state_spaces
+
+def win_probability(map, N=10): 
+    """ Assuming you get a reward of -1 on wining (and otherwise zero), the win probability is -J_pi(x_0). """
+    # TODO: 5 lines missing.
+    raise NotImplementedError("Return the chance of winning the given map within N steps or less.")
+    return win_probability
+
+def shortest_path(map, N=10): 
+    """ If each move has a cost of 1, the shortest path is the path with the lowest cost.
+    The actions should be the list of actions taken.
+    The states should be a list of states the agent visit. The first should be the initial state and the last
+    should be the won state. """
+    # TODO: 4 lines missing.
+    raise NotImplementedError("Return the cost of the shortest path, the list of actions taken, and the list of states.")
+    return actions, states
+
+
+def no_ghosts():
+    # Check the pacman_demo.py file for help on the GameState class and how to get started.
+    # This function contains examples of calling your functions. However, you should use unitgrade to verify correctness.
+
+    ## Problem 1: Lets try to go East. Run this code to see if the states you return looks sensible.
+    states = go_east(east)
+    for s in states:
+        print(str(s))
+
+    ## Problem 3: try the p_next function for a few empty environments. Does the result look sensible?
+    x, _ = PacmanEnvironment(layout_str=east).reset()
+    action = x.A()[0]
+    print(f"Transitions when taking action {action} in map: 'east'")
+    print(x)
+    print(p_next(x, action))  # use str(state) to get a nicer representation.
+
+    print(f"Transitions when taking action {action} in map: 'east2'")
+    x, _ = PacmanEnvironment(layout_str=east2).reset()
+    print(x)
+    print(p_next(x, action))
+
+    ## Problem 4
+    print(f"Checking states space S_1 for k=1 in SS0tiny:")
+    x, _ = PacmanEnvironment(layout_str=SS0tiny).reset()
+    states = get_future_states(x, N=10)
+    for s in states[1]: # Print all elements in S_1.
+        print(s)
+    print("States at time k=10, |S_10| =", len(states[10]))
+
+    ## Problem 6
+    N = 20  # Planning horizon
+    action, states = shortest_path(east, N)
+    print("east: Optimal action sequence:", action)
+
+    action, states = shortest_path(datadiscs, N)
+    print("datadiscs: Optimal action sequence:", action)
+
+    action, states = shortest_path(SS0tiny, N)
+    print("SS0tiny: Optimal action sequence:", action)
+
+
+def one_ghost():
+    # Win probability when planning using a single ghost. Notice this tends to increase with planning depth
+    wp = []
+    for n in range(10):
+        wp.append(win_probability(SS1tiny, N=n))
+    print(wp)
+    print("One ghost:", win_probability(SS1tiny, N=12))
+
+
+def two_ghosts():
+    # Win probability when planning using two ghosts
+    print("Two ghosts:", win_probability(SS2tiny, N=12))
+
+if __name__ == "__main__":
+    no_ghosts()
+    one_ghost()
+    two_ghosts()
diff --git a/irlc/project1/pacman_demo1.py b/irlc/project1/pacman_demo1.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf74e07b095507c77acd59521cd892a40b11d1ac
--- /dev/null
+++ b/irlc/project1/pacman_demo1.py
@@ -0,0 +1,53 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.project1.pacman import east, datadiscs, SS1tiny, SS2tiny
+from irlc import interactive, savepdf, Agent, train
+import matplotlib
+matplotlib.use('qtagg')
+
+count = """
+%%%%
+%P %
+%..%
+%%%%
+"""
+
+
+if __name__ == "__main__":
+    # Example interaction with an environment:
+    # Instantiate the map 'east' and get a GameState instance: 
+    env = PacmanEnvironment(layout_str=east, render_mode='human')
+    x, info = env.reset() # x is a irlc.pacman.gamestate.GameState object. See the online documentation for more examples.
+    print("Start configuration of board:")
+    print(x)
+    env.close() # If you use render_mode = 'human', I recommend you use env.close() at the end of the code to free up graphics resources.
+    # The GameState object `x` has a handful of useful functions. The important ones are:
+    # x.A()       # Action space
+    # x.f(action) # State resulting in taking action 'action' in state 'x'
+    # x.players() # Number of agents on board (at least 1)
+    # x.player()  # Whose turn it is (player = 0 is us)
+    # x.is_won()   # True if we have won
+    # x.is_lost()  # True if we have lost
+    # You can check if two GameState objects x1 and x2 are the same by simply doing x1 == x2. 
+    # There are other functions in the GameState class, but I advise against using them.
+    from irlc.pacman.pacman_environment import PacmanEnvironment, datadiscs
+    env = PacmanEnvironment(layout_str=datadiscs, render_mode='human')
+    s, _ = env.reset()
+
+    savepdf('pacman_east', env=env)
+    env.close()
+
+    env = PacmanEnvironment(layout_str=datadiscs, render_mode='human')
+    env.reset()
+    savepdf('pacman_datadiscs', env=env)
+    env.close()
+
+    env = PacmanEnvironment(layout_str=SS1tiny, render_mode='human')
+    env.reset()
+    savepdf('pacman_SS1tiny', env=env)
+    env.close()
+
+    env = PacmanEnvironment(layout_str=SS2tiny, render_mode='human')
+    env.reset()
+    savepdf('pacman_SS2tiny', env=env)
+    env.close()
diff --git a/irlc/project1/pacman_demo2.py b/irlc/project1/pacman_demo2.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3bf61d9756b70f810ff3f962ee59cb1f478bc11
--- /dev/null
+++ b/irlc/project1/pacman_demo2.py
@@ -0,0 +1,11 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment
+from irlc.project1.pacman import east, datadiscs, SS1tiny, SS2tiny
+from irlc import interactive, savepdf, Agent, train
+
+if __name__ == "__main__":
+    env = PacmanEnvironment(layout_str=datadiscs, render_mode='human')
+    env, agent = interactive(env, Agent(env))
+    stats, trajectory = train(env, agent, num_episodes=1)
+    print("First state was\n", trajectory[0].state[0])
+    env.close()
diff --git a/irlc/project1/project1_grade.py b/irlc/project1/project1_grade.py
new file mode 100644
index 0000000000000000000000000000000000000000..0051778ca320d2cb0cd049b7b27d17004f964cf7
--- /dev/null
+++ b/irlc/project1/project1_grade.py
@@ -0,0 +1,4 @@
+# irlc/project1/project1_tests.py
+''' WARNING: Modifying, decompiling or otherwise tampering with this script, it's data or the resulting .token file will be investigated as a cheating attempt. '''
+import bz2, base64
+exec(bz2.decompress(base64.b64decode('')))
\ No newline at end of file
diff --git a/irlc/project1/project1_tests.py b/irlc/project1/project1_tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd846223725ae942bc57f2ead0768d4e3bb12de1
--- /dev/null
+++ b/irlc/project1/project1_tests.py
@@ -0,0 +1,377 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import UTestCase, Report
+from irlc.pacman.gamestate import GameState
+from irlc.pacman.pacman_environment import PacmanEnvironment
+import numpy as np
+from unitgrade import hide
+
+def get_starting_state(name):
+    s0, _ = PacmanEnvironment(layout_str=get_map(name)).reset()
+    return s0
+
+def get_map(name):
+    from irlc.project1.pacman import east, east2, SS0tiny, datadiscs, SS1tiny, SS2tiny
+    names2maps = {'east': east,
+                  'east2': east2,
+                  'datadiscs': datadiscs,
+                  'SS0tiny': SS0tiny,
+                  'SS1tiny': SS1tiny,
+                  'SS2tiny': SS2tiny,
+                  }
+    return names2maps[name]
+
+class Pacman1(UTestCase):
+    """ Problem 1: The go_east function """
+
+    def test_states_length(self):
+        from irlc.project1.pacman import go_east, east
+        self.title = "Checking number of states"
+        self.assertEqualC(len(go_east(east)))
+        # assert False
+
+
+    def test_first_state(self):
+        from irlc.project1.pacman import go_east, east
+        self.title = "Checking first state"
+        self.assertEqualC(str(go_east(east))[0]) # string representation of the first state.
+
+    def test_all_states(self):
+        self.title = "Checking complete output"
+        from irlc.project1.pacman import go_east, east
+        self.assertEqualC(tuple(str(s) for s in go_east(east)))
+
+
+class Pacman3(UTestCase):
+    """ Problem 3: the p_next function without droids """
+    map = 'east'
+    action = 'East'
+
+    def get_transitions(self):
+        from irlc.project1.pacman import p_next
+
+        state = get_starting_state(self.map)
+        state_transitions = p_next(state, self.action)
+        self.assertIsInstance(state_transitions, dict)
+        for x in state_transitions:  # Test if each new state is actually a GameState.
+            self.assertIsInstance(x, GameState)
+        dd = {s: np.round(p, 4) for s, p in state_transitions.items()}
+        return dd
+
+    def test_dictionary_size(self):
+        """ Is the number of keys/values in the dictionary correct? """
+        # print(self.get_expected_test_value())
+        self.assertEqualC(len(self.get_transitions()))
+        # self.get_expected_value()
+
+
+    def test_probabilities(self):
+        """ Does the probabilities have the right value? """
+        self.assertEqualC(set(self.get_transitions().values()))
+
+    def test_states(self):
+        """ Does the dictionary contains the right states """
+        self.assertEqualC(set(self.get_transitions().keys()))
+
+    def test_everything(self):
+        """ Test both states and probabilities """
+        self.assertEqualC(self.get_transitions())
+
+
+class Pacman4(UTestCase):
+    """ Problem 4: Compute the state spaces as a list [S_0, ..., S_N] on the map 'east' using N = 7 """
+    map = 'east'
+    N = 7
+
+    @property
+    def states(self):
+        return self.__class__.states_
+
+    @property
+    def sizes(self):
+        return self.__class__.sizes_
+
+    @classmethod
+    def setUpClass(cls):
+        from irlc.project1.pacman import get_future_states
+        states = get_future_states(get_starting_state(cls.map), cls.N)
+        assert isinstance(states, list)
+        for S in states:
+            assert isinstance(S, list)
+            for s in S:
+                assert isinstance(s, GameState)
+        cls.sizes_ = [len(S) for S in states]
+        cls.states_ = [set(S) for S in states]
+
+    def test_state_space_size_S0(self):
+        self.assertEqualC(self.sizes[0])
+
+    def test_state_space_size_S1(self):
+        self.assertEqualC(self.sizes[1])
+
+    def test_state_space_size_all(self):
+        self.assertEqualC(self.sizes)
+
+    def test_number_of_spaces(self):
+        """ Check the list of state spaces has the right length. It should be N+1 long (S_0, ..., S_N) """
+        self.assertEqualC(len(self.states))
+
+    def test_state_space_0(self):
+        """ Check the first element, the state space S0.
+
+        Hints:
+            * It should be a list containning a single GameState object (the starting state) """
+        self.assertEqualC(self.states[0])
+
+    def test_state_space_1(self):
+        """ Check the second element, the state space S1.
+
+        Hints:
+            * It should be a list containing the GameState objects you can go to in one step.
+            * You should be able to figure out what they are from the description of the game rules. Note pacman will not move if he walks into the walls. """
+        self.assertEqualC(self.states[1])
+
+    def test_state_spaces(self):
+        """ Test all state spaces S_0, ..., S_N
+
+        Hints:
+            * If this method breaks, find the first state space which is wrongly computed, and work out which states are missing or should not be there
+            * I anticipate the won/lost game configurations may become a source of problems. Note you don't have to specify these manually; they should follow by using the s.f(action)-function. """
+
+        self.assertEqualC(tuple(self.states))
+
+
+class Pacman6a(UTestCase):
+    """ Problem 6a: No ghost optimal path (get_shortest_path) in map 'east' using N=20 """
+    map = 'east'
+    N = 20
+
+    def get_shortest_path(self):
+        from irlc.project1.pacman import shortest_path
+        layout = get_map(self.map)
+        actions, states = shortest_path(layout, self.N)
+        return actions, states
+
+    def test_sequence_lengths(self):
+        """ Test the length of the state/action lists. """
+        actions, states = self.get_shortest_path()
+        print("self.map", self.map, 'actions', actions)
+        self.assertEqualC(len(actions))
+        self.assertEqualC(len(states))
+
+    def test_trajectory(self):
+        """ Test the state/action trajectory """
+        actions, states = self.get_shortest_path()
+        self.assertTrue(states[-1].is_won())
+
+        x0 = states[0]
+        for k, u in enumerate(actions):
+            x0 = x0.f(u)
+            self.assertTrue(x0 == states[k + 1])
+        self.assertEqualC(states[1])
+        # self.assertEqualC(J)
+
+class Pacman6b(Pacman6a):
+    """ Problem 6b: No ghost optimal path (get_shortest_path) in map 'SS1tiny' using N=20 """
+    map = 'SS0tiny'
+
+class Pacman6c(Pacman6a):
+    """ Problem 6b: No ghost optimal path (get_shortest_path) in map 'datadiscs' using N=20 """
+    map = 'datadiscs'
+
+## ONE GHOST
+class Pacman7a(Pacman3):
+    """ Problem 7a: the p_next function with one droid """
+    map = 'SS1tiny'
+    action = 'East'
+
+class Pacman7b(Pacman3):
+    """ Problem 7b: the p_next function with one droid """
+    map = 'SS1tiny'
+    action = 'West'
+
+class Pacman8a(Pacman4):
+    """ Problem 5:  Test the state spaces as a list [S_0, ..., S_N]. on the map 'SS1tiny' using N = 4 """
+    map = 'SS1tiny'
+    N = 4
+
+class Pacman8b(Pacman4):
+    """ Problem 6: Test the state spaces as a list [S_0, ..., S_N]. on the map 'SS1tiny' using N = 6 """
+    map = 'SS1tiny'
+    N = 6
+    pass
+
+class Pacman9(UTestCase):
+    """ Problem 9: Testing winrate on the map SS1tiny (win_probability) """
+    map = 'SS1tiny'
+
+    def _win_rate(self, N):
+        self.title = f"Testing winrate in {N} steps"
+        from irlc.project1.pacman import win_probability
+        p = np.round(win_probability(get_map(self.map), N), 4)
+        print("win rate in N ", N, "steps was", p)
+        # print("Testing win rate", self.get_expected_test_value())
+        self.assertEqualC(p)
+
+    def test_win_rate_N4(self):
+        self._win_rate(N=4)
+
+    def test_win_rate_N5(self):
+        self._win_rate(N=5)
+
+    def test_win_rate_N6(self):
+        self._win_rate(N=6)
+
+
+# ## TWO GHOSTS
+class Pacman10(Pacman3): # p_next for two ghosts
+    """ Problem 10: Testing the p_next function using SS2tiny """
+    map = 'SS2tiny'
+    N = 4
+
+class Pacman11(Pacman4): # State-space lists
+    """ Problem 11: Test the state spaces as a list [S_0, ..., S_N]. on the map 'SS2tiny' using N = 3 """
+    map = 'SS2tiny'
+    N = 3
+
+class Pacman12(Pacman9): # Optimal planning for two ghost-droids.
+    """ Problem 12: Testing winrate on the map SS2tiny (win_probability) """
+    map = 'SS2tiny'
+    N = 2
+
+class Kiosk1(UTestCase):
+    """ Problem 14: Warmup check of S_0 and A_0(x_0) """
+    def test_warmup_states_length(self):
+        from irlc.project1.kiosk import warmup_states, warmup_actions
+        n = len(warmup_states())
+        self.title = f"Checking length of state space is {n}"
+        self.assertEqualC(n)
+
+    def test_warmup_actions_length(self):
+        from irlc.project1.kiosk import warmup_states, warmup_actions
+        n = len(warmup_actions())
+        self.title = f"Checking length of action space is {n}"
+        self.assertEqualC(n)
+
+
+    def test_warmup_states(self):
+        self.title = "Checking state space"
+        from irlc.project1.kiosk import warmup_states, warmup_actions
+        self.assertEqualC(set(warmup_states()))
+
+    def test_warmup_actions(self):
+        self.title = "Checking action space"
+        from irlc.project1.kiosk import warmup_states, warmup_actions
+        self.assertEqualC(set(warmup_actions()))
+
+
+class Kiosk2(UTestCase):
+    """ Problem 16: solve_kiosk_1 """
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        from irlc.project1.kiosk import solve_kiosk_1
+        cls.J, cls.pi = solve_kiosk_1()
+
+    def mk_title(self, k, x):
+        self.k = k
+        self.x = x
+
+        if self.k is not None:
+            if self.k != -1:
+                sk = f"N-{-self.k - 1}" if self.k < 0 else str(self.k)
+            else:
+                sk = "N"
+            jp = "J_{" + sk + "}" if len(sk) > 1 else "J_"+sk
+        else:
+            jp = "J_k"
+        if self.x is not None:
+            xp = f"(x={self.x})"
+        else:
+            xp = "(x) for all x"
+        return "Checking cost-to-go " + jp + xp
+
+    def check_J(self, k, x):
+        J = [{k: v for k, v in J_.items()} for J_ in self.__class__.J]
+        t = self.mk_title(k, x)
+        if k is not None and x is not None:
+            t += f" = {J[k][x]}"
+        self.title = t
+
+        if k is not None:
+            J_ = J[k]
+            if x is not None:
+                self.assertAlmostEqualC(J_[x], msg=f"Failed test of J[{k}][{x}]", delta=1e-4)
+                # self.assertL2(J_[x], msg=f"Failed test of J[{k}][{x}]", tol=1e-5)
+            else:
+                for state in sorted(J_.keys()):
+                    self.assertAlmostEqualC(J_[state], msg=f"Failed test of J[{k}][{state}]", delta=1e-4)
+        else:
+            for k, J_ in enumerate(J):
+                for state in sorted(J_.keys()):
+                    self.assertAlmostEqualC(J_[state], msg=f"Failed test of J[{k}][{state}]", delta=1e-4)
+
+    def test_case_1(self):
+        self.check_J(k=-1, x=10)
+
+    def test_case_2(self):
+        self.check_J(k=-2, x=20)
+
+    def test_case_3(self):
+        self.check_J(k=-2, x=0)
+
+    def test_case_4(self):
+        self.check_J(k=0, x=0)
+
+    def test_case_5(self):
+        self.check_J(k=1, x=4)
+
+    def test_case_6(self):
+        self.check_J(k=None, x=None)
+
+
+class Kiosk3(Kiosk2):
+    """ Problem 17: solve_kiosk_2 """
+    @classmethod
+    def setUpClass(cls) -> None:
+        from irlc.project1.kiosk import solve_kiosk_2
+        cls.J, cls.pi = solve_kiosk_2()
+
+
+class Project1(Report): #240 total.
+    title = "02465 project part 1: Dynamical Programming"
+    remote_url = "https://02465material.pages.compute.dtu.dk/02465public/_static/evaluation/"
+    import irlc
+    pack_imports = [irlc]
+    abbreviate_questions = True
+
+    pacman_questions = [
+        (Pacman1, 10), # east
+        (Pacman3, 10), # p_next (g=0)
+        (Pacman4, 10), # future_states (g=0)
+        (Pacman6a, 4), # shortest_path (g=0)
+        (Pacman6b, 3), # shortest_path (g=0)
+        (Pacman6c, 3), # shortest_path (g=0)
+        (Pacman7a, 5), # p_next (g=1)
+        (Pacman7b, 5), # p_next (g=1)
+        (Pacman8a, 5), # future_states (g=1)
+        (Pacman8b, 5), # future_states (g=1)
+        (Pacman9, 10),  # optimal planning (g=1)
+        (Pacman10, 10), # p_next (g=2)
+        (Pacman11, 10), # future_states (g=2)
+        (Pacman12, 10), # optimal planning (g=2)
+                 ]
+
+    kiosk_questions = [
+        (Kiosk1, 10),
+        (Kiosk2, 25),
+        (Kiosk3, 25),
+    ]
+
+    questions = []
+    questions += pacman_questions
+    questions += kiosk_questions
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Project1())
+# 448, 409 # 303
diff --git a/irlc/project1/unitgrade_data/Kiosk1.pkl b/irlc/project1/unitgrade_data/Kiosk1.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..77935a972bb38c2f6a0b0049567d3c1dcd34b8a4
Binary files /dev/null and b/irlc/project1/unitgrade_data/Kiosk1.pkl differ
diff --git a/irlc/project1/unitgrade_data/Kiosk2.pkl b/irlc/project1/unitgrade_data/Kiosk2.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b473df178f2b0028ec2a73a01397af4dd716452a
Binary files /dev/null and b/irlc/project1/unitgrade_data/Kiosk2.pkl differ
diff --git a/irlc/project1/unitgrade_data/Kiosk3.pkl b/irlc/project1/unitgrade_data/Kiosk3.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b473df178f2b0028ec2a73a01397af4dd716452a
Binary files /dev/null and b/irlc/project1/unitgrade_data/Kiosk3.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman1.pkl b/irlc/project1/unitgrade_data/Pacman1.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..77606c9d62da4c86930d00b431cead8276f1e032
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman1.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman10.pkl b/irlc/project1/unitgrade_data/Pacman10.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f45af44a02f04cd02b950fd05012370184eb11a8
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman10.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman11.pkl b/irlc/project1/unitgrade_data/Pacman11.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d7df2b409af8c9ceab168855a1b775e66fde86ec
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman11.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman12.pkl b/irlc/project1/unitgrade_data/Pacman12.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..a275d890067ce5ed9823bd69c29825457631d328
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman12.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman3.pkl b/irlc/project1/unitgrade_data/Pacman3.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f45af44a02f04cd02b950fd05012370184eb11a8
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman3.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman4.pkl b/irlc/project1/unitgrade_data/Pacman4.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d7df2b409af8c9ceab168855a1b775e66fde86ec
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman4.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman6a.pkl b/irlc/project1/unitgrade_data/Pacman6a.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..06b6acc653c46c834e0ea83451ac6ea41478074c
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman6a.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman6b.pkl b/irlc/project1/unitgrade_data/Pacman6b.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..06b6acc653c46c834e0ea83451ac6ea41478074c
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman6b.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman6c.pkl b/irlc/project1/unitgrade_data/Pacman6c.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..06b6acc653c46c834e0ea83451ac6ea41478074c
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman6c.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman7a.pkl b/irlc/project1/unitgrade_data/Pacman7a.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f45af44a02f04cd02b950fd05012370184eb11a8
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman7a.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman7b.pkl b/irlc/project1/unitgrade_data/Pacman7b.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f45af44a02f04cd02b950fd05012370184eb11a8
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman7b.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman8a.pkl b/irlc/project1/unitgrade_data/Pacman8a.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d7df2b409af8c9ceab168855a1b775e66fde86ec
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman8a.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman8b.pkl b/irlc/project1/unitgrade_data/Pacman8b.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d7df2b409af8c9ceab168855a1b775e66fde86ec
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman8b.pkl differ
diff --git a/irlc/project1/unitgrade_data/Pacman9.pkl b/irlc/project1/unitgrade_data/Pacman9.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..a275d890067ce5ed9823bd69c29825457631d328
Binary files /dev/null and b/irlc/project1/unitgrade_data/Pacman9.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..073cc3abaf79e5c9ce49838c8f63933844682176
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b85b7a79f12650ab08c5e1b1846b6dd94e07401e
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b122de5cb99d8111d9f14630edf22a4f3b6f8caa
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..65e8a95eca52ccb875699d342f28839299a696b3
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4b90516956cebe85cdf12481939260cb04f7c151
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7bdaed17849541d8d61653d1e5dd5c82f35206e4
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ
diff --git a/irlc/utils/async_wrappers.py b/irlc/utils/async_wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbebf533885664ea41f1a9d01e5012a20ec4490
--- /dev/null
+++ b/irlc/utils/async_wrappers.py
@@ -0,0 +1,39 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from gymnasium.wrappers import TimeLimit
+
+from irlc.pacman.pacman_environment import PacmanWinWrapper
+
+
+
+
+class AsyncPacmanWinWrapper(PacmanWinWrapper):
+    async def async_step(self, action):
+        observation, reward, done, truncated, info = await self.env.async_step(action)
+        if self.env.unwrapped.game.state.is_won():
+            reward = 1
+        else:
+            reward = 0
+        return observation, reward, done, truncated, info
+
+
+class AsyncTimeLimit(TimeLimit):
+    # def __init__(s
+
+    async def async_step(self, action):
+        """Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate.
+
+        Args:
+            action: The environment step action
+
+        Returns:
+            The environment step ``(observation, reward, terminated, truncated, info)`` with `truncated=True`
+            if the number of steps elapsed >= max episode steps
+
+        """
+        observation, reward, terminated, truncated, info = await self.env.async_step(action)
+        self._elapsed_steps += 1
+
+        if self._elapsed_steps >= self._max_episode_steps:
+            truncated = True
+
+        return observation, reward, terminated, truncated, info
diff --git a/irlc/utils/bandit_graphics_environment.py b/irlc/utils/bandit_graphics_environment.py
new file mode 100644
index 0000000000000000000000000000000000000000..391f0ea5f31d49a4bc70edddc624eb8d27fd018c
--- /dev/null
+++ b/irlc/utils/bandit_graphics_environment.py
@@ -0,0 +1,340 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import pygame.draw
+import pygame
+from irlc.ex08.bandits import StationaryBandit
+import time
+import numpy as np
+from irlc.pacman.pacman_resources import WHITE, BLACK, Ghost
+
+
+class BinaryBandit(StationaryBandit):
+    def reset(self):
+        # self.q_star = np.random.rand(self.k) + self.q_star_mean
+        self.q_star = np.ones(self.k)/3
+        self.q_star[np.random.randint(self.k)] *= 2
+        self.optimal_action = np.argmax(self.q_star)
+        self.previous_action = None
+        self.reward = None
+        return None, {}
+
+    def bandit_step(self, a):
+        """ Return the reward/regret for action a for the simple bandit. Use self.q_star (see reset) """
+        reward = np.random.rand() < self.q_star[a]
+        regret = self.q_star[self.optimal_action] - self.q_star[a]
+        self.previous_action = a
+        self.reward = reward
+        return reward, regret
+
+
+class GraphicalBandit(BinaryBandit):
+    viewer = None
+    metadata = {'render_modes': ['human', 'rgb_array'],
+                'render_fps': 20}
+
+    def get_keys_to_action(self):
+        return {(pygame.K_0,):0, (pygame.K_1,): 1, (pygame.K_2,): 2, (pygame.K_3,): 3, (pygame.K_4,): 4,
+                (pygame.K_5,): 5, (pygame.K_6,): 6, (pygame.K_7,): 7, (pygame.K_8,): 8, (pygame.K_9,): 9,
+                # (pygame.K_0,):0
+                }
+
+    def __init__(self, *args, render_mode='human', frames_per_second=None, **kwargs):
+        self.previous_reward, self.previous_action, self.agent = None, None, None
+        super().__init__( *args, **kwargs)
+        self.render_mode = render_mode
+        self.viewer = None
+        self.show_q_star = False
+        self.show_q_ucb = False
+        self.frames_per_second = frames_per_second
+
+        print("press q to show true q values and u to show UCB upper bounds.")
+
+    def reset(self):
+        s, info = super().reset()
+        self.render()
+        return s, info
+
+    def step(self, action):
+        o = super().step(action)
+        self.previous_action = action
+        self.previous_reward = o[1]
+        self.render()
+        return o
+
+    def keypress(self, key):
+        # print(key)
+        if key.unicode == 'q':
+            self.viewer.show_q_star = not self.viewer.show_q_star
+
+        if key.unicode == 'u':
+            self.viewer.show_q_ucb = not self.viewer.show_q_ucb
+        self.render()
+
+    def render(self, mode='human', agent=None, prev_action=None, reward=None):
+        if self.viewer is None:
+            self.viewer = BanditViewer(self, frames_per_second=self.frames_per_second)
+        self.viewer.update(self.agent, self.previous_action, self.previous_reward)
+        return self.viewer.blit(render_mode=self.render_mode) #(return_rgb_array=mode == 'rgb_array')
+
+    def close(self):
+        self.viewer.close()
+
+class BanditViewer:
+    scale = 400  # Scale of a single bar.
+    width = 0.4 * scale  # with of a bar.
+    bar_height = scale
+
+    def __init__(self, bandit, frames_per_second=None):
+        bin_bandit = isinstance(bandit, BinaryBandit)
+        if bin_bandit:
+            ymin = 0 - 0.6 * self.scale
+            ymax = (1 + 0.4)* self.scale
+        else:
+            ymin = (min(bandit.q_star) - 0.6)*self.scale
+            ymax = (max(bandit.q_star) + 0.4)*self.scale
+
+        xmin = -self.width
+        xmax = (bandit.k * self.width  + self.width)
+        # super().__init__(screen_width=1300, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
+        from irlc.utils.graphics_util_pygame import GraphicsUtilGym
+        dx = xmax-xmin
+        dy = ymax-ymin
+
+        # screen_width = 1300, xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax
+        self.ga = GraphicsUtilGym()
+        screen_width = 1300
+        self.ga.begin_graphics(screen_width, dy * (screen_width / dx), local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second)
+        self.bandit = bandit
+        self.ghost = Ghost(self.ga, scale=100)
+
+        # self.ghost.surf = pygame.transform.scale(self.ghost.surf, (self.ghost.surf.get_width()*0.4, self.ghost.surf.get_height()*0.4) )
+        # self.ghost.rect = self.ghost.surf.get_rect()
+        # self.ghost.rect.x = self.width / 2
+        # self.ghost.rect.y = -self.scale
+
+        self.last_action = None
+        self.agent = None
+        self.last_reward = None
+
+        self.show_q_star = False
+        self.show_q_ucb = False
+        # self.ghost.group.scale(self.width*0.4)
+        # self.ghost.group.translate(self.width / 2, -self.scale)
+
+    def close(self):
+        self.ga.close()
+
+    def blit(self, render_mode='human'):
+        return self.ga.blit(render_mode=render_mode)
+
+    def master_render(self):
+        self.ga.draw_background()
+        # batch = pyglet.graphics.Batch()
+        # self.ghosts = pyglet.graphics.Batch()
+        # return
+        self.q_star = []
+        self.q_ucb_upper = []
+        self.Qs = []
+        # bgg = OrderedGroup(-1)
+        # self.bg = [shapes.Rectangle(xmin, ymin, xmax-xmin, ymax-ymin, color=(0,0,0), batch=batch, group=bgg)]
+
+        dd = self.width / 30
+        self.text_n = []
+        fz = int(self.width / 6)
+
+        dw = self.width / 4
+
+        # group = OrderedGroup(1)
+        for i in range(self.bandit.k):
+            x = i * self.width
+            # print(x)
+            # from pyglet import shapes
+            # from pyglet.shapes import Rectangle
+            # self.bg.append(shapes.Rectangle(20, 30, 100,100, color=(100,10,10), batch=batch,group=group))
+
+            # pygame.draw.rect()
+            # self.ga.polygon()
+            # self.ga.rectangle(self.ga.surf, WHITE, pygame.Rect)
+            self.ga.rectangle(WHITE, x-dd, -dd, self.width/2+dd*2, self.bar_height+dd*2, border=0)
+            self.ga.rectangle(BLACK, x, 0, self.width/2, self.bar_height, border=0)
+
+            if self.agent is not None and hasattr(self.agent, 'Q'):
+                # height =
+                # print(self.agent.Q[i] * self.bar_height)
+                self.ga.rectangle((150, 200, 150), x, 0, self.width / 2, self.agent.Q[i] * self.bar_height)  # q-values.
+
+            # self.ga.rectangle((150,200,150), x, 0, self.width / 2, 0, ) # q-values.
+
+            self.ga.text("sadf", (x + self.width / 4, self.bar_height + dw * 2), WHITE, contents=f"Arm  {i}",
+                         size=fz,
+                         style='bold')
+            if self.agent is not None:
+                self.ga.text("sadf", (x + self.width / 4, self.bar_height + dw), WHITE, contents= f"N = {int(self.agent.N[i] if hasattr(self.agent, 'N') else 0)}", size=fz,
+                             style='bold')
+
+
+
+            # return
+            # continue
+            # return
+            # self.bg.append(shapes.Rectangle(x-dd, -dd, self.width/2+dd*2, self.bar_height+dd*2, color=WHITE,batch=batch,group=group))
+            # self.bg.append(shapes.Rectangle(x, 0, self.width/2, self.bar_height, color=BLACK, batch=batch,group=group))
+            # return
+            # self.Qs.append(shapes.Rectangle(x, 0, self.width/2, .0, color=(150,200,150), batch=batch,group=group))
+
+            # q_star = shapes.Rectangle(x, 0, self.width / 2, dd, color=WHITE, batch=batch,group=group)
+            # self.q_star_visible = False
+            if self.show_q_star:
+
+                y =  self.bandit.q_star[i] * self.bar_height
+                self.ga.rectangle(WHITE, x, y, self.width / 2, dd)  # q-star
+                # print(x, y)
+
+            # q_star.visible = False
+            # self.q_star.append(q_star)
+            # self.q_ucb_visible = False
+            if self.show_q_ucb:
+                from irlc.ex08.ucb_agent import UCBAgent
+                # if :
+                if (hasattr(self.agent, 'c') or isinstance(self.agent, UCBAgent)) and hasattr(self.agent, 'N'):  # Required if reset has not been called.
+                    t = (sum(self.agent.N) + 1e-8)
+                    ub = self.agent.Q + self.agent.c * np.sqrt(np.log(t + 1) / (self.agent.N + 1e-8))
+                    self.ga.rectangle((200, 0, 0), x, ub[i] * self.bar_height, self.width / 2, dd)
+                # q_ucb = shapes.Rectangle(x, -1000, self.width / 2, dd, color=(200, 0, 0), batch=batch,group=group)
+            # q_ucb.visible = False
+            # self.q_ucb_upper.append(q_ucb)
+
+            # self.q_star = []
+            # for i, b in enumerate(self.q_star):
+            #     continue
+            #     b.y = self.bandit.q_star[i] * self.bar_height
+            # print(ub)
+            # self.bg.append(Label(f"Arm  {i}", font_name='Arial', x=x+self.width/4, y=self.bar_height + dw*2, anchor_x='center', bold=True, color=(255, 255, 255, 255), anchor_y='center', font_size=fz, batch=batch, group=group))
+            # self.text_n.append(Label(f"N = {0}", font_name='Arial', x=x+self.width/4, y=self.bar_height + dw, anchor_x='center', bold=True, color=(255, 255, 255, 255), anchor_y='center', font_size=fz, batch=batch, group=group))
+        if self.agent is not None:
+            self.ga.text("sadf", (self.width / 2, self.bar_height + dw * 3), WHITE,
+                         contents=f"{self.agent.method if hasattr(self.agent, 'method') else ''}", size=fz,
+                         style='bold', anchor='c')
+        reward = self.last_reward
+        action = self.last_action
+        self.ghost.set_direction(self.ghost.rand_eyes()) # Random eyes.
+        if reward is not None:
+            if reward <= 0:
+                self.ghost.kill()
+            else:
+                self.ghost.resurrect()
+            last_outcome_x =  (action+0.25)*self.width
+            last_outcome_y = reward
+            self.ga.circle("cc", (last_outcome_x, last_outcome_y), self.width / 10, fillColor=WHITE, outlineColor=None)
+
+            # self.last_outcome.x = (action+0.25)*self.width
+            # self.last_outcome.y = reward
+        else:
+            pass
+            # self.last_outcome.x = -self.scale
+
+        if action is None:
+            action = -0.5
+
+        y2 = -0.45*self.width - 0.25 * self.width
+        x2 = (action+.25) * self.width
+        self.ghost.set_position(x2, y2)
+        self.ghost.render()
+        return
+        # for i, b in enumerate(self.q_star):
+        #     b.y = self.bandit.q_star[i]* self.bar_height
+        #
+        #     from irlc.ex08.ucb_agent import UCBAgent
+        #     if isinstance(agent, UCBAgent) and hasattr(agent, 'N'): # Required if reset has not been called.
+        #         t = sum(agent.N)
+        #         ub = agent.Q + agent.c * np.sqrt(np.log(t + 1) / (agent.N + 1e-8))
+        #
+        #         for i, b in enumerate(self.q_ucb_upper):
+        #             b.y = ub[i]* self.bar_height
+        #
+        #         # print(ub)
+        #     s = 234
+
+        # self.last_outcome = shapes.Circle(-self.scale, 0, self.width/10, color=WHITE, batch=batch,group=group)
+        # self.batch = batch
+
+    def update(self, agent, action, reward):
+        self.agent = agent
+        self.last_action = action
+        self.last_reward = reward
+        self.master_render()
+
+        # return
+        # # return
+        #
+        # # if action is not None:
+        # #     y2 = -0.45*self.width
+        # #     x2 = (action+.25) * self.width
+        # #     self.ghost.set_position(x2, y2)
+        #
+        #
+        # # dd = self.width / 30
+        # self.text_n = []
+        # # group = OrderedGroup(1)
+        # for i in range(self.bandit.k):
+        #     x = i * self.width
+        #     # print(x)
+        #     # from pyglet import shapes
+        #     # from pyglet.shapes import Rectangle
+        #     # self.bg.append(shapes.Rectangle(20, 30, 100,100, color=(100,10,10), batch=batch,group=group))
+        #
+        #     # pygame.draw.rect()
+        #     # self.ga.polygon()
+        #     # self.ga.rectangle(self.ga.surf, WHITE, pygame.Rect)
+        #     self.ga.rectangle(WHITE, x - dd, -dd, self.width / 2 + dd * 2, self.bar_height + dd * 2, border=0)
+        #     self.ga.rectangle(BLACK, x, 0, self.width / 2, self.bar_height, border=0)
+        #
+        #
+        #     dw = self.width / 4
+        #     fz = int(self.width / 6)
+        #
+        #     # for i, b in enumerate(self.Qs):
+        #     if agent is not None:
+        #         if hasattr(agent, 'Q'):
+        #             height = agent.Q[i] * self.bar_height
+        #             self.ga.rectangle((150, 200, 150), x, 0, self.width / 2, height)  # q-values.
+        #
+        #         if hasattr(agent, 'N'):
+        #             nlabel = f"N = {int(agent.N[i])}"
+        #             self.ga.text("sadf", (x + self.width / 4, self.bar_height + dw), WHITE, contents=nlabel,
+        #                          size=fz,
+        #                          style='bold', anchor='c')
+        #
+        #             # self.text_n[i].text =
+        #
+        #     self.ga.text("sadf", (x + self.width / 4, self.bar_height + dw * 2), WHITE, contents=f"Arm  {i}",
+        #                  size=fz,
+        #                  style='bold', anchor='c')
+        #
+        # return
+
+
+    # def draw(self):
+    #     self.batch.draw()
+    #     self.ghosts.draw()
+
+
+if __name__ == "__main__":
+    env = GraphicalBandit(10, render_mode='human')
+    from irlc import train
+    from irlc.ex08.ucb_agent import UCBAgent
+    # from irlc.utils.player_wrapper import PlayWrapper
+    from irlc import interactive
+    # agent = BasicAgent(env, epsilon=0.1)
+    agent = UCBAgent(env)
+
+    # env = VideoMonitor(env, agent=agent)
+    env, agent = interactive(env, agent)
+
+    # agent = PlayWrapper(agent, env)
+
+    t0 = time.time()
+    n = 500
+    stats, _ = train(env, agent, max_steps=n, num_episodes=10, return_trajectory=False, verbose=False)
+    tpf = (time.time()-t0)/ n
+    print("tpf", tpf, 'fps', 1/tpf)
+    env.close()
diff --git a/irlc/utils/graphics_util_pygame.py b/irlc/utils/graphics_util_pygame.py
index ca4b67d13ee2a411e6ea73ede7d8a5b09c47f8e5..025e6a8e26798448a92a8975cd4d9247af231df9 100644
--- a/irlc/utils/graphics_util_pygame.py
+++ b/irlc/utils/graphics_util_pygame.py
@@ -242,7 +242,9 @@ class GraphicsUtilGym:
 
     def draw_background(self, background_color=None):
         if background_color is None:
-            background_color = (0, 0, 0)
+            # background_color = (0, 0, 0)
+            background_color = self._bg_color
+
         self._bg_color = background_color
         x1, x2, y1, y2 = self._local_xmin_xmax_ymin_ymax
         corners = [ (x1, y1), (x2, y1), (x2, y2), (x1, y2)  ]
diff --git a/irlc/utils/player_wrapper.py b/irlc/utils/player_wrapper.py
index 74296f5f5eb5680650d0765b58c3a411b5edbf59..e84b48fec75e81c155a330565979af3a2cd6fe6d 100644
--- a/irlc/utils/player_wrapper.py
+++ b/irlc/utils/player_wrapper.py
@@ -127,13 +127,13 @@ async def _webassembly_interactive(env, agent, autoplay=False):
         #     s, info = env.reset()
         #     continue
 
-        sp, reward, done, _, info_sp = (await env.async_step(a)) if hasattr(env, 'async_step') else env.step(a)
+        sp, reward, done, truncated, info_sp = (await env.async_step(a)) if hasattr(env, 'async_step') else env.step(a)
 
         agent.train(s, a, reward, sp, done=done, info_s=info, info_sp=info_sp)
 
         step = step + 1
         k = k + 1
-        if done:
+        if done or truncated:
             sp, info_sp = env.reset()
             k = 0
 
@@ -211,6 +211,8 @@ class PlayWrapperPygame(AgentWrapper):
 
         agent.train = train_
         env.agent = agent
+        env.unwrapped.agent = agent
+
         env.reset = reset_
         env.step = step_
 
@@ -298,61 +300,61 @@ class PlayWrapperPygame(AgentWrapper):
                 if a is not None:
                     # print("Breaking", a)
                     break
-                if False:
-                    if event.type == pygame.QUIT:
-                        if hasattr(self, 'env'):
-                            self.env.close()
-                        time.sleep(0.1)
-                        pygame.display.quit()
-                        time.sleep(0.1)
-                        pygame.quit()
-                        time.sleep(0.1)
-                        sys.exit()
-
-                    # checking if keydown event happened or not
-                    if event.type == pygame.KEYDOWN:
-                        if event.key == pygame.K_SPACE:
-                            a = pi_action
-                            break
-                        elif (event.key,) in self.keys_to_action:
-                            a = self.keys_to_action[(event.key,)]
-                            if info is not None and 'mask' in info:
-                                from irlc.utils.common import DiscreteTextActionSpace
-                                if isinstance(self.env.action_space, DiscreteTextActionSpace):
-                                    aint = self.env.action_space.actions.index(a)
-                                else:
-                                    aint = a
-
-                                if info['mask'][aint] == 0:
-                                    # The action was masked. This means that this action is unavailable, and we should select another.
-                                    # The default is to select one of the available actions from the mask.
-                                    a = info['mask'].argmax()
-                                    if isinstance(self.env.action_space, DiscreteTextActionSpace):
-                                        a = self.env.action_space.actions[a]
-                                break
-                            else:
-                                break
-                        elif event.key == pygame.K_r:
-                            print("Pressing r")
-                            if hasattr(self, 'reset'):
-                                return PlayWrapperPygame.ACTION_FORCE_RESET
-                            #
-                            #     self.reset()
-                            #     self.env.reset()
-                            #
-                            # self.env.render()
-                        elif event.key == pygame.K_f:
-                            print("Pressing f")
-                            self.env.render()
-
-                        elif event.unicode == 'p':
-                            # unpause
-                            self.human_demand_autoplay = not self.human_demand_autoplay
-                            break
-                        else:
-                            # try to pass event on to the game.
-                            if hasattr(self.env, 'keypress'):
-                                self.env.keypress(event)
+                # if False:
+                #     if event.type == pygame.QUIT:
+                #         if hasattr(self, 'env'):
+                #             self.env.close()
+                #         time.sleep(0.1)
+                #         pygame.display.quit()
+                #         time.sleep(0.1)
+                #         pygame.quit()
+                #         time.sleep(0.1)
+                #         sys.exit()
+                #
+                #     # checking if keydown event happened or not
+                #     if event.type == pygame.KEYDOWN:
+                #         if event.key == pygame.K_SPACE:
+                #             a = pi_action
+                #             break
+                #         elif (event.key,) in self.keys_to_action:
+                #             a = self.keys_to_action[(event.key,)]
+                #             if info is not None and 'mask' in info:
+                #                 from irlc.utils.common import DiscreteTextActionSpace
+                #                 if isinstance(self.env.action_space, DiscreteTextActionSpace):
+                #                     aint = self.env.action_space.actions.index(a)
+                #                 else:
+                #                     aint = a
+                #
+                #                 if info['mask'][aint] == 0:
+                #                     # The action was masked. This means that this action is unavailable, and we should select another.
+                #                     # The default is to select one of the available actions from the mask.
+                #                     a = info['mask'].argmax()
+                #                     if isinstance(self.env.action_space, DiscreteTextActionSpace):
+                #                         a = self.env.action_space.actions[a]
+                #                 break
+                #             else:
+                #                 break
+                #         elif event.key == pygame.K_r:
+                #             print("Pressing r")
+                #             if hasattr(self, 'reset'):
+                #                 return PlayWrapperPygame.ACTION_FORCE_RESET
+                #             #
+                #             #     self.reset()
+                #             #     self.env.reset()
+                #             #
+                #             # self.env.render()
+                #         elif event.key == pygame.K_f:
+                #             print("Pressing f")
+                #             self.env.render()
+                #
+                #         elif event.unicode == 'p':
+                #             # unpause
+                #             self.human_demand_autoplay = not self.human_demand_autoplay
+                #             break
+                #         else:
+                #             # try to pass event on to the game.
+                #             if hasattr(self.env, 'keypress'):
+                #                 self.env.keypress(event)
 
             if self.human_demand_autoplay:
                 a = pi_action