diff --git a/irlc/ex04/__init__.py b/irlc/ex04/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d084853c8794e0bebd41758f6e27fbf152bc134f
--- /dev/null
+++ b/irlc/ex04/__init__.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 4."""
+
+speech = """
+Fate has ordained that the men who went to the moon to explore in peace will stay on the moon to rest in peace.
+
+These brave men, Neil Armstrong and Edwin Aldrin, know that there is no hope for their recovery. But they also know that there is hope for mankind in their sacrifice.
+
+These two men are laying down their lives in mankindâ€™s most noble goal: the search for truth and understanding.
+
+They will be mourned by their families and friends; they will be mourned by their nation; they will be mourned by the people of the world; they will be mourned by a Mother Earth that dared send two of her sons into the unknown.
+
+In their exploration, they stirred the people of the world to feel as one; in their sacrifice, they bind more tightly the brotherhood of man.
+
+In ancient days, men looked at stars and saw their heroes in the constellations. In modern times, we do much the same, but our heroes are epic men of flesh and blood.
+
+Others will follow, and surely find their way home. Manâ€™s search will not be denied. But these men were the first, and they will remain the foremost in our hearts.
+
+For every human being who looks up at the moon in the nights to come will know that there is some corner of another world that is forever mankind.
+"""
diff --git a/irlc/ex04/control_environment.py b/irlc/ex04/control_environment.py
new file mode 100644
index 0000000000000000000000000000000000000000..df927eeef2cd111586f9415f48e02798acae5a60
--- /dev/null
+++ b/irlc/ex04/control_environment.py
@@ -0,0 +1,185 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import gymnasium as gym
+import numpy as np
+from irlc.ex03.control_model import ensure_policy
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+
+
+class ControlEnvironment(gym.Env):
+    """
+    Helper class to convert a discretized model into an environment.
+    See the ``__init__`` function for how to create a new environment using this class. Once an environment has been
+    created, you can use it as any other gym environment:
+
+    .. runblock:: pycon
+
+        >>> from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+        >>> env = GymSinCosPendulumEnvironment(Tmax=4) # Specify we want it to run for a maximum of 4 seconds
+        >>> env.reset() # Reset both the time and state variable
+        >>> u = env.action_space.sample()
+        >>> next_state, cost, done, truncated, info = env.step(u)
+        >>> print("Current state: ", env.state)
+        >>> print("Current time", env.time)
+
+    In this example, tell the environment to terminate after 4 seconds using ``Tmax`` (after which ``done=True``)
+
+    .. Note::
+        The ``step``-method will use the (nearly exact) RK4 method to integrate the enviorent over a timespan of ``dt``,
+        and **not** use the approximate  ``model.f(x_k,u_k, k)``-method in the discrete environment which is based on
+        Euler discretization.
+        This is the correct behavior since we want the environment to reflect what happens in the real world and not
+        our apprixmation method.
+    """
+    metadata = {
+        'render_modes': ['human', 'rgb_array'],
+        'render_fps': 30
+    }
+    action_space = None
+    observation_space = None
+
+    def __init__(self, discrete_model: DiscreteControlModel, Tmax=None, supersample_trajectory=False, render_mode=None):
+        """
+        Creates a new instance. You should use this in conjunction with a discrete model to build a new class. An example:
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+            >>> from irlc.ex04.control_environment import ControlEnvironment
+            >>> from gymnasium.spaces import Box
+            >>> import numpy as np
+            >>> class MyGymSinCosEnvironment(ControlEnvironment):
+            ...     def __init__(self, Tmax=5):
+            ...         discrete_model = DiscreteSinCosPendulumModel()
+            ...         self.action_space = Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float64)
+            ...         self.observation_space = Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float64)
+            ...         super().__init__(discrete_model, Tmax=Tmax)
+            >>>
+            >>> env = MyGymSinCosEnvironment()
+            >>> env.reset()
+            >>> env.step(env.action_space.sample())
+
+        :param discrete_model: The discrete model the environment is based on
+        :param Tmax: Time in seconds until the environment terminates (``step`` returns ``done=True``)
+        :param supersample_trajectory: Used to create nicer (smooth) trajectories. Don't worry about it.
+        :param render_mode: If ``human`` the environment will be rendered (inherited from ``Env``)
+        """
+        self.dt = discrete_model.dt  # Discretization time
+        self.state = None            # the current state
+        self.time = 0                # Current global time index
+        self.discrete_model = discrete_model
+        self.Tmax = Tmax
+
+        # Try to guess action/observation spaces unless they are already defined.
+        if self.observation_space is None:
+            self.observation_space = gym.spaces.Box(-np.inf, np.inf, shape=(discrete_model.state_size,) )
+
+        if self.action_space is None:
+            u_bound = self.discrete_model.continuous_model.u_bound()
+            self.action_space = gym.spaces.Box(low=np.asarray(self.discrete_model.phi_u(u_bound.low), dtype=np.float64),
+                                               high=np.asarray(self.discrete_model.phi_u(u_bound.high), dtype=np.float64),
+                                               dtype=np.float64,
+                                               )
+        self.state_labels = discrete_model.state_labels
+        self.action_labels = discrete_model.action_labels
+        self.supersample_trajectory = supersample_trajectory
+        self.render_mode = render_mode
+
+
+    def step(self, u):
+        r"""
+        This works similar to the gym ``Env.step``-function. ``u`` is an action in the action-space,
+        and the environment will then assume we (constantly) apply the action ``u`` from the current time step, :math:`t_k`, until
+        the next time step :math:`t_{k+1} = t_k + \Delta`, where :math:`\Delta` is equal to ``env.model.dt``.
+
+        During this period, the next state is computed using the relatively exact RK4 simulation, and the incurred cost will be
+        computed using Riemann integration.
+
+        .. math::
+            \int_{t_k}^{t_k+\Delta} c(x(t), u(t)) dt
+
+        .. Note::
+            The gym environment requires that we return a cost. The reward will therefore be equal to minus the (integral) of the cost function.
+
+            In case the environment terminates, the reward will include the terminal cost. :math:`c_F`.
+
+        :param u: The action we apply :math:`u`
+        :return:
+            - ``state`` - the state we arrive in
+            - ``reward`` - (minus) the total (integrated) cost incurred in this time period.
+            - ``done`` - ``True`` if the environment has finished, i.e. we reached ``env.Tmax``.
+            - ``truncated`` - ``True`` if the environment was forced to terminated prematurely. Assume it is ``False`` and ignore it.
+            - ``info`` - A dictionary of potential extra information. Includes ``info['time_seconds']``, which is the current time after the step function has completed.
+        """
+
+        def clip_action(self, u):
+            return np.clip(u, a_max=self.action_space.high, a_min=self.action_space.low)
+
+        u = clip_action(self, u)
+        self.discrete_model.continuous_model._u_prev = u # for rendering.
+        if not ((self.action_space.low <= u).all() and (u <= self.action_space.high).all()): #  u not in self.action_space:
+            raise Exception("Action", u, "not contained in action space", self.action_space)
+        # N=20 is a bit arbitrary; should probably be a parameter to the environment.
+
+        # Simulate with transformation. Could be a function but is not.
+        policy = ensure_policy(u)
+        x0 = self.state
+        t0 = self.time
+        tF = self.time + self.discrete_model.dt
+        policy3 = lambda x, t: self.discrete_model.phi_u_inv(ensure_policy(policy)(x, t))
+        x, u, tt, total_cost = self.discrete_model.continuous_model.simulate(self.discrete_model.phi_x_inv(x0), policy3, t0, tF, N_steps=20, method='rk4')
+        # transform to discrete representations using phi.
+        xx = np.stack([np.asarray(self.discrete_model.phi_x(x_)).reshape((-1,)) for x_ in x])
+        uu = np.stack([np.asarray(self.discrete_model.phi_u(u_)).reshape((-1,)) for u_ in u])
+
+        # xx, uu, tt = self.discrete_model.simulate2(x0=self.state, policy=ensure_policy(u), t0=self.time, tF=self.time + self.discrete_model.dt, N=20)
+
+
+        self.state = xx[-1]
+        self.time = tt[-1]
+        cc = [self.discrete_model.cost.c(x, u, k=None) for x, u in zip(xx[:-1], uu[:-1])]
+        done = False
+        if self.time + self.discrete_model.dt/2 > self.Tmax:
+            cc[-1] += self.discrete_model.cost.cN(xx[-1])
+            done = True
+        info = {'dt': self.discrete_model.dt, 'time_seconds': self.time}  # Allow the train() function to figure out the simulation time step size
+        if self.supersample_trajectory:   # This is only for nice visualizations.
+            from irlc.ex01.agent import Trajectory
+            traj = Trajectory(time=tt, state=xx.T, action=uu.T, reward=np.asarray(cc), env_info=[])
+            info['supersample'] = traj # Supersample the trajectory
+        reward = -sum(cc)  # To be compatible with openai gym we return the reward as -cost.
+        if not ( (self.observation_space.low <= self.state).all() and (self.state <= self.observation_space.high).all() ): #self.state not in self.observation_space:
+            print("> state", self.state)
+            print("> observation space", self.observation_space)
+            raise Exception("State no longer in observation space", self.state)
+        if self.render_mode == "human": # as in gym's carpole
+            self.render()
+
+        return self.state, reward, done, False, info
+
+    def reset(self):
+        """
+        Reset the environment to the initial state. This will by default be `the value computed using `self.discrete_model.reset()``.
+
+        :return:
+            - ``state`` - The initial state the environment has been reset to
+            - ``info`` - A dictionary with extra information, in this case that time begins at 0 seconds.
+        """
+        self.state = self._get_initial_state()
+        self.time = 0 # Reset internal time (seconds)
+        if self.render_mode == "human":
+            self.render()
+        return self.state, {'time_seconds': self.time}
+
+    def _get_initial_state(self) -> np.ndarray:
+        # This helper function returns an initial state. It will be used by the reset() function, and it is this function
+        # you should overwrite if you want to reset to a state which is not implied by the bounds.
+        if (self.discrete_model.continuous_model.x0_bound().low == self.discrete_model.continuous_model.x0_bound().high).all():
+            return np.asarray(self.discrete_model.phi_x(self.discrete_model.continuous_model.x0_bound().low))
+        else:
+            raise Exception("Since bounds do not agree I cannot return initial state.")
+
+    def render(self):
+        return self.discrete_model.render(x=self.state, render_mode=self.render_mode)
+
+    def close(self):
+        self.discrete_model.close()
diff --git a/irlc/ex04/discrete_control_cost.py b/irlc/ex04/discrete_control_cost.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4e024a37fa4519da4d465c9e3b134ed916b7198
--- /dev/null
+++ b/irlc/ex04/discrete_control_cost.py
@@ -0,0 +1,195 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+Quadratic cost functions
+"""
+import numpy as np
+from irlc.ex03.control_cost import targ2matrices
+
+def nz(X,a,b=None):
+    return np.zeros((a,) if b is None else (a,b)) if X is None else X
+
+class DiscreteQRCost: #(DiscreteCost):
+    r"""
+    This class represents the cost function for a discrete-time model. In the simulations, we are going to assume
+    that the cost function takes the form:
+
+    .. math::
+        \sum_{k=0}^{N-1} c_k(x_k, u_k) + c_N(x_N)
+
+
+    And this class will specifically implement the two functions :math:`c` and :math:`c_N`.
+    They will be assumed to have the quadratic form:
+
+    .. math::
+        c_k(x_k, u_k) & = \frac{1}{2} x_k^T Q x_k + \frac{1}{2} u_k^T R u_k + u^T_k H x_k + q^T x_k + r^T u_k + q_0, \\\\
+        c_N(x_N) & = \frac{1}{2} x_N^T Q_N x_N + q_N^T x_N + q_{0,N}.
+
+    So what all of this boils down to is that the class just need to store a bunch of matrices and vectors.
+
+    You can add and scale cost-functions
+    **********************************************************
+
+    A slightly smart thing about the cost functions are that you can add and scale them. The following provides an
+    example:
+
+    .. runblock:: pycon
+
+        >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+        >>> import numpy as np
+        >>> cost1 = DiscreteQRCost(np.eye(2), np.zeros(1) ) # Set Q = I, R = 0
+        >>> cost2 = DiscreteQRCost(np.ones((2,2)), np.zeros(1) ) # Set Q = 2x2 matrices of 1's, R = 0
+        >>> print(cost1.Q) # Will be the identity matrix.
+        >>> cost = cost1 * 3 +  cost2 * 2
+        >>> print(cost.Q) # Will be 3 x I + 2
+
+    """
+    def __init__(self, Q, R, H=None,q=None,r=None,qc=0, QN=None, qN=None,qcN=0):
+        n, d = Q.shape[0], R.shape[0]
+        self.QN, self.qN = nz(QN,n,n), nz(qN,n)
+        self.Q, self.q = nz(Q, n, n), nz(q, n)
+        self.R, self.H, self.r = nz(R, d, d), nz(H, d, n), nz(r, d)
+        self.qc, self.qcN = qc, qcN
+        self.flds_term = ['QN', 'qN', 'qcN']
+        self.flds = ['Q', 'q', 'R', 'H', 'r', 'qc'] + self.flds_term
+
+    def c(self, x, u, k=None, compute_gradients=False):
+        """
+        Evaluate the (instantaneous) part of the function :math:`c_k(x_k,u_k)`. An example:
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+            >>> import numpy as np
+            >>> cost = DiscreteQRCost(np.eye(2), np.eye(1)) # Set Q = I, R = 0
+            >>> cost.c(x = np.asarray([1,2]), u=np.asarray([0]), compute_gradients=False) # should return 0.5 * x^T Q x = 0.5 * (1 + 4)
+
+        The function can also return the derivates of the cost function if ``compute_derivates=True``
+
+        :param x: The state :math:`x_k`
+        :param u: The action :math:`u_k`
+        :param k: The time step :math:`k` (this will be ignored)
+        :param compute_gradients: if ``True`` the function will compute gradients and Hessians.
+        :return:
+            - ``c`` - The cost as a ``float``
+            - ``c_x`` - The derivative with respect to :math:`x`
+        """
+        c = 1/2 * (x @ self.Q @ x) + 1/2 * (u @ self.R @ u) + u @ self.H @ x + self.q @ x + self.r @ u + self.qc
+        c_x = 1/2 * (self.Q + self.Q.T) @ x + self.q
+        c_u = 1 / 2 * (self.R + self.R.T) @ u + self.r
+        c_ux = self.H
+        c_xx = self.Q
+        c_uu = self.R
+        if compute_gradients:
+            # this is useful for MPC when we apply an optimizer rather than LQR (iLQR)
+            return c, c_x, c_u, c_xx, c_ux, c_uu
+        else:
+            return c
+
+    def cN(self, x, compute_gradients=False):
+        """
+        Evaluate the terminal (constant) term in the cost function :math:`c_N(x_N)`. An example:
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+            >>> import numpy as np
+            >>> cost = DiscreteQRCost(np.eye(2), np.zeros(1), QN=np.eye(2)) # Set Q = I, R = 0
+            >>> c, Jx, Jxx = cost.cN(x=2*np.ones((2,)), compute_gradients=True)
+            >>> c # should return 0.5 * x_N^T * x_N = 0.5 * 8
+
+        :param x: Terminal state :math:`x_N`
+        :param compute_gradients: if ``True`` the function will compute gradients and Hessians of the cost function.
+        :return: The last (terminal) part of the cost-function :math:`c_N`
+        """
+        J = 1/2 * (x @ self.QN @ x) + self.qN @ x + self.qcN
+        if compute_gradients:
+            J_x = 1 / 2 * (self.QN + self.QN.T) @ x + self.qN
+            return J, J_x, self.QN
+        else:
+            return J
+
+    def __add__(self, c):
+        return DiscreteQRCost(**{k: self.__dict__[k] + c.__dict__[k] for k in self.flds})
+
+    def __mul__(self, c):
+        return DiscreteQRCost(**{k: self.__dict__[k] * c for k in self.flds})
+
+    def __str__(self):
+        title = "Discrete-time cost function"
+        label1 = "Non-zero terms in c_k(x_k, u_k)"
+        label2 = "Non-zero terms in c_N(x_N)"
+        terms1 = [s for s in self.flds if s not in self.flds_term]
+        terms2 = self.flds_term
+        from irlc.ex03.control_cost import _repr_cost
+        return _repr_cost(self, title, label1, label2, terms1, terms2)
+
+    @classmethod
+    def zero(cls, state_size, action_size):
+        """
+        Creates an all-zero cost function, i.e. all terms :math:`Q`, :math:`R` are set to zero.
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+            >>> cost = DiscreteQRCost.zero(2, 1)
+            >>> cost.Q # 2x2 zero matrix
+            >>> cost.R # 1x1 zero matrix.
+
+        :param state_size: Dimension of the state vector :math:`n`
+        :param action_size: Dimension of the action vector :math:`d`
+        :return: A ``DiscreteQRCost`` with all zero terms.
+        """
+        return cls(Q=np.zeros((state_size, state_size)), R=np.zeros((action_size, action_size)))
+
+    def goal_seeking_terminal_cost(self, xN_target, QN=None):
+        """
+        Create a discrete cost function which is minimal when the final state :math:`x_N` is equal to a goal state :math:`x_N^*`.
+        Concretely, it will return a cost function of the form
+
+        .. math::
+            c_N(x_N) = \\frac{1}{2} (x^*_N - x_N)^\\top Q  (x^*_N - x_N)
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+            >>> import numpy as np
+            >>> cost = DiscreteQRCost.zero(2, 1)
+            >>> cost += cost.goal_seeking_terminal_cost(xN_target=np.ones((2,)))
+            >>> print(cost.qN)
+            >>> print(cost)
+
+        :param xN_target: Target state :math:`x_N^*`
+        :param Q: Cost matrix :math:`Q`
+        :return: A ``DiscreteQRCost`` object corresponding to the goal-seeking cost function
+        """
+
+        if QN is None:
+            QN = np.eye(xN_target.size)
+        QN, qN, qcN = targ2matrices(xN_target, Q=QN)
+        return DiscreteQRCost(Q=QN*0, R=self.R*0, QN=QN, qN=qN, qcN=qcN)
+
+    def goal_seeking_cost(self, x_target, Q=None):
+        """
+        Create a discrete cost function which is minimal when the state :math:`x_k` is equal to a goal state :math:`x_k^*`.
+        Concretely, it will return a cost function of the form
+
+        .. math::
+            c_k(x_k, u_k) = \\frac{1}{2} (x^*_k - x_k)^\\top Q  (x^*_k - x_k)
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.discrete_control_cost import DiscreteQRCost
+            >>> import numpy as np
+            >>> cost = DiscreteQRCost.zero(2, 1)
+            >>> cost += cost.goal_seeking_cost(x_target=np.ones((2,)))
+            >>> print(cost.q)
+            >>> print(cost)
+
+        :param x_target: Target state :math:`x_k^*`
+        :param Q: Cost matrix :math:`Q`
+        :return: A ``DiscreteQRCost`` object corresponding to the goal-seeking cost function
+        """
+        if Q is None:
+            Q = np.eye(x_target.size)
+        Q, q, qc = targ2matrices(x_target, Q=Q)
+        return DiscreteQRCost(Q=Q, R=self.R*0, q=q, qc=qc)
diff --git a/irlc/ex04/discrete_control_model.py b/irlc/ex04/discrete_control_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b0c79787d18ad6201598d58f7074575d02baf3
--- /dev/null
+++ b/irlc/ex04/discrete_control_model.py
@@ -0,0 +1,272 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex03.control_model import ControlModel
+import sympy as sym
+import numpy as np
+import sys
+# Add a few numpy functions to sympy.
+sympy_modules_ = ['numpy', {'atan': np.arctan, 'atan2': np.arctan2, 'atanh': np.arctanh}, 'sympy']
+sympy_modules = ['sympy']
+
+class DiscreteControlModel: 
+    """
+    A discretized model. To create a model of this type, first specify a symbolic model, then pass it along to the constructor.
+    Since the symbolic model will specify the dynamics as a symbolic function, the discretized model can automatically discretize it
+    and create functions for computing derivatives.
+
+    The class will also discretize the cost. Note that it is possible to specify coordinate transformations.
+    """
+    state_labels = None
+    action_labels = None
+
+    "This field represents the :class:`~irlc.ex04.continuous_time_model.ContinuousSymbolicModel` the discrete model is derived from."
+    continuous_model = None
+
+    def __init__(self, model: ControlModel, dt: float, cost=None, discretization_method=None): 
+        r"""
+        Create the discretized model.
+
+        :param model: The continuous-time model to discretize.
+        :param dt: Discretization timestep :math:`\Delta`
+        :param cost: If this parameter is not specified, the cost will be derived (discretized) automatically from ``model``
+        :param discretization_method: Can be either ``'Euler'`` (default) or ``'ei'`` (exponential integration). The later will assume that the model is a linear.
+        """
+        self.dt = dt  
+        self.continuous_model = model   
+        if discretization_method is None:
+            from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+            if isinstance(model, LinearQuadraticModel):
+                discretization_method = 'Ei'
+            else:
+                discretization_method = 'Euler'
+        self.discretization_method = discretization_method.lower()
+
+        """ Initialize symbolic variables representing inputs and actions. """
+
+        uc = sym.symbols(f"uc:{model.action_size}") 
+        xc = sym.symbols(f"xc:{model.state_size}")
+
+        xd, ud = model.phi_x(xc), model.phi_u(uc) # Compute discrete symbolic variables x_k, u_k.
+
+        x = sym.symbols(f"x:{len(xd)}") 
+        u = sym.symbols(f"u:{len(ud)}")
+
+        """ x_next is a symbolic variable representing x_{k+1} = f_k(x_k, u_k) """
+        x_next = self._f_discrete_sym(x, u, dt=dt) 
+        """ compute the symbolic derivate of x_next wrt. z = (x,u): d x_{k+1}/dz """
+        dy_dz = sym.Matrix([[sym.diff(f, zi) for zi in list(x) + list(u)] for f in x_next])
+        """ Define (numpy) functions giving next state and the derivatives """
+        self._f_z_np = sym.lambdify((tuple(x), tuple(u)), dy_dz, modules=sympy_modules_) 
+        # Create a numpy function corresponding to the discretized model x_{k+1} = f_discrete(x_k, u_k) 
+        self._f_np = sym.lambdify((tuple(x), tuple(u)), x_next, modules=sympy_modules_)  
+        self._n = len(x)
+        self._d = len(u)
+
+        # Make action/state transformation
+
+        self.phi_x_inv = sym.lambdify( (x,), model.phi_x_inv(x), modules=sympy_modules_)
+        self.phi_u_inv = sym.lambdify( (u,), model.phi_u_inv(u), modules=sympy_modules_)
+
+        self.phi_x = sym.lambdify((xc,), model.phi_x(xc), modules=sympy_modules_)
+        self.phi_u = sym.lambdify((uc,), model.phi_u(uc), modules=sympy_modules_)
+
+        # set labels
+        if self.state_labels is None:
+            self.state_labels = self.continuous_model.state_labels
+
+        if self.action_labels is None:
+            self.action_labels = self.continuous_model.action_labels
+
+        if cost is None:
+            self.cost = model.get_cost().discretize(dt=dt) 
+        else:
+            self.cost = cost
+
+    @property
+    def state_size(self):
+        """
+        The dimension of the state vector :math:`x`, i.e. :math:`n`
+        :return: Dimension of the state vector :math:`n`
+        """
+        return self._n
+
+    @property
+    def action_size(self):
+        """
+        The dimension of the action vector :math:`u`, i.e. :math:`d`
+        :return: Dimension of the action vector :math:`d`
+        """
+        return self._d
+
+    def _f_discrete_sym(self, xs, us, dt):
+        """
+        This is a helper function. It computes the discretized dynamics as a symbolic object:
+
+        .. math::
+            x_{k+1}  = f_k(x_k, u_k, t_k)
+
+        The parameters corresponds to states and actions and are lists of the form ``[x0, x1, ..]`` and ``[u0, u1, ..]``
+        where each element is a symbolic expression. The function returns a list of the form ``[f0, f1, ..]`` where
+        each element is a symbolic expression corresponding to a coordinate of :math:`f_k`.
+
+        :param xs: List of symbolic expressions corresponding to the coordinates of :math:`x_k`
+        :param us: List of symbolic expressions corresponding to the coordinates of :math:`x_u`
+        :param dt: A symbolic expressions corresponding to :math:`t_k`
+        :return: A list of symbolic expressions corresponding to the coordinates of :math:`f_k`
+        """
+        # xc, uc = self.sym_discrete_xu2continious_xu(xs, us)
+        xc, uc = self.continuous_model.phi_x_inv(xs), self.continuous_model.phi_u_inv(us)
+        if self.discretization_method == 'euler':
+            xdot = self.continuous_model.sym_f(x=xc, u=uc)
+            xnext = [x_ + xdot_ * dt for x_, xdot_ in zip(xc, xdot)]
+        elif self.discretization_method == 'ei':  # Assume the continuous model is linear; a bit hacky, but use exact Exponential integration in that case
+            A = self.continuous_model.A
+            B = self.continuous_model.B
+            d = self.continuous_model.d
+            """ These are the matrices of the continuous-time problem.
+            > dx/dt = Ax + Bu + d
+            and should be discretized using the exact integration technique (see (Her25, Subsection 13.1.3) and (Her25, Subsection 13.1.6)); 
+            the precise formula you should implement is given in (Her25, eq. (13.19))
+            
+            Remember the output matrix should be symbolic (see Euler integration for examples) but you can assume there are no variable transformations for simplicity.            
+            """
+            from scipy.linalg import expm
+            """
+            expm computes the matrix exponential: 
+            > expm(A) = exp(A) 
+            """
+            Ad = expm(A * dt)
+            n = Ad.shape[0]
+            d =  d.reshape( (len(B),1) ) if d is not None else np.zeros( (n, 1) )
+            Bud = B @ sym.Matrix(uc) + (sym.zeros(len(B),1) if d is None else d)
+            x_next = sym.Matrix(Ad) @ sym.Matrix(xc) + dt * phi1(A * dt) @ Bud
+            xnext = list(x_next)
+        else:
+            raise Exception("Unknown discreetization method", self.discretization_method)
+        xnext = self.continuous_model.phi_x(xnext)
+        return xnext
+
+    def f(self, x, u, k=0): 
+        r"""
+        This function implements the dynamics :math:`f_k(x_k, u_k)` of the model. They can be evaluated as:
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+            >>> model = DiscreteSinCosPendulumModel()
+            >>> x = [0, 1, 0.4]
+            >>> u = [1]
+            >>> print(model.f(x,u) )           # Computes x_{k+1} = f_k(x_k, u_k)
+
+        The model will by default be Euler discretized:
+
+        .. math::
+
+            x_{k+1} = f_k(x_k, u_k) = x_k + \Delta f(x_k, u_k)
+
+        except :python:`LinearQuadraticModel` which will be discretized using Exponential Integration by default.
+
+
+        :param x: The state as a numpy array
+        :param u: The action as a numpy array
+        :param k: The time step as an integer (currently this has no effect)
+        :return: The next state :math:`x_{x+1}` as a numpy array.
+        """
+        fx = np.asarray( self._f_np(x, u) ) 
+        return fx 
+
+    def f_jacobian(self, x, u, k=0):
+        r"""Compute the Jacobians of the discretized dynamics.
+
+        The function will compute the two Jacobian derives of the discrete dynamics :math:`f_k` with respect to :math:`x` and :math:`u`:
+
+        .. math::
+            J_x f_k(x,u), \quad J_u f_k(x, u)
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+            >>> model = DiscreteSinCosPendulumModel()
+            >>> x = [0, 1, 0.4]
+            >>> u = [0]
+            >>> f, Jx, Ju = model.f(x,u)
+            >>> Jx, Ju = model.f_jacobian(x,u)
+            >>> print("Jacobian Jx is\\n", Jx)
+            >>> print("Jacobian Ju is\\n", Ju)
+
+
+        :param x: The state as a numpy array
+        :param u: The action as a numpy array
+        :param k: The time step as an integer (currently this has no effect)
+        :return: The two Jacobians computed wrt. :math:`x` and :math:`u`.
+        """
+        J = self._f_z_np(x, u)
+        return J[:, :self.state_size], J[:, self.state_size:]
+
+
+    def render(self, x=None, render_mode="human"):
+        return self.continuous_model.render(x=self.phi_x_inv(x), render_mode=render_mode)
+
+    def close(self):
+        self.continuous_model.close()
+
+    def __str__(self):
+        """
+        Return a string representation of the model. This is a potentially helpful way to summarize the content of the
+        model. You can use it as:
+
+        .. runblock:: pycon
+
+            >>> from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+            >>> model = DiscreteSinCosPendulumModel()
+            >>> print(model)
+
+        :return: A string containing the details of the model.
+        """
+        split = "-"*20
+        s = [f"{self.__class__}"] + ['='*50]
+        s += [f"Dynamics (after discretization with Delta = {self.dt}):", split]
+        # t = sym.symbols("t")
+        x = sym.symbols(f"x0:{self.state_size}")
+        u = sym.symbols(f"u0:{self.action_size}")
+
+        f = self._f_discrete_sym(x, u, self.dt)
+        from irlc.ex03.control_model import typeset_eq
+        s += [typeset_eq(x, u, f)]
+
+        s += ["Continuous-time dynamics:", split]
+        xc = sym.symbols(f"x:{self.continuous_model.state_size}")
+        uc = sym.symbols(f"u:{self.continuous_model.action_size}")
+
+        s += [f"f_k({x}, {u}) = {str(self.continuous_model.sym_f(xc, uc))}", '']
+        s += ["Variable transformations:", split]
+        xd, ud = self.continuous_model.phi_x(xc), self.continuous_model.phi_u(uc)
+        s += [f' * phi_x( x(t) ) -> x_k = {xd}']
+        s += [f' * phi_u( u(t) ) -> u_k = {ud}', '']
+        s += ["Cost:", split, str(self.cost)]
+        return "\n".join(s)
+
+
+def phi1(A):
+    """ This is a helper functions which computes
+
+    .. math::
+        A^{-1} (e^A - I)
+        
+    and importantly deals with potential numerical instability in the expression.
+    """
+    from scipy.linalg import expm
+    from math import factorial
+    if np.linalg.cond(A) < 1 / sys.float_info.epsilon:
+        return np.linalg.solve(A, expm(A) - np.eye( len(A) ) )
+    else:
+        C = np.zeros_like(A)
+        for k in range(1, 20):
+            dC = np.linalg.matrix_power(A, k - 1) / factorial(k)
+            C += dC
+        assert sum( np.abs(dC.flat)) < 1e-10
+        return C
diff --git a/irlc/ex04/discrete_kuramoto.py b/irlc/ex04/discrete_kuramoto.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2d8affc519824bff59fcd41b44047e1401d86ee
--- /dev/null
+++ b/irlc/ex04/discrete_kuramoto.py
@@ -0,0 +1,101 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+import numpy as np
+from irlc import train, Agent, savepdf
+import matplotlib.pyplot as plt
+from irlc.ex03.kuramoto import KuramotoModel, f
+
+
+def fk(x,u):
+    """ Computes the discrete (Euler 1-step integrated) version of the Kuromoto update with discretization time dt=0.5,i.e.
+
+    x_{k+1} = f_k(x,u).
+
+    Look at dmodel.f for inspiration. As usual, use a debugger and experiment. Note you have to specify input arguments as lists,
+    and the function should return a numpy ndarray.
+    """
+    dmodel = DiscreteControlModel(KuramotoModel(), dt=0.5)  # this is how we discretize the Kuramoto model.
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute Euler discretized dynamics here using the dmodel.")
+    return f_euler
+
+def dfk_dx(x,u):
+    r""" Computes the derivative of the (Euler 1-step integrated) version of the Kuromoto update with discretization time dt=0.5,
+    i.e. if
+
+    .. math::
+
+        x_{k+1} = f_k(x,u)
+
+    this function should return
+
+    .. math::
+
+        \frac{\partial f_k}{\partial x }
+
+    (i.e. the Jacobian with respect to :math:`x`) as a numpy matrix.
+    Look at dmodel.f for inspiration, and note it has an input argument that is relevant.
+    As usual, use a debugger and experiment. Note you have to specify input arguments as lists,
+    and the function should return a two-dimensional numpy ndarray.
+
+    """
+    dmodel = DiscreteControlModel(KuramotoModel(), dt=0.5)
+    # the function dmodel.f accept various parameters. Perhaps their name can give you an idea?
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    return f_euler_derivative
+
+
+if __name__ == "__main__":
+    # Part 1: Making a model
+    cmodel = KuramotoModel()
+    print(cmodel)
+    # Computing f_k
+    dmodel = DiscreteControlModel(KuramotoModel(), dt=0.5) 
+    print(dmodel) # This will print details about the discrete model.  
+
+    print("The Euler-discretized version, f_k(x,u) = x + Delta f(x,u), is") 
+    print("f_k(x=0,u=0) =", fk([0], [0]))
+    print("f_k(x=1,u=0.3) =", fk([1], [0.3]))
+
+    # Computing df_k / dx (The Jacobian).
+    print("The derivative of the Euler discretized version wrt. x is:")
+    print("df_k/dx(x=0,u=0) =", dfk_dx([0], [0])) 
+
+    # Part 2: The environment and simulation:
+    env = ControlEnvironment(dmodel, Tmax=20) # An environment that runs for 20 seconds. 
+    u = 1.3 # Action to take in each time step.
+
+    ts_step = []  # Current time (according to the environment, i.e. in increments of dt.
+    xs_step = []  # x_k using the env.step-function in the enviroment.
+
+    x, _ = env.reset()       # Get starting state.
+    ts_step.append(env.time) # env.time keeps track of the clock-time in the environment.
+    xs_step.append(x)        # Initialize with first state
+
+    # Use
+    # > next_x, cost, terminated, truncated, metadata = env.step([u])
+    # to simulate a single step.
+    for _ in range(10000):
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        xs_step.append(next_x)
+        ts_step.append(env.time) # This is how you get the current time (in seconds) from the environment.
+
+        if terminated: # Obtain 'terminated' from the step-function. It will be true when Tmax=20 seconds has passed.
+            break
+
+    x0 = cmodel.x0_bound().low  # Get the starting state x0. We exploit that the bound on x0 is an equality constraint.
+    xs_rk4, us_rk4, ts_rk4, cost = cmodel.simulate(x0, u_fun=u, t0=0, tF=20, N_steps=100)
+
+    plt.plot(ts_rk4, xs_rk4, 'k-', label='RK4 (nearly exact)')
+    plt.plot(ts_step, xs_step, 'ro', label='RK4 (step-function in environment)')
+
+    # Use the train-function to plot the result of simulating a random agent.
+    stats, trajectories = train(env, Agent(env), return_trajectory=True) 
+    plt.plot(trajectories[0].time, trajectories[0].state, label='x(t) when using a random action sequence from agent') 
+    plt.legend()
+    savepdf('kuramoto_step')
+    plt.show(block=False)
+    print("The total cost obtained using random actions", -stats[0]['Accumulated Reward'])
diff --git a/irlc/ex04/locomotive.py b/irlc/ex04/locomotive.py
new file mode 100644
index 0000000000000000000000000000000000000000..6827d1cc451cd67eefb52cd7bf2a69e8cde21ce9
--- /dev/null
+++ b/irlc/ex04/locomotive.py
@@ -0,0 +1,105 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.model_harmonic import HarmonicOscilatorModel
+import numpy as np
+from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil
+from gymnasium.spaces import Box
+
+class LocomotiveModel(HarmonicOscilatorModel):
+    state_labels = ["x(t)", "v(t)"]
+    action_labels = ["u(t)"]
+
+
+    viewer = None
+    metadata = {
+        'render_modes': ['human', 'rgb_array'],
+        'render_fps': 20
+    }
+
+    def __init__(self, m=1., slope=0.0, target=0):
+        """
+        Slope is the uphill slope of the train (in degrees). E.g. slope=15 makes it harder for the engine.
+
+        :param m:
+        :param slope:
+        """
+        self.target = target
+        self.slope = slope
+        super().__init__(m=m, k=0., drag=-np.sin(slope/360*2*np.pi) * m * 9.82)
+
+    def x0_bound(self) -> Box:
+        return Box(np.asarray([-1, 0]), np.asarray([-1,0]))
+
+    def u_bound(self) -> Box:
+        return Box(np.asarray([-100]), np.asarray([100])) # Min and Max engine power.
+
+    def render(self, x, render_mode="human"):
+        """ Initialize a viewer and update the states. """
+        if self.viewer is None:
+            self.viewer = LocomotiveViewer(self)
+        self.viewer.update(x, self.target)
+        import time
+        time.sleep(0.05)
+        return self.viewer.blit(render_mode=render_mode)
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+
+class DiscreteLocomotiveModel(DiscreteControlModel):
+    def __init__(self, *args, dt=0.1, **kwargs):
+        model = LocomotiveModel(*args, **kwargs)
+        super().__init__(model=model, dt=dt)
+
+class LocomotiveEnvironment(ControlEnvironment):
+    def __init__(self, *args, dt=0.1, Tmax=5, render_mode=None, **kwargs):
+        model = DiscreteLocomotiveModel(*args, dt=dt, **kwargs)
+        # self.dt = model.dt
+        super().__init__(discrete_model=model, Tmax=Tmax, render_mode=render_mode)
+
+
+class LocomotiveViewer(UpgradedGraphicsUtil):
+    def __init__(self, train):
+        self.train = train
+        width = 1100
+        self.scale = width / 4
+        self.dw = self.scale * 0.1
+        super().__init__(screen_width=width, xmin=-width / 2, xmax=width / 2, ymin=-width / 5, ymax=width / 5, title='Locomotive environment')
+        from irlc.utils.graphics_util_pygame import Object
+        self.locomotive = Object("locomotive.png", image_width=90, graphics=self)
+
+    def render(self):
+        # fugly rendering code.
+        dw = self.dw
+        scale = self.scale
+        train = self.train
+        red = (200, 40, 40)
+        from irlc.utils.graphics_util_pygame import rotate_around
+        ptrack = [(-2 * scale, -dw / 2*0),
+                  (-2 * scale, dw / 2),
+                  (2 * scale, dw / 2),
+                  (2 * scale, -dw / 2*0)]
+        ptrack.append( ptrack[-1])
+        ptrack = rotate_around(ptrack,(0,0), -self.train.slope)
+        self.draw_background(background_color=(255, 255, 255))
+        self.polygon("asdf", coords=ptrack, fillColor=(int(.7 * 255),) * 3, filled=True)
+        self.locomotive.surf.get_height()
+        self.locomotive.rotate(self.train.slope)
+        p0 = (0,0)
+        self.locomotive.move_center_to_xy( *rotate_around( (self.scale * self.x[0], -self.locomotive.surf.get_height()//2), p0, -self.train.slope))
+        self.locomotive.blit(self.surf)
+        xx = 0*self.scale * self.x[0]
+        triangle = [(train.target * scale - dw / 2+ xx, dw/2), (train.target * scale + xx, -0*dw / 2),
+                    (train.target * scale + dw / 2 + xx, dw/2)]
+        triangle = rotate_around(triangle, p0, -self.train.slope)
+        ddw = dw/2
+        xx = self.scale * self.x[0]
+        trainloc = [(xx- ddw / 2, -ddw / 2), ( xx, -0 * ddw / 2), (xx + ddw / 2, -ddw / 2)]
+        trainloc = rotate_around(trainloc, p0, -self.train.slope)
+        self.trg = self.polygon("", coords=trainloc, fillColor=red, filled=True)
+        self.trg = self.polygon("", coords=triangle, fillColor=red, filled=True)
+
+    def update(self, x, xstar):
+        self.x = x #*self.scale
+        self.xstar = xstar
diff --git a/irlc/ex04/model_harmonic.py b/irlc/ex04/model_harmonic.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dad567453c6eba1d8a86b624115126343cc9121
--- /dev/null
+++ b/irlc/ex04/model_harmonic.py
@@ -0,0 +1,113 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+import numpy as np
+from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil
+
+"""
+Simulate a Harmonic oscillator governed by equations:
+
+d^2 x1 / dt^2 = -k/m x1 + u(x1, t)
+
+where x1 is the position and u is our externally applied force (the control)
+k is the spring constant and m is the mass. See:
+
+https://en.wikipedia.org/wiki/Simple_harmonic_motion#Dynamics
+
+for more details.
+In the code, we will re-write the equations as:
+
+dx/dt = f(x, u),   u = u_fun(x, t)
+
+where x = [x1,x2] is now a vector and f is a function of x and the current control.
+here, x1 is the position (same as x in the first equation) and x2 is the velocity.
+
+The function should return ts, xs, C
+
+where ts is the N time points t_0, ..., t_{N-1}, xs is a corresponding list [ ..., [x_1(t_k),x_2(t_k)], ...] and C is the cost.
+"""
+
+class HarmonicOscilatorModel(LinearQuadraticModel):
+    metadata = {
+        'render_modes': ['human', 'rgb_array'],
+        'render_fps': 20
+    }
+    """
+    See: https://books.google.dk/books?id=tXZDAAAAQBAJ&pg=PA147&lpg=PA147&dq=boeing+747+flight+0.322+model+longitudinal+flight&source=bl&ots=L2RpjCAWiZ&sig=ACfU3U2m0JsiHmUorwyq5REcOj2nlxZkuA&hl=en&sa=X&ved=2ahUKEwir7L3i6o3qAhWpl4sKHQV6CdcQ6AEwAHoECAoQAQ#v=onepage&q=boeing%20747%20flight%200.322%20model%20longitudinal%20flight&f=false
+    """
+    def __init__(self, k=1., m=1., drag=0.0, Q=None, R=None):
+        self.k = k
+        self.m = m
+        A = [[0, 1],
+             [-k/m, 0]]
+
+        B = [[0], [1/m]]
+        d = [[0], [drag/m]]
+
+        A, B, d = np.asarray(A), np.asarray(B), np.asarray(d)
+        if Q is None:
+            Q = np.eye(2)
+        if R is None:
+            R = np.eye(1)
+        self.viewer = None
+        super().__init__(A=A, B=B, Q=Q, R=R, d=d)
+
+    def render(self, x, render_mode="human"):
+        """ Render the environment. You don't have to understand this code.  """
+        if self.viewer is None:
+            self.viewer = HarmonicViewer(xstar=0) # target: x=0.
+        self.viewer.update(x)
+        import time
+        time.sleep(0.05)
+        return self.viewer.blit(render_mode=render_mode)
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+
+
+class DiscreteHarmonicOscilatorModel(DiscreteControlModel): 
+    def __init__(self, dt=0.1, discretization_method=None, **kwargs):
+        model = HarmonicOscilatorModel(**kwargs)
+        super().__init__(model=model, dt=dt, discretization_method=discretization_method)
+
+
+class HarmonicOscilatorEnvironment(ControlEnvironment): 
+    def __init__(self, Tmax=80, supersample_trajectory=False, render_mode=None, **kwargs):
+        model = DiscreteHarmonicOscilatorModel(**kwargs)
+        self.dt = model.dt
+        super().__init__(discrete_model=model, Tmax=Tmax, render_mode=render_mode, supersample_trajectory=supersample_trajectory) 
+
+    def _get_initial_state(self) -> np.ndarray:
+        return np.asarray([1, 0])
+
+class HarmonicViewer(UpgradedGraphicsUtil):
+    def __init__(self, xstar = 0):
+        self.xstar = xstar
+        width = 1100
+        self.scale = width / 6
+        self.dw = self.scale * 0.1
+        super().__init__(screen_width=width, xmin=-width / 2, xmax=width / 2, ymin=-width / 5, ymax=width / 5, title='Harmonic Osscilator')
+
+    def render(self):
+        self.draw_background(background_color=(255, 255, 255))
+        dw = self.dw
+        self.rectangle(color=(0,0,0), x=-dw//2, y=-dw//2, width=dw, height=dw)
+        xx = np.linspace(0, 1)
+        y = np.sin(xx * 2 * np.pi * 5) * 0.1*self.scale * 0.5
+
+        for i in range(len(xx) - 1):
+            self.line("asfasf", here=(xx[i] * self.x[0] * self.scale, y[i]), there=(xx[i + 1] * self.x[0] * self.scale, y[i+1]),
+                      color=(0,0,0), width=2)
+        self.circle("asdf", pos=( self.x[0] * self.scale, 0), r=dw, fillColor=(0,0,0))
+        self.circle("asdf", pos=( self.x[0] * self.scale, 0), r=dw*0.9, fillColor=(int(.7 * 255),) * 3)
+
+    def update(self, x):
+        self.x = x
+
+if __name__ == "__main__":
+    from irlc import train
+    env = HarmonicOscilatorEnvironment(render_mode='human')
+    # train(env, NullAgent(env), num_episodes=1, max_steps=200)
+    # env.close()
diff --git a/irlc/ex04/model_linear_quadratic.py b/irlc/ex04/model_linear_quadratic.py
new file mode 100644
index 0000000000000000000000000000000000000000..912c2bbe9e6fdcbb783cfb4c330e4e496f5c7fdd
--- /dev/null
+++ b/irlc/ex04/model_linear_quadratic.py
@@ -0,0 +1,29 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import sympy as sym
+from irlc.ex03.control_model import ControlModel
+from irlc.ex03.control_cost import SymbolicQRCost
+from gymnasium.spaces import Box
+
+class LinearQuadraticModel(ControlModel):
+    """
+    Implements a model with update equations
+
+    dx/dt = Ax + Bx + d
+    Cost = integral_0^{t_F} (1/2 x^T Q x + 1/2 u^T R u + q' x + qc) dt
+    """
+    def __init__(self, A, B, Q, R, q=None, qc=None, d=None):  
+        self._cost = SymbolicQRCost(R=R, Q=Q, q=q, qc=qc)
+        self.A, self.B, self.d = A, B, d
+        super().__init__()
+
+    def sym_f(self, x, u, t=None):  
+        xp = sym.Matrix(self.A) * sym.Matrix(x) + sym.Matrix(self.B) * sym.Matrix(u)
+        if self.d is not None:
+            xp += sym.Matrix(self.d)
+        return [x for xr in xp.tolist() for x in xr]  
+
+    def x0_bound(self) -> Box:
+        return Box(0, 0, shape=(self.state_size,))
+
+    def get_cost(self):
+        return self._cost
diff --git a/irlc/ex04/model_pendulum.py b/irlc/ex04/model_pendulum.py
new file mode 100644
index 0000000000000000000000000000000000000000..43c5b81f389412a71046b71218baecc1f2c33382
--- /dev/null
+++ b/irlc/ex04/model_pendulum.py
@@ -0,0 +1,145 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import sympy as sym
+from irlc.ex03.control_model import ControlModel
+from irlc.ex03.control_cost import SymbolicQRCost
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+import gymnasium as gym
+from gymnasium.spaces.box import Box
+from irlc.ex04.control_environment import ControlEnvironment
+import numpy as np
+
+"""
+SEE: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
+https://github.com/openai/gym/blob/master/gym/envs/classic_control/pendulum.py
+"""
+class PendulumModel(ControlModel):
+    state_labels= [r"$\theta$", r"$\frac{d \theta}{dt}$"]
+    action_labels = ['Torque $u$']
+    x_upright, x_down = np.asarray([0.0, 0.0]), np.asarray([np.pi, 0.0])
+
+    def __init__(self, l=1., m=.8, friction=0.0, max_torque=6.0, transform_coordinates=False): 
+        self.l, self.m, self.max_torque = l, m, max_torque
+        assert not transform_coordinates
+        super().__init__() 
+        self.friction = friction
+        self._u_prev = None                        # For rendering
+        self.cp_render = {}
+        assert friction == 0.0
+
+    def sym_f(self, x, u, t=None): 
+        l, m = self.l, self.m
+        g = 9.82
+        theta_dot = x[1]  # Parameterization: x = [theta, theta']
+        theta_dot_dot =  g/l * sym.sin(x[0]) + 1/(m*l**2) * u[0]
+        return [theta_dot, theta_dot_dot]
+
+    def get_cost(self) -> SymbolicQRCost:
+        return SymbolicQRCost(R=np.ones((1, 1)), Q=np.eye(2))  
+
+    def tF_bound(self) -> Box: 
+        return Box(0.5, 4, shape=(1,), dtype=float)
+
+    def t0_bound(self) -> Box:
+        return Box(0, 0, shape=(1,), dtype=float)
+
+    def x_bound(self) -> Box:
+        return Box(np.asarray( [-2 * np.pi, -np.inf]), np.asarray( [2 * np.pi, np.inf]), dtype=float)
+
+    def u_bound(self) -> Box:
+        return Box(np.asarray([-self.max_torque]), np.asarray([self.max_torque]), dtype=float)
+
+    def x0_bound(self) -> Box:
+        return Box(np.asarray( [np.pi, 0] ), np.asarray( [np.pi, 0]), dtype=float)
+
+    def xF_bound(self) -> Box:
+        return Box(np.asarray([0, 0]), np.asarray([0, 0]), dtype=float)         
+
+    def close(self):
+        for r in self.cp_render.values():
+            r.close()
+
+    def render(self, x, render_mode="human"):
+        if render_mode not in self.cp_render: # is None or self.cp_render[1] != render_mode:
+            self.cp_render[render_mode] = gym.make("Pendulum-v1", render_mode=render_mode)  # environment only used for rendering. Change to v1 in gym 0.26.
+            self.cp_render[render_mode].max_time_limit = 10000
+            self.cp_render[render_mode].reset()
+        self.cp_render[render_mode].unwrapped.state = np.asarray(x)  # environment is wrapped
+        self.cp_render[render_mode].unwrapped.last_u = self._u_prev[0] if self._u_prev is not None else None
+        return self.cp_render[render_mode].render()
+
+class SinCosPendulumModel(PendulumModel):
+    def phi_x(self, x):
+        theta, theta_dot = x[0], x[1]
+        return [sym.sin(theta), sym.cos(theta), theta_dot]
+
+    def phi_x_inv(self, x):
+        sin_theta, cos_theta, theta_dot = x[0], x[1], x[2]
+        theta = sym.atan2(sin_theta, cos_theta)  # Obtain angle theta from sin(theta),cos(theta)
+        return [theta, theta_dot]
+
+    def phi_u(self, u):
+        return [sym.atanh(u[0] / self.max_torque)]
+
+    def phi_u_inv(self, u):
+        return [sym.tanh(u[0]) * self.max_torque]
+
+    def u_bound(self) -> Box:
+        return Box(np.asarray([-np.inf]), np.asarray([np.inf]), dtype=float)
+
+def _pendulum_cost(model):
+    from irlc.ex04.discrete_control_cost import DiscreteQRCost
+    Q = np.eye(model.state_size)
+    Q[0, 1] = Q[1, 0] = model.l
+    Q[0, 0] = Q[1, 1] = model.l ** 2
+    Q[2, 2] = 0.0
+    R = np.array([[0.1]]) * 10
+    c0 = DiscreteQRCost(Q=np.zeros((model.state_size,model.state_size)), R=R)
+    c0 = c0 + c0.goal_seeking_cost(Q=Q, x_target=model.x_upright)
+    c0 = c0 + c0.goal_seeking_terminal_cost(xN_target=model.x_upright) * 1000
+    return c0 * 2
+
+
+class DiscreteSinCosPendulumModel(DiscreteControlModel): 
+    state_labels =  [r'$\sin(\theta)$', r'$\cos(\theta)$', r'$\dot{\theta}$'] # Check if this escape character works.
+    action_labels = ['Torque $u$']
+
+    def __init__(self, dt=0.02, cost=None, **kwargs): 
+        model = SinCosPendulumModel(**kwargs) 
+        self.max_torque = model.max_torque
+        # self.transform_actions = transform_actions  
+        super().__init__(model=model, dt=dt, cost=cost) 
+        self.x_upright = np.asarray(self.phi_x(model.x_upright))
+        self.l = model.l # Pendulum length
+        if cost is None:  
+            cost = _pendulum_cost(self)
+        self.cost = cost  
+
+
+class ThetaPendulumEnvironment(ControlEnvironment):
+    def __init__(self, Tmax=5, render_mode=None):
+        dt = 0.02
+        discrete_model = DiscreteControlModel(PendulumModel(), dt=dt)
+        super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode)
+
+class GymSinCosPendulumEnvironment(ControlEnvironment): 
+    def __init__(self, *args, Tmax=5, supersample_trajectory=False, render_mode=None, **kwargs): 
+        discrete_model = DiscreteSinCosPendulumModel(*args, **kwargs) 
+        self.action_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.action_size,), dtype=float)
+        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(discrete_model.state_size,), dtype=float)
+        super().__init__(discrete_model, Tmax=Tmax, supersample_trajectory=supersample_trajectory, render_mode=render_mode) 
+
+if __name__ == "__main__":
+    model = SinCosPendulumModel(l=1, m=1)
+    print(str(model))
+    print(f"Pendulum with l={model.l}, m={model.m}") 
+    x = [1,2]
+    u = [0] # Input state/action.
+    # x_dot = ...
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using the model-class defined above")
+    # x_dot_numpy = ...
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute dx/dt = f(x, u, t=0) here using numpy-expressions you write manually.")
+
+    print(f"Using model-class: dx/dt = f(x, u, t) = {x_dot}")
+    print(f"Using numpy: dx/dt = f(x, u, t) = {x_dot_numpy}") 
diff --git a/irlc/ex04/pid.py b/irlc/ex04/pid.py
new file mode 100644
index 0000000000000000000000000000000000000000..e59b08be60879827f0ce8e3131c70a11a3c3c7bb
--- /dev/null
+++ b/irlc/ex04/pid.py
@@ -0,0 +1,60 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc import savepdf
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc.ex04.locomotive import LocomotiveEnvironment
+
+class PID:
+    def __init__(self, dt, Kp, Ki, Kd, target):
+        self.Kp = Kp
+        self.Ki = Ki
+        self.Kd = Kd
+        self.dt = dt          # discretization time
+        self.target = target  # target, in our case just a number.
+        self.I = 0            # Internal variables for integral/derivative terms; use these or define your own.
+        self.e_prior = 0      # Previous value of the error. Used in the derivative term. Remember to update it in the pi-function.
+
+    def reset(self):
+        self.I = 0
+        self.e_prior = 0
+
+    def pi(self, x):
+        """
+        Policy for the PID class. x is always a scalar (float) and the output u is a scalar.
+        Should implement (Her25, Algorithm 19)
+
+        :param x: Input state (float)
+        :return: Action to take (float)
+        """
+        # TODO: 6 lines missing.
+        raise NotImplementedError("Compute u here.")
+        return u
+
+
+def pid_explicit():
+    env = LocomotiveEnvironment(m=70, slope=0, dt=0.05, Tmax=15) 
+    pid = PID(dt=0.05, Kp=40, Kd=0, Ki=0, target=0)
+    # Compute the first action using PID control:
+    print(f"When x_0 = 1 then the first action is u_0 = {pid.pi(x=1)} (and should be u_0 = -40.0)") 
+    x0, _ = env.reset()
+    x = [x0]
+    for _ in range(200): # Simulate for 200 steps, i.e. 0.05 * 200 seconds.
+        x_cur = x[-1] # x is the last state [position, velocity]. Note that you only need to pass position to your PID controller.
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute action here using the pid class.")
+        u = np.clip(u, -100, 100) # clip actions.
+        xp_, reward, done, truncated, _ = env.step(u)
+        x.append(xp_)
+
+    x = np.stack(x)
+    plt.plot(x[:,0], 'k-', label="PID state trajectory")
+    savepdf("pid_basic")
+    plt.show(block=False)
+
+if __name__ == "__main__":
+    pid_explicit()
diff --git a/irlc/ex04/pid_car.py b/irlc/ex04/pid_car.py
new file mode 100644
index 0000000000000000000000000000000000000000..84627fd578c9601b4acf1a26ea79bd2bb63be21f
--- /dev/null
+++ b/irlc/ex04/pid_car.py
@@ -0,0 +1,61 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc import savepdf
+from irlc.ex04.pid import PID
+from irlc import Agent
+from irlc.ex04.control_environment import ControlEnvironment
+
+class PIDCarAgent(Agent):
+    def __init__(self, env: ControlEnvironment, v_target=0.5, use_both_x5_x3=True):
+        """
+        Define two pid controllers: One for the angle, the other for the velocity.
+
+        self.pid_angle = PID(dt=self.discrete_model.dt, Kp=x, ...)
+        self.pid_velocity = PID(dt=self.discrete_model.dt, Kp=z, ...)
+
+        I did not use Kd/Ki, however you need to think a little about the targets.
+        """
+        # self.pid_angle = ...
+        ## TODO: Half of each line of code in the following 2 lines have been replaced by garbage. Make it work and remove the error.
+        #----------------------------------------------------------------------------------------------------------------------------
+        # self.pid_angle = PID(dt=env.discrete_m??????????????????????????????????????
+        # self.pid_velocity = PID(dt=env.discrete_mod???????????????????????????????????????????
+        raise NotImplementedError("Define PID controllers here.")
+        self.use_both_x5_x3 = use_both_x5_x3 # Using both x3+x5 seems to make it a little easier to get a quick lap time, but you can just use x5 to begin with.
+        super().__init__(env)
+
+    def pi(self, x, k, info=None):
+        """
+        Call PID controller. The steering angle controller should initially just be based on
+        x[5] (distance to the centerline), but you can later experiment with a linear combination of x5 and x3 as input.
+
+        Hints:
+            - To control the velocity, you should use x[0], the velocity of the car in the direction of the car.
+            - Remember to start out with a low value of v_target, then tune the controller and look at the animation.
+            - You can access the pid controllers as self.pid_angle(x_input)
+            - Remember the function must return a 2d numpy ndarray.
+        """
+
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Compute action here. No clipping necesary.")
+        return u
+
+
+if __name__ == "__main__":
+    from irlc.ex01.agent import train
+    from irlc.car.car_model import CarEnvironment
+    import matplotlib.pyplot as plt
+
+    env = CarEnvironment(noise_scale=0,Tmax=30, max_laps=1, render_mode='human')
+    agent = PIDCarAgent(env, v_target=1, use_both_x5_x3=True) # I recommend lowering v_target to make the problem simpler.
+
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+    t = trajectories[0]
+    plt.clf()
+    plt.plot(t.state[:,0], label="velocity" )
+    plt.plot(t.state[:,5], label="s (distance to center)" )
+    plt.xlabel("Time/seconds")
+    plt.legend()
+    savepdf("pid_car_agent")
+    plt.show()
diff --git a/irlc/ex04/pid_locomotive_agent.py b/irlc/ex04/pid_locomotive_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb2083c454f86465e7edc4d055518898f88fcc5c
--- /dev/null
+++ b/irlc/ex04/pid_locomotive_agent.py
@@ -0,0 +1,70 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc.ex04.locomotive import LocomotiveEnvironment
+from irlc.ex04.pid_car import PID
+from irlc import Agent, train
+from irlc import savepdf
+from irlc.ex04.control_environment import ControlEnvironment
+
+class PIDLocomotiveAgent(Agent):
+    def __init__(self, env: ControlEnvironment, dt, Kp=1.0, Ki=0.0, Kd=0.0, target=0):
+        # self.pid = PID(dt=...)
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Make a pid instance here.")
+        super().__init__(env)
+
+    def pi(self, x, k, info=None):
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Get the correct action using self.pid.pi(...). Same as previous exercise")
+        u = np.clip(u, self.env.action_space.low[0], self.env.action_space.high[0]) # Clip actions to ensure u is in the action space
+        return np.asarray([u]) # Must return actions as numpy ndarrays.
+
+def fixplt():
+    plt.legend()
+    plt.grid('on')
+    plt.box(False)
+    # plt.ylim([-dd, dd])
+    plt.xlabel('Time/seconds')
+    plt.ylabel('$x(t)$')
+
+def pid_locomotive():
+    dt = .08
+    m = 70
+    Tmax=15
+
+    env = LocomotiveEnvironment(m=m, slope=0, dt=dt, Tmax=Tmax, render_mode='human')
+    Kp = 40
+    agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=0, Kd=0, target=0)
+    stats, traj = train(env, agent, return_trajectory=True)
+    plt.plot(traj[0].time, traj[0].state[:, 0], '-', label=f"$K_p={40}$")
+    fixplt()
+    savepdf('pid_locomotive_Kp')
+    plt.show()
+
+    # Now include a derivative term:
+    Kp = 40
+    for Kd in [10, 50, 100]:
+        agent = PIDLocomotiveAgent(env, dt=dt, Kp=Kp, Ki=0, Kd=Kd, target=0)
+        stats, traj = train(env, agent, return_trajectory=True)
+        plt.plot(traj[0].time, traj[0].state[:, 0], '-', label=f"$K_p={Kp}, K_d={Kd}$")
+    fixplt()
+    savepdf('pid_locomotive_Kd')
+    plt.show()
+    env.close()
+
+    # Derivative test: Include a slope term. For fun, let's also change the target.
+    env = LocomotiveEnvironment(m=m, slope=2, dt=dt, Tmax=20, target=1, render_mode='human')
+    for Ki in [0, 10]:
+        agent = PIDLocomotiveAgent(env, dt=dt, Kp=40, Ki=Ki, Kd=50, target=1)
+        stats, traj = train(env, agent, return_trajectory=True)
+        x = traj[0].state
+        tt = traj[0].time
+        plt.plot(tt, x[:, 0], '-', label=f"$K_p={Kp}, K_i={Ki}, K_d={Kd}$")
+    fixplt()
+    savepdf('pid_locomotive_Ki')
+    plt.show()
+    env.close()
+
+if __name__ == '__main__':
+    pid_locomotive()
diff --git a/irlc/ex04/pid_lunar.py b/irlc/ex04/pid_lunar.py
new file mode 100644
index 0000000000000000000000000000000000000000..f450714b27165ca13130fdd83a9ceee2e99f550c
--- /dev/null
+++ b/irlc/ex04/pid_lunar.py
@@ -0,0 +1,136 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+For information about the Apollo 11 lunar lander see:
+https://eli40.com/lander/02-debrief/
+
+For code for the Gym LunarLander environment see:
+
+https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py
+
+This particular controller is inspired by:
+
+https://github.com/wfleshman/PID_Control/blob/master/pid.py
+
+However, I had better success with different parameters for the PID controller.
+"""
+import gymnasium as gym
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc import train
+from irlc.ex04.pid import PID
+from irlc import Agent
+from irlc.ex04 import speech
+from irlc import savepdf
+from gymnasium.envs.box2d.lunar_lander import FPS
+
+class ApolloLunarAgent(Agent):
+    def __init__(self, env, dt, Kp_altitude=18, Kd_altitude=13, Kp_angle=-18, Kd_angle=-18):
+        """ Set up PID parameters for the two controllers (one controlling the altitude, another the angle of the lander) """
+        self.Kp_altitude = Kp_altitude
+        self.Kd_altitude = Kd_altitude
+        self.Kp_angle = Kp_angle
+        self.Kd_angle = Kd_angle
+        self.error_angle = []
+        self.error_altitude = []
+        self.dt = dt
+        super().__init__(env)
+
+    def pi(self, x, k, info=None):
+        """ From documentation: https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py
+             x (list): The state. Attributes:
+              x[0] is the horizontal coordinate
+              x[1] is the vertical coordinate
+              x[2] is the horizontal speed
+              x[3] is the vertical speed
+              x[4] is the angle
+              x[5] is the angular speed
+              x[6] 1 if first leg has contact, else 0
+              x[7] 1 if second leg has contact, else 0
+
+              Your implementation should follow what happens in:
+
+              https://github.com/wfleshman/PID_Control/blob/master/pid.py
+
+              I.e. you have to compute the target for the angle and altitude as done in the code (and explained in the documentation.
+              Note the target for the PID controllers is 0.
+        """
+        if k == 0:
+            """ At time t=0 we set up the two PID controllers. You don't have to change these lines. """
+            self.pid_alt = PID(dt=self.dt, Kp=self.Kp_altitude, Kd=self.Kd_altitude, Ki=0, target=0)
+            self.pid_ang = PID(dt=self.dt, Kp=self.Kp_angle, Kd=self.Kd_angle, Ki=0, target=0)
+
+        """ Compute the PID control signals using two calls to the PID controllers such as: """
+        # alt_adj = self.pid_alt.pi(...)
+        # ang_adj = self.pid_ang.pi(...)
+        """ You need to specify the inputs to the controllers. Look at the code in the link above and implement a comparable control rule. 
+        The inputs you give to the controller will be simple functions of the coordinates of x, i.e. x[0], x[1], and so on.
+        """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Compute the alt_adj and ang_adj as in the gitlab repo (see code comment).")
+
+        u = np.array([alt_adj, ang_adj])
+        u = np.clip(u, -1, +1)
+
+        # If the legs are on the ground we made it, kill engines
+        if (x[6] or x[7]):
+            u[:] = 0
+        # Record stats.
+        self.error_altitude.append(self.pid_alt.e_prior)
+        self.error_angle.append(self.pid_ang.e_prior)
+        return u
+
+def get_lunar_lander(env):
+    dt = 1/FPS # Get time discretization from environment.
+    spars = ['Kp_altitude', 'Kd_altitude', 'Kp_angle', 'Kd_angle']
+    def x2pars(x2):
+        return {spars[i]: x2[i] for i in range(4)}
+    x_opt = np.asarray([52.23302414, 34.55938593, -80.68722976, -38.04571655])
+    agent = ApolloLunarAgent(env, dt=dt, **x2pars(x_opt))
+    return agent
+
+def lunar_single_mission():
+    env = gym.make('LunarLanderContinuous-v3', render_mode='human')
+    env._max_episode_steps = 1000  # We don't want it to time out.
+
+    agent = get_lunar_lander(env)
+    stats, traj = train(env, agent, return_trajectory=True, num_episodes=1)
+    env.close()
+    if traj[0].reward[-1] == 100:
+        print("A small step for man, a giant leap for mankind!")
+    elif traj[0].reward[-1] == -100:
+        print(speech)
+    else:
+        print("Environment timed out and the lunar module is just kind of floating around")
+
+    states = np.stack(traj[0].state)
+    plt.plot(states[:, 0], label='x')
+    plt.plot(states[:, 1], label='y')
+    plt.plot(states[:, 2], label='vx')
+    plt.plot(states[:, 3], label='vy')
+    plt.plot(states[:, 4], label='theta')
+    plt.plot(states[:, 5], label='vtheta')
+    plt.legend()
+    plt.grid()
+    plt.ylim(-1.1, 1.1)
+    plt.title('PID Control')
+    plt.ylabel('Value')
+    plt.xlabel('Steps')
+    savepdf("pid_lunar_trajectory")
+    plt.show(block=False)
+
+def lunar_average_performance():
+    env = gym.make('LunarLanderContinuous-v3', render_mode=None) # Set render_mode = 'human' to see what it does.
+    env._max_episode_steps = 1000  # To avoid the environment timing out after just 200 steps
+
+    agent = get_lunar_lander(env)
+    stats, traj = train(env, agent, return_trajectory=True, num_episodes=20)
+    env.close()
+
+    n_won = sum([np.sum(t.reward[-1] == 100) for t in traj])
+    n_lost = sum([np.sum(t.reward[-1] == -100) for t in traj])
+    print("Successfull landings: ", n_won, "of 20")
+    print("Unsuccessfull landings: ", n_lost, "of 20")
+
+if __name__ == "__main__":
+    lunar_single_mission() 
+    lunar_average_performance() 
diff --git a/irlc/ex04/pid_pendulum.py b/irlc/ex04/pid_pendulum.py
new file mode 100644
index 0000000000000000000000000000000000000000..82e865b853b734b3003f42a04df6cba43f3b9a2e
--- /dev/null
+++ b/irlc/ex04/pid_pendulum.py
@@ -0,0 +1,74 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+import matplotlib.pyplot as plt
+np.random.seed(32)
+from irlc import Agent, savepdf
+from irlc.ex04.pid import PID
+from irlc.ex01.agent import train
+
+class PIDPendulumAgent(Agent):
+    def __init__(self, env, dt, Kp=1.0, Ki=0.0, Kd=0.0, target_angle=0):
+        """ Balance_to_x0 = True implies the agent should also try to get the cartpole to x=0 (i.e. center).
+        If balance_to_x0 = False implies it is good enough for the agent to get the cart upright.
+        """
+        self.pid = PID(dt=dt, Kp = Kp, Ki=Ki, Kd=Kd, target=target_angle)
+        super().__init__(env)
+
+    def pi(self, x, k, info=None): 
+        """ Compute action using self.pid. YCartpoleou have to think about the inputs as they will depend on
+        whether balance_to_x0 is true or not.  """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Implement function body")
+        return u
+
+
+def get_offbalance_pendulum(waiting_steps=30):
+    from irlc.ex04.model_pendulum import ThetaPendulumEnvironment
+    env = ThetaPendulumEnvironment(Tmax=10, render_mode='human')
+
+    env.reset()
+    env.state[0] = 0
+    env.state[1] = 0
+    for _ in range(waiting_steps):  # Simulate the environment for 30 steps to get things out of balance.
+        env.step(1)
+    return env
+
+def plot_trajectory(trajectory):
+    t = trajectory
+    plt.plot(t.time, t.state[:,0], label="Angle $\\theta$" )
+    plt.plot(t.time, t.state[:,1], label="Angular speed $\\cdot{\\theta}$")
+    plt.xlabel("Time")
+    plt.legend()
+
+
+target_angle = np.pi/6 # The target angle for the second task in the pendulum problem.
+if __name__ == "__main__":
+    """
+    First task: Bring the balance upright from a slightly off-center position. 
+    For this task, we do not care about the x-position, only the angle theta which should be 0 (upright)
+    """
+    env = get_offbalance_pendulum(30)
+    ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+    #----------------------------------------------------------------------------------------------------------------------------
+    # agent = PIDPendulumAgent(env, dt=env.??????????????????????????????????????
+    raise NotImplementedError("Define your agent here (including parameters)")
+    _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False)  # Note reset=False to maintain initial conditions.
+    env.close()
+    plot_trajectory(trajectories[0])
+    savepdf("pid_pendulumA")
+    plt.show()
+
+    """
+    Second task: We will now try to get to a target angle of target_angle=np.pi/6.    
+    """
+    env = get_offbalance_pendulum(30)
+    ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+    #----------------------------------------------------------------------------------------------------------------------------
+    # agent = PIDPendulumAgent(env, dt=env.dt,?????????????????????????????????????????
+    raise NotImplementedError("Define your agent here (include the target_angle parameter to the agent!)")
+    _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False)  # Note reset=False to maintain initial conditions.
+    env.close()
+    plot_trajectory(trajectories[0])
+    print("Final state is x(t_F) =", trajectories[0].state[-1], f"goal [{target_angle:.2f}, 0]")
+    savepdf("pid_pendulumB")
+    plt.show()