diff --git a/irlc/ex06/__init__.py b/irlc/ex06/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e26755e5ec4fe79d8350778babe173741127191
--- /dev/null
+++ b/irlc/ex06/__init__.py
@@ -0,0 +1,2 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 6."""
diff --git a/irlc/ex06/boeing_lqr.py b/irlc/ex06/boeing_lqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..e26f5e6bdc1a5de218418b09e2d16b4d799a32a0
--- /dev/null
+++ b/irlc/ex06/boeing_lqr.py
@@ -0,0 +1,85 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import savepdf
+from irlc import train
+from irlc.ex06.model_boeing import BoeingEnvironment
+from irlc.ex06.lqr_agent import LQRAgent
+from irlc.ex03.control_model import ControlModel
+import scipy
+
+
+def boeing_simulation():
+    env = BoeingEnvironment(Tmax=10)
+    model = env.discrete_model.continuous_model # get the model from the Boeing environment
+    dt = env.dt # Get the discretization time.
+    A, B, d = compute_A_B_d(model, dt)
+    # Use compute_Q_R_q to get the Q, R, and q matrices in the discretized system
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute Q, R and q here")
+    ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+    #----------------------------------------------------------------------------------------------------------------------------
+    # agent = LQRAgent(env, A=A??????????????????????????
+    raise NotImplementedError("Use your LQRAgent to plan using the system matrices.")
+    stats, trajectories = train(env, agent, return_trajectory=True)
+    return stats, trajectories, env
+
+def compute_Q_R_q(model : ControlModel, dt : float):
+    cost = model.get_cost() # Get the continuous-time cost-function
+    # use print(cost) to see what it contains.
+    # Then get the discretized matrices using the techniques described in (Her25, Subsection 13.1.6).
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    return Q, R, q
+
+def compute_A_B_d(model : ControlModel, dt : float):
+    if model.d is None:
+        d = np.zeros((model.state_size,))  # Ensure d is set to a zero vector if it is not defined.
+    else:
+        d = model.d
+
+    A_discrete = scipy.linalg.expm(model.A * dt)  # This is the discrete A-matrix computed using the matrix exponential
+    # Now it is your job to define B_discrete and d_discrete.
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    return A_discrete, B_discrete, d_discrete.flatten()
+
+def boeing_experiment():
+    _, trajectories, env = boeing_simulation()
+    model = env.discrete_model.continuous_model
+
+    dt = env.dt 
+    Q, R, q = compute_Q_R_q(model, dt)
+    print("Discretization time is", dt)
+    print("Original q-vector was:", model.get_cost().q)
+    print("Discretized version is:", q) 
+
+    t = trajectories[-1]
+    out = t.state @ model.P.T
+
+    plt.plot(t.time, out[:, 0], '-', label=env.observation_labels[0])
+    plt.plot(t.time, out[:, 1], '-', label=env.observation_labels[1])
+    plt.grid()
+    plt.legend()
+    plt.xlabel("Time/seconds")
+    plt.ylabel("Output")
+    savepdf("boing_lqr_output")
+    plt.show(block=False)
+    plt.close()
+
+    plt.plot(t.time[:-1], t.action[:, 0], '-', label=env.action_labels[0])
+    plt.plot(t.time[:-1], t.action[:, 1], '-', label=env.action_labels[1])
+    plt.xlabel("Time/seconds")
+    plt.ylabel("Control action")
+    plt.grid()
+    plt.legend()
+    savepdf("boing_lqr_action")
+    plt.show()
+
+if __name__ == "__main__":
+    boeing_experiment()
diff --git a/irlc/ex06/dlqr.py b/irlc/ex06/dlqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8b790331f228223a82544f52ba7627a3e36668a
--- /dev/null
+++ b/irlc/ex06/dlqr.py
@@ -0,0 +1,207 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import bmatrix
+from irlc import savepdf
+
+
+
+def LQR(A : list,  # Dynamic
+        B : list,  # Dynamics
+        d : list =None,  # Dynamics (optional)
+        Q : list=None,
+        R: list=None,
+        H : list=None,
+        q : list=None,
+        r : list=None,
+        qc : list=None,
+        QN : np.ndarray =None, # Terminal cost term
+        qN : np.ndarray=None,  # Terminal cost term
+        qcN : np.ndarray =None,  # Terminal cost term.
+        mu : float =0 # regularization parameter which will only be relevant next week.
+        ):
+    r"""
+    Implement the LQR as defined in (Her25, Algorithm 22). I recommend viewing this documentation online (documentation for week 6).
+
+    When you solve this exercise, look at the algorithm in the book. Since the LQR problem is on the form:
+
+    .. math::
+
+        x_{k+1} = A_k x_k + B_k u_k + d_k
+
+    For :math:`k=0,\dots,N-1` this means there are :math:`N` matrices :math:`A_k`. This is implemented by assuming that
+    :python:`A` (i.e., the input argument) is a :python:`list` of length :math:`N` so that :python:`A[k]` corresponds
+    to :math:`A_k`.
+
+    Similar conventions are used for the cost term (please see the lecture notes or the online documentation for their meaning). Recall it has the form:
+
+    .. math::
+
+        c(x_k, u_k) = \frac{1}{2} \mathbf x_k^\top Q_k \mathbf x_k + \frac{1}{2} \mathbf q_k^\top \mathbf x_k + q_k + \cdots
+
+    When the function is called, the vector :math:`\textbf{q}_k` corresponds to :python:`q` and the constant :math:`q_k` correspond to :python:`qc` (q-constant).
+
+    .. note::
+
+        Only the terms :python:`A` and :python:`B` are required. The rest of the terms will default to 0-matrices.
+
+    The LQR algorithm will ultimately compute a control law of the form:
+
+    .. math::
+
+        \mathbf u_k = L_k \mathbf x_k + \mathbf l_k
+
+    And a cost-to-go function as:
+
+    .. math::
+
+        J_k(x_k) = \frac{1}{2} \mathbf x_k^\top V_k \mathbf x_k + v_k^\top \mathbf x_k + v_k
+
+    Again there are :math:`N-1` terms. The function then return :python:`return (L, l), (V, v, vc)` so that :python:`L[k]` corresponds to :math:`L_k`.
+
+    :param A: A list of :python:`np.ndarray` containing all terms :math:`A_k`
+    :param B: A list of :python:`np.ndarray` containing all terms :math:`B_k`
+    :param d: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf d_k` (optional)
+    :param Q: A list of :python:`np.ndarray` containing all terms  :math:`Q_k` (optional)
+    :param R: A list of :python:`np.ndarray` containing all terms  :math:`R_k` (optional)
+    :param H: A list of :python:`np.ndarray` containing all terms  :math:`H_k` (optional)
+    :param q: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf q_k` (optional)
+    :param r: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf r_k` (optional)
+    :param qc: A list of :python:`float` containing all terms  :math:`q_k` (i.e., constant terms) (optional)
+    :param QN: A :python:`np.ndarray` containing the terminal cost term :math:`Q_N` (optional)
+    :param qN: A :python:`np.ndarray` containing the terminal cost term :math:`\mathbf q_N` (optional)
+    :param qcN: A :python:`np.ndarray` containing the terminal cost term :math:`q_N`
+    :param mu: A regularization term which is useful for iterative-LQR (next week). Default to 0.
+    :return: A tuple of the form :python:`(L, l), (V, v, vc)` corresponding to the control and cost-matrices.
+    """
+    N = len(A)
+    n,m = B[0].shape
+    # Initialize empty lists for control matrices and cost terms
+    L, l = [None]*N, [None]*N
+    V, v, vc = [None]*(N+1), [None]*(N+1), [None]*(N+1)
+    # Initialize constant cost-function terms to zero if not specified.
+    # They will be initialized to zero, meaning they have no effect on the update rules.
+    QN = np.zeros((n,n)) if QN is None else QN
+    qN = np.zeros((n,)) if qN is None else qN
+    qcN = 0 if qcN is None else qcN
+    H, q, qc, r = init_mat(H,m,n,N=N), init_mat(q,n,N=N), init_mat(qc,1,N=N), init_mat(r,m,N=N)
+    d = init_mat(d,n, N=N)
+    """ In the next line, you should initialize the last cost-term. This is similar to how we in DP had the initialization step
+    > J_N(x_N) = g_N(x_N)
+    Except that since x_N is no longer discrete, we store it as matrices/vectors representing a second-order polynomial, i.e.    
+    > J_N(X_N) = 1/2 * x_N' V[N] x_N + v[N]' x_N + vc[N]
+    """
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Initialize V[N], v[N], vc[N] here")
+
+    In = np.eye(n)
+    for k in range(N-1,-1,-1):
+        # When you update S_uu and S_ux remember to add regularization as the terms ... (V[k+1] + mu * In) ...
+        # Note that that to find x such that
+        # >>> x = A^{-1} y this
+        # in a numerically stable manner this should be done as
+        # >>> x = np.linalg.solve(A, y)
+        # The terms you need to update will be, in turn:
+        # Suu = ...
+        # Sux = ...
+        # Su = ...
+        # L[k] = ...
+        # l[k] = ...
+        # V[k] = ...
+        # V[k] = ...
+        # v[k] = ...
+        # vc[k] = ...
+        ## TODO: Half of each line of code in the following 4 lines have been replaced by garbage. Make it work and remove the error.
+        #----------------------------------------------------------------------------------------------------------------------------
+        # Suu = R[k] + B[k].T @ (????????????????????????
+        # Sux = H[k] + B[k].T @ (????????????????????????
+        # Su = r[k] + B[k].T @ v[k + 1?????????????????????????????
+        # L[k] = -np.linal?????????????????
+        raise NotImplementedError("Insert your solution and remove this error.")
+        l[k] = -np.linalg.solve(Suu, Su) # You get this for free. Notice how we use np.lingalg.solve(A,x) to compute A^{-1} x
+        V[k] = Q[k] + A[k].T @ V[k+1] @ A[k] - L[k].T @ Suu @ L[k]
+        V[k] = 0.5 * (V[k] + V[k].T)  # I recommend putting this here to keep V positive semidefinite
+        # You get these for free: Compare to the code in the algorithm.
+        v[k] = q[k] + A[k].T @ (v[k+1]  + V[k+1] @ d[k]) + Sux.T @ l[k]
+        vc[k] = vc[k+1] + qc[k] + d[k].T @ v[k+1] + 1/2*( d[k].T @ V[k+1] @ d[k] ) + 1/2*l[k].T @ Su
+
+    return (L,l), (V,v,vc)
+
+
+def init_mat(X, a, b=None, N=None):
+    """
+    Helper function. Check if X is None, and if so return a list
+      [A, A,....]
+    which is N long and where each A is a (a x b) zero-matrix, else returns X repeated N times:
+     [X, X, ...]
+    """
+    M0 = np.zeros((a,) if b is None else (a, b))
+    if X is not None:
+        return [m if m is not None else M0 for m in X]
+    else:
+        return [M0] * N
+
+def lqr_rollout(x0,A,B,d,L,l):
+    """
+    Compute a rollout (states and actions) given solution from LQR controller function.
+
+    x0 is a vector (starting state), and A, B, d and L, l are lists of system/control matrices.
+    """
+    x, states,actions = x0, [x0], []
+    n,m = B[0].shape
+    N = len(L)
+    d = init_mat(d,n,1,N)  # Initialize as a list of zero matrices [ np.zeros((n,1)), np.zeros((n,1)), ...]
+    l = init_mat(l,m,1,N)  # Initialize as a list of zero matrices [ np.zeros((m,1)), np.zeros((m,1)), ...]
+
+    for k in range(N):
+        u = L[k] @ x + l[k]
+        x = A[k] @ x + B[k] @ u + d[k]
+        actions.append(u)
+        states.append(x)
+    return states, actions
+
+if __name__ ==  "__main__":
+    """
+    Solve this problem (see also lecture notes for the same example)
+    http://cse.lab.imtlucca.it/~bemporad/teaching/ac/pdf/AC2-04-LQR-Kalman.pdf
+    """
+    N = 20
+    A = np.ones((2,2))
+    A[1,0] = 0
+    B = np.asarray([[0], [1]])
+    Q = np.zeros((2,2))
+    R = np.ones((1,1))
+
+    print("System matrices A, B, Q, R")
+    print(bmatrix(A))  
+    print(bmatrix(B))  
+    print(bmatrix(Q))  
+    print(bmatrix(R))  
+
+    for rho in [0.1, 10, 100]:
+        Q[0,0] = 1/rho
+        (L,l), (V,v,vc) = LQR(A=[A]*N, B=[B]*N, d=None, Q=[Q]*N, R=[R]*N, QN=Q)
+
+        x0 = np.asarray( [[1],[0]])
+        trajectory, actions = lqr_rollout(x0,A=[A]*N, B=[B]*N, d=None,L=L,l=l)
+
+        xs = np.concatenate(trajectory, axis=1)[0,:]
+
+        plt.plot(xs, 'o-', label=f'rho={rho}')
+
+        k = 10
+        print(f"Control matrix in u_k = L_k x_k + l_k at k={k}:", L[k])
+    for k in [N-1,N-2,0]:
+        print(f"L[{k}] is:", L[k].round(4))
+    plt.title("Double integrator")
+    plt.xlabel('Steps $k$')
+    plt.ylabel('$x_1 = $ x[0]')
+    plt.legend()
+    plt.grid()
+    savepdf("dlqr_double_integrator")
+    plt.show()
diff --git a/irlc/ex06/dlqr_check.py b/irlc/ex06/dlqr_check.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d86db35853cffea347fbb5a5666a7ba9ec47681
--- /dev/null
+++ b/irlc/ex06/dlqr_check.py
@@ -0,0 +1,40 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex06.dlqr import LQR
+
+def urnd(sz):
+    return np.random.uniform(-1, 1, sz)
+
+def check_LQR():
+    np.random.seed(42)
+    n,m,N = 3,2,4
+    """
+    Create a randomized, nonsense control problem and solve it. Since seed is fixed we can expect same solution.
+    """
+    # system tersm
+    A = [urnd((n, n)) for _ in range(N)]
+    B = [urnd((n, m)) for _ in range(N)]
+    d = [urnd((n,)) for _ in range(N)]
+    # cost terms
+    Q = [urnd((n, n)) for _ in range(N)]
+    R = [urnd((m, m)) for _ in range(N)]
+    H = [urnd((m, n)) for _ in range(N)]
+    q = [urnd((n,)) for _ in range(N)]
+    qc = [urnd(()) for _ in range(N)]
+    r = [urnd((m,)) for _ in range(N)]
+    # terminal costs
+    QN = urnd((n, n))
+    qN = urnd((n,))
+    qcN = urnd(())
+    return LQR(A=A, B=B, d=d, Q=Q, R=R, H=H, q=q, r=r, qc=qc, QN=QN, qN=qN, qcN=qcN, mu=0)
+
+
+if __name__ == "__main__":
+    (L, l), (V, v, vc) = check_LQR()
+    N = len(V)-1
+    print(", ".join([f"l[{k}]={l[k].round(4)}" for k in [N - 1, N - 2, 0]]))  
+    print("\n".join([f"L[{k}]={L[k].round(4)}" for k in [N - 1, N - 2, 0]]))
+
+    print("\n".join([f"V[{k}]={V[k].round(4)}" for k in [0]]))
+    print(", ".join([f"v[{k}]={v[k].round(4)}" for k in [N, N - 1, 0]]))
+    print(", ".join([f"vc[{k}]={vc[k].round(4)}" for k in [N, N - 1, 0]]))  
diff --git a/irlc/ex06/lqr_agent.py b/irlc/ex06/lqr_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..f62ec55971e3757bb84a7ac113a24fee99462c4e
--- /dev/null
+++ b/irlc/ex06/lqr_agent.py
@@ -0,0 +1,54 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.locomotive import LocomotiveEnvironment
+from irlc import train, plot_trajectory, savepdf, Agent
+from irlc.ex06.dlqr import LQR
+from irlc.ex04.control_environment import ControlEnvironment
+import numpy as np
+import matplotlib.pyplot as plt
+
+class LQRAgent(Agent):
+    def __init__(self, env : ControlEnvironment, A, B, Q, R, d=None, q=None):
+        N = int((env.Tmax / env.dt)) # Obtain the planning horizon
+        """ Define A, B as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B x[t] + d.
+        You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B """
+        """ Define self.L, self.l here as the (lists of) control matrices. """
+        ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+        #----------------------------------------------------------------------------------------------------------------------------
+        # (self.L, self.l), _ = LQR(A=[A]*N, B=[B]*N, d=[d]*N if d is not No???????????????????????????????????????????????????????????????????
+        raise NotImplementedError("Insert your solution and remove this error.")
+        self.dt = env.dt
+        super().__init__(env)
+
+    def pi(self,x, k, info=None):
+        """
+        Compute the action here using u = L_k x + l_k.
+        You should use self.L, self.l to get the control matrices (i.e. L_k = self.L[k] ),
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute current action here")
+        return u
+
+
+if __name__ == "__main__":
+    # Make a guess at the system matrices for planning. We will return on how to compute these exactly in a later exercise.
+    A = np.ones((2, 2))
+    A[1, 0] = 0
+    B = np.asarray([[0], [1]])
+    Q = np.eye(2)*3
+    R = np.ones((1, 1))*2
+    q = np.asarray([-1.1, 0 ])
+
+    # Create and test our LQRAgent.
+    env = LocomotiveEnvironment(render_mode='human', Tmax=10, slope=1)
+    agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q)
+    stats, traj = train(env, agent, num_episodes=1)
+
+    env.reset()
+    savepdf("locomotive_snapshot.pdf", env=env) # Make a plot for the exercise file.
+    env.state_labels = ["x(t)", "v(t)"]
+    env.action_labels = ["u(t)"]
+    plot_trajectory(traj[0], env)
+    plt.show(block=True)
+    savepdf("lqr_agent")
+    plt.show()
+    env.close()
diff --git a/irlc/ex06/lqr_pid.py b/irlc/ex06/lqr_pid.py
new file mode 100644
index 0000000000000000000000000000000000000000..136cae2ba5a2bebcf45880daec13e853a7bee9b4
--- /dev/null
+++ b/irlc/ex06/lqr_pid.py
@@ -0,0 +1,79 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc import savepdf, train
+from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent
+from irlc.ex06.lqr_agent import LQRAgent
+from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment
+from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+
+class ConstantLQRAgent(LQRAgent):
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Complete this agent here. You need to update the policy-function: def pi(self, ..).")
+
+def get_Kp_Kd(L0):
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Use lqr_agent.L to define Kp and Kd.")
+    return Kp, Kd
+
+
+if __name__ == "__main__":
+    Delta = 0.06  # Time discretization constant
+    # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization.
+    env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, render_mode=None)  # set render_mode='human' to see the oscillator.
+    model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment.
+
+
+    # Compute the discretized A, B and d matrices using the helper functions we defined in the Boeing problem.
+    # Note that these are for the discrete environment: x_{k+1} = A x_k + B u_k + d
+    A, B, d = compute_A_B_d(model, Delta)
+    Q, R, q = compute_Q_R_q(model, Delta)
+
+    # Run the LQR agent
+    lqr_agent = LQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+    _, traj1 = train(env, lqr_agent, return_trajectory=True)
+
+    # Part 1. Build an agent that always takes actions u_k = L_0 x_k + l_0
+    constant_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+    # Check that its policy is independent of $k$:
+    x0, _ = env.reset() 
+    print(f"Initial state is {x0=}")
+    print(f"Action at time step k=0 {constant_agent.pi(x0, k=0)=}")
+    print(f"Action at time step k=5 (should be the same) {constant_agent.pi(x0, k=0)=}") 
+
+    _, traj2 = train(env, constant_agent, return_trajectory=True)
+
+    # Part 2. Use the L and l matrices (see lqr_agent.L and lqr_agent.l)
+    # to select Kp and Kd in a PID agent. Then let's use the Locomotive agent to see the effect of the controller.
+    # Use render_mode='human' to see its effect.
+    # We only need to use L.
+    # Hint: compare the form of the LQR and PID controller and use that to select Kp and Kd.
+    Kp, Kd = get_Kp_Kd(lqr_agent.L[0]) # Use lqr_agent.L to define Kp and Kd.
+
+    # Define and run the PID agent.
+    pid_agent = PIDLocomotiveAgent(env, env.dt, Kp=Kp, Kd=Kd)
+    _, traj3 = train(env, pid_agent, return_trajectory=True)
+
+    # Plot all actions and state sequences.
+    plt.figure(figsize=(10,5))
+    plt.grid()
+    plt.plot(traj1[0].time[:-1], traj1[0].action, label="Optimal LQR action sequence")
+    plt.plot(traj2[0].time[:-1], traj2[0].action, '.-', label="Constant LQR action sequence")
+    plt.plot(traj3[0].time[:-1], traj3[0].action, label="PID agent action sequence")
+    plt.xlabel("Time / Seconds")
+    plt.ylabel("Action / Newtons")
+    plt.ylim([-.2, .2])
+    plt.legend()
+    savepdf("pid_lqr_actions")
+    plt.show(block=True)
+
+    plt.figure(figsize=(10, 5))
+    plt.grid()
+    plt.plot(traj1[0].time, traj1[0].state[:, 0], label="Optimal LQR states x(t)")
+    plt.plot(traj2[0].time, traj2[0].state[:, 0], label="Constant LQR states x(t)")
+    plt.plot(traj3[0].time, traj3[0].state[:, 0], label="PID agent states x(t)")
+    plt.xlabel("Time / Seconds")
+    plt.ylabel("Position x(t) / Meters")
+    plt.ylim([-1, 1])
+    plt.legend()
+    savepdf("pid_lqr_states")
diff --git a/irlc/ex06/model_boeing.py b/irlc/ex06/model_boeing.py
new file mode 100644
index 0000000000000000000000000000000000000000..57e0a0c7a3664a45005437b4576038021c60f4ef
--- /dev/null
+++ b/irlc/ex06/model_boeing.py
@@ -0,0 +1,62 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+
+class BoeingModel(LinearQuadraticModel):
+    """
+    Boeing 747 level flight example.
+
+    See: https://books.google.dk/books?id=tXZDAAAAQBAJ&pg=PA147&lpg=PA147&dq=boeing+747+flight+0.322+model+longitudinal+flight&source=bl&ots=L2RpjCAWiZ&sig=ACfU3U2m0JsiHmUorwyq5REcOj2nlxZkuA&hl=en&sa=X&ved=2ahUKEwir7L3i6o3qAhWpl4sKHQV6CdcQ6AEwAHoECAoQAQ#v=onepage&q=boeing%20747%20flight%200.322%20model%20longitudinal%20flight&f=false
+    Also: https://web.stanford.edu/~boyd/vmls/vmls-slides.pdf
+    """
+    state_labels = ["Longitudinal velocity (x) ft/sec", "Velocity in y-axis ft/sec", "Angular velocity",
+                    "angle wrt. horizontal"]
+    action_labels = ['Elevator', "Throttle"]
+    observation_labels = ["Airspeed", "Climb rate"]
+
+    def __init__(self, output=None):
+        if output is None:
+            output = [10, 0]
+        # output = [10, 0]
+        A = [[-0.003, 0.039, 0, -0.322],
+             [-0.065, -.319, 7.74, 0],
+             [.02, -.101, -0.429, 0],
+             [0, 0, 1, 0]]
+        B = [[.01, 1],
+             [-.18, -.04],
+             [-1.16, .598],
+             [0, 0]]
+
+        A, B = np.asarray(A), np.asarray(B)
+        self.u0 = 7.74  # speed in hundred feet/seconds
+        self.P = np.asarray([[1, 0, 0, 0], [0, -1, 0, 7.74]])  # Projection of state into airspeed
+
+        dt = 0.1 # Scale the cost by this factor.
+
+        # Set up the cost:
+        self.Q_obs = np.eye(2)
+        Q = self.P.T @ self.Q_obs @ self.P / dt
+        R = np.eye(2) / dt
+        q = -np.asarray(output) @ self.Q_obs @ self.P / dt
+        super().__init__(A=A, B=B, Q=Q, R=R, q=q)
+
+    def state2outputs(self, x):
+        return self.P @ x
+
+class DiscreteBoeingModel(DiscreteControlModel):
+    def __init__(self, output=None):
+        model = BoeingModel(output=output)
+        dt = 0.1
+        super().__init__(model=model, dt=dt)
+
+
+class BoeingEnvironment(ControlEnvironment):
+    @property
+    def observation_labels(self):
+        return self.discrete_model.continuous_model.observation_labels
+
+    def __init__(self, Tmax=10):
+        model = DiscreteBoeingModel()
+        super().__init__(discrete_model=model, Tmax=Tmax)
diff --git a/irlc/ex06/model_rendevouz.py b/irlc/ex06/model_rendevouz.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffdb923b608a3079c27b1df41eb6405f29fdf41c
--- /dev/null
+++ b/irlc/ex06/model_rendevouz.py
@@ -0,0 +1,99 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+from gymnasium.spaces import Box
+
+"""
+SEE: https://github.com/anassinator/ilqr/blob/master/examples/rendezvous.ipynb
+"""
+class ContiniousRendevouzModel(LinearQuadraticModel): 
+    state_labels= ["x0", "y0", "x1", "y1", 'Vx0', "Vy0", "Vx1", "Vy1"]
+    action_labels = ['Fx0', 'Fy0', "Fx1", "Fy1"]
+    x0 = np.array([0, 0, 10, 10, 0, -5, 5, 0])  # Initial state.
+
+    def __init__(self, m=10.0, alpha=0.1, simple_bounds=None, cost=None): 
+        m00 = np.zeros((4,4))
+        mI = np.eye(4)
+        A = np.block( [ [m00, mI], [m00, -alpha/m*mI] ] )
+        B = np.block( [ [m00], [mI/m]] )
+        state_size = len(self.x0)
+        action_size = 4
+        self.m = m
+        self.alpha = alpha
+        Q = np.eye(state_size)
+        Q[0, 2] = Q[2, 0] = -1
+        Q[1, 3] = Q[3, 1] = -1
+        R = 0.1 * np.eye(action_size)
+        self.viewer = None
+        super().__init__(A=A, B=B, Q=Q*20, R=R*20)
+
+    def x0_bound(self) -> Box:
+        return Box(self.x0, self.x0) # self.bounds['x0_low'] = self.bounds['x0_high'] = list(self.x0)
+
+    def render(self, x, render_mode="human"):
+        """ Render the environment. You don't have to understand this code.  """
+        if self.viewer is None:
+            self.viewer = HarmonicViewer(xstar=0, x0=self.x0) # target: x=0.
+        self.viewer.update(x)
+        import time
+        time.sleep(0.05)
+        return self.viewer.blit(render_mode=render_mode)
+
+    def close(self):
+        pass
+
+
+class DiscreteRendevouzModel(DiscreteControlModel): 
+    def __init__(self, dt=0.1, cost=None, transform_actions=True, **kwargs):
+        model = ContiniousRendevouzModel(**kwargs)
+        super().__init__(model=model, dt=dt, cost=cost) 
+
+class RendevouzEnvironment(ControlEnvironment): 
+    def __init__(self, Tmax=20, render_mode=None, **kwargs):
+        discrete_model = DiscreteRendevouzModel(**kwargs)
+        super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode) 
+
+class HarmonicViewer(UpgradedGraphicsUtil):
+    def __init__(self, xstar = 0, x0=None):
+        self.xstar = xstar
+        width = 800
+        self.x0 = x0
+        sz = 20
+        self.scale = width/(2*sz)
+        self.p1h = []
+        self.p2h = []
+        super().__init__(screen_width=width, xmin=-sz, xmax=sz, ymin=-sz, ymax=sz, title='Rendevouz environment')
+
+    def render(self):
+        self.draw_background(background_color=(255, 255, 255))
+        # dw = self.dw
+        p1 = self.x[:2]
+        p2 = self.x[2:4]
+        self.p1h.append(p1)
+        self.p2h.append(p2)
+        self.circle("asdf", pos=p1, r=.5 * self.scale, fillColor=(200, 0, 0))
+        self.circle("asdf", pos=p2, r=.5 * self.scale, fillColor=(0, 0, 200) )
+        if len(self.p1h) > 2:
+            self.polyline('...', np.stack(self.p1h)[:,0], np.stack(self.p1h)[:,1], width=1, color=(200, 0, 0))
+            self.polyline('...', np.stack(self.p2h)[:,0], np.stack(self.p2h)[:,1], width=1, color=(0, 0, 200))
+
+        if tuple(self.x) == tuple(self.x0):
+            self.p1h = []
+            self.p2h = []
+
+
+    def update(self, x):
+        self.x = x
+
+
+if __name__ == "__main__":
+    from irlc import Agent, train
+    env = RendevouzEnvironment(render_mode='human')
+    from irlc.ex06.lqr_agent import LQRAgent
+    a2 = LQRAgent(env=env)
+
+    stats, traj = train(env, Agent(env), num_episodes=1)
+    pass
diff --git a/irlc/ex07/__init__.py b/irlc/ex07/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e8044b811c2884a4486857534492a8d7a83575b
--- /dev/null
+++ b/irlc/ex07/__init__.py
@@ -0,0 +1,2 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 7."""
diff --git a/irlc/ex07/ilqr.py b/irlc/ex07/ilqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..d259999214f970e4b8486523b8f5ebcfd0814428
--- /dev/null
+++ b/irlc/ex07/ilqr.py
@@ -0,0 +1,275 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+  [TET12] Yuval Tassa, Tom Erez, and Emanuel Todorov. Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 4906–4913. IEEE, 2012. (See tassa2012.pdf).
+  [Har20] James Harrison. Optimal and learning-based control combined course notes. (See AA203combined.pdf), 2020.
+"""
+r"""
+This implements two methods: The basic ILQR method, described in (Her25, Algorithm 24), and the linesearch-based method
+described in (Her25, Algorithm 25).
+
+If you are interested, you can consult (TET12) (which contains generalization to DDP) and (Har20, Alg 1).
+"""
+import warnings
+import numpy as np
+from irlc.ex06.dlqr import LQR
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+
+def ilqr_basic(model : DiscreteControlModel, N, x0, us_init : list = None, n_iterations=500, verbose=True):
+    r"""
+    Implements the basic ilqr algorithm, i.e. (Her25, Algorithm 24). Our notation (x_bar, etc.) will be consistent with the lecture slides
+    """
+    mu, alpha = 1, 1 # Hyperparameters. For now, just let them have defaults and don't change them
+    # Create a random initial state-sequence
+    n, m = model.state_size, model.action_size
+    u_bar = [np.random.uniform(-1, 1,(model.action_size,)) for _ in range(N)] if us_init is None else us_init
+    x_bar = [x0] + [np.zeros(n, )] * N
+    """
+    Initialize nominal trajectory xs, us using us and x0 (i.e. simulate system from x0 using action sequence us). 
+    The simplest way to do this is to call forward_pass with all-zero sequence of control vector/matrix l, L.
+    """
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Initialize x_bar, u_bar here")
+    J_hist = []
+    for i in range(n_iterations):
+        """
+        Compute derivatives around trajectory and cost estimate J of trajectory. To do so, use the get_derivatives
+        function. Remember the functions will return lists of derivatives.
+        """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Compute J and derivatives A_k = f_x, B_k = f_u, ....")
+        """  Backward pass: Obtain feedback law matrices l, L using the backward_pass function.
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute L, l = .... here")
+        """ Forward pass: Given L, l matrices computed above, simulate new (optimal) action sequence. 
+        In the lecture slides, this is similar to how we compute u^*_k and x_k
+        Once they are computed, iterate the iLQR algorithm by setting x_bar, u_bar equal to these values
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute x_bar, u_bar = ...")
+        if verbose:
+            print(f"{i}> J={J:4g}, change in cost since last iteration {0 if i == 0 else J-J_hist[-1]:4g}")
+        J_hist.append(J)
+    return x_bar, u_bar, J_hist, L, l
+
+def ilqr_linesearch(model : DiscreteControlModel, N, x0, n_iterations, us_init=None, tol=1e-6, verbose=True):
+    r"""
+    For linesearch implement method described in (Her25, Algorithm 25) (we will use regular iLQR, not DDP!)
+    """
+    # The range of alpha-values to try out in the linesearch
+    # plus parameters relevant for regularization scheduling.
+    alphas = 1.1 ** (-np.arange(10) ** 2)  # alphas = [1, 1.1^{-2}, ...]
+    mu_min = 1e-6
+    mu_max = 1e10
+    Delta_0 = 2
+    mu = 1.0
+    Delta = Delta_0
+
+    n, m = model.state_size, model.action_size
+    u_bar = [np.random.uniform(-1, 1, (model.action_size,)) for _ in range(N)] if us_init is None else us_init
+    x_bar = [x0] + [np.zeros(n, )] * (N)
+    # Initialize nominal trajectory xs, us (same as in basic linesearch)
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Copy-paste code from previous solution")
+    J_hist = []
+
+    converged = False
+    for i in range(n_iterations):
+        alpha_was_accepted = False
+        """ Step 1: Compute derivatives around trajectory and cost estimate of trajectory.
+        (copy-paste from basic implementation). In our implementation, J_bar = J_{u^star}(x_0) """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Obtain derivatives f_x, f_u, ... as well as cost of trajectory J_bar = ...")
+        try:
+            """
+            Step 2: Backward pass to obtain control law (l, L). Same as before so more copy-paste
+            """
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Obtain l, L = ... in backward pass")
+            """
+            Step 3: Forward pass and alpha scheduling.
+            Decrease alpha and check condition |J^new < J'|. Apply the regularization scheduling as needed. """
+            for alpha in alphas:
+                x_hat, u_hat = forward_pass(model, x_bar, u_bar, L=L, l=l, alpha=alpha) # Simulate trajectory using this alpha
+                # TODO: 1 lines missing.
+                raise NotImplementedError("Compute J_new = ... as the cost of trajectory x_hat, u_hat")
+
+                if J_new < J_prime:
+                    """ Linesearch proposed trajectory accepted! Set current trajectory equal to x_hat, u_hat. """
+                    if np.abs((J_prime - J_new) / J_prime) < tol:
+                        converged = True  # Method does not seem to decrease J; converged. Break and return.
+
+                    J_prime = J_new
+                    x_bar, u_bar = x_hat, u_hat
+                    '''
+                    The update was accepted and you should change the regularization term mu, 
+                     and the related scheduling term Delta.                   
+                    '''
+                    # TODO: 1 lines missing.
+                    raise NotImplementedError("Delta, mu = ...")
+                    alpha_was_accepted = True # accept this alpha
+                    break
+        except np.linalg.LinAlgError as e:
+            # Matrix in dlqr was not positive-definite and this diverged
+            warnings.warn(str(e))
+
+        if not alpha_was_accepted:
+            ''' No alphas were accepted, which is not too hot. Regularization should change
+            '''
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Delta, mu = ...")
+
+            if mu_max and mu >= mu_max:
+                raise Exception("Exceeded max regularization term; we are stuffed.")
+
+        dJ = 0 if i == 0 else J_prime-J_hist[-1]
+        info = "converged" if converged else ("accepted" if alpha_was_accepted else "failed")
+        if verbose:
+            print(f"{i}> J={J_prime:4g}, decrease in cost {dJ:4g} ({info}).\nx[N]={x_bar[-1].round(2)}")
+        J_hist.append(J_prime)
+        if converged:
+            break
+    return x_bar, u_bar, J_hist, L, l
+
+def backward_pass(A : list, B : list, c_x : list, c_u : list, c_xx : list, c_ux : list, c_uu : list, mu=1):
+    r"""Given all derivatives, apply the LQR algorithm to get the control law.
+
+    The input arguments are described in the online documentation and the lecture notes. You should use them to call your
+    implementation of the :func:`~irlc.ex06.dlqr.LQR` method. Note that you should give a value of all inputs except for the ``d``-term.
+
+    :param A: linearization of the dynamics matrices :math:`A_k`.
+    :param B:  linearization of the dynamics matrices :math:`B_k`.
+    :param c_x: Cost terms corresponding to :math:`\mathbf{q}_k`
+    :param c_u: Cost terms corresponding to :math:`\mathbf{r}_k`
+    :param c_xx: Cost terms corresponding to :math:`Q_k`
+    :param c_ux: Cost terms corresponding to :math:`H_k`
+    :param c_uu: Cost terms corresponding to :math:`R_k`
+    :param mu: Regularization parameter for the LQR method
+    :return: The control law :math:`L_k, \mathbf{l}_k` as two lists.
+    """
+    Q, QN = c_xx[:-1], c_xx[-1] # An example.
+    # TODO: 4 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    # Define the inputs using the linearization inputs.
+    (L, l), (V, v, vc) = LQR(A=A, B=B, R=R, Q=Q, QN=QN, H=H, q=q, qN=qN, r=r, mu=mu)
+    return L, l
+
+def cost_of_trajectory(model : DiscreteControlModel, xs : list, us : list) -> float:
+    r"""Helper function which computes the cost of the trajectory.
+
+    The cost is defined as:
+
+    .. math::
+
+        c_N( \bar {\mathbf x}_N, \bar {\mathbf u}_) + \sum_{k=0}^{N-1} c_k(\bar {\mathbf x}_k, \bar {\mathbf u}_k)
+
+    and to compute it, you should use the two helper methods ``model.cost.c`` and ``model.cost.cN``
+    (see :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.c` and :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.cN`).
+
+    :param model: The control model used to compute the cost.
+    :param xs: A list of length :math:`N+1` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_N \end{bmatrix}`
+    :param us: A list of length :math:`N` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_{N-1} \end{bmatrix}`
+    :return: The cost as a number.
+    """
+    N = len(us)
+    JN = model.cost.cN(xs[-1])
+    return sum(map(lambda args:  model.cost.c(*args), zip(xs[:-1], us, range(N)))) + JN
+
+def get_derivatives(model : DiscreteControlModel, x_bar : list, u_bar : list):
+    """Compute all the derivatives used in the model.
+
+    The return type should match the meaning in (Her25, Subequation 17.8) and in the online documentation.
+
+    - ``c`` should be a list of length :math:`N+1`
+    - ``c_x`` should be a list of length :math:`N+1`
+    - ``c_xx`` should be a list of length :math:`N+1`
+    - ``c_u`` should be a list of length :math:`N`
+    - ``c_uu`` should be a list of length :math:`N`
+    - ``c_ux`` should be a list of length :math:`N`
+    - ``A`` should be a list of length :math:`N`
+    - ``B`` should be a list of length :math:`N`
+
+    Use the model to compute these terms. For instance, this will compute the first terms ``A[0]`` and ``B[0]``::
+
+        A0, B0 = model.f_jacobian(x_bar[0], u_bar[0], 0)
+
+    Meanwhile, to compute the first terms of the cost-functions you should use::
+
+        c[0], c_x[0], c_u[0], c_xx[0], c_ux[0], c_uu[0] = model.cost.c(x_bar[0], u_bar[0], k=0, compute_gradients=True)
+
+    :param model: The model to use when computing the derivatives of the cost
+    :param x_bar: The nominal state-trajectory
+    :param u_bar: The nominal action-trajectory
+    :return: Lists of all derivatives computed around the nominal trajectory (see the lecture notes).
+    """
+    N = len(u_bar)
+    """ Compute A_k, B_k (lists of matrices of length N) as the jacobians of the dynamics. To do so, 
+    recall from the online documentation that: 
+        x, f_x, f_u = model.f(x, u, k, compute_jacobian=True)
+    """
+    A = [None]*N
+    B = [None]*N
+    c = [None] * (N+1)
+    c_x = [None] * (N + 1)
+    c_xx = [None] * (N + 1)
+
+    c_u = [None] * (N+1)
+    c_ux = [None] * (N + 1)
+    c_uu = [None] * (N + 1)
+    # Now update each entry correctly (i.e., ensure there are no None elements left).
+    # TODO: 4 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    """ Compute derivatives of the cost function. For terms not including u these should be of length N+1 
+    (because of gN!), for the other lists of length N
+    recall model.cost.c has output:
+        c[i], c_x[i], c_u[i], c_xx[i], c_ux[i], c_uu[i] = model.cost.c(x, u, i, compute_gradients=True)
+    """
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    # Concatenate the derivatives associated with the last time point N.
+    cN, c_xN, c_xxN = model.cost.cN(x_bar[N], compute_gradients=True)
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Update c, c_x and c_xx with the terminal terms.")
+    return A, B, c, c_x, c_u, c_xx, c_ux, c_uu
+
+def forward_pass(model : DiscreteControlModel, x_bar : list, u_bar : list, L : list, l : list, alpha=1.0):
+    r"""Simulates the effect of the controller on the model
+
+    We assume the system starts in :math:`\mathbf{x}_0 = \bar {\mathbf x}_0`, and then simulate the effect of
+    generating actions using the closed-loop policy
+
+    .. math::
+
+        \mathbf{u}_k = \bar {\mathbf u}_k + \alpha \mathbf{l}_k + L_k (\mathbf{x}_k - \bar { \mathbf x}_k)
+
+    (see  (Her25, eq. (17.16))).
+
+    :param model: The model used to compute the dynamics.
+    :param x_bar: A nominal list of states
+    :param u_bar: A nominal list of actions (not used by the method)
+    :param L: A list of control matrices :math:`L_k`
+    :param l: A list of control vectors :math:`\mathbf{l}_k`
+    :param alpha: The linesearch parameter.
+    :return: A list of length :math:`N+1` of simulated states and a list of length :math:`N` of simulated actions.
+    """
+    N = len(u_bar)
+    x = [None] * (N+1)
+    u_star = [None] * N
+    x[0] = x_bar[0].copy()
+
+    for i in range(N):
+        r""" Compute using (Her25, eq. (17.16))
+        u_{i} = ...
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("u_star[i] = ....")
+        """ Remember to compute 
+        x_{i+1} = f_k(x_i, u_i^*)        
+        here:
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("x[i+1] = ...")
+    return x, u_star
diff --git a/irlc/ex07/ilqr_agent.py b/irlc/ex07/ilqr_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb8c1464087000a4efbbba2b803eb6776378739e
--- /dev/null
+++ b/irlc/ex07/ilqr_agent.py
@@ -0,0 +1,56 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex06.model_rendevouz import RendevouzEnvironment
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import train
+from irlc import Agent
+import numpy as np
+
+class ILQRAgent(Agent):
+    def __init__(self, env, discrete_model, N=250, ilqr_iterations=10, use_ubar=False, use_linesearch=True):
+        super().__init__(env)
+        self.dt = discrete_model.dt
+        # x0 = discrete_model.reset()
+        x0,_ = env.reset()
+        x0 = np.asarray(x0) # Get the initial state. We will take this from the environment.
+        xs, us, self.J_hist, L, l = ilqr(discrete_model, N, x0, n_iter=ilqr_iterations, use_linesearch=use_linesearch)
+        self.ubar = us
+        self.xbar = xs
+        self.L = L
+        self.l = l
+        self.use_ubar = use_ubar # Should policy use open-loop u-bar (suboptimal) or closed-loop L_k, l_k?
+
+    def pi(self, x, k, info=None):
+        if self.use_ubar:
+            u = self.ubar[k]
+        else:
+            if k >= len(self.ubar):
+                print(k, len(self.ubar))
+                k = len(self.ubar)-1
+            # See (Her25, eq. (17.16))
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Generate action using the control matrices.")
+        return u
+
+def solve_rendevouz():
+    env = RendevouzEnvironment()
+    N = int(env.Tmax / env.dt)
+    agent = ILQRAgent(env, env.discrete_model, N=N)
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+    return stats, trajectories, agent
+
+if __name__ == "__main__":
+    from irlc.ex07.ilqr_rendovouz_basic import plot_vehicles
+    import matplotlib.pyplot as plt
+    stats, trajectories, agent = solve_rendevouz()
+    t =trajectories[0].state
+    xb = agent.xbar
+    plot_vehicles(t[:,0], t[:,1], t[:,2], t[:,3], linespec=':', legend=("RK4 policy simulation", "RK4 policy simulation"))
+    plot_vehicles(xb[:,0], xb[:,1], xb[:,2], xb[:,3], linespec='-')
+    plt.legend()
+    plt.show()
diff --git a/irlc/ex07/ilqr_cartpole.py b/irlc/ex07/ilqr_cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2463a56ff438c600d4e4047d1e349757716dfc7
--- /dev/null
+++ b/irlc/ex07/ilqr_cartpole.py
@@ -0,0 +1,83 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+import time
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import savepdf
+
+# Number of steps.
+N = 100
+def cartpole(use_linesearch):
+    env = GymSinCosCartpoleEnvironment(render_mode='human')
+    x0, info = env.reset()
+    xs, us, J_hist, L, l = ilqr(env.discrete_model, N, x0, n_iter=300, use_linesearch=use_linesearch)
+    plot_cartpole(env, xs, us, use_linesearch=use_linesearch)
+
+def plot_cartpole(env, xs, us, J_hist=None, use_linesearch=True):
+    animate(xs, env)
+    env.close()
+    # Transform actions/states using build-in functions.
+    def gapply(f, xm):
+        usplit = np.split(xm, len(xm))
+        u2 = [f(u.flat) for u in usplit]
+        us = np.stack(u2)
+        return us
+
+    us = gapply(env.discrete_model.phi_u_inv, us)
+    xs = gapply(env.discrete_model.phi_x_inv, xs)
+
+    t = np.arange(N + 1) * env.dt
+    x = xs[:, 0]
+    theta = np.unwrap(xs[:, 2])  # Makes for smoother plots.
+    theta_dot = xs[:, 3]
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    ev = 'cartpole_'
+
+    plt.plot(theta, theta_dot)
+    plt.xlabel("theta (rad)")
+    plt.ylabel("theta_dot (rad/s)")
+    plt.title("Orientation Phase Plot")
+    plt.grid()
+    savepdf(f"{ev}theta{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t[:-1], us)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Force (N)")
+    _ = plt.title("Action path")
+    plt.grid()
+    savepdf(f"{ev}action{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t, x)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Position (m)")
+    _ = plt.title("Cart position")
+    plt.grid()
+    savepdf(f"{ev}position{pdf_ex}")
+    plt.show()
+    if J_hist is not None:
+        _ = plt.plot(J_hist)
+        _ = plt.xlabel("Iteration")
+        _ = plt.ylabel("Total cost")
+        _ = plt.title("Total cost-to-go")
+        plt.grid()
+        savepdf(f"{ev}J{pdf_ex}")
+        plt.show()
+
+def animate(xs0, env):
+    render = True
+    if render:
+        for i in range(2):
+            render_(xs0, env.discrete_model)
+            time.sleep(1)
+        # env.viewer.close()
+
+def render_(xs, env):
+    for i in range(xs.shape[0]):
+        x = xs[i]
+        env.render(x=x)
+
+if __name__ == "__main__":
+    cartpole(use_linesearch=True)
diff --git a/irlc/ex07/ilqr_cartpole_agent.py b/irlc/ex07/ilqr_cartpole_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd82bd29a258b4eaf04b3f4f2ee3e64b74bf8af2
--- /dev/null
+++ b/irlc/ex07/ilqr_cartpole_agent.py
@@ -0,0 +1,43 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc import savepdf
+import matplotlib.pyplot as plt
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+
+def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True):
+    np.random.seed(2)
+    Tmax = .9
+    dt = Tmax/N
+    env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None)
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+
+    agent.use_ubar = True
+    stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+
+    xb = agent.xbar
+    tb = np.arange(N+1)*dt
+    plt.figure(figsize=(8,6))
+    F = 3
+    plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$')
+    plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$')
+
+    plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$")
+    plt.xlabel("Time/seconds")
+    plt.ylabel("$\cos(\\theta)$")
+    plt.title(f"Cartpole environment $T={N}$")
+
+    plt.grid()
+    plt.legend()
+    ev = "pendulum"
+    savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+def plt_cartpole():
+    cartpole_experiment(N=50, use_linesearch=True, animate=True)
+
+if __name__ == '__main__':
+    plt_cartpole()
diff --git a/irlc/ex07/ilqr_pendulum.py b/irlc/ex07/ilqr_pendulum.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bcc82e7d94d072bf5ce529e08a99bb6f0647895
--- /dev/null
+++ b/irlc/ex07/ilqr_pendulum.py
@@ -0,0 +1,68 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+import matplotlib.pyplot as plt
+import time
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import savepdf
+
+def pendulum(use_linesearch):
+    print("> Using iLQR to solve Pendulum swingup task. Using linesearch?", use_linesearch)
+    dt = 0.02
+    model = DiscreteSinCosPendulumModel(dt, cost=None)
+    N = 250
+    # This rather clunky line gets us the initial state; we transform the bound by the variable transformation.
+    x0 = np.asarray(model.phi_x(model.continuous_model.x0_bound().low))
+
+    n_iter = 200 # Use 200 iLQR iterations.
+    # xs, us, J_hist, L, l = ilqr(model, ...) Write a function call like this, but with the correct parametesr
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Call iLQR here (see hint above).")
+
+    render = True
+    if render:
+        for i in range(2):
+            render_(xs, model)
+            time.sleep(2) # Sleep for two seconds between simulations.
+    model.close()
+    xs = np.asarray([model.phi_x_inv(x) for x, u in zip(xs, us)]) # Convert to Radians. We use the build-in functions to change coordinates.
+    xs, us = np.asarray(xs), np.asarray(us)
+
+    t = np.arange(N) * dt
+    theta = np.unwrap(xs[:, 0])  # Makes for smoother plots.
+    theta_dot = xs[:, 1]
+
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    stitle = "(using linesearch)" if use_linesearch else "(not using linesearch) "
+    ev = 'pendulum_'
+    _ = plt.plot(theta, theta_dot)
+    _ = plt.xlabel("$\\theta$ (rad)")
+    _ = plt.ylabel("$d\\theta/dt$ (rad/s)")
+    _ = plt.title(f"Phase Plot {stitle}")
+    plt.grid()
+    savepdf(f"{ev}theta{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t, us)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Force (N)")
+    _ = plt.title(f"Action path {stitle}")
+    plt.grid()
+    savepdf(f"{ev}action{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(J_hist)
+    _ = plt.xlabel("Iteration")
+    _ = plt.ylabel("Total cost")
+    _ = plt.title(f"Total cost-to-go {stitle}")
+    plt.grid()
+    savepdf(f"{ev}J{pdf_ex}")
+    plt.show()
+
+def render_(xs, env):
+    for i in range(xs.shape[0]):
+        env.render(xs[i])
+
+if __name__ == "__main__":
+    pendulum(use_linesearch=False)
+    pendulum(use_linesearch=True)
diff --git a/irlc/ex07/ilqr_pendulum_agent.py b/irlc/ex07/ilqr_pendulum_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..6daf971f986520f92ecbab832b5a7d0c104f97f9
--- /dev/null
+++ b/irlc/ex07/ilqr_pendulum_agent.py
@@ -0,0 +1,63 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc import savepdf
+import matplotlib.pyplot as plt
+
+Tmax = 3
+def pen_experiment(N=12, use_linesearch=True,figex="", animate=True):
+    dt = Tmax / N
+    env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode="human" if animate else None)
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+
+    agent.use_ubar = True
+    stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+
+    plot_pendulum_trajectory(env, trajectories[0], label='Closed-loop $\\pi$')
+    xb = agent.xbar
+    tb = np.arange(N+1)*dt
+    plt.figure(figsize=(12, 6))
+    plt.plot(trajectories[0].time, trajectories[0].state[:,1], '-', label='Closed-loop $\\pi(x_k)$')
+
+    plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$')
+    plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$")
+    plt.grid()
+    plt.legend()
+    ev = "pendulum"
+    savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+    ## Plot J
+    plt.figure(figsize=(6, 6))
+    plt.semilogy(agent.J_hist, 'k.-')
+    plt.xlabel("iLQR Iterations")
+    plt.ylabel("Cost function estimate $J$")
+    # plt.title("Last value: {")
+    plt.grid()
+    savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+def plot_pendulum_trajectory(env, traj, style='k.-', label=None, action=False, **kwargs):
+    if action:
+        y = traj.action[:, 0]
+        y = np.clip(y, env.action_space.low[0], env.action_space.high[0])
+    else:
+        y = traj.state[:, 1]
+
+    plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs)
+    plt.xlabel("Time/seconds")
+    if action:
+        plt.ylabel("Torque $u$")
+    else:
+        plt.ylabel(r"$\cos(\theta)$")
+    plt.grid()
+    pass
+
+N = 50
+if __name__ == "__main__":
+    pen_experiment(N=N, use_linesearch=True)
diff --git a/irlc/ex07/ilqr_rendevoyz.py b/irlc/ex07/ilqr_rendevoyz.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cd6cdc30de69f057cb7d024bbdb46f6530e146e
--- /dev/null
+++ b/irlc/ex07/ilqr_rendevoyz.py
@@ -0,0 +1,5 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex07.ilqr_rendovouz_basic import plot_rendevouz
+
+if __name__ == "__main__":
+    plot_rendevouz(use_linesearch=True)
diff --git a/irlc/ex07/ilqr_rendovouz_basic.py b/irlc/ex07/ilqr_rendovouz_basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcd245e8166c6a9ede99c969c827268d2db40664
--- /dev/null
+++ b/irlc/ex07/ilqr_rendovouz_basic.py
@@ -0,0 +1,99 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import savepdf
+from irlc.ex07.ilqr import ilqr_basic, ilqr_linesearch
+from irlc.ex06.model_rendevouz import DiscreteRendevouzModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+# plt.ion()
+
+
+def ilqr(model : DiscreteControlModel, N, x0, n_iter, use_linesearch, verbose=True):
+    if not use_linesearch:
+        xs, us, J_hist, L, l = ilqr_basic(model, N, x0, n_iterations=n_iter,verbose=verbose) 
+    else:
+        xs, us, J_hist, L, l = ilqr_linesearch(model, N, x0, n_iterations=n_iter, tol=1e-6,verbose=verbose)
+    xs, us = np.stack(xs), np.stack(us)
+    return xs, us, J_hist, L, l
+
+def plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2")):
+    _ = plt.title("Trajectory of the two omnidirectional vehicles")
+    _ = plt.plot(x_0, y_0, "r"+linespec, label=legend[0])
+    _ = plt.plot(x_1, y_1, "b"+linespec, label=legend[1])
+
+Tmax = 20
+def solve_rendovouz(use_linesearch=False):
+    model = DiscreteRendevouzModel()
+    x0 = np.asarray(model.continuous_model.x0_bound().low) # Starting position
+    N = int(Tmax/model.dt)
+    return ilqr(model, N, x0, n_iter=10, use_linesearch=use_linesearch), model
+
+def plot_rendevouz(use_linesearch=False):
+    (xs, us, J_hist, _, _), env = solve_rendovouz(use_linesearch=use_linesearch)
+    N = int(Tmax / env.dt)
+    dt = env.dt
+    x_0 = xs[:, 0]
+    y_0 = xs[:, 1]
+    x_1 = xs[:, 2]
+    y_1 = xs[:, 3]
+    x_0_dot = xs[:, 4]
+    y_0_dot = xs[:, 5]
+    x_1_dot = xs[:, 6]
+    y_1_dot = xs[:, 7]
+
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    ev = 'rendevouz_'
+    plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2"))
+    plt.legend()
+    savepdf(f'{ev}trajectory{pdf_ex}')
+    plt.show()
+
+    t = np.arange(N + 1) * dt
+    _ = plt.plot(t, x_0, "r")
+    _ = plt.plot(t, x_1, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("x (m)")
+    _ = plt.title("X positional paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_x_pos{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, y_0, "r")
+    _ = plt.plot(t, y_1, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("y (m)")
+    _ = plt.title("Y positional paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_y_pos{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, x_0_dot, "r")
+    _ = plt.plot(t, x_1_dot, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("x_dot (m)")
+    _ = plt.title("X velocity paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_vx{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, y_0_dot, "r")
+    _ = plt.plot(t, y_1_dot, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("y_dot (m)")
+    _ = plt.title("Y velocity paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_vy{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(J_hist)
+    _ = plt.xlabel("Iteration")
+    _ = plt.ylabel("Total cost")
+    _ = plt.title("Total cost-to-go")
+    savepdf(f'{ev}cost_to_go{pdf_ex}')
+    plt.show()
+    pass
+
+
+if __name__ == "__main__":
+    plot_rendevouz(use_linesearch=False)
diff --git a/irlc/ex07/linearization_agent.py b/irlc/ex07/linearization_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..bffbe892da547974b2bc66a3c0d3b0807676ce8a
--- /dev/null
+++ b/irlc/ex07/linearization_agent.py
@@ -0,0 +1,67 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex06.dlqr import LQR
+from irlc import Agent
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+from irlc import train, savepdf
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+
+class LinearizationAgent(Agent):
+    """ Implement the simple linearization procedure described in (Her25, Algorithm 23) which expands around a single fixed point. """
+    def __init__(self, env: ControlEnvironment, model : DiscreteControlModel, xbar=None, ubar=None):
+        self.model = model
+        N = 50  # Plan on this horizon. The control matrices will converge fairly quickly.
+        """ Define A, B, d as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B u[t] + d.
+        You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B.
+        It is important that you linearize around xbar, ubar. See (Her25, Section 17.1) for further details. """
+        # TODO: 4 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        Q, q, R = self.model.cost.Q, self.model.cost.q, self.model.cost.R
+        """ Define self.L, self.l here as the (lists of) control matrices. """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute control matrices L, l here using LQR(...)")
+        super().__init__(env)
+
+    def pi(self, x, k, info=None):
+        """
+        Compute the action here using u_k = L_0 x_k + l_0. The control matrix/vector L_0 can be found as the output from LQR, i.e.
+        L_0 = L[0] and l_0 = l[0].
+
+        The reason we use L_0, l_0 (and not L_k, l_k) is because the LQR problem itself is an approximation of the true dynamics
+        and this controller will be able to balance the pendulum for an infinite amount of time.
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute current action here")
+        return u
+
+
+def get_offbalance_cart(waiting_steps=30, sleep_time=0.1):
+    env = GymSinCosCartpoleEnvironment(Tmax=3, render_mode='human')
+    env.reset()
+    import time
+    time.sleep(sleep_time)
+    env.state = env.discrete_model.x_upright
+    env.state[-1] = 0.01 # a bit of angular speed.
+    for _ in range(waiting_steps):  # Simulate the environment for 30 steps to get things out of balance.
+        env.step(1)
+        time.sleep(sleep_time)
+    return env
+
+
+if __name__ == "__main__":
+    np.random.seed(42) # I don't think these results are seed-dependent but let's make sure.
+    from irlc import plot_trajectory
+    env = get_offbalance_cart(4) # Simulate for 4 seconds to get the cart off-balance. Same idea as PID control.
+    agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0)
+    _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False)  # Note reset=False to maintain initial conditions.
+    plot_trajectory(trajectories[0], env, xkeys=[0,2, 3], ukeys=[0])
+    env.close()
+    savepdf("linearization_cartpole")
+    plt.show()
diff --git a/irlc/lectures/lec05/__init__.py b/irlc/lectures/lec05/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be
--- /dev/null
+++ b/irlc/lectures/lec05/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec05/lecture_05_carpole_random.py b/irlc/lectures/lec05/lecture_05_carpole_random.py
new file mode 100644
index 0000000000000000000000000000000000000000..e82a89bbe407251ed4d9b02d2881c33ccfefa20b
--- /dev/null
+++ b/irlc/lectures/lec05/lecture_05_carpole_random.py
@@ -0,0 +1,9 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import Agent, train
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+
+if __name__ == "__main__":
+
+    env = GymSinCosCartpoleEnvironment(Tmax=20, render_mode='human')
+    train(env, Agent(env), num_episodes=1)
+    env.close()
diff --git a/irlc/lectures/lec05/lecture_05_cartpole_kelly.py b/irlc/lectures/lec05/lecture_05_cartpole_kelly.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bc3bcce247df45f93d74fdbb4cc731c8f7b539d
--- /dev/null
+++ b/irlc/lectures/lec05/lecture_05_cartpole_kelly.py
@@ -0,0 +1,10 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex05.direct_cartpole_kelly import compute_solutions
+from irlc.ex05.direct_plot import plot_solutions
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+    env, solutions = compute_solutions()
+    print("Did we succeed?", solutions[-1]['solver']['success'])
+    plot_solutions(env, solutions, animate=True, pdf=None, animate_all=True, animate_repeats=3)
+    env.close()
diff --git a/irlc/lectures/lec05/lecture_05_cartpole_time.py b/irlc/lectures/lec05/lecture_05_cartpole_time.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebd6e873e6e2036dcd6f6e13c6ee24c124694813
--- /dev/null
+++ b/irlc/lectures/lec05/lecture_05_cartpole_time.py
@@ -0,0 +1,11 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex05.direct_cartpole_time import compute_solutions
+from irlc.ex05.direct_plot import plot_solutions
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+    env, solutions = compute_solutions()
+    print("Did we succeed?", solutions[-1]['solver']['success'])
+    plot_solutions(env, solutions, animate=True, pdf=None, animate_all=True, animate_repeats=3)
+    env.close()
+    pass
diff --git a/irlc/lectures/lec06/__init__.py b/irlc/lectures/lec06/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be
--- /dev/null
+++ b/irlc/lectures/lec06/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec06/lecture6_lqr_locomotive.py b/irlc/lectures/lec06/lecture6_lqr_locomotive.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c9ddf3c2dec5a84476a66a11861d89a9ff08f7b
--- /dev/null
+++ b/irlc/lectures/lec06/lecture6_lqr_locomotive.py
@@ -0,0 +1,37 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc import savepdf, train
+from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent
+from irlc.ex06.lqr_agent import LQRAgent
+from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment
+from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+from irlc.ex07.linearization_agent import LinearizationAgent
+from irlc.ex06.lqr_pid import ConstantLQRAgent
+from irlc.ex04.locomotive import LocomotiveEnvironment
+from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent
+from irlc.ex01.agent import train
+from irlc.ex03.control_cost import SymbolicQRCost
+import matplotlib
+#matplotlib.use('qtagg')
+dt = .04
+m = 70
+Tmax=10
+slope = 0
+
+env = LocomotiveEnvironment(m=m, slope=slope, dt=dt, Tmax=Tmax, render_mode='human')
+
+model = env.discrete_model
+model.cost = SymbolicQRCost(Q=np.eye(2)*100, R=np.eye(1)).discretize(dt=dt)
+agent = LinearizationAgent(env, model=model, xbar=env.observation_space.sample(), ubar=env.action_space.sample())
+_, traj = train(env, agent, num_episodes=1)
+env.close()
+if False:
+    from irlc import plot_trajectory, savepdf
+    import matplotlib.pyplot as plt
+    plt.figure()
+    plot_trajectory(trajectory=traj[0], env=env, xkeys=[0, 1], ukeys=[])
+    savepdf('lqr_pid_locomotive_state.pdf')
+    plot_trajectory(trajectory=traj[0], env=env, ukeys=[0], xkeys=[])
+    savepdf('lqr_pid_locomotive_action.pdf')
+    env.close()
diff --git a/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py b/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc5633578e01195c85c72b5e6d29080d5029c038
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py
@@ -0,0 +1,47 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+# from irlc import VideoMonitor
+
+def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True):
+    np.random.seed(2)
+    Tmax = .9
+    dt = Tmax/N
+
+    env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human')
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+    # if animate:
+    #     env =VideoMonitor(env)
+    stats, trajectories = train(env, agent, num_episodes=3, return_trajectory=True)
+
+    # agent.use_ubar = True
+    # stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
+    # env.close()
+    env.close()
+
+def plt_cartpole():
+    cartpole_experiment(N=50, use_linesearch=True, animate=True)
+
+if __name__ == '__main__':
+    np.random.seed(42)
+    plt_cartpole()
+
+    # xb = agent.xbar
+    # tb = np.arange(N+1)*dt
+    # plt.figure(figsize=(8,6))
+    # F = 3
+    # # plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$')
+    # # plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$')
+    #
+    # plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$")
+    # plt.xlabel("Time/seconds")
+    # plt.ylabel("$\cos(\\theta)$")
+    # plt.title(f"Pendulum environment $T={N}$")
+    #
+    # plt.grid()
+    # plt.legend()
+    # ev = "pendulum"
+    # savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}")
+    # plt.show()
diff --git a/irlc/lectures/lec06/lecture_06_linearize.py b/irlc/lectures/lec06/lecture_06_linearize.py
new file mode 100644
index 0000000000000000000000000000000000000000..311dd27eb55cfca8fc3ef90fb25941155b7cadbd
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_linearize.py
@@ -0,0 +1,6 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex07.linearization_agent import get_offbalance_cart
+
+if __name__ == "__main__":
+    env = get_offbalance_cart(waiting_steps=20, sleep_time=0.1)
+    env.close()
diff --git a/irlc/lectures/lec06/lecture_06_linearize_b.py b/irlc/lectures/lec06/lecture_06_linearize_b.py
new file mode 100644
index 0000000000000000000000000000000000000000..b582957f75442e2162edaccc09ff4af51cdfeb13
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_linearize_b.py
@@ -0,0 +1,18 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import plot_trajectory, train
+from irlc.ex07.linearization_agent import get_offbalance_cart, LinearizationAgent
+import numpy as np
+import matplotlib
+matplotlib.use("tkagg")
+import matplotlib.pyplot as plt
+
+
+if __name__ == "__main__":
+    np.random.seed(42) # I don't think these results are seed-dependent but let's make sure.
+    env = get_offbalance_cart(4, sleep_time=0.08) # Simulate for a little time to get an off-balance cart. Increase 4-->10 to get failure.
+    agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0)
+    _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False)  # Note reset=False to maintain initial conditions.
+    plt.figure()
+    plot_trajectory(trajectories[0], env, xkeys=[0, 2, 3], ukeys=[0])
+    plt.show()
+    env.close()
diff --git a/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0cb2ca23b92b51c2b23db190b67ca172404f5de
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py
@@ -0,0 +1,7 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment
+
+if __name__ == "__main__":
+    np.random.seed(2) # (!)
+    pen_experiment(N=50, use_linesearch=False, use_ubar=False)
diff --git a/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py
new file mode 100644
index 0000000000000000000000000000000000000000..d61a8ddc17ba23ba3c440108f5eaa4dba70a8530
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py
@@ -0,0 +1,66 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc import savepdf
+import matplotlib.pyplot as plt
+
+Tmax = 3
+def pen_experiment(N=12, use_linesearch=True,figex="", animate=True, use_ubar=False):
+    dt = Tmax / N
+    env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None)
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+    # if animate:
+    #     env = VideoMonitor(env)
+
+    if use_ubar:
+        agent.use_ubar = True
+    stats2, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+
+    plot_pendulum_trajectory(trajectories[0], label=f'Use linesearch? {use_linesearch}. Use u-bar? {use_ubar}')
+    plt.legend()
+    plt.show()
+
+    plt.figure(figsize=(6, 6))
+    plt.semilogy(agent.J_hist, 'k.-')
+    plt.xlabel("iLQR Iterations")
+    plt.ylabel("Cost function estimate $J$")
+    # plt.title("Last value: {")
+    plt.grid()
+    # savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}")
+    plt.show()
+    #
+    # plt.show()
+    # xb = agent.xbar
+    # tb = np.arange(N+1)*dt
+    # plt.figure(figsize=(12, 6))
+    # plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$')
+    # plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$")
+    # plt.grid()
+    # plt.legend()
+    # ev = "pendulum"
+    # savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}")
+    # plt.show()
+
+    ## Plot J
+
+#
+def plot_pendulum_trajectory(traj, style='k-', label=None, action=False, **kwargs):
+    y = traj.state[:, 1] if not action else traj.action[:,0]
+    plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs)
+
+    plt.xlabel("Time/seconds")
+    if action:
+        plt.ylabel("Torque $u$")
+    else:
+        plt.ylabel("$\cos(\\theta)$")
+    plt.grid()
+    pass
+
+N = 50
+
+if __name__ == "__main__":
+    np.random.seed(2) # (!)
+    pen_experiment(N=N, use_linesearch=False, use_ubar=True)
diff --git a/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e475bf6335497c98536f9850ce9934c53d2d4fd
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py
@@ -0,0 +1,5 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+if __name__ == "__main__":
+    from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment
+    N = 50
+    pen_experiment(N=N, use_linesearch=True, use_ubar=False)
diff --git a/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py
new file mode 100644
index 0000000000000000000000000000000000000000..b44a35cc127904eee0bb318cab2b383388cd4110
--- /dev/null
+++ b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py
@@ -0,0 +1,5 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+if __name__ == "__main__":
+    from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment
+    N = 50
+    pen_experiment(N=N, use_linesearch=True, use_ubar=True)
diff --git a/irlc/lectures/lec07/__init__.py b/irlc/lectures/lec07/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be
--- /dev/null
+++ b/irlc/lectures/lec07/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr.py b/irlc/lectures/lec07/lecture_07_boing_lqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a140752a73aa016366a0c2cd371f66504c2c08e
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_boing_lqr.py
@@ -0,0 +1,19 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_boeing import BoeingEnvironment
+from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local
+from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local
+
+if __name__ == "__main__":
+    env = BoeingEnvironment(output=[10, 0])
+
+    # Part A: LQR and global regression
+    learning_lqr(env)
+
+    # Part B: LQR+MPC
+    # learning_lqr_mpc(env)
+    #
+    # # Part C: LQR+MPC and local regression
+    # learning_lqr_mpc_local(env)
+    #
+    # # Part D: Optimization+MPC and local regression
+    # learning_optimization_mpc_local(env)
diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c4a72274d3f0fd68c713147bcc4910a9e1b879c
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py
@@ -0,0 +1,14 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_boeing import BoeingEnvironment
+from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local
+from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local
+
+if __name__ == "__main__":
+    env = BoeingEnvironment(output=[10, 0])
+    learning_lqr_mpc(env)
+
+    # # Part C: LQR+MPC and local regression
+    # learning_lqr_mpc_local(env)
+    #
+    # # Part D: Optimization+MPC and local regression
+    # learning_optimization_mpc_local(env)
diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py
new file mode 100644
index 0000000000000000000000000000000000000000..22376d13457883a8f9b9f89becbffa9e46aedbb9
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py
@@ -0,0 +1,9 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_boeing import BoeingEnvironment
+from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local
+from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local
+
+if __name__ == "__main__":
+    env = BoeingEnvironment(output=[10, 0])
+    learning_lqr_mpc_local(env)
+    # learning_optimization_mpc_local(env)
diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ed3f3e080238b559fb656e1f5640a1374109bfc
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py
@@ -0,0 +1,8 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_boeing import BoeingEnvironment
+from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local
+from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local
+
+if __name__ == "__main__":
+    env = BoeingEnvironment(output=[10, 0])
+    learning_optimization_mpc_local(env)
diff --git a/irlc/lectures/lec07/lecture_07_lmpc.py b/irlc/lectures/lec07/lecture_07_lmpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fff87cb5ed8a3581de2f2c8c6ec6ae94957aeb4
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_lmpc.py
@@ -0,0 +1,5 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex07.lmpc_run import main
+
+if __name__ == "__main__":
+    main(show_episode=True)
diff --git a/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py b/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py
new file mode 100644
index 0000000000000000000000000000000000000000..8867c0afeac9588af42a796e93ec70def74d2a07
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py
@@ -0,0 +1,4 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+if __name__ == "__main__":
+    from irlc.ex07.mpc_pendulum_experiment_lqr import main_pendulum_lqr
+    main_pendulum_lqr()
diff --git a/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py b/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py
new file mode 100644
index 0000000000000000000000000000000000000000..9eff242ac8034a7e3f4ce8aa79e95d15f86d822b
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py
@@ -0,0 +1,4 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+if __name__ == "__main__":
+    from irlc.ex07.mpc_pendulum_experiment_optim import main_pendulum
+    main_pendulum()
diff --git a/irlc/lectures/lec07/lecture_07_pendulum_simple.py b/irlc/lectures/lec07/lecture_07_pendulum_simple.py
new file mode 100644
index 0000000000000000000000000000000000000000..337b16585b8cd3c0a709a49b5eb7b378a08c2757
--- /dev/null
+++ b/irlc/lectures/lec07/lecture_07_pendulum_simple.py
@@ -0,0 +1,41 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex04.discrete_control_cost import goal_seeking_qr_cost, DiscreteQRCost
+from irlc.ex01.agent import train
+from irlc.ex07.lqr_learning_agents import MPCLocalLearningLQRAgent, MPCLearningAgent
+from irlc import plot_trajectory, main_plot
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc.ex07.mpc_pendulum_experiment_lqr import mk_mpc_pendulum_env
+
+L = 12
+def main_pendulum_lqr_simple(Tmax=10):
+
+    """ Run Local LQR/MPC agent using the parameters
+    L = 12  
+    neighboorhood_size = 50
+    min_buffer_size = 50 
+    """
+    env_pendulum = mk_mpc_pendulum_env()
+
+    # agent = .... (instantiate agent here)
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Instantiate your agent here")
+    env_pendulum = VideoMonitor(env_pendulum)
+
+    experiment_name = f"pendulum{L}_lqr"
+    stats, trajectories = train(env_pendulum, agent, experiment_name=experiment_name, num_episodes=16,return_trajectory=True)
+    plt.show()
+    for k in range(len(trajectories)):
+        plot_trajectory(trajectories[k], env_pendulum)
+        plt.title(f"Trajectory {k}")
+        plt.show()
+
+    env_pendulum.close()
+    main_plot(experiment_name)
+    plt.show()
+
+if __name__ == "__main__":
+    np.random.seed(1)
+    main_pendulum_lqr_simple()
diff --git a/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc592cfe3c7ae897391e9ce88d0c1454848920e8
--- /dev/null
+++ b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt
@@ -0,0 +1,17 @@
+Episode	Accumulated Reward	Average Reward	Length	Steps
+0	-4895.647575826604	-39.165180606612836	125	125
+1	-4401.3119492497035	-35.21049559399765	125	250
+2	-2909.118791375824	-23.272950331006577	125	375
+3	-1355.8374521458716	-10.846699617166973	125	500
+4	-1392.1376132555315	-11.13710090604426	125	625
+5	-2377.229711438541	-19.017837691508326	125	750
+6	-1605.205601135333	-12.841644809082668	125	875
+7	-1142.6024308594363	-9.140819446875495	125	1000
+8	-1110.17238007953	-8.881379040636237	125	1125
+9	-1415.0153227915457	-11.320122582332363	125	1250
+10	-1084.806186007314	-8.678449488058519	125	1375
+11	-1204.58673322474	-9.636693865797913	125	1500
+12	-1582.8902992149344	-12.66312239371947	125	1625
+13	-1234.3968104401945	-9.875174483521556	125	1750
+14	-2290.3685781570866	-18.322948625256696	125	1875
+15	-1317.928485283048	-10.543427882264387	125	2000
diff --git a/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..34930a77fba816ca18036b42921823ab18cefc24
Binary files /dev/null and b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl differ
diff --git a/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3005052ea2c0aa4f28a74ca66b23b716fa4a925d
--- /dev/null
+++ b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt
@@ -0,0 +1,17 @@
+Episode	Accumulated Reward	Average Reward	Length	Steps
+0	-5062.646915554269	-40.50117532443415	125	125
+1	-4545.443228168109	-36.36354582534487	125	250
+2	-3992.451522701582	-31.939612181612656	125	375
+3	-2660.945115772302	-21.287560926178415	125	500
+4	-1089.641113544413	-8.71712890835529	125	625
+5	-1794.3862709577143	-14.35509016766171	125	750
+6	-1599.3332228782826	-12.79466578302626	125	875
+7	-1999.3347944176303	-15.994678355341035	125	1000
+8	-1240.1770407677993	-9.921416326142396	125	1125
+9	-1128.717151496786	-9.029737211974288	125	1250
+10	-1148.8528175884883	-9.19082254070789	125	1375
+11	-1199.5840778420286	-9.596672622736232	125	1500
+12	-1147.0703774473068	-9.176563019578447	125	1625
+13	-1245.4139074019245	-9.963311259215399	125	1750
+14	-1257.9333517907346	-10.063466814325885	125	1875
+15	-1309.9607605947551	-10.479686084758042	125	2000
diff --git a/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c2b34f7154ed75b8dfa9e69bcd3de982d0cd0eb1
Binary files /dev/null and b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl differ
diff --git a/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..643cee39646b4d0a0ee0189cf724ae538a4cd508
--- /dev/null
+++ b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt
@@ -0,0 +1,17 @@
+Episode	Accumulated Reward	Length	Steps
+0	-5042.020051692956	125	0
+1	-4627.801003133058	125	1
+2	-3635.9503089227105	125	2
+3	-2459.2456085370436	125	3
+4	-1142.0454750510762	125	4
+5	-1563.5408392433658	125	5
+6	-1718.6689962603696	125	6
+7	-1215.2631997008277	125	7
+8	-1172.9345344478274	125	8
+9	-1108.3729746371948	125	9
+10	-1012.6787060036193	125	10
+11	-1715.9593847985013	125	11
+12	-1009.5943996400636	125	12
+13	-1082.3121757069966	125	13
+14	-1248.0530347172762	125	14
+15	-1496.6826680867007	125	15
diff --git a/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..3e9f2d19af2a1f4bdcdbc7a8922852a440795c46
Binary files /dev/null and b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl differ
diff --git a/irlc/tests/tests_week05.py b/irlc/tests/tests_week05.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7f813840b6670d6caa99c16576d2b90ff7572c
--- /dev/null
+++ b/irlc/tests/tests_week05.py
@@ -0,0 +1,114 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import Report
+from irlc.ex05.direct_agent import train_direct_agent
+from unitgrade import UTestCase
+import irlc
+from irlc.ex05.direct import run_direct_small_problem
+
+
+class DirectMethods(UTestCase):
+    title = "Direct methods z, z0, z_lb/z_ub definitions+"
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        env, solution = run_direct_small_problem()
+        cls.solution = solution[-1]
+
+    def test_z_variable_vector(self):
+        self.assertEqualC(str(DirectMethods.solution['inputs']['z']))
+
+    def test_z0_initial_state(self):
+        self.assertL2(DirectMethods.solution['inputs']['z0'], tol=1e-6)
+
+    def test_zU_upper_bound(self):
+        self.assertL2(DirectMethods.solution['inputs']['z_ub'], tol=1e-6)
+
+    def test_zL_lower_bound(self):
+        self.assertL2(DirectMethods.solution['inputs']['z_lb'], tol=1e-6)
+
+
+class DirectAgentPendulum(UTestCase):
+    """ Direct agent: Test of pendulum environment """
+    def test_pendulum(self):
+        stats,_,_ = train_direct_agent(animate=False)
+        return self.assertL2(stats[0]['Accumulated Reward'], tol=0.03)
+
+class DirectSolverQuestion(UTestCase):
+    """ Test the Direct solver on the Pendulum using run_direct_small_problem() """
+    @classmethod
+    def setUpClass(cls):
+        cls.solution = cls.compute_solution()
+
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct import run_direct_small_problem
+        env, solution = run_direct_small_problem()
+        return solution
+        # cls.solution = solution
+
+    def test_solver_success(self):
+        self.assertTrue(self.__class__.solution[-1]['solver']['success'])
+
+    def test_solver_fun(self):
+        self.assertL2(self.__class__.solution[-1]['solver']['fun'], tol=0.01)
+
+    def test_constraint_violation(self):
+        self.assertL2(self.__class__.solution[-1]['eqC_val'], tol=0.01)
+
+
+class PendulumQuestion(DirectSolverQuestion):
+    """ Direct solver on the pendulum problem """
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct_pendulum import compute_pendulum_solutions
+        return compute_pendulum_solutions()[1]
+
+
+class CartpoleTimeQuestion(DirectSolverQuestion):
+    """ Direct solver on the cartpole (minimum time) task """
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct_cartpole_time import compute_solutions
+        return compute_solutions()[1]
+
+
+class CartpoleCostQuestion(DirectSolverQuestion):
+    """ Direct solver on the cartpole (kelly) task """
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct_cartpole_kelly import compute_solutions
+        return compute_solutions()[1]
+
+class BrachistochroneQuestion(DirectSolverQuestion):
+    """ Brachistochrone (unconstrained) """
+
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct_brachistochrone import compute_constrained_solutions
+        return compute_constrained_solutions()[1]
+
+class BrachistochroneConstrainedQuestion(DirectSolverQuestion):
+    """ Brachistochrone (constrained) """
+    @classmethod
+    def compute_solution(cls):
+        from irlc.ex05.direct_brachistochrone import compute_constrained_solutions
+        return compute_constrained_solutions()[1]
+
+class Week05Tests(Report):
+    title = "Tests for week 05"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [
+        (DirectMethods, 10),                        # ok
+        (DirectSolverQuestion, 10),                 # ok
+        (PendulumQuestion, 5),                      # ok
+        (DirectAgentPendulum, 10),                  # ok
+        (CartpoleTimeQuestion, 5),                  # ok
+        (CartpoleCostQuestion, 5),                  # ok
+        (BrachistochroneQuestion, 5),               # ok
+        (BrachistochroneConstrainedQuestion, 10),   # ok
+                 ]
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week05Tests())
diff --git a/irlc/tests/tests_week06.py b/irlc/tests/tests_week06.py
new file mode 100644
index 0000000000000000000000000000000000000000..a72463838f7fba08f8db8d4bf789532d313e7e2d
--- /dev/null
+++ b/irlc/tests/tests_week06.py
@@ -0,0 +1,147 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex06.model_boeing import BoeingEnvironment
+from unitgrade import UTestCase, Report
+import irlc
+from irlc import train
+import numpy as np
+from irlc.ex04.locomotive import LocomotiveEnvironment
+from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment
+
+matrices = ['L', 'l', 'V', 'v', 'vc']
+
+class Problem3LQR(UTestCase):
+    title = "LQR, full check of implementation"
+
+    @classmethod
+    def setUpClass(cls):
+        # def init(self):
+        from irlc.ex06.dlqr_check import check_LQR
+        (cls.L, cls.l), (cls.V, cls.v, cls.vc) = check_LQR()
+        # self.M = list(zip(matrices, [L, l, V, v, vc]))
+
+    def chk_item(self, m_list):
+        self.assertIsInstance(m_list, list)
+        self.assertEqualC(len(m_list))
+        for m in m_list:
+            self.assertIsInstance(m, np.ndarray)
+            self.assertEqualC(m.shape)
+            self.assertL2(m, tol=1e-6)
+
+    def test_L(self):
+        self.chk_item(self.__class__.L)
+
+    def test_l(self):
+        self.chk_item(self.__class__.l)
+
+    def test_V(self):
+        self.chk_item(self.__class__.V)
+
+    def test_v(self):
+        self.chk_item(self.__class__.v)
+
+    def test_vc(self):
+        vc = self.__class__.vc
+        self.assertIsInstance(vc, list)
+        for d in vc:
+            self.assertL2(d, tol=1e-6)
+
+        self.chk_item(self.__class__.l)
+
+class Problem4LQRAgent(UTestCase):
+    def _mkagent(self, val=0.):
+        A = np.ones((2, 2))* (1+val)
+        A[1, 0] = 0
+        B = np.asarray([[0], [1]])
+        Q = np.eye(2) * (3+val)
+        R = np.ones((1, 1)) * 2
+        q = np.asarray([-1.1 + val, 0])
+        from irlc.ex06.lqr_agent import LQRAgent
+        env = LocomotiveEnvironment(render_mode=None, Tmax=5, slope=1)
+        agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q)
+        return agent
+
+    def test_policy_lqr_a(self):
+        agent = self._mkagent(0)
+        self.assertL2(agent.pi(np.asarray([1, 0]), k=0))
+        self.assertL2(agent.pi(np.asarray([1, 0]), k=5))
+
+    def test_policy_lqr_b(self):
+        agent = self._mkagent(0.2)
+        self.assertL2(agent.pi(np.asarray([1, 0]), k=0))
+        self.assertL2(agent.pi(np.asarray([1, 0]), k=5))
+
+class Problem5_6_Boeing(UTestCase):
+
+    def test_compute_A_B_d(self):
+        from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+        model = BoeingEnvironment(Tmax=10).discrete_model.continuous_model
+        A, B, d = compute_A_B_d(model, dt=0.2)
+        self.assertL2(A)
+        self.assertL2(B)
+        self.assertL2(d)
+
+    def test_compute_Q_R_q(self):
+        from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+        model = BoeingEnvironment(Tmax=10).discrete_model.continuous_model
+        Q, R, q = compute_Q_R_q(model, dt=0.2)
+        self.assertL2(Q)
+        self.assertL2(R)
+        self.assertL2(q)
+
+    def test_boing_path(self):
+        from irlc.ex06.boeing_lqr import boeing_simulation
+        stats, trajectories, env = boeing_simulation()
+        self.assertL2(trajectories[-1].state, tol=1e-6)
+
+
+class Problem7_8_PidLQR(UTestCase):
+    def test_constant_lqr_agent(self):
+        Delta = 0.06  # Time discretization constant
+        # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization.
+        env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8,
+                                           render_mode=None)  # set render_mode='human' to see the oscillator.
+        model = env.discrete_model.continuous_model  # Get the ControlModel corresponding to this environment.
+
+        from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+        from irlc.ex06.lqr_pid import ConstantLQRAgent
+        A, B, d = compute_A_B_d(model, Delta)
+        Q, R, q = compute_Q_R_q(model, Delta)
+        x0, _ = env.reset()
+
+        # Run the LQR agent
+        lqr_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+        self.assertLinf(lqr_agent.pi(x0, k=0), tol=1e-3)
+        self.assertLinf(lqr_agent.pi(x0, k=10), tol=1e-3)
+
+
+    def test_KpKd(self):
+        Delta = 0.06  # Time discretization constant
+        # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization.
+        env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8,
+                                           render_mode=None)  # set render_mode='human' to see the oscillator.
+        model = env.discrete_model.continuous_model  # Get the ControlModel corresponding to this environment.
+        from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+        from irlc.ex06.lqr_pid import ConstantLQRAgent, get_Kp_Kd
+        A, B, d = compute_A_B_d(model, Delta)
+        Q, R, q = compute_Q_R_q(model, Delta)
+        lqr_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+        Kp, Kd = get_Kp_Kd(lqr_agent.L[0])
+        self.assertAlmostEqualC(Kp, places=3)
+        self.assertAlmostEqualC(Kd, places=3)
+
+
+
+
+class Week06Tests(Report):
+    title = "Tests for week 06"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [
+        (Problem3LQR, 10),
+        (Problem4LQRAgent, 10),
+        (Problem5_6_Boeing, 10),
+        (Problem7_8_PidLQR, 10),
+                 ]
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week06Tests())
diff --git a/irlc/tests/tests_week07.py b/irlc/tests/tests_week07.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f46427025ca7e635d340fafa678f4a7e2c309a7
--- /dev/null
+++ b/irlc/tests/tests_week07.py
@@ -0,0 +1,62 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import Report
+import irlc
+from unitgrade import UTestCase
+import numpy as np
+from irlc import Agent, train
+
+class RendevouzItem(UTestCase):
+    def test_rendevouz_without_linesearch(self):
+        """ Rendevouz with iLQR (no linesearch) """
+        from irlc.ex07.ilqr_rendovouz_basic import solve_rendovouz
+        (xs, us, J_hist, l, L), env = solve_rendovouz(use_linesearch=False)
+        # print(J_hist[-1])
+        self.assertL2(xs[-1], tol=1e-2)
+
+    def test_rendevouz_with_linesearch(self):
+        """ Rendevouz with iLQR (with linesearch) """
+        from irlc.ex07.ilqr_rendovouz_basic import solve_rendovouz
+        (xs, us, J_hist, l, L), env = solve_rendovouz(use_linesearch=True)
+        # print(J_hist[-1])
+        self.assertL2(xs[-1], tol=1e-2)
+        # return l, L, xs
+
+
+
+
+
+class ILQRAgentQuestion(UTestCase):
+    """ iLQR Agent on Rendevouz """
+    def test_ilqr_agent(self):
+        from irlc.ex07.ilqr_agent import solve_rendevouz
+        stats, trajectories, agent = solve_rendevouz()
+        self.assertL2(trajectories[-1].state[-1], tol=1e-2)
+
+
+class ILQRPendulumQuestion(UTestCase):
+    """ iLQR Agent on Pendulum """
+
+    def test_ilqr_agent_pendulum(self):
+        from irlc.ex07.ilqr_pendulum_agent import Tmax, N
+        from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+        from irlc.ex07.ilqr_agent import ILQRAgent
+        dt = Tmax / N
+        env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True)
+        agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=True)
+        stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+        state = trajectories[-1].state[-1]
+        self.assertL2(state, tol=2e-2)
+
+class Week07Tests(Report): #240 total.
+    title = "Tests for week 07"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [
+        (RendevouzItem, 10),  # ok
+        (ILQRAgentQuestion, 10),  # ok
+        (ILQRPendulumQuestion, 10),  # ok
+                 ]
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week07Tests())
diff --git a/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl b/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl and b/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl b/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl and b/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
index 49ce42c0ea11e0f6f45b5cd1aadccde7c4fb4274..e9d2ed475214b22bc52f1cc0dfc8c04c71d9a2b9 100644
Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl
index 7ccccf4713076674e8553270c2282b44501b6cc8..f81ab2560bf4752a237712f1df94fc8ae01ac0ce 100644
Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
index 12902e7ce8615fda0d3a74fbd97811e28766dded..a511917ab3f43109f1d3fe37b025f2fde713339f 100644
Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
index d40a08b78e3defd5e720b05e0948e20d9ad0f3b8..f927b5ae7578002c5a8840c91705f5d4c7d806f0 100644
Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ
diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
index 50149f3db947fddf5f0faf61a7f098e1fc8a3167..6555641d17ccd50bdc906dab13481cdad59254cc 100644
Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4af21ecb688c99771e1897bc53ecbae1bc667b8f
Binary files /dev/null and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1019b5db1c0a0ad9b3e14545aaf80055652fcb66
Binary files /dev/null and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl
index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644
Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl
index 62c9f65ce0617aa3054d6affdad0c711fc5685c7..cb3e05e507f46a771194aa47e9d478a38ea6dc4a 100644
Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl
index 9b686f94b8f574600bc3a37c18a45c4d0a299430..5884cbbaec323589db6f77d93e93aab42b835c82 100644
Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl
index cf285336e8f17dae0bab7e7270a857852e1ba420..ef1b528ea3cb8460738583d5cf526ff19f05dc78 100644
Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl
index 7023764ccbc227ca96f484b63afdabaa14f9483e..25f36de2f85c245c47e3f76ccfd13411d7dbb190 100644
Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl
index ad9b4003fc5167a0e32093f6add6c9564bb3abd6..c6a6d86c444884d85398ab5d68b4a8f2d731b17c 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl
index 7316c88b930645edd82ccc62f437a80b134c2230..d7b664c9873a3f229a3ada6b5c0794429402f080 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl
index efa02d1d079347d5441874bc597c4e187d31bc71..ed6643cb8af26e6368d19072746fd41ae4c60ab0 100644
Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..604d5c20678f95431aae0b2c24d839ff98a641a0
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl
index 8c6beb7a36898495ec6e53b18524f5710a900457..535051b828a3939a46f439310b29198a5e080a0a 100644
Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl
index 929b45dc8d15fa1f4ae023ee1b060091fb8247af..04713f394a37f31128be4aead3269f8c3c2e4695 100644
Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl
index b98533150232f5c5315c7b14033536777ce90a8f..54dc9c584545f20c53ce6eab91442e358d24233e 100644
Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl
index 6228dc12a502006cdf951286048d93936bea9dfe..2e0efe223d9500337a95c76732a6fcfc3cbb2872 100644
Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b7f0a6a6d9def299660158f65d0b85af24d92699
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl
index b3daba6609d47b6033fa62fa6963bb89fcae2543..70a85f4bcf6fc78c263690ba8b0491e7799bf59c 100644
Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl
index 04ac143368d1c4eb5280b4166b9da0f4e3eb3701..3968cbf7284d8d716e19ed9972ad53d4d3172bf7 100644
Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4650e0a6d3fa65b094ffb89356fae69744d7793a
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl
index ebeda311cb01e4d60077598cb655e5368f5bcdda..e7da887928def8fb94b9e7e3d034e373928ce32e 100644
Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl
index bfe010445cb8af018fe5e956ef043b34a34dba89..e22d2e24d83e87606e137a33987ca83d3c67a210 100644
Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d455290c459b3de4a4a31f3905f4f131de9a2b78
Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..91c3ae562ba9fce0668664f1fa58979cd070b2a7
Binary files /dev/null and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ