diff --git a/irlc/ex06/__init__.py b/irlc/ex06/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6e26755e5ec4fe79d8350778babe173741127191 --- /dev/null +++ b/irlc/ex06/__init__.py @@ -0,0 +1,2 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +"""This directory contains the exercises for week 6.""" diff --git a/irlc/ex06/boeing_lqr.py b/irlc/ex06/boeing_lqr.py new file mode 100644 index 0000000000000000000000000000000000000000..e26f5e6bdc1a5de218418b09e2d16b4d799a32a0 --- /dev/null +++ b/irlc/ex06/boeing_lqr.py @@ -0,0 +1,85 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +""" + +References: + [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025. +""" +import numpy as np +import matplotlib.pyplot as plt +from irlc import savepdf +from irlc import train +from irlc.ex06.model_boeing import BoeingEnvironment +from irlc.ex06.lqr_agent import LQRAgent +from irlc.ex03.control_model import ControlModel +import scipy + + +def boeing_simulation(): + env = BoeingEnvironment(Tmax=10) + model = env.discrete_model.continuous_model # get the model from the Boeing environment + dt = env.dt # Get the discretization time. + A, B, d = compute_A_B_d(model, dt) + # Use compute_Q_R_q to get the Q, R, and q matrices in the discretized system + # TODO: 1 lines missing. + raise NotImplementedError("Compute Q, R and q here") + ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error. + #---------------------------------------------------------------------------------------------------------------------------- + # agent = LQRAgent(env, A=A?????????????????????????? + raise NotImplementedError("Use your LQRAgent to plan using the system matrices.") + stats, trajectories = train(env, agent, return_trajectory=True) + return stats, trajectories, env + +def compute_Q_R_q(model : ControlModel, dt : float): + cost = model.get_cost() # Get the continuous-time cost-function + # use print(cost) to see what it contains. + # Then get the discretized matrices using the techniques described in (Her25, Subsection 13.1.6). + # TODO: 3 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + return Q, R, q + +def compute_A_B_d(model : ControlModel, dt : float): + if model.d is None: + d = np.zeros((model.state_size,)) # Ensure d is set to a zero vector if it is not defined. + else: + d = model.d + + A_discrete = scipy.linalg.expm(model.A * dt) # This is the discrete A-matrix computed using the matrix exponential + # Now it is your job to define B_discrete and d_discrete. + # TODO: 2 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + return A_discrete, B_discrete, d_discrete.flatten() + +def boeing_experiment(): + _, trajectories, env = boeing_simulation() + model = env.discrete_model.continuous_model + + dt = env.dt + Q, R, q = compute_Q_R_q(model, dt) + print("Discretization time is", dt) + print("Original q-vector was:", model.get_cost().q) + print("Discretized version is:", q) + + t = trajectories[-1] + out = t.state @ model.P.T + + plt.plot(t.time, out[:, 0], '-', label=env.observation_labels[0]) + plt.plot(t.time, out[:, 1], '-', label=env.observation_labels[1]) + plt.grid() + plt.legend() + plt.xlabel("Time/seconds") + plt.ylabel("Output") + savepdf("boing_lqr_output") + plt.show(block=False) + plt.close() + + plt.plot(t.time[:-1], t.action[:, 0], '-', label=env.action_labels[0]) + plt.plot(t.time[:-1], t.action[:, 1], '-', label=env.action_labels[1]) + plt.xlabel("Time/seconds") + plt.ylabel("Control action") + plt.grid() + plt.legend() + savepdf("boing_lqr_action") + plt.show() + +if __name__ == "__main__": + boeing_experiment() diff --git a/irlc/ex06/dlqr.py b/irlc/ex06/dlqr.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b790331f228223a82544f52ba7627a3e36668a --- /dev/null +++ b/irlc/ex06/dlqr.py @@ -0,0 +1,207 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +""" + +References: + [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025. +""" +import numpy as np +import matplotlib.pyplot as plt +from irlc import bmatrix +from irlc import savepdf + + + +def LQR(A : list, # Dynamic + B : list, # Dynamics + d : list =None, # Dynamics (optional) + Q : list=None, + R: list=None, + H : list=None, + q : list=None, + r : list=None, + qc : list=None, + QN : np.ndarray =None, # Terminal cost term + qN : np.ndarray=None, # Terminal cost term + qcN : np.ndarray =None, # Terminal cost term. + mu : float =0 # regularization parameter which will only be relevant next week. + ): + r""" + Implement the LQR as defined in (Her25, Algorithm 22). I recommend viewing this documentation online (documentation for week 6). + + When you solve this exercise, look at the algorithm in the book. Since the LQR problem is on the form: + + .. math:: + + x_{k+1} = A_k x_k + B_k u_k + d_k + + For :math:`k=0,\dots,N-1` this means there are :math:`N` matrices :math:`A_k`. This is implemented by assuming that + :python:`A` (i.e., the input argument) is a :python:`list` of length :math:`N` so that :python:`A[k]` corresponds + to :math:`A_k`. + + Similar conventions are used for the cost term (please see the lecture notes or the online documentation for their meaning). Recall it has the form: + + .. math:: + + c(x_k, u_k) = \frac{1}{2} \mathbf x_k^\top Q_k \mathbf x_k + \frac{1}{2} \mathbf q_k^\top \mathbf x_k + q_k + \cdots + + When the function is called, the vector :math:`\textbf{q}_k` corresponds to :python:`q` and the constant :math:`q_k` correspond to :python:`qc` (q-constant). + + .. note:: + + Only the terms :python:`A` and :python:`B` are required. The rest of the terms will default to 0-matrices. + + The LQR algorithm will ultimately compute a control law of the form: + + .. math:: + + \mathbf u_k = L_k \mathbf x_k + \mathbf l_k + + And a cost-to-go function as: + + .. math:: + + J_k(x_k) = \frac{1}{2} \mathbf x_k^\top V_k \mathbf x_k + v_k^\top \mathbf x_k + v_k + + Again there are :math:`N-1` terms. The function then return :python:`return (L, l), (V, v, vc)` so that :python:`L[k]` corresponds to :math:`L_k`. + + :param A: A list of :python:`np.ndarray` containing all terms :math:`A_k` + :param B: A list of :python:`np.ndarray` containing all terms :math:`B_k` + :param d: A list of :python:`np.ndarray` containing all terms :math:`\mathbf d_k` (optional) + :param Q: A list of :python:`np.ndarray` containing all terms :math:`Q_k` (optional) + :param R: A list of :python:`np.ndarray` containing all terms :math:`R_k` (optional) + :param H: A list of :python:`np.ndarray` containing all terms :math:`H_k` (optional) + :param q: A list of :python:`np.ndarray` containing all terms :math:`\mathbf q_k` (optional) + :param r: A list of :python:`np.ndarray` containing all terms :math:`\mathbf r_k` (optional) + :param qc: A list of :python:`float` containing all terms :math:`q_k` (i.e., constant terms) (optional) + :param QN: A :python:`np.ndarray` containing the terminal cost term :math:`Q_N` (optional) + :param qN: A :python:`np.ndarray` containing the terminal cost term :math:`\mathbf q_N` (optional) + :param qcN: A :python:`np.ndarray` containing the terminal cost term :math:`q_N` + :param mu: A regularization term which is useful for iterative-LQR (next week). Default to 0. + :return: A tuple of the form :python:`(L, l), (V, v, vc)` corresponding to the control and cost-matrices. + """ + N = len(A) + n,m = B[0].shape + # Initialize empty lists for control matrices and cost terms + L, l = [None]*N, [None]*N + V, v, vc = [None]*(N+1), [None]*(N+1), [None]*(N+1) + # Initialize constant cost-function terms to zero if not specified. + # They will be initialized to zero, meaning they have no effect on the update rules. + QN = np.zeros((n,n)) if QN is None else QN + qN = np.zeros((n,)) if qN is None else qN + qcN = 0 if qcN is None else qcN + H, q, qc, r = init_mat(H,m,n,N=N), init_mat(q,n,N=N), init_mat(qc,1,N=N), init_mat(r,m,N=N) + d = init_mat(d,n, N=N) + """ In the next line, you should initialize the last cost-term. This is similar to how we in DP had the initialization step + > J_N(x_N) = g_N(x_N) + Except that since x_N is no longer discrete, we store it as matrices/vectors representing a second-order polynomial, i.e. + > J_N(X_N) = 1/2 * x_N' V[N] x_N + v[N]' x_N + vc[N] + """ + # TODO: 1 lines missing. + raise NotImplementedError("Initialize V[N], v[N], vc[N] here") + + In = np.eye(n) + for k in range(N-1,-1,-1): + # When you update S_uu and S_ux remember to add regularization as the terms ... (V[k+1] + mu * In) ... + # Note that that to find x such that + # >>> x = A^{-1} y this + # in a numerically stable manner this should be done as + # >>> x = np.linalg.solve(A, y) + # The terms you need to update will be, in turn: + # Suu = ... + # Sux = ... + # Su = ... + # L[k] = ... + # l[k] = ... + # V[k] = ... + # V[k] = ... + # v[k] = ... + # vc[k] = ... + ## TODO: Half of each line of code in the following 4 lines have been replaced by garbage. Make it work and remove the error. + #---------------------------------------------------------------------------------------------------------------------------- + # Suu = R[k] + B[k].T @ (???????????????????????? + # Sux = H[k] + B[k].T @ (???????????????????????? + # Su = r[k] + B[k].T @ v[k + 1????????????????????????????? + # L[k] = -np.linal????????????????? + raise NotImplementedError("Insert your solution and remove this error.") + l[k] = -np.linalg.solve(Suu, Su) # You get this for free. Notice how we use np.lingalg.solve(A,x) to compute A^{-1} x + V[k] = Q[k] + A[k].T @ V[k+1] @ A[k] - L[k].T @ Suu @ L[k] + V[k] = 0.5 * (V[k] + V[k].T) # I recommend putting this here to keep V positive semidefinite + # You get these for free: Compare to the code in the algorithm. + v[k] = q[k] + A[k].T @ (v[k+1] + V[k+1] @ d[k]) + Sux.T @ l[k] + vc[k] = vc[k+1] + qc[k] + d[k].T @ v[k+1] + 1/2*( d[k].T @ V[k+1] @ d[k] ) + 1/2*l[k].T @ Su + + return (L,l), (V,v,vc) + + +def init_mat(X, a, b=None, N=None): + """ + Helper function. Check if X is None, and if so return a list + [A, A,....] + which is N long and where each A is a (a x b) zero-matrix, else returns X repeated N times: + [X, X, ...] + """ + M0 = np.zeros((a,) if b is None else (a, b)) + if X is not None: + return [m if m is not None else M0 for m in X] + else: + return [M0] * N + +def lqr_rollout(x0,A,B,d,L,l): + """ + Compute a rollout (states and actions) given solution from LQR controller function. + + x0 is a vector (starting state), and A, B, d and L, l are lists of system/control matrices. + """ + x, states,actions = x0, [x0], [] + n,m = B[0].shape + N = len(L) + d = init_mat(d,n,1,N) # Initialize as a list of zero matrices [ np.zeros((n,1)), np.zeros((n,1)), ...] + l = init_mat(l,m,1,N) # Initialize as a list of zero matrices [ np.zeros((m,1)), np.zeros((m,1)), ...] + + for k in range(N): + u = L[k] @ x + l[k] + x = A[k] @ x + B[k] @ u + d[k] + actions.append(u) + states.append(x) + return states, actions + +if __name__ == "__main__": + """ + Solve this problem (see also lecture notes for the same example) + http://cse.lab.imtlucca.it/~bemporad/teaching/ac/pdf/AC2-04-LQR-Kalman.pdf + """ + N = 20 + A = np.ones((2,2)) + A[1,0] = 0 + B = np.asarray([[0], [1]]) + Q = np.zeros((2,2)) + R = np.ones((1,1)) + + print("System matrices A, B, Q, R") + print(bmatrix(A)) + print(bmatrix(B)) + print(bmatrix(Q)) + print(bmatrix(R)) + + for rho in [0.1, 10, 100]: + Q[0,0] = 1/rho + (L,l), (V,v,vc) = LQR(A=[A]*N, B=[B]*N, d=None, Q=[Q]*N, R=[R]*N, QN=Q) + + x0 = np.asarray( [[1],[0]]) + trajectory, actions = lqr_rollout(x0,A=[A]*N, B=[B]*N, d=None,L=L,l=l) + + xs = np.concatenate(trajectory, axis=1)[0,:] + + plt.plot(xs, 'o-', label=f'rho={rho}') + + k = 10 + print(f"Control matrix in u_k = L_k x_k + l_k at k={k}:", L[k]) + for k in [N-1,N-2,0]: + print(f"L[{k}] is:", L[k].round(4)) + plt.title("Double integrator") + plt.xlabel('Steps $k$') + plt.ylabel('$x_1 = $ x[0]') + plt.legend() + plt.grid() + savepdf("dlqr_double_integrator") + plt.show() diff --git a/irlc/ex06/dlqr_check.py b/irlc/ex06/dlqr_check.py new file mode 100644 index 0000000000000000000000000000000000000000..3d86db35853cffea347fbb5a5666a7ba9ec47681 --- /dev/null +++ b/irlc/ex06/dlqr_check.py @@ -0,0 +1,40 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex06.dlqr import LQR + +def urnd(sz): + return np.random.uniform(-1, 1, sz) + +def check_LQR(): + np.random.seed(42) + n,m,N = 3,2,4 + """ + Create a randomized, nonsense control problem and solve it. Since seed is fixed we can expect same solution. + """ + # system tersm + A = [urnd((n, n)) for _ in range(N)] + B = [urnd((n, m)) for _ in range(N)] + d = [urnd((n,)) for _ in range(N)] + # cost terms + Q = [urnd((n, n)) for _ in range(N)] + R = [urnd((m, m)) for _ in range(N)] + H = [urnd((m, n)) for _ in range(N)] + q = [urnd((n,)) for _ in range(N)] + qc = [urnd(()) for _ in range(N)] + r = [urnd((m,)) for _ in range(N)] + # terminal costs + QN = urnd((n, n)) + qN = urnd((n,)) + qcN = urnd(()) + return LQR(A=A, B=B, d=d, Q=Q, R=R, H=H, q=q, r=r, qc=qc, QN=QN, qN=qN, qcN=qcN, mu=0) + + +if __name__ == "__main__": + (L, l), (V, v, vc) = check_LQR() + N = len(V)-1 + print(", ".join([f"l[{k}]={l[k].round(4)}" for k in [N - 1, N - 2, 0]])) + print("\n".join([f"L[{k}]={L[k].round(4)}" for k in [N - 1, N - 2, 0]])) + + print("\n".join([f"V[{k}]={V[k].round(4)}" for k in [0]])) + print(", ".join([f"v[{k}]={v[k].round(4)}" for k in [N, N - 1, 0]])) + print(", ".join([f"vc[{k}]={vc[k].round(4)}" for k in [N, N - 1, 0]])) diff --git a/irlc/ex06/lqr_agent.py b/irlc/ex06/lqr_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..f62ec55971e3757bb84a7ac113a24fee99462c4e --- /dev/null +++ b/irlc/ex06/lqr_agent.py @@ -0,0 +1,54 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.locomotive import LocomotiveEnvironment +from irlc import train, plot_trajectory, savepdf, Agent +from irlc.ex06.dlqr import LQR +from irlc.ex04.control_environment import ControlEnvironment +import numpy as np +import matplotlib.pyplot as plt + +class LQRAgent(Agent): + def __init__(self, env : ControlEnvironment, A, B, Q, R, d=None, q=None): + N = int((env.Tmax / env.dt)) # Obtain the planning horizon + """ Define A, B as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B x[t] + d. + You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B """ + """ Define self.L, self.l here as the (lists of) control matrices. """ + ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error. + #---------------------------------------------------------------------------------------------------------------------------- + # (self.L, self.l), _ = LQR(A=[A]*N, B=[B]*N, d=[d]*N if d is not No??????????????????????????????????????????????????????????????????? + raise NotImplementedError("Insert your solution and remove this error.") + self.dt = env.dt + super().__init__(env) + + def pi(self,x, k, info=None): + """ + Compute the action here using u = L_k x + l_k. + You should use self.L, self.l to get the control matrices (i.e. L_k = self.L[k] ), + """ + # TODO: 1 lines missing. + raise NotImplementedError("Compute current action here") + return u + + +if __name__ == "__main__": + # Make a guess at the system matrices for planning. We will return on how to compute these exactly in a later exercise. + A = np.ones((2, 2)) + A[1, 0] = 0 + B = np.asarray([[0], [1]]) + Q = np.eye(2)*3 + R = np.ones((1, 1))*2 + q = np.asarray([-1.1, 0 ]) + + # Create and test our LQRAgent. + env = LocomotiveEnvironment(render_mode='human', Tmax=10, slope=1) + agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q) + stats, traj = train(env, agent, num_episodes=1) + + env.reset() + savepdf("locomotive_snapshot.pdf", env=env) # Make a plot for the exercise file. + env.state_labels = ["x(t)", "v(t)"] + env.action_labels = ["u(t)"] + plot_trajectory(traj[0], env) + plt.show(block=True) + savepdf("lqr_agent") + plt.show() + env.close() diff --git a/irlc/ex06/lqr_pid.py b/irlc/ex06/lqr_pid.py new file mode 100644 index 0000000000000000000000000000000000000000..136cae2ba5a2bebcf45880daec13e853a7bee9b4 --- /dev/null +++ b/irlc/ex06/lqr_pid.py @@ -0,0 +1,79 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import matplotlib.pyplot as plt +import numpy as np +from irlc import savepdf, train +from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent +from irlc.ex06.lqr_agent import LQRAgent +from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment +from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q + +class ConstantLQRAgent(LQRAgent): + # TODO: 3 lines missing. + raise NotImplementedError("Complete this agent here. You need to update the policy-function: def pi(self, ..).") + +def get_Kp_Kd(L0): + # TODO: 1 lines missing. + raise NotImplementedError("Use lqr_agent.L to define Kp and Kd.") + return Kp, Kd + + +if __name__ == "__main__": + Delta = 0.06 # Time discretization constant + # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization. + env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, render_mode=None) # set render_mode='human' to see the oscillator. + model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment. + + + # Compute the discretized A, B and d matrices using the helper functions we defined in the Boeing problem. + # Note that these are for the discrete environment: x_{k+1} = A x_k + B u_k + d + A, B, d = compute_A_B_d(model, Delta) + Q, R, q = compute_Q_R_q(model, Delta) + + # Run the LQR agent + lqr_agent = LQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q) + _, traj1 = train(env, lqr_agent, return_trajectory=True) + + # Part 1. Build an agent that always takes actions u_k = L_0 x_k + l_0 + constant_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q) + # Check that its policy is independent of $k$: + x0, _ = env.reset() + print(f"Initial state is {x0=}") + print(f"Action at time step k=0 {constant_agent.pi(x0, k=0)=}") + print(f"Action at time step k=5 (should be the same) {constant_agent.pi(x0, k=0)=}") + + _, traj2 = train(env, constant_agent, return_trajectory=True) + + # Part 2. Use the L and l matrices (see lqr_agent.L and lqr_agent.l) + # to select Kp and Kd in a PID agent. Then let's use the Locomotive agent to see the effect of the controller. + # Use render_mode='human' to see its effect. + # We only need to use L. + # Hint: compare the form of the LQR and PID controller and use that to select Kp and Kd. + Kp, Kd = get_Kp_Kd(lqr_agent.L[0]) # Use lqr_agent.L to define Kp and Kd. + + # Define and run the PID agent. + pid_agent = PIDLocomotiveAgent(env, env.dt, Kp=Kp, Kd=Kd) + _, traj3 = train(env, pid_agent, return_trajectory=True) + + # Plot all actions and state sequences. + plt.figure(figsize=(10,5)) + plt.grid() + plt.plot(traj1[0].time[:-1], traj1[0].action, label="Optimal LQR action sequence") + plt.plot(traj2[0].time[:-1], traj2[0].action, '.-', label="Constant LQR action sequence") + plt.plot(traj3[0].time[:-1], traj3[0].action, label="PID agent action sequence") + plt.xlabel("Time / Seconds") + plt.ylabel("Action / Newtons") + plt.ylim([-.2, .2]) + plt.legend() + savepdf("pid_lqr_actions") + plt.show(block=True) + + plt.figure(figsize=(10, 5)) + plt.grid() + plt.plot(traj1[0].time, traj1[0].state[:, 0], label="Optimal LQR states x(t)") + plt.plot(traj2[0].time, traj2[0].state[:, 0], label="Constant LQR states x(t)") + plt.plot(traj3[0].time, traj3[0].state[:, 0], label="PID agent states x(t)") + plt.xlabel("Time / Seconds") + plt.ylabel("Position x(t) / Meters") + plt.ylim([-1, 1]) + plt.legend() + savepdf("pid_lqr_states") diff --git a/irlc/ex06/model_boeing.py b/irlc/ex06/model_boeing.py new file mode 100644 index 0000000000000000000000000000000000000000..57e0a0c7a3664a45005437b4576038021c60f4ef --- /dev/null +++ b/irlc/ex06/model_boeing.py @@ -0,0 +1,62 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex04.discrete_control_model import DiscreteControlModel +from irlc.ex04.control_environment import ControlEnvironment +from irlc.ex04.model_linear_quadratic import LinearQuadraticModel + +class BoeingModel(LinearQuadraticModel): + """ + Boeing 747 level flight example. + + See: https://books.google.dk/books?id=tXZDAAAAQBAJ&pg=PA147&lpg=PA147&dq=boeing+747+flight+0.322+model+longitudinal+flight&source=bl&ots=L2RpjCAWiZ&sig=ACfU3U2m0JsiHmUorwyq5REcOj2nlxZkuA&hl=en&sa=X&ved=2ahUKEwir7L3i6o3qAhWpl4sKHQV6CdcQ6AEwAHoECAoQAQ#v=onepage&q=boeing%20747%20flight%200.322%20model%20longitudinal%20flight&f=false + Also: https://web.stanford.edu/~boyd/vmls/vmls-slides.pdf + """ + state_labels = ["Longitudinal velocity (x) ft/sec", "Velocity in y-axis ft/sec", "Angular velocity", + "angle wrt. horizontal"] + action_labels = ['Elevator', "Throttle"] + observation_labels = ["Airspeed", "Climb rate"] + + def __init__(self, output=None): + if output is None: + output = [10, 0] + # output = [10, 0] + A = [[-0.003, 0.039, 0, -0.322], + [-0.065, -.319, 7.74, 0], + [.02, -.101, -0.429, 0], + [0, 0, 1, 0]] + B = [[.01, 1], + [-.18, -.04], + [-1.16, .598], + [0, 0]] + + A, B = np.asarray(A), np.asarray(B) + self.u0 = 7.74 # speed in hundred feet/seconds + self.P = np.asarray([[1, 0, 0, 0], [0, -1, 0, 7.74]]) # Projection of state into airspeed + + dt = 0.1 # Scale the cost by this factor. + + # Set up the cost: + self.Q_obs = np.eye(2) + Q = self.P.T @ self.Q_obs @ self.P / dt + R = np.eye(2) / dt + q = -np.asarray(output) @ self.Q_obs @ self.P / dt + super().__init__(A=A, B=B, Q=Q, R=R, q=q) + + def state2outputs(self, x): + return self.P @ x + +class DiscreteBoeingModel(DiscreteControlModel): + def __init__(self, output=None): + model = BoeingModel(output=output) + dt = 0.1 + super().__init__(model=model, dt=dt) + + +class BoeingEnvironment(ControlEnvironment): + @property + def observation_labels(self): + return self.discrete_model.continuous_model.observation_labels + + def __init__(self, Tmax=10): + model = DiscreteBoeingModel() + super().__init__(discrete_model=model, Tmax=Tmax) diff --git a/irlc/ex06/model_rendevouz.py b/irlc/ex06/model_rendevouz.py new file mode 100644 index 0000000000000000000000000000000000000000..ffdb923b608a3079c27b1df41eb6405f29fdf41c --- /dev/null +++ b/irlc/ex06/model_rendevouz.py @@ -0,0 +1,99 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil +from irlc.ex04.discrete_control_model import DiscreteControlModel +from irlc.ex04.control_environment import ControlEnvironment +from irlc.ex04.model_linear_quadratic import LinearQuadraticModel +from gymnasium.spaces import Box + +""" +SEE: https://github.com/anassinator/ilqr/blob/master/examples/rendezvous.ipynb +""" +class ContiniousRendevouzModel(LinearQuadraticModel): + state_labels= ["x0", "y0", "x1", "y1", 'Vx0', "Vy0", "Vx1", "Vy1"] + action_labels = ['Fx0', 'Fy0', "Fx1", "Fy1"] + x0 = np.array([0, 0, 10, 10, 0, -5, 5, 0]) # Initial state. + + def __init__(self, m=10.0, alpha=0.1, simple_bounds=None, cost=None): + m00 = np.zeros((4,4)) + mI = np.eye(4) + A = np.block( [ [m00, mI], [m00, -alpha/m*mI] ] ) + B = np.block( [ [m00], [mI/m]] ) + state_size = len(self.x0) + action_size = 4 + self.m = m + self.alpha = alpha + Q = np.eye(state_size) + Q[0, 2] = Q[2, 0] = -1 + Q[1, 3] = Q[3, 1] = -1 + R = 0.1 * np.eye(action_size) + self.viewer = None + super().__init__(A=A, B=B, Q=Q*20, R=R*20) + + def x0_bound(self) -> Box: + return Box(self.x0, self.x0) # self.bounds['x0_low'] = self.bounds['x0_high'] = list(self.x0) + + def render(self, x, render_mode="human"): + """ Render the environment. You don't have to understand this code. """ + if self.viewer is None: + self.viewer = HarmonicViewer(xstar=0, x0=self.x0) # target: x=0. + self.viewer.update(x) + import time + time.sleep(0.05) + return self.viewer.blit(render_mode=render_mode) + + def close(self): + pass + + +class DiscreteRendevouzModel(DiscreteControlModel): + def __init__(self, dt=0.1, cost=None, transform_actions=True, **kwargs): + model = ContiniousRendevouzModel(**kwargs) + super().__init__(model=model, dt=dt, cost=cost) + +class RendevouzEnvironment(ControlEnvironment): + def __init__(self, Tmax=20, render_mode=None, **kwargs): + discrete_model = DiscreteRendevouzModel(**kwargs) + super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode) + +class HarmonicViewer(UpgradedGraphicsUtil): + def __init__(self, xstar = 0, x0=None): + self.xstar = xstar + width = 800 + self.x0 = x0 + sz = 20 + self.scale = width/(2*sz) + self.p1h = [] + self.p2h = [] + super().__init__(screen_width=width, xmin=-sz, xmax=sz, ymin=-sz, ymax=sz, title='Rendevouz environment') + + def render(self): + self.draw_background(background_color=(255, 255, 255)) + # dw = self.dw + p1 = self.x[:2] + p2 = self.x[2:4] + self.p1h.append(p1) + self.p2h.append(p2) + self.circle("asdf", pos=p1, r=.5 * self.scale, fillColor=(200, 0, 0)) + self.circle("asdf", pos=p2, r=.5 * self.scale, fillColor=(0, 0, 200) ) + if len(self.p1h) > 2: + self.polyline('...', np.stack(self.p1h)[:,0], np.stack(self.p1h)[:,1], width=1, color=(200, 0, 0)) + self.polyline('...', np.stack(self.p2h)[:,0], np.stack(self.p2h)[:,1], width=1, color=(0, 0, 200)) + + if tuple(self.x) == tuple(self.x0): + self.p1h = [] + self.p2h = [] + + + def update(self, x): + self.x = x + + +if __name__ == "__main__": + from irlc import Agent, train + env = RendevouzEnvironment(render_mode='human') + from irlc.ex06.lqr_agent import LQRAgent + a2 = LQRAgent(env=env) + + stats, traj = train(env, Agent(env), num_episodes=1) + pass diff --git a/irlc/ex07/__init__.py b/irlc/ex07/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e8044b811c2884a4486857534492a8d7a83575b --- /dev/null +++ b/irlc/ex07/__init__.py @@ -0,0 +1,2 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +"""This directory contains the exercises for week 7.""" diff --git a/irlc/ex07/ilqr.py b/irlc/ex07/ilqr.py new file mode 100644 index 0000000000000000000000000000000000000000..d259999214f970e4b8486523b8f5ebcfd0814428 --- /dev/null +++ b/irlc/ex07/ilqr.py @@ -0,0 +1,275 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +""" + +References: + [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025. + [TET12] Yuval Tassa, Tom Erez, and Emanuel Todorov. Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 4906–4913. IEEE, 2012. (See tassa2012.pdf). + [Har20] James Harrison. Optimal and learning-based control combined course notes. (See AA203combined.pdf), 2020. +""" +r""" +This implements two methods: The basic ILQR method, described in (Her25, Algorithm 24), and the linesearch-based method +described in (Her25, Algorithm 25). + +If you are interested, you can consult (TET12) (which contains generalization to DDP) and (Har20, Alg 1). +""" +import warnings +import numpy as np +from irlc.ex06.dlqr import LQR +from irlc.ex04.discrete_control_model import DiscreteControlModel + +def ilqr_basic(model : DiscreteControlModel, N, x0, us_init : list = None, n_iterations=500, verbose=True): + r""" + Implements the basic ilqr algorithm, i.e. (Her25, Algorithm 24). Our notation (x_bar, etc.) will be consistent with the lecture slides + """ + mu, alpha = 1, 1 # Hyperparameters. For now, just let them have defaults and don't change them + # Create a random initial state-sequence + n, m = model.state_size, model.action_size + u_bar = [np.random.uniform(-1, 1,(model.action_size,)) for _ in range(N)] if us_init is None else us_init + x_bar = [x0] + [np.zeros(n, )] * N + """ + Initialize nominal trajectory xs, us using us and x0 (i.e. simulate system from x0 using action sequence us). + The simplest way to do this is to call forward_pass with all-zero sequence of control vector/matrix l, L. + """ + # TODO: 2 lines missing. + raise NotImplementedError("Initialize x_bar, u_bar here") + J_hist = [] + for i in range(n_iterations): + """ + Compute derivatives around trajectory and cost estimate J of trajectory. To do so, use the get_derivatives + function. Remember the functions will return lists of derivatives. + """ + # TODO: 2 lines missing. + raise NotImplementedError("Compute J and derivatives A_k = f_x, B_k = f_u, ....") + """ Backward pass: Obtain feedback law matrices l, L using the backward_pass function. + """ + # TODO: 1 lines missing. + raise NotImplementedError("Compute L, l = .... here") + """ Forward pass: Given L, l matrices computed above, simulate new (optimal) action sequence. + In the lecture slides, this is similar to how we compute u^*_k and x_k + Once they are computed, iterate the iLQR algorithm by setting x_bar, u_bar equal to these values + """ + # TODO: 1 lines missing. + raise NotImplementedError("Compute x_bar, u_bar = ...") + if verbose: + print(f"{i}> J={J:4g}, change in cost since last iteration {0 if i == 0 else J-J_hist[-1]:4g}") + J_hist.append(J) + return x_bar, u_bar, J_hist, L, l + +def ilqr_linesearch(model : DiscreteControlModel, N, x0, n_iterations, us_init=None, tol=1e-6, verbose=True): + r""" + For linesearch implement method described in (Her25, Algorithm 25) (we will use regular iLQR, not DDP!) + """ + # The range of alpha-values to try out in the linesearch + # plus parameters relevant for regularization scheduling. + alphas = 1.1 ** (-np.arange(10) ** 2) # alphas = [1, 1.1^{-2}, ...] + mu_min = 1e-6 + mu_max = 1e10 + Delta_0 = 2 + mu = 1.0 + Delta = Delta_0 + + n, m = model.state_size, model.action_size + u_bar = [np.random.uniform(-1, 1, (model.action_size,)) for _ in range(N)] if us_init is None else us_init + x_bar = [x0] + [np.zeros(n, )] * (N) + # Initialize nominal trajectory xs, us (same as in basic linesearch) + # TODO: 2 lines missing. + raise NotImplementedError("Copy-paste code from previous solution") + J_hist = [] + + converged = False + for i in range(n_iterations): + alpha_was_accepted = False + """ Step 1: Compute derivatives around trajectory and cost estimate of trajectory. + (copy-paste from basic implementation). In our implementation, J_bar = J_{u^star}(x_0) """ + # TODO: 2 lines missing. + raise NotImplementedError("Obtain derivatives f_x, f_u, ... as well as cost of trajectory J_bar = ...") + try: + """ + Step 2: Backward pass to obtain control law (l, L). Same as before so more copy-paste + """ + # TODO: 1 lines missing. + raise NotImplementedError("Obtain l, L = ... in backward pass") + """ + Step 3: Forward pass and alpha scheduling. + Decrease alpha and check condition |J^new < J'|. Apply the regularization scheduling as needed. """ + for alpha in alphas: + x_hat, u_hat = forward_pass(model, x_bar, u_bar, L=L, l=l, alpha=alpha) # Simulate trajectory using this alpha + # TODO: 1 lines missing. + raise NotImplementedError("Compute J_new = ... as the cost of trajectory x_hat, u_hat") + + if J_new < J_prime: + """ Linesearch proposed trajectory accepted! Set current trajectory equal to x_hat, u_hat. """ + if np.abs((J_prime - J_new) / J_prime) < tol: + converged = True # Method does not seem to decrease J; converged. Break and return. + + J_prime = J_new + x_bar, u_bar = x_hat, u_hat + ''' + The update was accepted and you should change the regularization term mu, + and the related scheduling term Delta. + ''' + # TODO: 1 lines missing. + raise NotImplementedError("Delta, mu = ...") + alpha_was_accepted = True # accept this alpha + break + except np.linalg.LinAlgError as e: + # Matrix in dlqr was not positive-definite and this diverged + warnings.warn(str(e)) + + if not alpha_was_accepted: + ''' No alphas were accepted, which is not too hot. Regularization should change + ''' + # TODO: 1 lines missing. + raise NotImplementedError("Delta, mu = ...") + + if mu_max and mu >= mu_max: + raise Exception("Exceeded max regularization term; we are stuffed.") + + dJ = 0 if i == 0 else J_prime-J_hist[-1] + info = "converged" if converged else ("accepted" if alpha_was_accepted else "failed") + if verbose: + print(f"{i}> J={J_prime:4g}, decrease in cost {dJ:4g} ({info}).\nx[N]={x_bar[-1].round(2)}") + J_hist.append(J_prime) + if converged: + break + return x_bar, u_bar, J_hist, L, l + +def backward_pass(A : list, B : list, c_x : list, c_u : list, c_xx : list, c_ux : list, c_uu : list, mu=1): + r"""Given all derivatives, apply the LQR algorithm to get the control law. + + The input arguments are described in the online documentation and the lecture notes. You should use them to call your + implementation of the :func:`~irlc.ex06.dlqr.LQR` method. Note that you should give a value of all inputs except for the ``d``-term. + + :param A: linearization of the dynamics matrices :math:`A_k`. + :param B: linearization of the dynamics matrices :math:`B_k`. + :param c_x: Cost terms corresponding to :math:`\mathbf{q}_k` + :param c_u: Cost terms corresponding to :math:`\mathbf{r}_k` + :param c_xx: Cost terms corresponding to :math:`Q_k` + :param c_ux: Cost terms corresponding to :math:`H_k` + :param c_uu: Cost terms corresponding to :math:`R_k` + :param mu: Regularization parameter for the LQR method + :return: The control law :math:`L_k, \mathbf{l}_k` as two lists. + """ + Q, QN = c_xx[:-1], c_xx[-1] # An example. + # TODO: 4 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + # Define the inputs using the linearization inputs. + (L, l), (V, v, vc) = LQR(A=A, B=B, R=R, Q=Q, QN=QN, H=H, q=q, qN=qN, r=r, mu=mu) + return L, l + +def cost_of_trajectory(model : DiscreteControlModel, xs : list, us : list) -> float: + r"""Helper function which computes the cost of the trajectory. + + The cost is defined as: + + .. math:: + + c_N( \bar {\mathbf x}_N, \bar {\mathbf u}_) + \sum_{k=0}^{N-1} c_k(\bar {\mathbf x}_k, \bar {\mathbf u}_k) + + and to compute it, you should use the two helper methods ``model.cost.c`` and ``model.cost.cN`` + (see :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.c` and :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.cN`). + + :param model: The control model used to compute the cost. + :param xs: A list of length :math:`N+1` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_N \end{bmatrix}` + :param us: A list of length :math:`N` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_{N-1} \end{bmatrix}` + :return: The cost as a number. + """ + N = len(us) + JN = model.cost.cN(xs[-1]) + return sum(map(lambda args: model.cost.c(*args), zip(xs[:-1], us, range(N)))) + JN + +def get_derivatives(model : DiscreteControlModel, x_bar : list, u_bar : list): + """Compute all the derivatives used in the model. + + The return type should match the meaning in (Her25, Subequation 17.8) and in the online documentation. + + - ``c`` should be a list of length :math:`N+1` + - ``c_x`` should be a list of length :math:`N+1` + - ``c_xx`` should be a list of length :math:`N+1` + - ``c_u`` should be a list of length :math:`N` + - ``c_uu`` should be a list of length :math:`N` + - ``c_ux`` should be a list of length :math:`N` + - ``A`` should be a list of length :math:`N` + - ``B`` should be a list of length :math:`N` + + Use the model to compute these terms. For instance, this will compute the first terms ``A[0]`` and ``B[0]``:: + + A0, B0 = model.f_jacobian(x_bar[0], u_bar[0], 0) + + Meanwhile, to compute the first terms of the cost-functions you should use:: + + c[0], c_x[0], c_u[0], c_xx[0], c_ux[0], c_uu[0] = model.cost.c(x_bar[0], u_bar[0], k=0, compute_gradients=True) + + :param model: The model to use when computing the derivatives of the cost + :param x_bar: The nominal state-trajectory + :param u_bar: The nominal action-trajectory + :return: Lists of all derivatives computed around the nominal trajectory (see the lecture notes). + """ + N = len(u_bar) + """ Compute A_k, B_k (lists of matrices of length N) as the jacobians of the dynamics. To do so, + recall from the online documentation that: + x, f_x, f_u = model.f(x, u, k, compute_jacobian=True) + """ + A = [None]*N + B = [None]*N + c = [None] * (N+1) + c_x = [None] * (N + 1) + c_xx = [None] * (N + 1) + + c_u = [None] * (N+1) + c_ux = [None] * (N + 1) + c_uu = [None] * (N + 1) + # Now update each entry correctly (i.e., ensure there are no None elements left). + # TODO: 4 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + """ Compute derivatives of the cost function. For terms not including u these should be of length N+1 + (because of gN!), for the other lists of length N + recall model.cost.c has output: + c[i], c_x[i], c_u[i], c_xx[i], c_ux[i], c_uu[i] = model.cost.c(x, u, i, compute_gradients=True) + """ + # TODO: 2 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + # Concatenate the derivatives associated with the last time point N. + cN, c_xN, c_xxN = model.cost.cN(x_bar[N], compute_gradients=True) + # TODO: 3 lines missing. + raise NotImplementedError("Update c, c_x and c_xx with the terminal terms.") + return A, B, c, c_x, c_u, c_xx, c_ux, c_uu + +def forward_pass(model : DiscreteControlModel, x_bar : list, u_bar : list, L : list, l : list, alpha=1.0): + r"""Simulates the effect of the controller on the model + + We assume the system starts in :math:`\mathbf{x}_0 = \bar {\mathbf x}_0`, and then simulate the effect of + generating actions using the closed-loop policy + + .. math:: + + \mathbf{u}_k = \bar {\mathbf u}_k + \alpha \mathbf{l}_k + L_k (\mathbf{x}_k - \bar { \mathbf x}_k) + + (see (Her25, eq. (17.16))). + + :param model: The model used to compute the dynamics. + :param x_bar: A nominal list of states + :param u_bar: A nominal list of actions (not used by the method) + :param L: A list of control matrices :math:`L_k` + :param l: A list of control vectors :math:`\mathbf{l}_k` + :param alpha: The linesearch parameter. + :return: A list of length :math:`N+1` of simulated states and a list of length :math:`N` of simulated actions. + """ + N = len(u_bar) + x = [None] * (N+1) + u_star = [None] * N + x[0] = x_bar[0].copy() + + for i in range(N): + r""" Compute using (Her25, eq. (17.16)) + u_{i} = ... + """ + # TODO: 1 lines missing. + raise NotImplementedError("u_star[i] = ....") + """ Remember to compute + x_{i+1} = f_k(x_i, u_i^*) + here: + """ + # TODO: 1 lines missing. + raise NotImplementedError("x[i+1] = ...") + return x, u_star diff --git a/irlc/ex07/ilqr_agent.py b/irlc/ex07/ilqr_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..eb8c1464087000a4efbbba2b803eb6776378739e --- /dev/null +++ b/irlc/ex07/ilqr_agent.py @@ -0,0 +1,56 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +""" + +References: + [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025. +""" +from irlc.ex06.model_rendevouz import RendevouzEnvironment +from irlc.ex07.ilqr_rendovouz_basic import ilqr +from irlc import train +from irlc import Agent +import numpy as np + +class ILQRAgent(Agent): + def __init__(self, env, discrete_model, N=250, ilqr_iterations=10, use_ubar=False, use_linesearch=True): + super().__init__(env) + self.dt = discrete_model.dt + # x0 = discrete_model.reset() + x0,_ = env.reset() + x0 = np.asarray(x0) # Get the initial state. We will take this from the environment. + xs, us, self.J_hist, L, l = ilqr(discrete_model, N, x0, n_iter=ilqr_iterations, use_linesearch=use_linesearch) + self.ubar = us + self.xbar = xs + self.L = L + self.l = l + self.use_ubar = use_ubar # Should policy use open-loop u-bar (suboptimal) or closed-loop L_k, l_k? + + def pi(self, x, k, info=None): + if self.use_ubar: + u = self.ubar[k] + else: + if k >= len(self.ubar): + print(k, len(self.ubar)) + k = len(self.ubar)-1 + # See (Her25, eq. (17.16)) + # TODO: 1 lines missing. + raise NotImplementedError("Generate action using the control matrices.") + return u + +def solve_rendevouz(): + env = RendevouzEnvironment() + N = int(env.Tmax / env.dt) + agent = ILQRAgent(env, env.discrete_model, N=N) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + return stats, trajectories, agent + +if __name__ == "__main__": + from irlc.ex07.ilqr_rendovouz_basic import plot_vehicles + import matplotlib.pyplot as plt + stats, trajectories, agent = solve_rendevouz() + t =trajectories[0].state + xb = agent.xbar + plot_vehicles(t[:,0], t[:,1], t[:,2], t[:,3], linespec=':', legend=("RK4 policy simulation", "RK4 policy simulation")) + plot_vehicles(xb[:,0], xb[:,1], xb[:,2], xb[:,3], linespec='-') + plt.legend() + plt.show() diff --git a/irlc/ex07/ilqr_cartpole.py b/irlc/ex07/ilqr_cartpole.py new file mode 100644 index 0000000000000000000000000000000000000000..d2463a56ff438c600d4e4047d1e349757716dfc7 --- /dev/null +++ b/irlc/ex07/ilqr_cartpole.py @@ -0,0 +1,83 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import matplotlib.pyplot as plt +import numpy as np +from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment +import time +from irlc.ex07.ilqr_rendovouz_basic import ilqr +from irlc import savepdf + +# Number of steps. +N = 100 +def cartpole(use_linesearch): + env = GymSinCosCartpoleEnvironment(render_mode='human') + x0, info = env.reset() + xs, us, J_hist, L, l = ilqr(env.discrete_model, N, x0, n_iter=300, use_linesearch=use_linesearch) + plot_cartpole(env, xs, us, use_linesearch=use_linesearch) + +def plot_cartpole(env, xs, us, J_hist=None, use_linesearch=True): + animate(xs, env) + env.close() + # Transform actions/states using build-in functions. + def gapply(f, xm): + usplit = np.split(xm, len(xm)) + u2 = [f(u.flat) for u in usplit] + us = np.stack(u2) + return us + + us = gapply(env.discrete_model.phi_u_inv, us) + xs = gapply(env.discrete_model.phi_x_inv, xs) + + t = np.arange(N + 1) * env.dt + x = xs[:, 0] + theta = np.unwrap(xs[:, 2]) # Makes for smoother plots. + theta_dot = xs[:, 3] + pdf_ex = '_linesearch' if use_linesearch else '' + ev = 'cartpole_' + + plt.plot(theta, theta_dot) + plt.xlabel("theta (rad)") + plt.ylabel("theta_dot (rad/s)") + plt.title("Orientation Phase Plot") + plt.grid() + savepdf(f"{ev}theta{pdf_ex}") + plt.show() + + _ = plt.plot(t[:-1], us) + _ = plt.xlabel("time (s)") + _ = plt.ylabel("Force (N)") + _ = plt.title("Action path") + plt.grid() + savepdf(f"{ev}action{pdf_ex}") + plt.show() + + _ = plt.plot(t, x) + _ = plt.xlabel("time (s)") + _ = plt.ylabel("Position (m)") + _ = plt.title("Cart position") + plt.grid() + savepdf(f"{ev}position{pdf_ex}") + plt.show() + if J_hist is not None: + _ = plt.plot(J_hist) + _ = plt.xlabel("Iteration") + _ = plt.ylabel("Total cost") + _ = plt.title("Total cost-to-go") + plt.grid() + savepdf(f"{ev}J{pdf_ex}") + plt.show() + +def animate(xs0, env): + render = True + if render: + for i in range(2): + render_(xs0, env.discrete_model) + time.sleep(1) + # env.viewer.close() + +def render_(xs, env): + for i in range(xs.shape[0]): + x = xs[i] + env.render(x=x) + +if __name__ == "__main__": + cartpole(use_linesearch=True) diff --git a/irlc/ex07/ilqr_cartpole_agent.py b/irlc/ex07/ilqr_cartpole_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..cd82bd29a258b4eaf04b3f4f2ee3e64b74bf8af2 --- /dev/null +++ b/irlc/ex07/ilqr_cartpole_agent.py @@ -0,0 +1,43 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex07.ilqr_agent import ILQRAgent +from irlc import train +from irlc import savepdf +import matplotlib.pyplot as plt +from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment + +def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True): + np.random.seed(2) + Tmax = .9 + dt = Tmax/N + env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None) + agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + + agent.use_ubar = True + stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + + xb = agent.xbar + tb = np.arange(N+1)*dt + plt.figure(figsize=(8,6)) + F = 3 + plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$') + plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$') + + plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$") + plt.xlabel("Time/seconds") + plt.ylabel("$\cos(\\theta)$") + plt.title(f"Cartpole environment $T={N}$") + + plt.grid() + plt.legend() + ev = "pendulum" + savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}") + plt.show() + +def plt_cartpole(): + cartpole_experiment(N=50, use_linesearch=True, animate=True) + +if __name__ == '__main__': + plt_cartpole() diff --git a/irlc/ex07/ilqr_pendulum.py b/irlc/ex07/ilqr_pendulum.py new file mode 100644 index 0000000000000000000000000000000000000000..5bcc82e7d94d072bf5ce529e08a99bb6f0647895 --- /dev/null +++ b/irlc/ex07/ilqr_pendulum.py @@ -0,0 +1,68 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel +import matplotlib.pyplot as plt +import time +from irlc.ex07.ilqr_rendovouz_basic import ilqr +from irlc import savepdf + +def pendulum(use_linesearch): + print("> Using iLQR to solve Pendulum swingup task. Using linesearch?", use_linesearch) + dt = 0.02 + model = DiscreteSinCosPendulumModel(dt, cost=None) + N = 250 + # This rather clunky line gets us the initial state; we transform the bound by the variable transformation. + x0 = np.asarray(model.phi_x(model.continuous_model.x0_bound().low)) + + n_iter = 200 # Use 200 iLQR iterations. + # xs, us, J_hist, L, l = ilqr(model, ...) Write a function call like this, but with the correct parametesr + # TODO: 1 lines missing. + raise NotImplementedError("Call iLQR here (see hint above).") + + render = True + if render: + for i in range(2): + render_(xs, model) + time.sleep(2) # Sleep for two seconds between simulations. + model.close() + xs = np.asarray([model.phi_x_inv(x) for x, u in zip(xs, us)]) # Convert to Radians. We use the build-in functions to change coordinates. + xs, us = np.asarray(xs), np.asarray(us) + + t = np.arange(N) * dt + theta = np.unwrap(xs[:, 0]) # Makes for smoother plots. + theta_dot = xs[:, 1] + + pdf_ex = '_linesearch' if use_linesearch else '' + stitle = "(using linesearch)" if use_linesearch else "(not using linesearch) " + ev = 'pendulum_' + _ = plt.plot(theta, theta_dot) + _ = plt.xlabel("$\\theta$ (rad)") + _ = plt.ylabel("$d\\theta/dt$ (rad/s)") + _ = plt.title(f"Phase Plot {stitle}") + plt.grid() + savepdf(f"{ev}theta{pdf_ex}") + plt.show() + + _ = plt.plot(t, us) + _ = plt.xlabel("time (s)") + _ = plt.ylabel("Force (N)") + _ = plt.title(f"Action path {stitle}") + plt.grid() + savepdf(f"{ev}action{pdf_ex}") + plt.show() + + _ = plt.plot(J_hist) + _ = plt.xlabel("Iteration") + _ = plt.ylabel("Total cost") + _ = plt.title(f"Total cost-to-go {stitle}") + plt.grid() + savepdf(f"{ev}J{pdf_ex}") + plt.show() + +def render_(xs, env): + for i in range(xs.shape[0]): + env.render(xs[i]) + +if __name__ == "__main__": + pendulum(use_linesearch=False) + pendulum(use_linesearch=True) diff --git a/irlc/ex07/ilqr_pendulum_agent.py b/irlc/ex07/ilqr_pendulum_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..6daf971f986520f92ecbab832b5a7d0c104f97f9 --- /dev/null +++ b/irlc/ex07/ilqr_pendulum_agent.py @@ -0,0 +1,63 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment +from irlc.ex07.ilqr_agent import ILQRAgent +from irlc import train +from irlc import savepdf +import matplotlib.pyplot as plt + +Tmax = 3 +def pen_experiment(N=12, use_linesearch=True,figex="", animate=True): + dt = Tmax / N + env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode="human" if animate else None) + agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch) + + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + + agent.use_ubar = True + stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + + plot_pendulum_trajectory(env, trajectories[0], label='Closed-loop $\\pi$') + xb = agent.xbar + tb = np.arange(N+1)*dt + plt.figure(figsize=(12, 6)) + plt.plot(trajectories[0].time, trajectories[0].state[:,1], '-', label='Closed-loop $\\pi(x_k)$') + + plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$') + plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$") + plt.grid() + plt.legend() + ev = "pendulum" + savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}") + plt.show() + + ## Plot J + plt.figure(figsize=(6, 6)) + plt.semilogy(agent.J_hist, 'k.-') + plt.xlabel("iLQR Iterations") + plt.ylabel("Cost function estimate $J$") + # plt.title("Last value: {") + plt.grid() + savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}") + plt.show() + +def plot_pendulum_trajectory(env, traj, style='k.-', label=None, action=False, **kwargs): + if action: + y = traj.action[:, 0] + y = np.clip(y, env.action_space.low[0], env.action_space.high[0]) + else: + y = traj.state[:, 1] + + plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs) + plt.xlabel("Time/seconds") + if action: + plt.ylabel("Torque $u$") + else: + plt.ylabel(r"$\cos(\theta)$") + plt.grid() + pass + +N = 50 +if __name__ == "__main__": + pen_experiment(N=N, use_linesearch=True) diff --git a/irlc/ex07/ilqr_rendevoyz.py b/irlc/ex07/ilqr_rendevoyz.py new file mode 100644 index 0000000000000000000000000000000000000000..8cd6cdc30de69f057cb7d024bbdb46f6530e146e --- /dev/null +++ b/irlc/ex07/ilqr_rendevoyz.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex07.ilqr_rendovouz_basic import plot_rendevouz + +if __name__ == "__main__": + plot_rendevouz(use_linesearch=True) diff --git a/irlc/ex07/ilqr_rendovouz_basic.py b/irlc/ex07/ilqr_rendovouz_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..fcd245e8166c6a9ede99c969c827268d2db40664 --- /dev/null +++ b/irlc/ex07/ilqr_rendovouz_basic.py @@ -0,0 +1,99 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +import matplotlib.pyplot as plt +from irlc import savepdf +from irlc.ex07.ilqr import ilqr_basic, ilqr_linesearch +from irlc.ex06.model_rendevouz import DiscreteRendevouzModel +from irlc.ex04.control_environment import ControlEnvironment +from irlc.ex04.discrete_control_model import DiscreteControlModel +# plt.ion() + + +def ilqr(model : DiscreteControlModel, N, x0, n_iter, use_linesearch, verbose=True): + if not use_linesearch: + xs, us, J_hist, L, l = ilqr_basic(model, N, x0, n_iterations=n_iter,verbose=verbose) + else: + xs, us, J_hist, L, l = ilqr_linesearch(model, N, x0, n_iterations=n_iter, tol=1e-6,verbose=verbose) + xs, us = np.stack(xs), np.stack(us) + return xs, us, J_hist, L, l + +def plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2")): + _ = plt.title("Trajectory of the two omnidirectional vehicles") + _ = plt.plot(x_0, y_0, "r"+linespec, label=legend[0]) + _ = plt.plot(x_1, y_1, "b"+linespec, label=legend[1]) + +Tmax = 20 +def solve_rendovouz(use_linesearch=False): + model = DiscreteRendevouzModel() + x0 = np.asarray(model.continuous_model.x0_bound().low) # Starting position + N = int(Tmax/model.dt) + return ilqr(model, N, x0, n_iter=10, use_linesearch=use_linesearch), model + +def plot_rendevouz(use_linesearch=False): + (xs, us, J_hist, _, _), env = solve_rendovouz(use_linesearch=use_linesearch) + N = int(Tmax / env.dt) + dt = env.dt + x_0 = xs[:, 0] + y_0 = xs[:, 1] + x_1 = xs[:, 2] + y_1 = xs[:, 3] + x_0_dot = xs[:, 4] + y_0_dot = xs[:, 5] + x_1_dot = xs[:, 6] + y_1_dot = xs[:, 7] + + pdf_ex = '_linesearch' if use_linesearch else '' + ev = 'rendevouz_' + plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2")) + plt.legend() + savepdf(f'{ev}trajectory{pdf_ex}') + plt.show() + + t = np.arange(N + 1) * dt + _ = plt.plot(t, x_0, "r") + _ = plt.plot(t, x_1, "b") + _ = plt.xlabel("Time (s)") + _ = plt.ylabel("x (m)") + _ = plt.title("X positional paths") + _ = plt.legend(["Vehicle 1", "Vehicle 2"]) + savepdf(f'{ev}vehicles_x_pos{pdf_ex}') + plt.show() + + _ = plt.plot(t, y_0, "r") + _ = plt.plot(t, y_1, "b") + _ = plt.xlabel("Time (s)") + _ = plt.ylabel("y (m)") + _ = plt.title("Y positional paths") + _ = plt.legend(["Vehicle 1", "Vehicle 2"]) + savepdf(f'{ev}vehicles_y_pos{pdf_ex}') + plt.show() + + _ = plt.plot(t, x_0_dot, "r") + _ = plt.plot(t, x_1_dot, "b") + _ = plt.xlabel("Time (s)") + _ = plt.ylabel("x_dot (m)") + _ = plt.title("X velocity paths") + _ = plt.legend(["Vehicle 1", "Vehicle 2"]) + savepdf(f'{ev}vehicles_vx{pdf_ex}') + plt.show() + + _ = plt.plot(t, y_0_dot, "r") + _ = plt.plot(t, y_1_dot, "b") + _ = plt.xlabel("Time (s)") + _ = plt.ylabel("y_dot (m)") + _ = plt.title("Y velocity paths") + _ = plt.legend(["Vehicle 1", "Vehicle 2"]) + savepdf(f'{ev}vehicles_vy{pdf_ex}') + plt.show() + + _ = plt.plot(J_hist) + _ = plt.xlabel("Iteration") + _ = plt.ylabel("Total cost") + _ = plt.title("Total cost-to-go") + savepdf(f'{ev}cost_to_go{pdf_ex}') + plt.show() + pass + + +if __name__ == "__main__": + plot_rendevouz(use_linesearch=False) diff --git a/irlc/ex07/linearization_agent.py b/irlc/ex07/linearization_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..bffbe892da547974b2bc66a3c0d3b0807676ce8a --- /dev/null +++ b/irlc/ex07/linearization_agent.py @@ -0,0 +1,67 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +""" + +References: + [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025. +""" +from irlc.ex06.dlqr import LQR +from irlc import Agent +from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment +from irlc import train, savepdf +import matplotlib.pyplot as plt +import numpy as np +from irlc.ex04.control_environment import ControlEnvironment +from irlc.ex04.discrete_control_model import DiscreteControlModel + +class LinearizationAgent(Agent): + """ Implement the simple linearization procedure described in (Her25, Algorithm 23) which expands around a single fixed point. """ + def __init__(self, env: ControlEnvironment, model : DiscreteControlModel, xbar=None, ubar=None): + self.model = model + N = 50 # Plan on this horizon. The control matrices will converge fairly quickly. + """ Define A, B, d as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B u[t] + d. + You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B. + It is important that you linearize around xbar, ubar. See (Her25, Section 17.1) for further details. """ + # TODO: 4 lines missing. + raise NotImplementedError("Insert your solution and remove this error.") + Q, q, R = self.model.cost.Q, self.model.cost.q, self.model.cost.R + """ Define self.L, self.l here as the (lists of) control matrices. """ + # TODO: 1 lines missing. + raise NotImplementedError("Compute control matrices L, l here using LQR(...)") + super().__init__(env) + + def pi(self, x, k, info=None): + """ + Compute the action here using u_k = L_0 x_k + l_0. The control matrix/vector L_0 can be found as the output from LQR, i.e. + L_0 = L[0] and l_0 = l[0]. + + The reason we use L_0, l_0 (and not L_k, l_k) is because the LQR problem itself is an approximation of the true dynamics + and this controller will be able to balance the pendulum for an infinite amount of time. + """ + # TODO: 1 lines missing. + raise NotImplementedError("Compute current action here") + return u + + +def get_offbalance_cart(waiting_steps=30, sleep_time=0.1): + env = GymSinCosCartpoleEnvironment(Tmax=3, render_mode='human') + env.reset() + import time + time.sleep(sleep_time) + env.state = env.discrete_model.x_upright + env.state[-1] = 0.01 # a bit of angular speed. + for _ in range(waiting_steps): # Simulate the environment for 30 steps to get things out of balance. + env.step(1) + time.sleep(sleep_time) + return env + + +if __name__ == "__main__": + np.random.seed(42) # I don't think these results are seed-dependent but let's make sure. + from irlc import plot_trajectory + env = get_offbalance_cart(4) # Simulate for 4 seconds to get the cart off-balance. Same idea as PID control. + agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0) + _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False) # Note reset=False to maintain initial conditions. + plot_trajectory(trajectories[0], env, xkeys=[0,2, 3], ukeys=[0]) + env.close() + savepdf("linearization_cartpole") + plt.show() diff --git a/irlc/lectures/lec05/__init__.py b/irlc/lectures/lec05/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec05/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec05/lecture_05_carpole_random.py b/irlc/lectures/lec05/lecture_05_carpole_random.py new file mode 100644 index 0000000000000000000000000000000000000000..e82a89bbe407251ed4d9b02d2881c33ccfefa20b --- /dev/null +++ b/irlc/lectures/lec05/lecture_05_carpole_random.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import Agent, train +from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment + +if __name__ == "__main__": + + env = GymSinCosCartpoleEnvironment(Tmax=20, render_mode='human') + train(env, Agent(env), num_episodes=1) + env.close() diff --git a/irlc/lectures/lec05/lecture_05_cartpole_kelly.py b/irlc/lectures/lec05/lecture_05_cartpole_kelly.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc3bcce247df45f93d74fdbb4cc731c8f7b539d --- /dev/null +++ b/irlc/lectures/lec05/lecture_05_cartpole_kelly.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex05.direct_cartpole_kelly import compute_solutions +from irlc.ex05.direct_plot import plot_solutions +import matplotlib.pyplot as plt + +if __name__ == "__main__": + env, solutions = compute_solutions() + print("Did we succeed?", solutions[-1]['solver']['success']) + plot_solutions(env, solutions, animate=True, pdf=None, animate_all=True, animate_repeats=3) + env.close() diff --git a/irlc/lectures/lec05/lecture_05_cartpole_time.py b/irlc/lectures/lec05/lecture_05_cartpole_time.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd6e873e6e2036dcd6f6e13c6ee24c124694813 --- /dev/null +++ b/irlc/lectures/lec05/lecture_05_cartpole_time.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex05.direct_cartpole_time import compute_solutions +from irlc.ex05.direct_plot import plot_solutions +import matplotlib.pyplot as plt + +if __name__ == "__main__": + env, solutions = compute_solutions() + print("Did we succeed?", solutions[-1]['solver']['success']) + plot_solutions(env, solutions, animate=True, pdf=None, animate_all=True, animate_repeats=3) + env.close() + pass diff --git a/irlc/lectures/lec06/__init__.py b/irlc/lectures/lec06/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec06/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec06/lecture6_lqr_locomotive.py b/irlc/lectures/lec06/lecture6_lqr_locomotive.py new file mode 100644 index 0000000000000000000000000000000000000000..2c9ddf3c2dec5a84476a66a11861d89a9ff08f7b --- /dev/null +++ b/irlc/lectures/lec06/lecture6_lqr_locomotive.py @@ -0,0 +1,37 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import matplotlib.pyplot as plt +import numpy as np +from irlc import savepdf, train +from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent +from irlc.ex06.lqr_agent import LQRAgent +from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment +from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q +from irlc.ex07.linearization_agent import LinearizationAgent +from irlc.ex06.lqr_pid import ConstantLQRAgent +from irlc.ex04.locomotive import LocomotiveEnvironment +from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent +from irlc.ex01.agent import train +from irlc.ex03.control_cost import SymbolicQRCost +import matplotlib +#matplotlib.use('qtagg') +dt = .04 +m = 70 +Tmax=10 +slope = 0 + +env = LocomotiveEnvironment(m=m, slope=slope, dt=dt, Tmax=Tmax, render_mode='human') + +model = env.discrete_model +model.cost = SymbolicQRCost(Q=np.eye(2)*100, R=np.eye(1)).discretize(dt=dt) +agent = LinearizationAgent(env, model=model, xbar=env.observation_space.sample(), ubar=env.action_space.sample()) +_, traj = train(env, agent, num_episodes=1) +env.close() +if False: + from irlc import plot_trajectory, savepdf + import matplotlib.pyplot as plt + plt.figure() + plot_trajectory(trajectory=traj[0], env=env, xkeys=[0, 1], ukeys=[]) + savepdf('lqr_pid_locomotive_state.pdf') + plot_trajectory(trajectory=traj[0], env=env, ukeys=[0], xkeys=[]) + savepdf('lqr_pid_locomotive_action.pdf') + env.close() diff --git a/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py b/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py new file mode 100644 index 0000000000000000000000000000000000000000..dc5633578e01195c85c72b5e6d29080d5029c038 --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_cartpole_ilqr.py @@ -0,0 +1,47 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex07.ilqr_agent import ILQRAgent +from irlc import train +from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment +# from irlc import VideoMonitor + +def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True): + np.random.seed(2) + Tmax = .9 + dt = Tmax/N + + env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human') + agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch) + # if animate: + # env =VideoMonitor(env) + stats, trajectories = train(env, agent, num_episodes=3, return_trajectory=True) + + # agent.use_ubar = True + # stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True) + # env.close() + env.close() + +def plt_cartpole(): + cartpole_experiment(N=50, use_linesearch=True, animate=True) + +if __name__ == '__main__': + np.random.seed(42) + plt_cartpole() + + # xb = agent.xbar + # tb = np.arange(N+1)*dt + # plt.figure(figsize=(8,6)) + # F = 3 + # # plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$') + # # plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$') + # + # plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$") + # plt.xlabel("Time/seconds") + # plt.ylabel("$\cos(\\theta)$") + # plt.title(f"Pendulum environment $T={N}$") + # + # plt.grid() + # plt.legend() + # ev = "pendulum" + # savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}") + # plt.show() diff --git a/irlc/lectures/lec06/lecture_06_linearize.py b/irlc/lectures/lec06/lecture_06_linearize.py new file mode 100644 index 0000000000000000000000000000000000000000..311dd27eb55cfca8fc3ef90fb25941155b7cadbd --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_linearize.py @@ -0,0 +1,6 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex07.linearization_agent import get_offbalance_cart + +if __name__ == "__main__": + env = get_offbalance_cart(waiting_steps=20, sleep_time=0.1) + env.close() diff --git a/irlc/lectures/lec06/lecture_06_linearize_b.py b/irlc/lectures/lec06/lecture_06_linearize_b.py new file mode 100644 index 0000000000000000000000000000000000000000..b582957f75442e2162edaccc09ff4af51cdfeb13 --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_linearize_b.py @@ -0,0 +1,18 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc import plot_trajectory, train +from irlc.ex07.linearization_agent import get_offbalance_cart, LinearizationAgent +import numpy as np +import matplotlib +matplotlib.use("tkagg") +import matplotlib.pyplot as plt + + +if __name__ == "__main__": + np.random.seed(42) # I don't think these results are seed-dependent but let's make sure. + env = get_offbalance_cart(4, sleep_time=0.08) # Simulate for a little time to get an off-balance cart. Increase 4-->10 to get failure. + agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0) + _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False) # Note reset=False to maintain initial conditions. + plt.figure() + plot_trajectory(trajectories[0], env, xkeys=[0, 2, 3], ukeys=[0]) + plt.show() + env.close() diff --git a/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py new file mode 100644 index 0000000000000000000000000000000000000000..e0cb2ca23b92b51c2b23db190b67ca172404f5de --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_L.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment + +if __name__ == "__main__": + np.random.seed(2) # (!) + pen_experiment(N=50, use_linesearch=False, use_ubar=False) diff --git a/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py new file mode 100644 index 0000000000000000000000000000000000000000..d61a8ddc17ba23ba3c440108f5eaa4dba70a8530 --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_pendulum_bilqr_ubar.py @@ -0,0 +1,66 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment +from irlc.ex07.ilqr_agent import ILQRAgent +from irlc import train +from irlc import savepdf +import matplotlib.pyplot as plt + +Tmax = 3 +def pen_experiment(N=12, use_linesearch=True,figex="", animate=True, use_ubar=False): + dt = Tmax / N + env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None) + agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch) + # if animate: + # env = VideoMonitor(env) + + if use_ubar: + agent.use_ubar = True + stats2, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + env.close() + + plot_pendulum_trajectory(trajectories[0], label=f'Use linesearch? {use_linesearch}. Use u-bar? {use_ubar}') + plt.legend() + plt.show() + + plt.figure(figsize=(6, 6)) + plt.semilogy(agent.J_hist, 'k.-') + plt.xlabel("iLQR Iterations") + plt.ylabel("Cost function estimate $J$") + # plt.title("Last value: {") + plt.grid() + # savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}") + plt.show() + # + # plt.show() + # xb = agent.xbar + # tb = np.arange(N+1)*dt + # plt.figure(figsize=(12, 6)) + # plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$') + # plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$") + # plt.grid() + # plt.legend() + # ev = "pendulum" + # savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}") + # plt.show() + + ## Plot J + +# +def plot_pendulum_trajectory(traj, style='k-', label=None, action=False, **kwargs): + y = traj.state[:, 1] if not action else traj.action[:,0] + plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs) + + plt.xlabel("Time/seconds") + if action: + plt.ylabel("Torque $u$") + else: + plt.ylabel("$\cos(\\theta)$") + plt.grid() + pass + +N = 50 + +if __name__ == "__main__": + np.random.seed(2) # (!) + pen_experiment(N=N, use_linesearch=False, use_ubar=True) diff --git a/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py new file mode 100644 index 0000000000000000000000000000000000000000..6e475bf6335497c98536f9850ce9934c53d2d4fd --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_L.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +if __name__ == "__main__": + from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment + N = 50 + pen_experiment(N=N, use_linesearch=True, use_ubar=False) diff --git a/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py new file mode 100644 index 0000000000000000000000000000000000000000..b44a35cc127904eee0bb318cab2b383388cd4110 --- /dev/null +++ b/irlc/lectures/lec06/lecture_06_pendulum_ilqr_ubar.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +if __name__ == "__main__": + from irlc.lectures.lec06.lecture_06_pendulum_bilqr_ubar import pen_experiment + N = 50 + pen_experiment(N=N, use_linesearch=True, use_ubar=True) diff --git a/irlc/lectures/lec07/__init__.py b/irlc/lectures/lec07/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec07/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr.py b/irlc/lectures/lec07/lecture_07_boing_lqr.py new file mode 100644 index 0000000000000000000000000000000000000000..7a140752a73aa016366a0c2cd371f66504c2c08e --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_boing_lqr.py @@ -0,0 +1,19 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.model_boeing import BoeingEnvironment +from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local +from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local + +if __name__ == "__main__": + env = BoeingEnvironment(output=[10, 0]) + + # Part A: LQR and global regression + learning_lqr(env) + + # Part B: LQR+MPC + # learning_lqr_mpc(env) + # + # # Part C: LQR+MPC and local regression + # learning_lqr_mpc_local(env) + # + # # Part D: Optimization+MPC and local regression + # learning_optimization_mpc_local(env) diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py new file mode 100644 index 0000000000000000000000000000000000000000..2c4a72274d3f0fd68c713147bcc4910a9e1b879c --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc.py @@ -0,0 +1,14 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.model_boeing import BoeingEnvironment +from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local +from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local + +if __name__ == "__main__": + env = BoeingEnvironment(output=[10, 0]) + learning_lqr_mpc(env) + + # # Part C: LQR+MPC and local regression + # learning_lqr_mpc_local(env) + # + # # Part D: Optimization+MPC and local regression + # learning_optimization_mpc_local(env) diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py new file mode 100644 index 0000000000000000000000000000000000000000..22376d13457883a8f9b9f89becbffa9e46aedbb9 --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_local.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.model_boeing import BoeingEnvironment +from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local +from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local + +if __name__ == "__main__": + env = BoeingEnvironment(output=[10, 0]) + learning_lqr_mpc_local(env) + # learning_optimization_mpc_local(env) diff --git a/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py new file mode 100644 index 0000000000000000000000000000000000000000..4ed3f3e080238b559fb656e1f5640a1374109bfc --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_boing_lqr_mpc_optim.py @@ -0,0 +1,8 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.model_boeing import BoeingEnvironment +from irlc.ex07.lqr_learning_agents import learning_lqr, learning_lqr_mpc, learning_lqr_mpc_local +from irlc.ex07.learning_agent_mpc_optimize import learning_optimization_mpc_local + +if __name__ == "__main__": + env = BoeingEnvironment(output=[10, 0]) + learning_optimization_mpc_local(env) diff --git a/irlc/lectures/lec07/lecture_07_lmpc.py b/irlc/lectures/lec07/lecture_07_lmpc.py new file mode 100644 index 0000000000000000000000000000000000000000..5fff87cb5ed8a3581de2f2c8c6ec6ae94957aeb4 --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_lmpc.py @@ -0,0 +1,5 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex07.lmpc_run import main + +if __name__ == "__main__": + main(show_episode=True) diff --git a/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py b/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py new file mode 100644 index 0000000000000000000000000000000000000000..8867c0afeac9588af42a796e93ec70def74d2a07 --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_pendulum_mpc_lqr.py @@ -0,0 +1,4 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +if __name__ == "__main__": + from irlc.ex07.mpc_pendulum_experiment_lqr import main_pendulum_lqr + main_pendulum_lqr() diff --git a/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py b/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py new file mode 100644 index 0000000000000000000000000000000000000000..9eff242ac8034a7e3f4ce8aa79e95d15f86d822b --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_pendulum_mpc_optm.py @@ -0,0 +1,4 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +if __name__ == "__main__": + from irlc.ex07.mpc_pendulum_experiment_optim import main_pendulum + main_pendulum() diff --git a/irlc/lectures/lec07/lecture_07_pendulum_simple.py b/irlc/lectures/lec07/lecture_07_pendulum_simple.py new file mode 100644 index 0000000000000000000000000000000000000000..337b16585b8cd3c0a709a49b5eb7b378a08c2757 --- /dev/null +++ b/irlc/lectures/lec07/lecture_07_pendulum_simple.py @@ -0,0 +1,41 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment +from irlc.utils.video_monitor import VideoMonitor +from irlc.ex04.discrete_control_cost import goal_seeking_qr_cost, DiscreteQRCost +from irlc.ex01.agent import train +from irlc.ex07.lqr_learning_agents import MPCLocalLearningLQRAgent, MPCLearningAgent +from irlc import plot_trajectory, main_plot +import matplotlib.pyplot as plt +import numpy as np +from irlc.ex07.mpc_pendulum_experiment_lqr import mk_mpc_pendulum_env + +L = 12 +def main_pendulum_lqr_simple(Tmax=10): + + """ Run Local LQR/MPC agent using the parameters + L = 12 + neighboorhood_size = 50 + min_buffer_size = 50 + """ + env_pendulum = mk_mpc_pendulum_env() + + # agent = .... (instantiate agent here) + # TODO: 1 lines missing. + raise NotImplementedError("Instantiate your agent here") + env_pendulum = VideoMonitor(env_pendulum) + + experiment_name = f"pendulum{L}_lqr" + stats, trajectories = train(env_pendulum, agent, experiment_name=experiment_name, num_episodes=16,return_trajectory=True) + plt.show() + for k in range(len(trajectories)): + plot_trajectory(trajectories[k], env_pendulum) + plt.title(f"Trajectory {k}") + plt.show() + + env_pendulum.close() + main_plot(experiment_name) + plt.show() + +if __name__ == "__main__": + np.random.seed(1) + main_pendulum_lqr_simple() diff --git a/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..dc592cfe3c7ae897391e9ce88d0c1454848920e8 --- /dev/null +++ b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/log.txt @@ -0,0 +1,17 @@ +Episode Accumulated Reward Average Reward Length Steps +0 -4895.647575826604 -39.165180606612836 125 125 +1 -4401.3119492497035 -35.21049559399765 125 250 +2 -2909.118791375824 -23.272950331006577 125 375 +3 -1355.8374521458716 -10.846699617166973 125 500 +4 -1392.1376132555315 -11.13710090604426 125 625 +5 -2377.229711438541 -19.017837691508326 125 750 +6 -1605.205601135333 -12.841644809082668 125 875 +7 -1142.6024308594363 -9.140819446875495 125 1000 +8 -1110.17238007953 -8.881379040636237 125 1125 +9 -1415.0153227915457 -11.320122582332363 125 1250 +10 -1084.806186007314 -8.678449488058519 125 1375 +11 -1204.58673322474 -9.636693865797913 125 1500 +12 -1582.8902992149344 -12.66312239371947 125 1625 +13 -1234.3968104401945 -9.875174483521556 125 1750 +14 -2290.3685781570866 -18.322948625256696 125 1875 +15 -1317.928485283048 -10.543427882264387 125 2000 diff --git a/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl new file mode 100644 index 0000000000000000000000000000000000000000..34930a77fba816ca18036b42921823ab18cefc24 Binary files /dev/null and b/irlc/lectures/lec07/pendulum12/2021-03-19_08-21-20.207/trajectories.pkl differ diff --git a/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3005052ea2c0aa4f28a74ca66b23b716fa4a925d --- /dev/null +++ b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/log.txt @@ -0,0 +1,17 @@ +Episode Accumulated Reward Average Reward Length Steps +0 -5062.646915554269 -40.50117532443415 125 125 +1 -4545.443228168109 -36.36354582534487 125 250 +2 -3992.451522701582 -31.939612181612656 125 375 +3 -2660.945115772302 -21.287560926178415 125 500 +4 -1089.641113544413 -8.71712890835529 125 625 +5 -1794.3862709577143 -14.35509016766171 125 750 +6 -1599.3332228782826 -12.79466578302626 125 875 +7 -1999.3347944176303 -15.994678355341035 125 1000 +8 -1240.1770407677993 -9.921416326142396 125 1125 +9 -1128.717151496786 -9.029737211974288 125 1250 +10 -1148.8528175884883 -9.19082254070789 125 1375 +11 -1199.5840778420286 -9.596672622736232 125 1500 +12 -1147.0703774473068 -9.176563019578447 125 1625 +13 -1245.4139074019245 -9.963311259215399 125 1750 +14 -1257.9333517907346 -10.063466814325885 125 1875 +15 -1309.9607605947551 -10.479686084758042 125 2000 diff --git a/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c2b34f7154ed75b8dfa9e69bcd3de982d0cd0eb1 Binary files /dev/null and b/irlc/lectures/lec07/pendulum12/2022-03-17_14-16-10.758/trajectories.pkl differ diff --git a/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..643cee39646b4d0a0ee0189cf724ae538a4cd508 --- /dev/null +++ b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/log.txt @@ -0,0 +1,17 @@ +Episode Accumulated Reward Length Steps +0 -5042.020051692956 125 0 +1 -4627.801003133058 125 1 +2 -3635.9503089227105 125 2 +3 -2459.2456085370436 125 3 +4 -1142.0454750510762 125 4 +5 -1563.5408392433658 125 5 +6 -1718.6689962603696 125 6 +7 -1215.2631997008277 125 7 +8 -1172.9345344478274 125 8 +9 -1108.3729746371948 125 9 +10 -1012.6787060036193 125 10 +11 -1715.9593847985013 125 11 +12 -1009.5943996400636 125 12 +13 -1082.3121757069966 125 13 +14 -1248.0530347172762 125 14 +15 -1496.6826680867007 125 15 diff --git a/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3e9f2d19af2a1f4bdcdbc7a8922852a440795c46 Binary files /dev/null and b/irlc/lectures/lec07/pendulum12_lqr/2023-03-17_08-13-45.172/trajectories.pkl differ diff --git a/irlc/tests/tests_week05.py b/irlc/tests/tests_week05.py new file mode 100644 index 0000000000000000000000000000000000000000..4a7f813840b6670d6caa99c16576d2b90ff7572c --- /dev/null +++ b/irlc/tests/tests_week05.py @@ -0,0 +1,114 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +from irlc.ex05.direct_agent import train_direct_agent +from unitgrade import UTestCase +import irlc +from irlc.ex05.direct import run_direct_small_problem + + +class DirectMethods(UTestCase): + title = "Direct methods z, z0, z_lb/z_ub definitions+" + + @classmethod + def setUpClass(cls) -> None: + env, solution = run_direct_small_problem() + cls.solution = solution[-1] + + def test_z_variable_vector(self): + self.assertEqualC(str(DirectMethods.solution['inputs']['z'])) + + def test_z0_initial_state(self): + self.assertL2(DirectMethods.solution['inputs']['z0'], tol=1e-6) + + def test_zU_upper_bound(self): + self.assertL2(DirectMethods.solution['inputs']['z_ub'], tol=1e-6) + + def test_zL_lower_bound(self): + self.assertL2(DirectMethods.solution['inputs']['z_lb'], tol=1e-6) + + +class DirectAgentPendulum(UTestCase): + """ Direct agent: Test of pendulum environment """ + def test_pendulum(self): + stats,_,_ = train_direct_agent(animate=False) + return self.assertL2(stats[0]['Accumulated Reward'], tol=0.03) + +class DirectSolverQuestion(UTestCase): + """ Test the Direct solver on the Pendulum using run_direct_small_problem() """ + @classmethod + def setUpClass(cls): + cls.solution = cls.compute_solution() + + @classmethod + def compute_solution(cls): + from irlc.ex05.direct import run_direct_small_problem + env, solution = run_direct_small_problem() + return solution + # cls.solution = solution + + def test_solver_success(self): + self.assertTrue(self.__class__.solution[-1]['solver']['success']) + + def test_solver_fun(self): + self.assertL2(self.__class__.solution[-1]['solver']['fun'], tol=0.01) + + def test_constraint_violation(self): + self.assertL2(self.__class__.solution[-1]['eqC_val'], tol=0.01) + + +class PendulumQuestion(DirectSolverQuestion): + """ Direct solver on the pendulum problem """ + @classmethod + def compute_solution(cls): + from irlc.ex05.direct_pendulum import compute_pendulum_solutions + return compute_pendulum_solutions()[1] + + +class CartpoleTimeQuestion(DirectSolverQuestion): + """ Direct solver on the cartpole (minimum time) task """ + @classmethod + def compute_solution(cls): + from irlc.ex05.direct_cartpole_time import compute_solutions + return compute_solutions()[1] + + +class CartpoleCostQuestion(DirectSolverQuestion): + """ Direct solver on the cartpole (kelly) task """ + @classmethod + def compute_solution(cls): + from irlc.ex05.direct_cartpole_kelly import compute_solutions + return compute_solutions()[1] + +class BrachistochroneQuestion(DirectSolverQuestion): + """ Brachistochrone (unconstrained) """ + + @classmethod + def compute_solution(cls): + from irlc.ex05.direct_brachistochrone import compute_constrained_solutions + return compute_constrained_solutions()[1] + +class BrachistochroneConstrainedQuestion(DirectSolverQuestion): + """ Brachistochrone (constrained) """ + @classmethod + def compute_solution(cls): + from irlc.ex05.direct_brachistochrone import compute_constrained_solutions + return compute_constrained_solutions()[1] + +class Week05Tests(Report): + title = "Tests for week 05" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (DirectMethods, 10), # ok + (DirectSolverQuestion, 10), # ok + (PendulumQuestion, 5), # ok + (DirectAgentPendulum, 10), # ok + (CartpoleTimeQuestion, 5), # ok + (CartpoleCostQuestion, 5), # ok + (BrachistochroneQuestion, 5), # ok + (BrachistochroneConstrainedQuestion, 10), # ok + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week05Tests()) diff --git a/irlc/tests/tests_week06.py b/irlc/tests/tests_week06.py new file mode 100644 index 0000000000000000000000000000000000000000..a72463838f7fba08f8db8d4bf789532d313e7e2d --- /dev/null +++ b/irlc/tests/tests_week06.py @@ -0,0 +1,147 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex06.model_boeing import BoeingEnvironment +from unitgrade import UTestCase, Report +import irlc +from irlc import train +import numpy as np +from irlc.ex04.locomotive import LocomotiveEnvironment +from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment + +matrices = ['L', 'l', 'V', 'v', 'vc'] + +class Problem3LQR(UTestCase): + title = "LQR, full check of implementation" + + @classmethod + def setUpClass(cls): + # def init(self): + from irlc.ex06.dlqr_check import check_LQR + (cls.L, cls.l), (cls.V, cls.v, cls.vc) = check_LQR() + # self.M = list(zip(matrices, [L, l, V, v, vc])) + + def chk_item(self, m_list): + self.assertIsInstance(m_list, list) + self.assertEqualC(len(m_list)) + for m in m_list: + self.assertIsInstance(m, np.ndarray) + self.assertEqualC(m.shape) + self.assertL2(m, tol=1e-6) + + def test_L(self): + self.chk_item(self.__class__.L) + + def test_l(self): + self.chk_item(self.__class__.l) + + def test_V(self): + self.chk_item(self.__class__.V) + + def test_v(self): + self.chk_item(self.__class__.v) + + def test_vc(self): + vc = self.__class__.vc + self.assertIsInstance(vc, list) + for d in vc: + self.assertL2(d, tol=1e-6) + + self.chk_item(self.__class__.l) + +class Problem4LQRAgent(UTestCase): + def _mkagent(self, val=0.): + A = np.ones((2, 2))* (1+val) + A[1, 0] = 0 + B = np.asarray([[0], [1]]) + Q = np.eye(2) * (3+val) + R = np.ones((1, 1)) * 2 + q = np.asarray([-1.1 + val, 0]) + from irlc.ex06.lqr_agent import LQRAgent + env = LocomotiveEnvironment(render_mode=None, Tmax=5, slope=1) + agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q) + return agent + + def test_policy_lqr_a(self): + agent = self._mkagent(0) + self.assertL2(agent.pi(np.asarray([1, 0]), k=0)) + self.assertL2(agent.pi(np.asarray([1, 0]), k=5)) + + def test_policy_lqr_b(self): + agent = self._mkagent(0.2) + self.assertL2(agent.pi(np.asarray([1, 0]), k=0)) + self.assertL2(agent.pi(np.asarray([1, 0]), k=5)) + +class Problem5_6_Boeing(UTestCase): + + def test_compute_A_B_d(self): + from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q + model = BoeingEnvironment(Tmax=10).discrete_model.continuous_model + A, B, d = compute_A_B_d(model, dt=0.2) + self.assertL2(A) + self.assertL2(B) + self.assertL2(d) + + def test_compute_Q_R_q(self): + from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q + model = BoeingEnvironment(Tmax=10).discrete_model.continuous_model + Q, R, q = compute_Q_R_q(model, dt=0.2) + self.assertL2(Q) + self.assertL2(R) + self.assertL2(q) + + def test_boing_path(self): + from irlc.ex06.boeing_lqr import boeing_simulation + stats, trajectories, env = boeing_simulation() + self.assertL2(trajectories[-1].state, tol=1e-6) + + +class Problem7_8_PidLQR(UTestCase): + def test_constant_lqr_agent(self): + Delta = 0.06 # Time discretization constant + # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization. + env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, + render_mode=None) # set render_mode='human' to see the oscillator. + model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment. + + from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q + from irlc.ex06.lqr_pid import ConstantLQRAgent + A, B, d = compute_A_B_d(model, Delta) + Q, R, q = compute_Q_R_q(model, Delta) + x0, _ = env.reset() + + # Run the LQR agent + lqr_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q) + self.assertLinf(lqr_agent.pi(x0, k=0), tol=1e-3) + self.assertLinf(lqr_agent.pi(x0, k=10), tol=1e-3) + + + def test_KpKd(self): + Delta = 0.06 # Time discretization constant + # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization. + env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, + render_mode=None) # set render_mode='human' to see the oscillator. + model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment. + from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q + from irlc.ex06.lqr_pid import ConstantLQRAgent, get_Kp_Kd + A, B, d = compute_A_B_d(model, Delta) + Q, R, q = compute_Q_R_q(model, Delta) + lqr_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q) + Kp, Kd = get_Kp_Kd(lqr_agent.L[0]) + self.assertAlmostEqualC(Kp, places=3) + self.assertAlmostEqualC(Kd, places=3) + + + + +class Week06Tests(Report): + title = "Tests for week 06" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem3LQR, 10), + (Problem4LQRAgent, 10), + (Problem5_6_Boeing, 10), + (Problem7_8_PidLQR, 10), + ] +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week06Tests()) diff --git a/irlc/tests/tests_week07.py b/irlc/tests/tests_week07.py new file mode 100644 index 0000000000000000000000000000000000000000..1f46427025ca7e635d340fafa678f4a7e2c309a7 --- /dev/null +++ b/irlc/tests/tests_week07.py @@ -0,0 +1,62 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +import irlc +from unitgrade import UTestCase +import numpy as np +from irlc import Agent, train + +class RendevouzItem(UTestCase): + def test_rendevouz_without_linesearch(self): + """ Rendevouz with iLQR (no linesearch) """ + from irlc.ex07.ilqr_rendovouz_basic import solve_rendovouz + (xs, us, J_hist, l, L), env = solve_rendovouz(use_linesearch=False) + # print(J_hist[-1]) + self.assertL2(xs[-1], tol=1e-2) + + def test_rendevouz_with_linesearch(self): + """ Rendevouz with iLQR (with linesearch) """ + from irlc.ex07.ilqr_rendovouz_basic import solve_rendovouz + (xs, us, J_hist, l, L), env = solve_rendovouz(use_linesearch=True) + # print(J_hist[-1]) + self.assertL2(xs[-1], tol=1e-2) + # return l, L, xs + + + + + +class ILQRAgentQuestion(UTestCase): + """ iLQR Agent on Rendevouz """ + def test_ilqr_agent(self): + from irlc.ex07.ilqr_agent import solve_rendevouz + stats, trajectories, agent = solve_rendevouz() + self.assertL2(trajectories[-1].state[-1], tol=1e-2) + + +class ILQRPendulumQuestion(UTestCase): + """ iLQR Agent on Pendulum """ + + def test_ilqr_agent_pendulum(self): + from irlc.ex07.ilqr_pendulum_agent import Tmax, N + from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment + from irlc.ex07.ilqr_agent import ILQRAgent + dt = Tmax / N + env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True) + agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=True) + stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True) + state = trajectories[-1].state[-1] + self.assertL2(state, tol=2e-2) + +class Week07Tests(Report): #240 total. + title = "Tests for week 07" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (RendevouzItem, 10), # ok + (ILQRAgentQuestion, 10), # ok + (ILQRPendulumQuestion, 10), # ok + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week07Tests()) diff --git a/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl b/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl and b/irlc/tests/unitgrade_data/BrachistochroneConstrainedQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl b/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl and b/irlc/tests/unitgrade_data/BrachistochroneQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl index 49ce42c0ea11e0f6f45b5cd1aadccde7c4fb4274..e9d2ed475214b22bc52f1cc0dfc8c04c71d9a2b9 100644 Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl index 7ccccf4713076674e8553270c2282b44501b6cc8..f81ab2560bf4752a237712f1df94fc8ae01ac0ce 100644 Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index 12902e7ce8615fda0d3a74fbd97811e28766dded..a511917ab3f43109f1d3fe37b025f2fde713339f 100644 Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index d40a08b78e3defd5e720b05e0948e20d9ad0f3b8..f927b5ae7578002c5a8840c91705f5d4c7d806f0 100644 Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index 50149f3db947fddf5f0faf61a7f098e1fc8a3167..6555641d17ccd50bdc906dab13481cdad59254cc 100644 Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4af21ecb688c99771e1897bc53ecbae1bc667b8f Binary files /dev/null and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1019b5db1c0a0ad9b3e14545aaf80055652fcb66 Binary files /dev/null and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl index 76f27814d4eaaa7c2990d3dab02904f443993ef0..1d1b594d8ccdde8336eb3cd174e105beaf5eaf6f 100644 Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index 62c9f65ce0617aa3054d6affdad0c711fc5685c7..cb3e05e507f46a771194aa47e9d478a38ea6dc4a 100644 Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index 9b686f94b8f574600bc3a37c18a45c4d0a299430..5884cbbaec323589db6f77d93e93aab42b835c82 100644 Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index cf285336e8f17dae0bab7e7270a857852e1ba420..ef1b528ea3cb8460738583d5cf526ff19f05dc78 100644 Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 7023764ccbc227ca96f484b63afdabaa14f9483e..25f36de2f85c245c47e3f76ccfd13411d7dbb190 100644 Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index ad9b4003fc5167a0e32093f6add6c9564bb3abd6..c6a6d86c444884d85398ab5d68b4a8f2d731b17c 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index 7316c88b930645edd82ccc62f437a80b134c2230..d7b664c9873a3f229a3ada6b5c0794429402f080 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index efa02d1d079347d5441874bc597c4e187d31bc71..ed6643cb8af26e6368d19072746fd41ae4c60ab0 100644 Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl new file mode 100644 index 0000000000000000000000000000000000000000..604d5c20678f95431aae0b2c24d839ff98a641a0 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 8c6beb7a36898495ec6e53b18524f5710a900457..535051b828a3939a46f439310b29198a5e080a0a 100644 Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 929b45dc8d15fa1f4ae023ee1b060091fb8247af..04713f394a37f31128be4aead3269f8c3c2e4695 100644 Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index b98533150232f5c5315c7b14033536777ce90a8f..54dc9c584545f20c53ce6eab91442e358d24233e 100644 Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index 6228dc12a502006cdf951286048d93936bea9dfe..2e0efe223d9500337a95c76732a6fcfc3cbb2872 100644 Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b7f0a6a6d9def299660158f65d0b85af24d92699 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index b3daba6609d47b6033fa62fa6963bb89fcae2543..70a85f4bcf6fc78c263690ba8b0491e7799bf59c 100644 Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 04ac143368d1c4eb5280b4166b9da0f4e3eb3701..3968cbf7284d8d716e19ed9972ad53d4d3172bf7 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4650e0a6d3fa65b094ffb89356fae69744d7793a Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index ebeda311cb01e4d60077598cb655e5368f5bcdda..e7da887928def8fb94b9e7e3d034e373928ce32e 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index bfe010445cb8af018fe5e956ef043b34a34dba89..e22d2e24d83e87606e137a33987ca83d3c67a210 100644 Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d455290c459b3de4a4a31f3905f4f131de9a2b78 Binary files /dev/null and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl new file mode 100644 index 0000000000000000000000000000000000000000..91c3ae562ba9fce0668664f1fa58979cd070b2a7 Binary files /dev/null and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ