From 62809ce7538f0c0ec69fb0a3be623653709f08ea Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Sun, 9 Feb 2025 21:15:19 +0100 Subject: [PATCH] Solutions to exercises --- solutions/ex00/fruit_homework_TODO_1.py | 1 + solutions/ex00/fruit_homework_TODO_2.py | 1 + solutions/ex00/fruit_homework_TODO_3.py | 1 + solutions/ex00/fruit_homework_TODO_4.py | 1 + solutions/ex01/bobs_friend_TODO_1.py | 3 +++ solutions/ex01/bobs_friend_TODO_2.py | 9 +++++++++ solutions/ex01/bobs_friend_TODO_3.py | 1 + solutions/ex01/bobs_friend_TODO_4.py | 1 + solutions/ex01/chess_TODO_1.py | 1 + solutions/ex01/chess_TODO_2.py | 7 +++++++ solutions/ex01/chess_TODO_3.py | 1 + solutions/ex01/chess_TODO_4.py | 1 + solutions/ex01/chess_TODO_5.py | 1 + solutions/ex01/inventory_environment_TODO_1.py | 5 +++++ solutions/ex01/inventory_environment_TODO_2.py | 1 + solutions/ex01/inventory_environment_TODO_3.py | 7 +++++++ solutions/ex01/pacman_hardcoded_TODO_1.py | 7 +++++++ 17 files changed, 49 insertions(+) create mode 100644 solutions/ex00/fruit_homework_TODO_1.py create mode 100644 solutions/ex00/fruit_homework_TODO_2.py create mode 100644 solutions/ex00/fruit_homework_TODO_3.py create mode 100644 solutions/ex00/fruit_homework_TODO_4.py create mode 100644 solutions/ex01/bobs_friend_TODO_1.py create mode 100644 solutions/ex01/bobs_friend_TODO_2.py create mode 100644 solutions/ex01/bobs_friend_TODO_3.py create mode 100644 solutions/ex01/bobs_friend_TODO_4.py create mode 100644 solutions/ex01/chess_TODO_1.py create mode 100644 solutions/ex01/chess_TODO_2.py create mode 100644 solutions/ex01/chess_TODO_3.py create mode 100644 solutions/ex01/chess_TODO_4.py create mode 100644 solutions/ex01/chess_TODO_5.py create mode 100644 solutions/ex01/inventory_environment_TODO_1.py create mode 100644 solutions/ex01/inventory_environment_TODO_2.py create mode 100644 solutions/ex01/inventory_environment_TODO_3.py create mode 100644 solutions/ex01/pacman_hardcoded_TODO_1.py diff --git a/solutions/ex00/fruit_homework_TODO_1.py b/solutions/ex00/fruit_homework_TODO_1.py new file mode 100644 index 0000000..b498ceb --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_1.py @@ -0,0 +1 @@ + return a+b \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_2.py b/solutions/ex00/fruit_homework_TODO_2.py new file mode 100644 index 0000000..f546843 --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_2.py @@ -0,0 +1 @@ + return ["mr " + a for a in animals] \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_3.py b/solutions/ex00/fruit_homework_TODO_3.py new file mode 100644 index 0000000..5be72c6 --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_3.py @@ -0,0 +1 @@ + return sum([x * p for x, p in p_dict.items()]) \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_4.py b/solutions/ex00/fruit_homework_TODO_4.py new file mode 100644 index 0000000..84c3b39 --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_4.py @@ -0,0 +1 @@ + return self.prices[fruit] \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_1.py b/solutions/ex01/bobs_friend_TODO_1.py new file mode 100644 index 0000000..2d03d7c --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_1.py @@ -0,0 +1,3 @@ + + self.s = self.x0 + \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_2.py b/solutions/ex01/bobs_friend_TODO_2.py new file mode 100644 index 0000000..9caf28a --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_2.py @@ -0,0 +1,9 @@ + terminated = True + if a == 0: + s_next = self.s * 1.1 + else: + if np.random.rand() < 1/4: + s_next = 0 + else: + s_next = self.s + 12 + reward = s_next - self.s \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_3.py b/solutions/ex01/bobs_friend_TODO_3.py new file mode 100644 index 0000000..8399f7f --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_3.py @@ -0,0 +1 @@ + return 0 \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_4.py b/solutions/ex01/bobs_friend_TODO_4.py new file mode 100644 index 0000000..36a268f --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_4.py @@ -0,0 +1 @@ + return 1 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_1.py b/solutions/ex01/chess_TODO_1.py new file mode 100644 index 0000000..f8752f9 --- /dev/null +++ b/solutions/ex01/chess_TODO_1.py @@ -0,0 +1 @@ + self.s = [] \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_2.py b/solutions/ex01/chess_TODO_2.py new file mode 100644 index 0000000..9b82990 --- /dev/null +++ b/solutions/ex01/chess_TODO_2.py @@ -0,0 +1,7 @@ + if np.random.rand() < self.p_draw: + game_outcome = 0 + else: + if np.random.rand() < self.p_win: + game_outcome = 1 + else: + game_outcome = -1 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_3.py b/solutions/ex01/chess_TODO_3.py new file mode 100644 index 0000000..29e1443 --- /dev/null +++ b/solutions/ex01/chess_TODO_3.py @@ -0,0 +1 @@ + done = len(self.s) >= 2 and self.s[-1] == self.s[-2] and self.s[-1] != 0 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_4.py b/solutions/ex01/chess_TODO_4.py new file mode 100644 index 0000000..d45e38a --- /dev/null +++ b/solutions/ex01/chess_TODO_4.py @@ -0,0 +1 @@ + r = self.s[-1] == 1 if done else 0 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_5.py b/solutions/ex01/chess_TODO_5.py new file mode 100644 index 0000000..c270359 --- /dev/null +++ b/solutions/ex01/chess_TODO_5.py @@ -0,0 +1 @@ + stats, _ = train(env, Agent(env), num_episodes=T) \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_1.py b/solutions/ex01/inventory_environment_TODO_1.py new file mode 100644 index 0000000..5f5a775 --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_1.py @@ -0,0 +1,5 @@ + s_next = max(0, min(2, self.s-w+a)) # next state; x_{k+1} = f_k(x_k, u_k, w_k) + reward = -(a + (self.s + a - w)**2) # reward = -cost = -g_k(x_k, u_k, w_k) + terminated = self.k == self.N-1 # Have we terminated? (i.e. is k==N-1) + self.s = s_next # update environment state + self.k += 1 # update current time step \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_2.py b/solutions/ex01/inventory_environment_TODO_2.py new file mode 100644 index 0000000..bebe04b --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_2.py @@ -0,0 +1 @@ + return np.random.choice(3) # Return a random action \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_3.py b/solutions/ex01/inventory_environment_TODO_3.py new file mode 100644 index 0000000..0855951 --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_3.py @@ -0,0 +1,7 @@ + a = agent.pi(s, k) + sp, r, terminated, truncated, metadata = env.step(a) + agent.train(s, a, sp, r, terminated) + s = sp + J += r + if terminated or truncated: + break \ No newline at end of file diff --git a/solutions/ex01/pacman_hardcoded_TODO_1.py b/solutions/ex01/pacman_hardcoded_TODO_1.py new file mode 100644 index 0000000..5c532d7 --- /dev/null +++ b/solutions/ex01/pacman_hardcoded_TODO_1.py @@ -0,0 +1,7 @@ + if k < 7: + return 'South' + elif k < 14: + return 'East' + elif k < 21: + return 'North' + elif k < 28: \ No newline at end of file -- GitLab