diff --git a/solutions/ex00/fruit_homework_TODO_1.py b/solutions/ex00/fruit_homework_TODO_1.py new file mode 100644 index 0000000000000000000000000000000000000000..b498ceb06d3fa5ab7a183da45428d83a52e1a5cb --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_1.py @@ -0,0 +1 @@ + return a+b \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_2.py b/solutions/ex00/fruit_homework_TODO_2.py new file mode 100644 index 0000000000000000000000000000000000000000..f546843734ee5aa45c7345b9bd8d3bfdca5600ff --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_2.py @@ -0,0 +1 @@ + return ["mr " + a for a in animals] \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_3.py b/solutions/ex00/fruit_homework_TODO_3.py new file mode 100644 index 0000000000000000000000000000000000000000..5be72c6f38c133f5b1a2f5a09acff6d2eefda3ee --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_3.py @@ -0,0 +1 @@ + return sum([x * p for x, p in p_dict.items()]) \ No newline at end of file diff --git a/solutions/ex00/fruit_homework_TODO_4.py b/solutions/ex00/fruit_homework_TODO_4.py new file mode 100644 index 0000000000000000000000000000000000000000..84c3b39c208f1eb0dbda3f5f8001c82e9af2cb4b --- /dev/null +++ b/solutions/ex00/fruit_homework_TODO_4.py @@ -0,0 +1 @@ + return self.prices[fruit] \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_1.py b/solutions/ex01/bobs_friend_TODO_1.py new file mode 100644 index 0000000000000000000000000000000000000000..2d03d7c5a8beff870e8ced0000a122c5b9952d75 --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_1.py @@ -0,0 +1,3 @@ + + self.s = self.x0 + \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_2.py b/solutions/ex01/bobs_friend_TODO_2.py new file mode 100644 index 0000000000000000000000000000000000000000..9caf28ad6988a09d81ea82bcebf257c4fd4caf0c --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_2.py @@ -0,0 +1,9 @@ + terminated = True + if a == 0: + s_next = self.s * 1.1 + else: + if np.random.rand() < 1/4: + s_next = 0 + else: + s_next = self.s + 12 + reward = s_next - self.s \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_3.py b/solutions/ex01/bobs_friend_TODO_3.py new file mode 100644 index 0000000000000000000000000000000000000000..8399f7fba970e6acf6b370dadd567754b5f8bc7e --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_3.py @@ -0,0 +1 @@ + return 0 \ No newline at end of file diff --git a/solutions/ex01/bobs_friend_TODO_4.py b/solutions/ex01/bobs_friend_TODO_4.py new file mode 100644 index 0000000000000000000000000000000000000000..36a268f2fa289cbbb6a96ae75376e8e1cc5ea729 --- /dev/null +++ b/solutions/ex01/bobs_friend_TODO_4.py @@ -0,0 +1 @@ + return 1 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_1.py b/solutions/ex01/chess_TODO_1.py new file mode 100644 index 0000000000000000000000000000000000000000..f8752f92f9185d2b2e05cd157a1b5372b6b9560b --- /dev/null +++ b/solutions/ex01/chess_TODO_1.py @@ -0,0 +1 @@ + self.s = [] \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_2.py b/solutions/ex01/chess_TODO_2.py new file mode 100644 index 0000000000000000000000000000000000000000..9b829905c94cfd57d43edfcfeecdfe8bd36039b6 --- /dev/null +++ b/solutions/ex01/chess_TODO_2.py @@ -0,0 +1,7 @@ + if np.random.rand() < self.p_draw: + game_outcome = 0 + else: + if np.random.rand() < self.p_win: + game_outcome = 1 + else: + game_outcome = -1 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_3.py b/solutions/ex01/chess_TODO_3.py new file mode 100644 index 0000000000000000000000000000000000000000..29e14434d6508e15f098119b60f2e0e6e15390d9 --- /dev/null +++ b/solutions/ex01/chess_TODO_3.py @@ -0,0 +1 @@ + done = len(self.s) >= 2 and self.s[-1] == self.s[-2] and self.s[-1] != 0 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_4.py b/solutions/ex01/chess_TODO_4.py new file mode 100644 index 0000000000000000000000000000000000000000..d45e38a4592d6c99a05d741916eb6655c2babebf --- /dev/null +++ b/solutions/ex01/chess_TODO_4.py @@ -0,0 +1 @@ + r = self.s[-1] == 1 if done else 0 \ No newline at end of file diff --git a/solutions/ex01/chess_TODO_5.py b/solutions/ex01/chess_TODO_5.py new file mode 100644 index 0000000000000000000000000000000000000000..c270359f1626954b313683214d93beb8a786521b --- /dev/null +++ b/solutions/ex01/chess_TODO_5.py @@ -0,0 +1 @@ + stats, _ = train(env, Agent(env), num_episodes=T) \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_1.py b/solutions/ex01/inventory_environment_TODO_1.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5a775b790f2dfb76f573b6c1b4ee7b4a8442fb --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_1.py @@ -0,0 +1,5 @@ + s_next = max(0, min(2, self.s-w+a)) # next state; x_{k+1} = f_k(x_k, u_k, w_k) + reward = -(a + (self.s + a - w)**2) # reward = -cost = -g_k(x_k, u_k, w_k) + terminated = self.k == self.N-1 # Have we terminated? (i.e. is k==N-1) + self.s = s_next # update environment state + self.k += 1 # update current time step \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_2.py b/solutions/ex01/inventory_environment_TODO_2.py new file mode 100644 index 0000000000000000000000000000000000000000..bebe04bc25189f1dad2fbd10c30070cd823b5b9e --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_2.py @@ -0,0 +1 @@ + return np.random.choice(3) # Return a random action \ No newline at end of file diff --git a/solutions/ex01/inventory_environment_TODO_3.py b/solutions/ex01/inventory_environment_TODO_3.py new file mode 100644 index 0000000000000000000000000000000000000000..0855951dfbae56cfc204cab87b19954e2e4bf074 --- /dev/null +++ b/solutions/ex01/inventory_environment_TODO_3.py @@ -0,0 +1,7 @@ + a = agent.pi(s, k) + sp, r, terminated, truncated, metadata = env.step(a) + agent.train(s, a, sp, r, terminated) + s = sp + J += r + if terminated or truncated: + break \ No newline at end of file diff --git a/solutions/ex01/pacman_hardcoded_TODO_1.py b/solutions/ex01/pacman_hardcoded_TODO_1.py new file mode 100644 index 0000000000000000000000000000000000000000..5c532d7fae997f5c6a4d7e8383dbbb14af8a12e8 --- /dev/null +++ b/solutions/ex01/pacman_hardcoded_TODO_1.py @@ -0,0 +1,7 @@ + if k < 7: + return 'South' + elif k < 14: + return 'East' + elif k < 21: + return 'North' + elif k < 28: \ No newline at end of file