From 62809ce7538f0c0ec69fb0a3be623653709f08ea Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Sun, 9 Feb 2025 21:15:19 +0100
Subject: [PATCH] Solutions to exercises

---
 solutions/ex00/fruit_homework_TODO_1.py        | 1 +
 solutions/ex00/fruit_homework_TODO_2.py        | 1 +
 solutions/ex00/fruit_homework_TODO_3.py        | 1 +
 solutions/ex00/fruit_homework_TODO_4.py        | 1 +
 solutions/ex01/bobs_friend_TODO_1.py           | 3 +++
 solutions/ex01/bobs_friend_TODO_2.py           | 9 +++++++++
 solutions/ex01/bobs_friend_TODO_3.py           | 1 +
 solutions/ex01/bobs_friend_TODO_4.py           | 1 +
 solutions/ex01/chess_TODO_1.py                 | 1 +
 solutions/ex01/chess_TODO_2.py                 | 7 +++++++
 solutions/ex01/chess_TODO_3.py                 | 1 +
 solutions/ex01/chess_TODO_4.py                 | 1 +
 solutions/ex01/chess_TODO_5.py                 | 1 +
 solutions/ex01/inventory_environment_TODO_1.py | 5 +++++
 solutions/ex01/inventory_environment_TODO_2.py | 1 +
 solutions/ex01/inventory_environment_TODO_3.py | 7 +++++++
 solutions/ex01/pacman_hardcoded_TODO_1.py      | 7 +++++++
 17 files changed, 49 insertions(+)
 create mode 100644 solutions/ex00/fruit_homework_TODO_1.py
 create mode 100644 solutions/ex00/fruit_homework_TODO_2.py
 create mode 100644 solutions/ex00/fruit_homework_TODO_3.py
 create mode 100644 solutions/ex00/fruit_homework_TODO_4.py
 create mode 100644 solutions/ex01/bobs_friend_TODO_1.py
 create mode 100644 solutions/ex01/bobs_friend_TODO_2.py
 create mode 100644 solutions/ex01/bobs_friend_TODO_3.py
 create mode 100644 solutions/ex01/bobs_friend_TODO_4.py
 create mode 100644 solutions/ex01/chess_TODO_1.py
 create mode 100644 solutions/ex01/chess_TODO_2.py
 create mode 100644 solutions/ex01/chess_TODO_3.py
 create mode 100644 solutions/ex01/chess_TODO_4.py
 create mode 100644 solutions/ex01/chess_TODO_5.py
 create mode 100644 solutions/ex01/inventory_environment_TODO_1.py
 create mode 100644 solutions/ex01/inventory_environment_TODO_2.py
 create mode 100644 solutions/ex01/inventory_environment_TODO_3.py
 create mode 100644 solutions/ex01/pacman_hardcoded_TODO_1.py

diff --git a/solutions/ex00/fruit_homework_TODO_1.py b/solutions/ex00/fruit_homework_TODO_1.py
new file mode 100644
index 0000000..b498ceb
--- /dev/null
+++ b/solutions/ex00/fruit_homework_TODO_1.py
@@ -0,0 +1 @@
+    return a+b
\ No newline at end of file
diff --git a/solutions/ex00/fruit_homework_TODO_2.py b/solutions/ex00/fruit_homework_TODO_2.py
new file mode 100644
index 0000000..f546843
--- /dev/null
+++ b/solutions/ex00/fruit_homework_TODO_2.py
@@ -0,0 +1 @@
+    return ["mr " + a for a in animals]
\ No newline at end of file
diff --git a/solutions/ex00/fruit_homework_TODO_3.py b/solutions/ex00/fruit_homework_TODO_3.py
new file mode 100644
index 0000000..5be72c6
--- /dev/null
+++ b/solutions/ex00/fruit_homework_TODO_3.py
@@ -0,0 +1 @@
+    return sum([x * p for x, p in p_dict.items()])
\ No newline at end of file
diff --git a/solutions/ex00/fruit_homework_TODO_4.py b/solutions/ex00/fruit_homework_TODO_4.py
new file mode 100644
index 0000000..84c3b39
--- /dev/null
+++ b/solutions/ex00/fruit_homework_TODO_4.py
@@ -0,0 +1 @@
+        return self.prices[fruit]
\ No newline at end of file
diff --git a/solutions/ex01/bobs_friend_TODO_1.py b/solutions/ex01/bobs_friend_TODO_1.py
new file mode 100644
index 0000000..2d03d7c
--- /dev/null
+++ b/solutions/ex01/bobs_friend_TODO_1.py
@@ -0,0 +1,3 @@
+        
+        self.s = self.x0
+        
\ No newline at end of file
diff --git a/solutions/ex01/bobs_friend_TODO_2.py b/solutions/ex01/bobs_friend_TODO_2.py
new file mode 100644
index 0000000..9caf28a
--- /dev/null
+++ b/solutions/ex01/bobs_friend_TODO_2.py
@@ -0,0 +1,9 @@
+        terminated = True  
+        if a == 0:
+            s_next = self.s * 1.1
+        else:
+            if np.random.rand() < 1/4:
+                s_next = 0
+            else:
+                s_next = self.s + 12
+        reward = s_next - self.s  
\ No newline at end of file
diff --git a/solutions/ex01/bobs_friend_TODO_3.py b/solutions/ex01/bobs_friend_TODO_3.py
new file mode 100644
index 0000000..8399f7f
--- /dev/null
+++ b/solutions/ex01/bobs_friend_TODO_3.py
@@ -0,0 +1 @@
+        return 0
\ No newline at end of file
diff --git a/solutions/ex01/bobs_friend_TODO_4.py b/solutions/ex01/bobs_friend_TODO_4.py
new file mode 100644
index 0000000..36a268f
--- /dev/null
+++ b/solutions/ex01/bobs_friend_TODO_4.py
@@ -0,0 +1 @@
+        return 1
\ No newline at end of file
diff --git a/solutions/ex01/chess_TODO_1.py b/solutions/ex01/chess_TODO_1.py
new file mode 100644
index 0000000..f8752f9
--- /dev/null
+++ b/solutions/ex01/chess_TODO_1.py
@@ -0,0 +1 @@
+        self.s = []
\ No newline at end of file
diff --git a/solutions/ex01/chess_TODO_2.py b/solutions/ex01/chess_TODO_2.py
new file mode 100644
index 0000000..9b82990
--- /dev/null
+++ b/solutions/ex01/chess_TODO_2.py
@@ -0,0 +1,7 @@
+        if np.random.rand() < self.p_draw: 
+            game_outcome = 0
+        else:
+            if np.random.rand() < self.p_win:
+                game_outcome = 1
+            else:
+                game_outcome = -1 
\ No newline at end of file
diff --git a/solutions/ex01/chess_TODO_3.py b/solutions/ex01/chess_TODO_3.py
new file mode 100644
index 0000000..29e1443
--- /dev/null
+++ b/solutions/ex01/chess_TODO_3.py
@@ -0,0 +1 @@
+        done = len(self.s) >= 2 and self.s[-1] == self.s[-2] and self.s[-1] != 0 
\ No newline at end of file
diff --git a/solutions/ex01/chess_TODO_4.py b/solutions/ex01/chess_TODO_4.py
new file mode 100644
index 0000000..d45e38a
--- /dev/null
+++ b/solutions/ex01/chess_TODO_4.py
@@ -0,0 +1 @@
+        r = self.s[-1] == 1 if done else 0   
\ No newline at end of file
diff --git a/solutions/ex01/chess_TODO_5.py b/solutions/ex01/chess_TODO_5.py
new file mode 100644
index 0000000..c270359
--- /dev/null
+++ b/solutions/ex01/chess_TODO_5.py
@@ -0,0 +1 @@
+    stats, _ = train(env, Agent(env), num_episodes=T) 
\ No newline at end of file
diff --git a/solutions/ex01/inventory_environment_TODO_1.py b/solutions/ex01/inventory_environment_TODO_1.py
new file mode 100644
index 0000000..5f5a775
--- /dev/null
+++ b/solutions/ex01/inventory_environment_TODO_1.py
@@ -0,0 +1,5 @@
+        s_next = max(0, min(2, self.s-w+a))           # next state; x_{k+1} =  f_k(x_k, u_k, w_k) 
+        reward = -(a + (self.s + a - w)**2)           # reward = -cost      = -g_k(x_k, u_k, w_k)
+        terminated = self.k == self.N-1               # Have we terminated? (i.e. is k==N-1)
+        self.s = s_next                               # update environment state
+        self.k += 1                                   # update current time step 
\ No newline at end of file
diff --git a/solutions/ex01/inventory_environment_TODO_2.py b/solutions/ex01/inventory_environment_TODO_2.py
new file mode 100644
index 0000000..bebe04b
--- /dev/null
+++ b/solutions/ex01/inventory_environment_TODO_2.py
@@ -0,0 +1 @@
+        return np.random.choice(3) # Return a random action
\ No newline at end of file
diff --git a/solutions/ex01/inventory_environment_TODO_3.py b/solutions/ex01/inventory_environment_TODO_3.py
new file mode 100644
index 0000000..0855951
--- /dev/null
+++ b/solutions/ex01/inventory_environment_TODO_3.py
@@ -0,0 +1,7 @@
+        a = agent.pi(s, k) 
+        sp, r, terminated, truncated, metadata = env.step(a)
+        agent.train(s, a, sp, r, terminated)
+        s = sp
+        J += r
+        if terminated or truncated:
+            break 
\ No newline at end of file
diff --git a/solutions/ex01/pacman_hardcoded_TODO_1.py b/solutions/ex01/pacman_hardcoded_TODO_1.py
new file mode 100644
index 0000000..5c532d7
--- /dev/null
+++ b/solutions/ex01/pacman_hardcoded_TODO_1.py
@@ -0,0 +1,7 @@
+        if k < 7:
+            return 'South'
+        elif k < 14:
+            return 'East'
+        elif k < 21:
+            return 'North'
+        elif k < 28: 
\ No newline at end of file
-- 
GitLab