diff --git a/solutions/ex09/gambler_TODO_1.py b/solutions/ex09/gambler_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..5edd9179f3c7d5cecdf4ed62874f892d3b277a49
--- /dev/null
+++ b/solutions/ex09/gambler_TODO_1.py
@@ -0,0 +1 @@
+        return state in [0, self.goal]
\ No newline at end of file
diff --git a/solutions/ex09/gambler_TODO_2.py b/solutions/ex09/gambler_TODO_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c4cf777199a52f2eea1c333f5eb98a692b6e61
--- /dev/null
+++ b/solutions/ex09/gambler_TODO_2.py
@@ -0,0 +1 @@
+        return list( range(1, min(s, self.goal - s) + 1))
\ No newline at end of file
diff --git a/solutions/ex09/gambler_TODO_3.py b/solutions/ex09/gambler_TODO_3.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4e0a660fca2209ea59af40b1b18bc7aad4e9c39
--- /dev/null
+++ b/solutions/ex09/gambler_TODO_3.py
@@ -0,0 +1,4 @@
+        r = 1 if s + a == 100 else 0
+        WIN = (s+a, r)
+        LOSS = (s-a, 0)
+        outcome_dict = {WIN: self.p_heads, LOSS: 1-self.p_heads } if WIN != LOSS else {WIN: 1.} 
\ No newline at end of file
diff --git a/solutions/ex09/mdp_warmup_TODO_1.py b/solutions/ex09/mdp_warmup_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8ee7db7c1bc9e038ea07feff49720d151785490
--- /dev/null
+++ b/solutions/ex09/mdp_warmup_TODO_1.py
@@ -0,0 +1 @@
+    q_dict = {a: sum([p*(r+ (gamma*v[sp] if not mdp.is_terminal(sp) else 0)) for (sp,r), p in mdp.Psr(s,a).items()]) for a in mdp.A(s)} 
\ No newline at end of file
diff --git a/solutions/ex09/mdp_warmup_TODO_2.py b/solutions/ex09/mdp_warmup_TODO_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..f605ec389d9213a9f951ad32bf2d96ed93f83424
--- /dev/null
+++ b/solutions/ex09/mdp_warmup_TODO_2.py
@@ -0,0 +1 @@
+    raise NotImplementedError("Insert your solution and remove this error.")
\ No newline at end of file
diff --git a/solutions/ex09/mdp_warmup_TODO_3.py b/solutions/ex09/mdp_warmup_TODO_3.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8f9a461ff9e0126baa93f2f1abbf35d365a52d6
--- /dev/null
+++ b/solutions/ex09/mdp_warmup_TODO_3.py
@@ -0,0 +1 @@
+    expected_reward = sum( [r * p for (sp, r), p in mdp.Psr(s, a).items() ] ) 
\ No newline at end of file
diff --git a/solutions/ex09/mdp_warmup_TODO_4.py b/solutions/ex09/mdp_warmup_TODO_4.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb8d2810f8b9e635f333b72539f47e18035d818e
--- /dev/null
+++ b/solutions/ex09/mdp_warmup_TODO_4.py
@@ -0,0 +1 @@
+    V_s = sum( [Q[s,a] * p for a, p in policy.items()] ) 
\ No newline at end of file
diff --git a/solutions/ex09/policy_evaluation_TODO_1.py b/solutions/ex09/policy_evaluation_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..290d5ab43f92e519b08e9adbd01916b4f70cd81b
--- /dev/null
+++ b/solutions/ex09/policy_evaluation_TODO_1.py
@@ -0,0 +1,2 @@
+            q = value_function2q_function(mdp, s, gamma, v) 
+            v_, v[s] = v[s], sum( [q[a] * pi_a for a,pi_a in pi[s].items()] ) 
\ No newline at end of file
diff --git a/solutions/ex09/policy_iteration_TODO_1.py b/solutions/ex09/policy_iteration_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..00c8a957fb3b6a2650aec38a6da1f8fe541ea5ee
--- /dev/null
+++ b/solutions/ex09/policy_iteration_TODO_1.py
@@ -0,0 +1,6 @@
+        for s in [mdp.nonterminal_states[i] for i in np.random.permutation(len(mdp.nonterminal_states))]:  
+            old_a = pi[s] # The best action we would take under the current policy
+            Qs = value_function2q_function(mdp, s, gamma, V)
+            pi[s] = max(Qs, key=Qs.get)
+            if old_a != pi[s]:
+                policy_stable = False 
\ No newline at end of file
diff --git a/solutions/ex09/value_iteration_TODO_1.py b/solutions/ex09/value_iteration_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..d07abe42531f4a01ecb6e6ef69b30abdaef4c0b1
--- /dev/null
+++ b/solutions/ex09/value_iteration_TODO_1.py
@@ -0,0 +1,2 @@
+            v, V[s] = V[s], max(value_function2q_function(mdp, s, gamma, V).values()) if len(mdp.A(s)) > 0 else 0    
+            Delta = max(Delta, np.abs(v - V[s])) 
\ No newline at end of file
diff --git a/solutions/ex09/value_iteration_TODO_2.py b/solutions/ex09/value_iteration_TODO_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..89339fefc3a2ea8871c75d63d26586b2e07f0f1b
--- /dev/null
+++ b/solutions/ex09/value_iteration_TODO_2.py
@@ -0,0 +1,2 @@
+        Q = {a: v-(1e-8*a if isinstance(a, int) else 0) for a,v in value_function2q_function(mdp, s, gamma, V).items()} 
+        pi[s] = max(Q, key=Q.get) 
\ No newline at end of file
diff --git a/solutions/ex09/value_iteration_agent_TODO_1.py b/solutions/ex09/value_iteration_agent_TODO_1.py
new file mode 100644
index 0000000000000000000000000000000000000000..49090726ea7151cfdb717088aa6b371b78baee41
--- /dev/null
+++ b/solutions/ex09/value_iteration_agent_TODO_1.py
@@ -0,0 +1 @@
+        self.policy, self.v = value_iteration(mdp, gamma=gamma, **kwargs) 
\ No newline at end of file
diff --git a/solutions/ex09/value_iteration_agent_TODO_2.py b/solutions/ex09/value_iteration_agent_TODO_2.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a41f1466ec31a8bbec8921330340cf1693f5de2
--- /dev/null
+++ b/solutions/ex09/value_iteration_agent_TODO_2.py
@@ -0,0 +1 @@
+            action = self.policy[s] 
\ No newline at end of file