Skip to content
Snippets Groups Projects
Commit d9906643 authored by tuhe's avatar tuhe
Browse files

Solutions for week 9

parent 374f8067
No related branches found
No related tags found
No related merge requests found
Showing with 24 additions and 0 deletions
return state in [0, self.goal]
\ No newline at end of file
return list( range(1, min(s, self.goal - s) + 1))
\ No newline at end of file
r = 1 if s + a == 100 else 0
WIN = (s+a, r)
LOSS = (s-a, 0)
outcome_dict = {WIN: self.p_heads, LOSS: 1-self.p_heads } if WIN != LOSS else {WIN: 1.}
\ No newline at end of file
q_dict = {a: sum([p*(r+ (gamma*v[sp] if not mdp.is_terminal(sp) else 0)) for (sp,r), p in mdp.Psr(s,a).items()]) for a in mdp.A(s)}
\ No newline at end of file
raise NotImplementedError("Insert your solution and remove this error.")
\ No newline at end of file
expected_reward = sum( [r * p for (sp, r), p in mdp.Psr(s, a).items() ] )
\ No newline at end of file
V_s = sum( [Q[s,a] * p for a, p in policy.items()] )
\ No newline at end of file
q = value_function2q_function(mdp, s, gamma, v)
v_, v[s] = v[s], sum( [q[a] * pi_a for a,pi_a in pi[s].items()] )
\ No newline at end of file
for s in [mdp.nonterminal_states[i] for i in np.random.permutation(len(mdp.nonterminal_states))]:
old_a = pi[s] # The best action we would take under the current policy
Qs = value_function2q_function(mdp, s, gamma, V)
pi[s] = max(Qs, key=Qs.get)
if old_a != pi[s]:
policy_stable = False
\ No newline at end of file
v, V[s] = V[s], max(value_function2q_function(mdp, s, gamma, V).values()) if len(mdp.A(s)) > 0 else 0
Delta = max(Delta, np.abs(v - V[s]))
\ No newline at end of file
Q = {a: v-(1e-8*a if isinstance(a, int) else 0) for a,v in value_function2q_function(mdp, s, gamma, V).items()}
pi[s] = max(Q, key=Q.get)
\ No newline at end of file
self.policy, self.v = value_iteration(mdp, gamma=gamma, **kwargs)
\ No newline at end of file
action = self.policy[s]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment