Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from unitgrade import Report
import irlc
# from irlc.ex01.frozen_lake import FrozenAgentDownRight
import gymnasium as gym
from unitgrade import UTestCase
from irlc.ex01.inventory_environment import InventoryEnvironment, simplified_train, RandomAgent
from unitgrade import Capturing2
import numpy as np
from gymnasium.envs.toy_text.frozen_lake import RIGHT, DOWN # The down and right-actions; may be relevant.
from irlc.ex01.pacman_hardcoded import GoAroundAgent, layout
from irlc.pacman.pacman_environment import PacmanEnvironment
from irlc import Agent, train
from irlc.ex01.bobs_friend import BobFriendEnvironment, AlwaysAction_u1, AlwaysAction_u0
class Problem1BobsFriend(UTestCase):
def test_a_env_basic(self):
env = BobFriendEnvironment()
s0, _ = env.reset()
self.assertEqual(s0, 20, msg="Reset must return the initial state, i.e. the amount of money we start out with")
def test_a_env_u0(self):
env = BobFriendEnvironment()
env.reset()
s1, r, done, _, _ = env.step(0)
self.assertEqual(r, 2, msg="When taking action u0, we must get a reward of 2.")
self.assertEqual(s1, 22, msg="When taking action u0, we must end in state x1=22")
self.assertEqual(done, True, msg="After taking an action, the environment must terminate")
class Problem2BobsPolicy(UTestCase):
def test_a_env_u1(self):
env = BobFriendEnvironment()
env.reset()
s1, r, done, _, _ = env.step(1)
print(r)
self.assertTrue(r == 12 or r == -20, msg="When taking action u1, we must get a reward of 0 or 12.")
self.assertTrue(s1 == 0 or s1 == 32, msg="When taking action u1, we must end in state x1=0 or x1 = 34")
self.assertEqual(done, True, msg="After taking an action, the environment must terminate")
def test_b_always_action_u0(self):
env = BobFriendEnvironment()
stats, _ = train(env, AlwaysAction_u0(env), num_episodes=1000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 2, msg="Average reward when we always take action u=0 must be 2.")
def test_b_always_action_u1(self):
env = BobFriendEnvironment()
stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 4, tol=0.5, msg="Average reward when we always take action u=0 must be about 4.")
def test_b_always_action_u1_starting_200(self):
env = BobFriendEnvironment(x0=200)
stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, -42, tol=4, msg="Average reward when we always take action u=0 must be about 4.")
def test_b_always_action_u0_starting_200(self):
env = BobFriendEnvironment(x0=200)
stats, _ = train(env, AlwaysAction_u0(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 20, msg="Average reward when we always take action u=0 must be about 4.")
class Problem5PacmanHardcoded(UTestCase):
""" Test the hardcoded pacman agent """
def test_pacman(self):
env = PacmanEnvironment(layout_str=layout)
agent = GoAroundAgent(env)
stats, _ = train(env, agent, num_episodes=1)
self.assertEqual(stats[0]['Length'] < 100, True)
class Problem6ChessTournament(UTestCase):
def test_chess(self):
""" Test the correct result in the little chess-tournament """
from irlc.ex01.chess import main
with Capturing2() as c:
main()
# Extract the numbers from the console output.
print("Numbers extracted from console output was")
print(c.numbers)
self.assertLinf(c.numbers[-2], 26/33, tol=0.05)
class Problem3InventoryInventoryEnvironment(UTestCase):
def test_environment(self):
env = InventoryEnvironment()
# agent = RandomAgent(env)
stats, _ = train(env, Agent(env), num_episodes=2000, verbose=False)
avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
self.assertLinf(avg_reward, tol=0.6)
def test_random_agent(self):
env = InventoryEnvironment()
stats, _ = train(env, RandomAgent(env), num_episodes=2000, verbose=False)
avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
self.assertLinf(avg_reward, tol=0.6)
class Problem4InventoryTrain(UTestCase):
def test_simplified_train(self):
env = InventoryEnvironment()
agent = Agent(env)
avg_reward_simplified_train = np.mean([simplified_train(env, agent) for i in range(1000)])
self.assertLinf(avg_reward_simplified_train, tol=0.5)
# class FrozenLakeTest(UTestCase):
# def test_frozen_lake(self):
# env = gym.make("FrozenLake-v1")
# agent = FrozenAgentDownRight(env)
# s = env.reset()
# for k in range(10):
# self.assertEqual(agent.pi(s, k), DOWN if k % 2 == 0 else RIGHT)
class Week01Tests(Report): #240 total.
title = "Tests for week 01"
pack_imports = [irlc]
individual_imports = []
questions = [
(Problem1BobsFriend, 10),
(Problem2BobsPolicy, 10),
(Problem3InventoryInventoryEnvironment, 10),
(Problem4InventoryTrain, 10),
(Problem5PacmanHardcoded, 10),
(Problem6ChessTournament, 10), # Week 1: Everything
]
if __name__ == '__main__':
from unitgrade import evaluate_report_student
evaluate_report_student(Week01Tests())