diff --git a/irlc/tests/tests_week01.py b/irlc/tests/tests_week01.py new file mode 100644 index 0000000000000000000000000000000000000000..812c8fa77f27109db9e9e46f821a97c43085a08f --- /dev/null +++ b/irlc/tests/tests_week01.py @@ -0,0 +1,132 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import Report +import irlc +# from irlc.ex01.frozen_lake import FrozenAgentDownRight +import gymnasium as gym +from unitgrade import UTestCase +from irlc.ex01.inventory_environment import InventoryEnvironment, simplified_train, RandomAgent +from unitgrade import Capturing2 +import numpy as np +from gymnasium.envs.toy_text.frozen_lake import RIGHT, DOWN # The down and right-actions; may be relevant. +from irlc.ex01.pacman_hardcoded import GoAroundAgent, layout +from irlc.pacman.pacman_environment import PacmanEnvironment +from irlc import Agent, train +from irlc.ex01.bobs_friend import BobFriendEnvironment, AlwaysAction_u1, AlwaysAction_u0 + + +class Problem1BobsFriend(UTestCase): + def test_a_env_basic(self): + env = BobFriendEnvironment() + s0, _ = env.reset() + self.assertEqual(s0, 20, msg="Reset must return the initial state, i.e. the amount of money we start out with") + + def test_a_env_u0(self): + env = BobFriendEnvironment() + env.reset() + s1, r, done, _, _ = env.step(0) + self.assertEqual(r, 2, msg="When taking action u0, we must get a reward of 2.") + self.assertEqual(s1, 22, msg="When taking action u0, we must end in state x1=22") + self.assertEqual(done, True, msg="After taking an action, the environment must terminate") + +class Problem2BobsPolicy(UTestCase): + def test_a_env_u1(self): + env = BobFriendEnvironment() + env.reset() + s1, r, done, _, _ = env.step(1) + print(r) + self.assertTrue(r == 12 or r == -20, msg="When taking action u1, we must get a reward of 0 or 12.") + self.assertTrue(s1 == 0 or s1 == 32, msg="When taking action u1, we must end in state x1=0 or x1 = 34") + self.assertEqual(done, True, msg="After taking an action, the environment must terminate") + + def test_b_always_action_u0(self): + env = BobFriendEnvironment() + stats, _ = train(env, AlwaysAction_u0(env), num_episodes=1000) + avg = np.mean( [stat['Accumulated Reward'] for stat in stats] ) + self.assertL2(avg, 2, msg="Average reward when we always take action u=0 must be 2.") + + def test_b_always_action_u1(self): + env = BobFriendEnvironment() + stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000) + avg = np.mean( [stat['Accumulated Reward'] for stat in stats] ) + self.assertL2(avg, 4, tol=0.5, msg="Average reward when we always take action u=0 must be about 4.") + + def test_b_always_action_u1_starting_200(self): + env = BobFriendEnvironment(x0=200) + stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000) + avg = np.mean( [stat['Accumulated Reward'] for stat in stats] ) + self.assertL2(avg, -42, tol=4, msg="Average reward when we always take action u=0 must be about 4.") + + def test_b_always_action_u0_starting_200(self): + env = BobFriendEnvironment(x0=200) + stats, _ = train(env, AlwaysAction_u0(env), num_episodes=10000) + avg = np.mean( [stat['Accumulated Reward'] for stat in stats] ) + self.assertL2(avg, 20, msg="Average reward when we always take action u=0 must be about 4.") + + + +class Problem5PacmanHardcoded(UTestCase): + """ Test the hardcoded pacman agent """ + def test_pacman(self): + env = PacmanEnvironment(layout_str=layout) + agent = GoAroundAgent(env) + stats, _ = train(env, agent, num_episodes=1) + self.assertEqual(stats[0]['Length'] < 100, True) + + +class Problem6ChessTournament(UTestCase): + def test_chess(self): + """ Test the correct result in the little chess-tournament """ + from irlc.ex01.chess import main + with Capturing2() as c: + main() + # Extract the numbers from the console output. + print("Numbers extracted from console output was") + print(c.numbers) + self.assertLinf(c.numbers[-2], 26/33, tol=0.05) + +class Problem3InventoryInventoryEnvironment(UTestCase): + def test_environment(self): + env = InventoryEnvironment() + # agent = RandomAgent(env) + stats, _ = train(env, Agent(env), num_episodes=2000, verbose=False) + avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats]) + self.assertLinf(avg_reward, tol=0.6) + + def test_random_agent(self): + env = InventoryEnvironment() + stats, _ = train(env, RandomAgent(env), num_episodes=2000, verbose=False) + avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats]) + self.assertLinf(avg_reward, tol=0.6) + +class Problem4InventoryTrain(UTestCase): + def test_simplified_train(self): + env = InventoryEnvironment() + agent = Agent(env) + avg_reward_simplified_train = np.mean([simplified_train(env, agent) for i in range(1000)]) + self.assertLinf(avg_reward_simplified_train, tol=0.5) + +# class FrozenLakeTest(UTestCase): +# def test_frozen_lake(self): +# env = gym.make("FrozenLake-v1") +# agent = FrozenAgentDownRight(env) +# s = env.reset() +# for k in range(10): +# self.assertEqual(agent.pi(s, k), DOWN if k % 2 == 0 else RIGHT) + + +class Week01Tests(Report): #240 total. + title = "Tests for week 01" + pack_imports = [irlc] + individual_imports = [] + questions = [ + (Problem1BobsFriend, 10), + (Problem2BobsPolicy, 10), + (Problem3InventoryInventoryEnvironment, 10), + (Problem4InventoryTrain, 10), + (Problem5PacmanHardcoded, 10), + (Problem6ChessTournament, 10), # Week 1: Everything + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week01Tests()) diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index 073cc3abaf79e5c9ce49838c8f63933844682176..34289b45933e7cb0b5e5674272678d0a7e985f75 100644 Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index b85b7a79f12650ab08c5e1b1846b6dd94e07401e..f70500921e9a5df596b27587b0c9ab8b3ee15767 100644 Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index b122de5cb99d8111d9f14630edf22a4f3b6f8caa..8a114ca18a3007af260497124c33780495b358b0 100644 Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index 65e8a95eca52ccb875699d342f28839299a696b3..efd9f275aa8d0a83d557df08d6486ef4c547e5ec 100644 Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 4b90516956cebe85cdf12481939260cb04f7c151..38874049df6b13f41b7a25f2ca24dd44aaadbe8a 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 7bdaed17849541d8d61653d1e5dd5c82f35206e4..aef3d60cea9361a43ac33ec618dd55824929c86b 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ