Skip to content
Snippets Groups Projects
Commit 922ab3b9 authored by tuhe's avatar tuhe
Browse files

Added the tests for week 1

parent 44d77a11
No related branches found
No related tags found
No related merge requests found
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from unitgrade import Report
import irlc
# from irlc.ex01.frozen_lake import FrozenAgentDownRight
import gymnasium as gym
from unitgrade import UTestCase
from irlc.ex01.inventory_environment import InventoryEnvironment, simplified_train, RandomAgent
from unitgrade import Capturing2
import numpy as np
from gymnasium.envs.toy_text.frozen_lake import RIGHT, DOWN # The down and right-actions; may be relevant.
from irlc.ex01.pacman_hardcoded import GoAroundAgent, layout
from irlc.pacman.pacman_environment import PacmanEnvironment
from irlc import Agent, train
from irlc.ex01.bobs_friend import BobFriendEnvironment, AlwaysAction_u1, AlwaysAction_u0
class Problem1BobsFriend(UTestCase):
def test_a_env_basic(self):
env = BobFriendEnvironment()
s0, _ = env.reset()
self.assertEqual(s0, 20, msg="Reset must return the initial state, i.e. the amount of money we start out with")
def test_a_env_u0(self):
env = BobFriendEnvironment()
env.reset()
s1, r, done, _, _ = env.step(0)
self.assertEqual(r, 2, msg="When taking action u0, we must get a reward of 2.")
self.assertEqual(s1, 22, msg="When taking action u0, we must end in state x1=22")
self.assertEqual(done, True, msg="After taking an action, the environment must terminate")
class Problem2BobsPolicy(UTestCase):
def test_a_env_u1(self):
env = BobFriendEnvironment()
env.reset()
s1, r, done, _, _ = env.step(1)
print(r)
self.assertTrue(r == 12 or r == -20, msg="When taking action u1, we must get a reward of 0 or 12.")
self.assertTrue(s1 == 0 or s1 == 32, msg="When taking action u1, we must end in state x1=0 or x1 = 34")
self.assertEqual(done, True, msg="After taking an action, the environment must terminate")
def test_b_always_action_u0(self):
env = BobFriendEnvironment()
stats, _ = train(env, AlwaysAction_u0(env), num_episodes=1000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 2, msg="Average reward when we always take action u=0 must be 2.")
def test_b_always_action_u1(self):
env = BobFriendEnvironment()
stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 4, tol=0.5, msg="Average reward when we always take action u=0 must be about 4.")
def test_b_always_action_u1_starting_200(self):
env = BobFriendEnvironment(x0=200)
stats, _ = train(env, AlwaysAction_u1(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, -42, tol=4, msg="Average reward when we always take action u=0 must be about 4.")
def test_b_always_action_u0_starting_200(self):
env = BobFriendEnvironment(x0=200)
stats, _ = train(env, AlwaysAction_u0(env), num_episodes=10000)
avg = np.mean( [stat['Accumulated Reward'] for stat in stats] )
self.assertL2(avg, 20, msg="Average reward when we always take action u=0 must be about 4.")
class Problem5PacmanHardcoded(UTestCase):
""" Test the hardcoded pacman agent """
def test_pacman(self):
env = PacmanEnvironment(layout_str=layout)
agent = GoAroundAgent(env)
stats, _ = train(env, agent, num_episodes=1)
self.assertEqual(stats[0]['Length'] < 100, True)
class Problem6ChessTournament(UTestCase):
def test_chess(self):
""" Test the correct result in the little chess-tournament """
from irlc.ex01.chess import main
with Capturing2() as c:
main()
# Extract the numbers from the console output.
print("Numbers extracted from console output was")
print(c.numbers)
self.assertLinf(c.numbers[-2], 26/33, tol=0.05)
class Problem3InventoryInventoryEnvironment(UTestCase):
def test_environment(self):
env = InventoryEnvironment()
# agent = RandomAgent(env)
stats, _ = train(env, Agent(env), num_episodes=2000, verbose=False)
avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
self.assertLinf(avg_reward, tol=0.6)
def test_random_agent(self):
env = InventoryEnvironment()
stats, _ = train(env, RandomAgent(env), num_episodes=2000, verbose=False)
avg_reward = np.mean([stat['Accumulated Reward'] for stat in stats])
self.assertLinf(avg_reward, tol=0.6)
class Problem4InventoryTrain(UTestCase):
def test_simplified_train(self):
env = InventoryEnvironment()
agent = Agent(env)
avg_reward_simplified_train = np.mean([simplified_train(env, agent) for i in range(1000)])
self.assertLinf(avg_reward_simplified_train, tol=0.5)
# class FrozenLakeTest(UTestCase):
# def test_frozen_lake(self):
# env = gym.make("FrozenLake-v1")
# agent = FrozenAgentDownRight(env)
# s = env.reset()
# for k in range(10):
# self.assertEqual(agent.pi(s, k), DOWN if k % 2 == 0 else RIGHT)
class Week01Tests(Report): #240 total.
title = "Tests for week 01"
pack_imports = [irlc]
individual_imports = []
questions = [
(Problem1BobsFriend, 10),
(Problem2BobsPolicy, 10),
(Problem3InventoryInventoryEnvironment, 10),
(Problem4InventoryTrain, 10),
(Problem5PacmanHardcoded, 10),
(Problem6ChessTournament, 10), # Week 1: Everything
]
if __name__ == '__main__':
from unitgrade import evaluate_report_student
evaluate_report_student(Week01Tests())
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment