Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
import pygame
from irlc.ex01.inventory_environment import InventoryEnvironment
from irlc.utils.graphics_util_pygame import formatColor
class VizInventoryEnvironment(InventoryEnvironment):
"""This is a variant of the Inventory environment which also sets up visualization.
Most of the additional code involves calling the visualization and setting up keybindings. The one small change is that Gymnasium
typically reset immediately on reaching the final state. I.e., the terminal state will typically not be rendered. """
metadata = {'render_modes': ['human', 'rgb_array'],
'render_fps': 30
}
def __init__(self, N=2, render_mode='human'):
super(VizInventoryEnvironment, self).__init__(N)
self.render_mode = render_mode
self.viewer = None
self.in_term_state = False
def get_keys_to_action(self):
k2a = [pygame.K_0, pygame.K_1, pygame.K_2, pygame.K_3]
k2a = {(k2a[i],) : i for i in range(self.action_space.n) }
return k2a
def reset(self):
s, info = super().reset()
self.s = s
self.action = None
self.w = None
self.reward = None
self.render()
return s, info
def step(self, a):
self.action = a
print(f"Step using {a=}")
if self.in_term_state:
self.reward = 0
self.k += 1
self.in_term_state = False
return self.s, 0, True, False, {}
else:
# s_next, reward, terminated, trunctated, info = super().step(a)
w = np.random.choice(3, p=(.1, .7, .2)) # Generate random disturbance
self.w = w
s_next = max(0, min(2, self.s - w + a))
reward = -(a + (self.s + a - w) ** 2) # reward = -cost = -g_k(x_k, u_k, w_k)
terminated = self.k == self.N - 1 # Have we terminated? (i.e. is k==N-1)
self.s = s_next # update environment state
self.k += 1
if terminated:
self.in_term_state = True
self.reward = reward
return s_next, reward, False, False, {}
def render(self, mode='human', agent=None, prev_action=None, reward=None):
if self.viewer is None:
self.viewer = InventoryViewer(self, frames_per_second=self.metadata['render_fps'])
print(f"render: {self.action=}")
self.viewer.update(self.agent, state=self.s, k=self.k, action=self.action, reward=self.reward, w=self.w, restart=self.action is None)
return self.viewer.blit(render_mode=self.render_mode) #(return_rgb_array=mode == 'rgb_array')
def close(self):
self.viewer.close()
from irlc.pacman.pacman_resources import WHITE, BLACK, Ghost
from irlc.utils.graphics_util_pygame import GraphicsUtilGym
class InventoryViewer:
scale = 400 # Scale of a single bar.
width = 0.4 * scale # with of a bar.
def __init__(self, inventory : InventoryEnvironment, frames_per_second=None):
# print("BEGINNING GRAPHICS")
self.k = 0
self.states = []
self.actions = []
self.factories = []
self.inventory = inventory
xmin = -0.2
xmax = inventory.N*2 + 1.4
ymin = -0.4
ymax = 1.4
dx = xmax-xmin
dy = ymax-ymin
self.ga = GraphicsUtilGym()
screen_width = 1300
self.ga.begin_graphics(screen_width, dy * (screen_width / dx), local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second, color=formatColor(0, 0, 0), title=f"Inventory environment using N={inventory.N}")
self.last_action = None
self.agent = None
self.last_reward = None
self.scale = screen_width / dx
def close(self):
self.ga.close()
def blit(self, render_mode='human'):
return self.ga.blit(render_mode=render_mode)
def master_render(self):
self.ga.draw_background()
for i, factory in enumerate(self.factories):
factory.render()
if hasattr(self.inventory, '_interactive_data') and 'avg_reward_per_episode' in self.inventory._interactive_data:
avg_reward = self.inventory._interactive_data['avg_reward_per_episode']
episodes = self.inventory._interactive_data['completed_episodes']
self.ga.text("sadf", (0.1, -0.1), WHITE, contents=f"Completed episodes = {episodes}",
size=12,
style='bold', anchor='w')
self.ga.text("sadf", (0.1, -0.2), WHITE, contents=f"Average reward per episode = {avg_reward:.2f}",
size=12,
style='bold', anchor='w')
def update(self, agent, k, state, action, reward, w, restart=False):
self.agent = agent
if restart:
# print("Restarting the sim now..")
self.factories = [Factory(graphics_adaptor=self.ga, x=0, y=0, k=0, state=state)]
if len(self.factories) <= k:
self.factories.append(Factory(graphics_adaptor=self.ga, x=k*2, y=0, k=k, state=state))
if len(self.factories) <= self.inventory.N+1:
# print("Setting actions.")
self.factories[k-1].action = action
self.factories[k-1].w = w
self.factories[k-1].reward = reward
self.master_render()
class Factory:
def __init__(self, graphics_adaptor, x, y, order=1, scale=10., k=1, state=2):
self.ga = graphics_adaptor
self.x = x
self.y = y
self.scale = scale
self.s = state
self.action = None
self.reward = None
self.w = None
self.k = k
def render(self):
self.ga.rectangle(color=WHITE, x=self.x, y=0, width=1, height=1, border=1)
self.ga.text("sadf", (self.x + 0.5, 1.1), WHITE, contents=f"day k = {self.k}",
size=12,
style='bold', anchor='c')
self.ga.text("sadf", (self.x + 0.5, 0.8), WHITE, contents=f"state s_{self.k} = {self.s}",
size=12,
style='bold', anchor='c')
mw = 1
dh = 0.1
rad = mw / 3 / 2
for h in range(self.s):
loc = self.x + rad * 2 * h + rad + mw * (3 - self.s) / 3 / 2
self.ga.circle("sadf", (loc, rad), 0.8 * rad * self.ga.scale(), fillColor=WHITE)
scale = self.ga.scale()
if self.action is not None:
self.ga.text("sdaf", (self.x + 1.5, 0.8 + dh), WHITE, contents=f"action = {self.action}", size=12, style="bold", anchor="c")
# self.ga.line("sadf", (self.x+1.1, 0.5 + dh), (self.x+1.8, 0.5+dh), color=WHITE, width=2)
self.ga.line("sadf", (self.x + 1.1, 0.5 + dh), (ex := self.x + 1.9, ey := 0.5 + dh), color=WHITE, width=2)
self.ga.line("sadf", (ex, ey), (ex-0.05, ey-0.05), color=WHITE, width=2)
self.ga.line("sadf", (ex, ey), (ex-0.05, ey+0.05), color=WHITE, width=2)
from irlc.utils.graphics_util_pygame import Object
if self.action is not None:
for a in range(self.action):
self.truck = Object(file="truck.jpg", graphics=self.ga, image_width=0.25 * scale)
self.truck.move_center_to_xy(self.x + 1 + 0.2 + a * 0.2, 0.6 + dh)
self.truck.flip()
self.truck.blit(self.ga.surf)
if self.w is not None:
self.ga.text("asdf", (self.x + 1.5, dh+0.05), WHITE, contents=f"w_{self.k} = {self.w}", size=12, style="bold",
anchor="c")
for w in range(self.w):
self.customer = Object(file="customer.jpg", graphics=self.ga, image_width=0.25*scale)
self.customer.move_center_to_xy(self.x + 1 + 0.2 + w * 0.2, 0.3 + dh)
self.customer.blit(self.ga.surf)
if self.reward is not None:
self.ga.text("asdf", (self.x + 1.5, dh-0.02), WHITE, contents=f"reward = {self.reward}", size=12, style="bold",
anchor="c")