Commit 6633f4c0 authored by Håkon Harnes's avatar Håkon Harnes

semi-completed assignment 8

parent 3f960421
......@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 42,
"metadata": {
"scrolled": true
},
......@@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 43,
"metadata": {
"scrolled": true
},
......@@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 44,
"metadata": {
"scrolled": true
},
......@@ -57,20 +57,19 @@
"source": [
"# Hyperparameters \n",
"BUCKETS = (8, 8) \n",
"EPISODES = 100000\n",
"EPISODES = 5000\n",
"MIN_LEARNING_RATE = 0.1\n",
"MIN_EPSILON = 0.5\n",
"DISCOUNT = 0.95\n",
"MIN_EPSILON = 0.1\n",
"DISCOUNT = 1.0\n",
"DECAY = 500\n",
"\n",
"# Visualization variables \n",
"SHOW_ENV = 10000\n",
"SHOW_STATS = 1000"
"SHOW_STATS = 500"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 45,
"metadata": {
"scrolled": true
},
......@@ -82,7 +81,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 46,
"metadata": {
"scrolled": true
},
......@@ -94,7 +93,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 47,
"metadata": {
"scrolled": true
},
......@@ -115,7 +114,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 48,
"metadata": {
"scrolled": true
},
......@@ -131,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 49,
"metadata": {
"scrolled": true
},
......@@ -144,7 +143,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 50,
"metadata": {
"scrolled": true
},
......@@ -157,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
......@@ -168,59 +167,102 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 52,
"metadata": {
"scrolled": false
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode Score\n",
"500\t 11.2%\n",
"1000\t 45.8%\n",
"1500\t 91.4%\n",
"2000\t 99.0%\n",
"2500\t 99.6%\n",
"3000\t 100.0%\n",
"3500\t 100.0%\n",
"4000\t 100.0%\n",
"4500\t 100.0%\n",
"5000\t 100.0%\n"
]
}
],
"source": [
"successfulEpisode = -1 \n",
"print('Episode Score')\n",
"completionCount = 0 \n",
"\n",
"for episode in range(EPISODES):\n",
" render = episode % SHOW_ENV == 0 \n",
" \n",
" # Resets the state \n",
" current_state = discretize_state(env.reset())\n",
"\n",
" current_state = tuple(env.reset()) \n",
" \n",
" # Updates learning rate and epsilon \n",
" learning_rate = get_learning_rate(episode)\n",
" epsilon = get_epsilon(episode)\n",
" \n",
" # Plays the game \n",
" # Runs through an episode \n",
" done = False\n",
" while not done:\n",
" \n",
" # Renders the current state \n",
" if render:\n",
" env.render(np.argmax(q_table[current_state]))\n",
" \n",
" action = choose_action(current_state) # Chooses action\n",
" obs, reward, done, _ = env.step(action) # Performs action \n",
" new_state = discretize_state(obs) # Discretizes state\n",
" new_state = tuple(obs) # Discretizes new state\n",
" update_q(current_state, action, reward, new_state) # Updates Q-Table\n",
" current_state = new_state # Updates the current state\n",
" \n",
" if reward == 1.0: \n",
" successfulEpisode = episode \n",
" print(f'Completed @ {episode}')\n",
" \n",
" if reward == 10.0: completionCount += 1 \n",
" \n",
" # Prints some statistics for every 50th episode \n",
" if episode % SHOW_STATS == 0: print(f'Episode {episode}')\n",
"\n",
"# Prints the result \n",
"if successfulEpisode > 0:\n",
" print(f'\\nCompleted on episode {successfulEpisode}')\n",
"else:\n",
" print('\\nUnable to complete game')"
" # Prints some statistics \n",
" if (episode + 1) % SHOW_STATS == 0: \n",
" print(f'{episode + 1}\\t {round((completionCount / SHOW_STATS) * 100, 2)}%')\n",
" completionCount = 0 "
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1, 0]\n",
"[1, 0]\n",
"[2, 0]\n",
"[3, 0]\n",
"[4, 0]\n",
"[5, 0]\n",
"[6, 0]\n",
"[6, 1]\n",
"[6, 2]\n",
"[6, 3]\n",
"[6, 4]\n",
"[6, 5]\n",
"[6, 6]\n"
]
}
],
"source": [
"current_state = tuple(env.reset()) \n",
"\n",
"done = False \n",
"while not done:\n",
" \n",
" # Chooses and performs action\n",
" action = choose_action(current_state) \n",
" obs, reward, done, _ = env.step(action) \n",
" \n",
" # Sets new state\n",
" new_state = tuple(obs)\n",
" current_state = new_state \n",
" \n",
" # Renders frame \n",
" env.render()\n",
" print(obs)"
]
}
],
"metadata": {
......@@ -228,18 +270,6 @@
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.12"
}
},
"nbformat": 4,
......
......@@ -23,17 +23,19 @@ class GridWorld:
LEFT = 2
RIGHT = 3
def __init__(self, WINDOW_SIZE, RECTANGLE_COUNT, GRID_GAP):
# Initializes parameters
self.DISPLAY = pygame.display.set_mode((WINDOW_SIZE, WINDOW_SIZE))
self.ROW_COUNT = self.COL_COUNT = int(math.sqrt(RECTANGLE_COUNT))
self.OBSTACLE_POSITION = [self.ROW_COUNT - 4, self.COL_COUNT - 2]
self.GOAL_POSITION = [self.ROW_COUNT - 2, self.COL_COUNT - 2]
self.RECTANGLE_SIZE = WINDOW_SIZE / self.COL_COUNT
self.RECTANGLE_COUNT = RECTANGLE_COUNT
self.MAX_MOVES = RECTANGLE_COUNT;
self.moves = 0;
self.GRID_GAP = GRID_GAP
self.MAX_MOVES = (2/3) * self.RECTANGLE_COUNT
self.moves = 0
# Fills the display (background) as black
self.DISPLAY.fill(self.BLACK)
......@@ -46,32 +48,24 @@ class GridWorld:
def reset(self):
self.PLAYER_POSITION = [0, 0]
self.moves = 0;
self.moves = 0
return self.PLAYER_POSITION
def render(self, action):
def render(self):
pygame.init()
self.drawGrid(action)
self.drawGrid()
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
pygame.display.update()
pygame.time.delay(1000)
def drawGrid(self, action):
# Finds the next position according to the Q-Table
if action == self.UP: qTablePos = [ self.PLAYER_POSITION[0] - 1, self.PLAYER_POSITION[1] ]
if action == self.DOWN: qTablePos = [ self.PLAYER_POSITION[0] + 1, self.PLAYER_POSITION[1] ]
if action == self.LEFT: qTablePos = [ self.PLAYER_POSITION[0], self.PLAYER_POSITION[1] - 1]
if action == self.RIGHT: qTablePos = [ self.PLAYER_POSITION[0], self.PLAYER_POSITION[1] + 1]
def drawGrid(self):
for row in range(self.ROW_COUNT):
for col in range(self.COL_COUNT):
rectangle = pygame.Rect(col * self.RECTANGLE_SIZE, row * self.RECTANGLE_SIZE,
......@@ -81,37 +75,37 @@ class GridWorld:
color = self.RED
elif [row, col] == self.GOAL_POSITION:
color = self.GREEN
elif [row, col] == qTablePos:
color = self.LIGHT_RED
elif [row, col] == self.OBSTACLE_POSITION:
color = self.BLACK
else:
color = self.WHITE
pygame.draw.rect(self.DISPLAY, color, rectangle)
# pygame.draw.polygon(self.DISPLAY, self.BLACK, [[col * self.RECTANGLE_SIZE, row * self.RECTANGLE_SIZE], [0, 100], [100, 50]])
def step(self, action):
self.moves += 1
newPos = self.PLAYER_POSITION.copy()
oldPos = self.PLAYER_POSITION.copy()
done = False
# Moves the player
if action == self.UP: self.PLAYER_POSITION[0] -= 1
if action == self.DOWN: self.PLAYER_POSITION[0] += 1
if action == self.LEFT: self.PLAYER_POSITION[1] -= 1
if action == self.RIGHT: self.PLAYER_POSITION[1] += 1
# Checks if new position is valid
if self.PLAYER_POSITION[0] < 0 or self.PLAYER_POSITION[0] >= self.ROW_COUNT: done = True
elif self.PLAYER_POSITION[1] < 0 or self.PLAYER_POSITION[1] >= self.COL_COUNT: done = True
else: done = False
# Checks if the player has reached the goal
if self.PLAYER_POSITION == self.GOAL_POSITION:
reward = 1.0
done = True
else: reward = 0.0
# Makes sure the solution doesn't use too many moves
self.moves += 1
if self.moves > self.MAX_MOVES:
reward = 0.0
done = True
if action == self.UP: newPos[0] = self.PLAYER_POSITION[0] - 1
if action == self.DOWN: newPos[0] = self.PLAYER_POSITION[0] + 1
if action == self.LEFT: newPos[1] = self.PLAYER_POSITION[1] - 1
if action == self.RIGHT: newPos[1] = self.PLAYER_POSITION[1] + 1
# Checks if new position is valid
if 0 <= newPos[0] < self.ROW_COUNT and 0 <= newPos[1] < self.COL_COUNT:
self.PLAYER_POSITION = newPos
if self.PLAYER_POSITION == self.GOAL_POSITION: reward = 10.0; done = True
elif self.PLAYER_POSITION == self.OBSTACLE_POSITION: reward = -10.0; done = True
else: reward = -1.0
if self.moves >= self.MAX_MOVES: done = True
return self.PLAYER_POSITION, reward, done, {}
return self.PLAYER_POSITION, reward, done, {}
\ No newline at end of file
from gridworld import GridWorld
env = GridWorld(800, 64, 1)
done = False
while not done:
env.render()
print(env.step(3))
\ No newline at end of file
import numpy as np
# global variables
BOARD_ROWS = 3
BOARD_COLS = 4
WIN_STATE = (0, 3)
LOSE_STATE = (1, 3)
START = (2, 0)
DETERMINISTIC = True
class State:
def __init__(self, state=START):
self.board = np.zeros([BOARD_ROWS, BOARD_COLS])
self.board[1, 1] = -1
self.state = state
self.isEnd = False
self.determine = DETERMINISTIC
def giveReward(self):
if self.state == WIN_STATE:
return 1
elif self.state == LOSE_STATE:
return -1
else:
return 0
def isEndFunc(self):
if (self.state == WIN_STATE) or (self.state == LOSE_STATE):
self.isEnd = True
def nxtPosition(self, action):
"""
action: up, down, left, right
-------------
0 | 1 | 2| 3|
1 |
2 |
return next position
"""
if self.determine:
if action == "up":
nxtState = (self.state[0] - 1, self.state[1])
elif action == "down":
nxtState = (self.state[0] + 1, self.state[1])
elif action == "left":
nxtState = (self.state[0], self.state[1] - 1)
else:
nxtState = (self.state[0], self.state[1] + 1)
# if next state legal
if (nxtState[0] >= 0) and (nxtState[0] <= (BOARD_ROWS -1)):
if (nxtState[1] >= 0) and (nxtState[1] <= (BOARD_COLS -1)):
if nxtState != (1, 1):
return nxtState
return self.state
def showBoard(self):
self.board[self.state] = 1
for i in range(0, BOARD_ROWS):
print('-----------------')
out = '| '
for j in range(0, BOARD_COLS):
if self.board[i, j] == 1:
token = '*'
if self.board[i, j] == -1:
token = 'z'
if self.board[i, j] == 0:
token = '0'
out += token + ' | '
print(out)
print('-----------------')
# Agent of player
class Agent:
def __init__(self):
self.states = []
self.actions = ["up", "down", "left", "right"]
self.State = State()
self.lr = 0.2
self.exp_rate = 0.3
# initial state reward
self.state_values = {}
for i in range(BOARD_ROWS):
for j in range(BOARD_COLS):
self.state_values[(i, j)] = 0 # set initial value to 0
def chooseAction(self):
# choose action with most expected value
mx_nxt_reward = 0
action = ""
if np.random.uniform(0, 1) <= self.exp_rate:
action = np.random.choice(self.actions)
else:
# greedy action
for a in self.actions:
# if the action is deterministic
nxt_reward = self.state_values[self.State.nxtPosition(a)]
if nxt_reward >= mx_nxt_reward:
action = a
mx_nxt_reward = nxt_reward
return action
def takeAction(self, action):
position = self.State.nxtPosition(action)
return State(state=position)
def reset(self):
self.states = []
self.State = State()
def play(self, rounds=10):
i = 0
while i < rounds:
# to the end of game back propagate reward
if self.State.isEnd:
# back propagate
reward = self.State.giveReward()
# explicitly assign end state to reward values
self.state_values[self.State.state] = reward # this is optional
print("Game End Reward", reward)
for s in reversed(self.states):
reward = self.state_values[s] + self.lr * (reward - self.state_values[s])
self.state_values[s] = round(reward, 3)
self.reset()
i += 1
else:
action = self.chooseAction()
# append trace
self.states.append(self.State.nxtPosition(action))
print("current position {} action {}".format(self.State.state, action))
# by taking the action, it reaches the next state
self.State = self.takeAction(action)
# mark is end
self.State.isEndFunc()
print("nxt state", self.State.state)
print("---------------------")
def showValues(self):
for i in range(0, BOARD_ROWS):
print('----------------------------------')
out = '| '
for j in range(0, BOARD_COLS):
out += str(self.state_values[(i, j)]).ljust(6) + ' | '
print(out)
print('----------------------------------')
if __name__ == "__main__":
ag = Agent()
ag.play(50)
print(ag.showValues())
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment