Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mohammad Al Nayef
AI program to play super mario
Commits
8460c296
Commit
8460c296
authored
Nov 24, 2020
by
Lasse Seivaag
Browse files
Final changes before delivery.
parent
3d2431e4
Changes
5
Hide whitespace changes
Inline
Side-by-side
LoadBalancer
.py
→
NetworkingCommons
.py
View file @
8460c296
...
...
@@ -10,6 +10,33 @@ import pickle
import
numpy
as
np
from
MarioConfig
import
MarioConfig
def
reward_function
(
total_x
,
total_y
,
score
,
ticks
):
return
total_x
**
1.9
+
total_y
**
2.3
+
score
**
1.5
+
ticks
**
1.1
def
NES
(
npop
,
learning_rate
,
sigma
,
layers
,
W
,
b
,
Nw
,
Nb
,
R
):
#print("NES INPUT:")
#print("npop: ", npop)
#print("learning rate: ", learning_rate)
#print("len(W): ", len(W))
#print("len(b) ", len(b))
#print("W[0].shape: ", W[0].shape)
#print("b[0].shape: ", b[0].shape)
#print("len(R): ", len(R))
#print("W: ", W)
#print("b: ", b)
#print("Nw: ", Nw)
#print("Nb: ", Nb)
#print("R: ", R)
A
=
None
Rstd
=
np
.
std
(
R
)
if
Rstd
==
0
:
A
=
R
-
np
.
mean
(
R
)
else
:
A
=
(
R
-
np
.
mean
(
R
))
/
np
.
std
(
R
)
for
i
in
range
(
len
(
layers
)):
W
[
i
]
=
W
[
i
]
+
(
learning_rate
/
(
npop
*
sigma
))
*
np
.
dot
(
Nw
[
i
].
transpose
(
1
,
2
,
0
),
A
)
b
[
i
]
=
b
[
i
]
+
(
learning_rate
/
(
npop
*
sigma
))
*
np
.
dot
(
Nb
[
i
].
T
,
A
)
client_server_packets
=
{
"client_hello"
:
0
,
"client_cpu_cores"
:
1
,
...
...
@@ -73,28 +100,3 @@ class ServerClientUpdateConfig():
self
.
nodeids
=
nodeids
self
.
population
=
population
self
.
npop_total
=
npop_total
##################################
# Server stuff
##################################
class
Server
(
Thread
):
def
__init__
(
self
):
super
(
Server
,
self
).
__init__
()
self
.
server
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
#IPv4 TCP
self
.
server
.
bind
((
''
,
15025
))
#Listen to all interfaces, port 15025
self
.
connection_lock
=
Lock
()
self
.
clients
=
[]
self
.
done
=
False
def
run
(
self
):
try
:
self
.
server
.
listen
(
3
)
while
not
self
.
done
:
(
client
,
address
)
=
self
.
server
.
accept
()
# Provided workers have static IP we can use a whitelist
self
.
connection_lock
.
acquire
()
self
.
clients
.
append
((
client
,
address
))
self
.
connection_lock
.
release
()
except
:
print
(
"wtf"
)
\ No newline at end of file
cnn_test
.py
→
Replay
.py
View file @
8460c296
...
...
@@ -139,7 +139,7 @@ def demo_agent(mario, cnn, R, map_to_use=None, make_movie=False):
if
map_to_use
is
not
None
:
stage_string
=
map_to_use
else
:
stage_string
=
"SuperMarioBros-
4-1
-v0"
stage_string
=
"SuperMarioBros-
2-3
-v0"
env
=
gym_super_mario_bros
.
make
(
stage_string
)
...
...
TaskMasterV3.py
View file @
8460c296
...
...
@@ -5,14 +5,75 @@ import numpy as np
import
torch
import
sys
from
copy
import
*
from
shutil
import
copy2
from
pathlib
import
Path
from
math
import
*
from
MarioConfig
import
*
from
LoadBalancer
import
*
from
AIManagerCopy
import
NES
,
save_agent
,
load_agent
class
TaskManagerGlobals
():
def
__init__
(
self
,
base_weights
=
None
,
reward
=
None
):
from
NetworkingCommons
import
*
#####################################################
# Saved Mario AI instance and the cnn it used #
# Saved in folder with population and index #
# So it can be replayed later #
#####################################################
def
save_agent
(
mario
,
index
,
population
,
reward
,
stage_string
):
directory
=
Path
(
"ai_training/models/pop{}-ind{}"
.
format
(
population
,
index
))
directory
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
torch
.
save
(
mario
.
state_dict
(),
"ai_training/models/pop{}-ind{}/ind{}-{}-reward{}.pth"
.
format
(
population
,
index
,
index
,
stage_string
,
reward
))
copy2
(
"cnn_training/models/best.pth"
,
"ai_training/models/pop{}-ind{}/cnn.pth"
.
format
(
population
,
index
))
#####################################################
# Loads Mario AI and it's accompanying CNN #
# Returns (Mario, CNN) #
#####################################################
def
load_agent
(
population
,
index
,
reward
,
stage_string
,
cnn_output
):
layers
=
((
cnn_output
,
9
),
(
9
,
5
))
W
=
[]
b
=
[]
Nw
=
[]
Nb
=
[]
Ws
=
[]
Bs
=
[]
for
layer
in
layers
:
W
.
append
(
torch
.
rand
(
layer
))
b
.
append
(
torch
.
rand
((
layer
[
1
])))
for
j
in
range
(
len
(
layers
)):
Ws
.
append
(
W
[
j
].
float
())
Bs
.
append
(
b
[
j
].
float
())
directory
=
Path
(
"ai_training/models/pop{}-ind{}"
.
format
(
population
,
index
))
directory
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
mario
=
Mario
(
Ws
,
Bs
,
True
)
mario
.
load_state_dict
(
torch
.
load
(
"ai_training/models/pop{}-ind{}/ind{}-{}-reward{}.pth"
.
format
(
population
,
index
,
index
,
stage_string
,
reward
)))
mario
.
eval
()
cnn
=
CNN
(
4
)
cnn
.
load_state_dict
(
torch
.
load
(
"ai_training/models/pop{}-ind{}/cnn.pth"
.
format
(
population
,
index
),
map_location
=
torch
.
device
(
'cpu'
)))
cnn
.
eval
()
return
mario
,
cnn
class
Server
(
Thread
):
def
__init__
(
self
):
super
(
Server
,
self
).
__init__
()
self
.
server
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
#IPv4 TCP
self
.
server
.
bind
((
''
,
15025
))
#Listen to all interfaces, port 15025
self
.
connection_lock
=
Lock
()
self
.
clients
=
[]
self
.
done
=
False
def
run
(
self
):
try
:
self
.
server
.
listen
(
3
)
while
not
self
.
done
:
(
client
,
address
)
=
self
.
server
.
accept
()
# Provided workers have static IP we can use a whitelist
self
.
connection_lock
.
acquire
()
self
.
clients
.
append
((
client
,
address
))
self
.
connection_lock
.
release
()
except
:
print
(
"wtf"
)
class
TaskMasterGlobals
():
def
__init__
(
self
):
#############################
# TaskMaster global variables
#############################
...
...
@@ -42,21 +103,22 @@ class TaskManagerGlobals():
self
.
mario_vision
=
((
self
.
from_y
,
self
.
to_y
),
(
self
.
from_x
,
self
.
to_x
))
self
.
layers
=
((
self
.
cnn_shape
,
9
),
(
9
,
5
))
self
.
old_max
=
-
1
self
.
old_max
=
-
1
.0
self
.
W
=
[]
self
.
b
=
[]
for
layer
in
self
.
layers
:
self
.
W
.
append
(
torch
.
tensor
(
self
.
globalrandom
.
randn
(
layer
[
0
],
layer
[
1
])))
self
.
b
.
append
(
torch
.
tensor
(
self
.
globalrandom
.
randn
(
layer
[
1
])))
if
base_weights
is
not
None
:
self
.
old_max
=
reward
self
.
hyperparameters
=
MarioHyperparameters
(
base_weights
[
0
],
base_weights
[
1
],
self
.
sigma
,
self
.
learning_rate
)
else
:
self
.
hyperparameters
=
MarioHyperparameters
(
self
.
W
,
self
.
b
,
self
.
sigma
,
self
.
learning_rate
)
self
.
hyperparameters
=
MarioHyperparameters
(
self
.
W
,
self
.
b
,
self
.
sigma
,
self
.
learning_rate
)
self
.
reset_pop_values
()
# per population variables #
def
load_previous_agent
(
self
,
base_weights
,
reward
):
if
base_weights
is
not
None
:
self
.
old_max
=
float
(
reward
)
self
.
hyperparameters
=
MarioHyperparameters
(
base_weights
[
0
],
base_weights
[
1
],
self
.
sigma
,
self
.
learning_rate
)
def
reset_pop_values
(
self
):
self
.
R
=
[]
...
...
@@ -405,19 +467,19 @@ def task_manager_main():
if
__name__
==
"__main__"
:
base_weights
=
None
reward
=
-
1.0
global
tm_globals
tm_globals
=
TaskMasterGlobals
()
if
len
(
sys
.
argv
)
>
1
:
pop
=
sys
.
argv
[
1
]
ind
=
sys
.
argv
[
2
]
reward
=
sys
.
argv
[
3
]
stage
=
sys
.
argv
[
4
]
mario
,
_
=
load_agent
(
pop
,
ind
,
reward
,
stage
)
mario
,
_
=
load_agent
(
pop
,
ind
,
reward
,
stage
,
tm_globals
.
cnn_shape
)
Ws
,
bs
=
mario
.
get_layers
()
base_weights
=
(
Ws
,
bs
)
tm_globals
.
load_previous_agent
(
base_weights
,
reward
)
print
(
"Starting at pop-ind with reward {}-{}-{}"
.
format
(
pop
,
ind
,
reward
))
global
tm_globals
tm_globals
=
TaskManagerGlobals
(
base_weights
,
float
(
reward
))
global
server
server
=
Server
()
server
.
daemon
=
True
...
...
WorkerV2.py
View file @
8460c296
...
...
@@ -16,8 +16,7 @@ import gym_super_mario_bros
from
gym_super_mario_bros.actions
import
RIGHT_ONLY
,
SIMPLE_MOVEMENT
,
COMPLEX_MOVEMENT
from
MarioConfig
import
*
from
LoadBalancer
import
*
from
AIManagerCopy
import
NES
,
reward_function
from
NetworkingCommons
import
*
#########################################################
# Agent training, meant to be used with multiprocessing #
...
...
@@ -349,7 +348,7 @@ class Worker():
def
send_client_cpu_cores
(
self
):
#print("send_client_cpu_cores start")
packet_id
=
client_server_packets
[
"client_cpu_cores"
].
to_bytes
(
1
,
byteorder
=
'little'
)
cpu_cores
=
3
2
cpu_cores
=
2
8
payload
=
pickle
.
dumps
(
ClientConfigPacket
(
cpu_cores
))
length
=
len
(
payload
).
to_bytes
(
4
,
byteorder
=
'little'
)
self
.
out_buffer
.
append
(
packet_id
+
length
+
payload
)
...
...
graph_reward.py
View file @
8460c296
import
numpy
as
np
import
pickle
as
pl
import
matplotlib.pyplot
as
plt
from
pathlib
import
Path
import
glob
import
re
import
sys
import
os
if
sys
.
argv
.
__contains__
(
"max"
):
pops
=
[]
rewards
=
[]
all_paths
=
[]
all_numbers
=
[]
best_high
=
[]
for
path
in
glob
.
glob
(
"ai_training/models/*/*.pth"
):
if
not
path
.
__contains__
(
"cnn.pth"
):
all_paths
.
append
(
path
)
for
i
in
range
(
len
(
all_paths
)):
scores
=
re
.
findall
(
r
"[^a-zA-Z]+\d.pth"
,
all_paths
[
i
])
pop
=
all_paths
[
i
].
split
(
"pop"
)[
1
].
split
(
"-"
)[
0
]
if
len
(
scores
)
==
1
:
score
=
''
.
join
(
scores
[
0
])
all_numbers
.
append
((
int
(
pop
),
float
(
score
[:
-
4
])))
for
pop
,
number
in
all_numbers
:
best_high
.
append
((
pop
,
number
))
best_high
=
sorted
(
best_high
,
reverse
=
False
,
key
=
lambda
x
:
x
[
0
])
popsMax
=
[]
rewardsMax
=
[]
all_pathsMax
=
[]
all_numbersMax
=
[]
best_highMax
=
[]
for
path
in
glob
.
glob
(
"ai_training/models/*/*.pth"
):
if
not
path
.
__contains__
(
"cnn.pth"
):
all_pathsMax
.
append
(
path
)
for
i
in
range
(
len
(
all_pathsMax
)):
scores
=
re
.
findall
(
r
"[^a-zA-Z]+\d.pth"
,
all_pathsMax
[
i
])
pop
=
all_pathsMax
[
i
].
split
(
"pop"
)[
1
].
split
(
"-"
)[
0
]
if
len
(
scores
)
==
1
:
score
=
''
.
join
(
scores
[
0
])
all_numbersMax
.
append
((
int
(
pop
),
float
(
score
[:
-
4
])))
for
pop
,
number
in
all_numbersMax
:
best_highMax
.
append
((
pop
,
number
))
for
pop
,
reward
in
best_high
:
pops
.
append
(
pop
)
rewards
.
append
(
reward
)
plt
.
xlim
(
0
,
max
(
pops
))
plt
.
ylim
(
0
,
max
(
rewards
))
# naming the x axis
plt
.
xlabel
(
'Population'
)
# naming the y axis
plt
.
ylabel
(
'Max Reward'
)
plt
.
plot
(
pops
,
rewards
,
scalex
=
False
,
scaley
=
False
)
best_high
=
sorted
(
best_highMax
,
reverse
=
False
,
key
=
lambda
x
:
x
[
0
])
for
pop
,
reward
in
best_high
:
popsMax
.
append
(
pop
)
rewardsMax
.
append
(
reward
)
plt
.
xlim
(
0
,
max
(
popsMax
))
plt
.
ylim
(
0
,
max
(
rewardsMax
))
plt
.
plot
(
popsMax
,
rewardsMax
,
'b'
,
scalex
=
False
,
scaley
=
False
)
# giving a title to my graph
plt
.
title
(
'The relationship between population and max rewards'
)
plt
.
show
()
if
sys
.
argv
.
__contains__
(
"average"
):
pops
=
[]
rewards
=
[]
all_paths
=
[]
data
=
[]
for
path
in
glob
.
glob
(
"ai_training/models/*/*.pickle"
):
all_paths
.
append
(
path
)
for
i
in
range
(
len
(
all_paths
)):
infile
=
open
(
all_paths
[
i
],
'rb'
)
R
=
pl
.
load
(
infile
)
infile
.
close
()
avg_reward
=
(
sum
(
R
)
/
len
(
R
))
pop
=
all_paths
[
i
].
split
(
"pop"
)[
1
].
split
(
"-"
)[
0
]
data
.
append
((
int
(
pop
),
avg_reward
))
data
=
sorted
(
data
,
reverse
=
False
,
key
=
lambda
x
:
x
[
0
])
for
pop
,
reward
in
data
:
pops
.
append
(
pop
)
rewards
.
append
(
reward
)
plt
.
xlim
(
0
,
len
(
pops
))
plt
.
ylim
(
0
,
max
(
rewards
))
# naming the x axis
plt
.
xlabel
(
'Population'
)
# naming the y axis
plt
.
ylabel
(
'Average rewards'
)
plt
.
plot
(
pops
,
rewards
,
scalex
=
False
,
scaley
=
False
)
# giving a title to my graph
plt
.
title
(
'The relationship between population and average rewards'
)
plt
.
show
()
#if sys.argv.__contains__("average"):
pops
=
[]
rewards
=
[]
all_paths
=
[]
data
=
[]
for
path
in
glob
.
glob
(
"ai_training/models/*/*.pickle"
):
all_paths
.
append
(
path
)
for
i
in
range
(
len
(
all_paths
)):
infile
=
open
(
all_paths
[
i
],
'rb'
)
R
=
pl
.
load
(
infile
)
infile
.
close
()
avg_reward
=
(
sum
(
R
)
/
len
(
R
))
pop
=
all_paths
[
i
].
split
(
"pop"
)[
1
].
split
(
"-"
)[
0
]
data
.
append
((
int
(
pop
),
avg_reward
))
data
=
sorted
(
data
,
reverse
=
False
,
key
=
lambda
x
:
x
[
0
])
for
pop
,
reward
in
data
:
pops
.
append
(
pop
)
rewards
.
append
(
reward
)
plt
.
xlim
(
0
,
len
(
pops
))
plt
.
ylim
(
0
,
max
(
max
(
rewards
),
max
(
rewardsMax
)))
# naming the x axis
plt
.
xlabel
(
'Population'
)
# naming the y axis
plt
.
ylabel
(
'Rewards/Fitness'
)
plt
.
plot
(
pops
,
rewards
,
'r'
,
scalex
=
False
,
scaley
=
False
)
# giving a title to my graph
plt
.
title
(
'The relationship between population and rewards/fitness'
)
plt
.
figtext
(.
83
,
.
99
,
"Red is average
\n
Blue is max"
,
verticalalignment
=
'top'
,
bbox
=
dict
(
facecolor
=
'yellow'
,
alpha
=
0.1
))
plt
.
show
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment