Commit 8460c296 authored by Lasse Seivaag's avatar Lasse Seivaag
Browse files

Final changes before delivery.

parent 3d2431e4
......@@ -10,6 +10,33 @@ import pickle
import numpy as np
from MarioConfig import MarioConfig
def reward_function(total_x, total_y, score, ticks):
return total_x**1.9 + total_y**2.3 + score**1.5 + ticks**1.1
def NES(npop, learning_rate, sigma, layers, W, b, Nw, Nb, R):
#print("NES INPUT:")
#print("npop: ", npop)
#print("learning rate: ", learning_rate)
#print("len(W): ", len(W))
#print("len(b) ", len(b))
#print("W[0].shape: ", W[0].shape)
#print("b[0].shape: ", b[0].shape)
#print("len(R): ", len(R))
#print("W: ", W)
#print("b: ", b)
#print("Nw: ", Nw)
#print("Nb: ", Nb)
#print("R: ", R)
A = None
Rstd = np.std(R)
if Rstd == 0:
A = R-np.mean(R)
else:
A = (R - np.mean(R)) / np.std(R)
for i in range(len(layers)):
W[i] = W[i] + (learning_rate/(npop*sigma)) * np.dot(Nw[i].transpose(1,2,0), A)
b[i] = b[i] + (learning_rate/(npop*sigma)) * np.dot(Nb[i].T, A)
client_server_packets = {
"client_hello": 0,
"client_cpu_cores": 1,
......@@ -73,28 +100,3 @@ class ServerClientUpdateConfig():
self.nodeids = nodeids
self.population = population
self.npop_total = npop_total
##################################
# Server stuff
##################################
class Server(Thread):
def __init__(self):
super(Server, self).__init__()
self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #IPv4 TCP
self.server.bind(('', 15025)) #Listen to all interfaces, port 15025
self.connection_lock = Lock()
self.clients = []
self.done = False
def run(self):
try:
self.server.listen(3)
while not self.done:
(client, address) = self.server.accept() # Provided workers have static IP we can use a whitelist
self.connection_lock.acquire()
self.clients.append((client,address))
self.connection_lock.release()
except:
print("wtf")
\ No newline at end of file
......@@ -139,7 +139,7 @@ def demo_agent(mario, cnn, R, map_to_use=None, make_movie=False):
if map_to_use is not None:
stage_string = map_to_use
else:
stage_string = "SuperMarioBros-4-1-v0"
stage_string = "SuperMarioBros-2-3-v0"
env = gym_super_mario_bros.make(stage_string)
......
......@@ -5,14 +5,75 @@ import numpy as np
import torch
import sys
from copy import *
from shutil import copy2
from pathlib import Path
from math import *
from MarioConfig import *
from LoadBalancer import *
from AIManagerCopy import NES, save_agent, load_agent
class TaskManagerGlobals():
def __init__(self, base_weights=None, reward=None):
from NetworkingCommons import *
#####################################################
# Saved Mario AI instance and the cnn it used #
# Saved in folder with population and index #
# So it can be replayed later #
#####################################################
def save_agent(mario, index, population, reward, stage_string):
directory = Path("ai_training/models/pop{}-ind{}".format(population, index))
directory.mkdir(parents=True, exist_ok=True)
torch.save(mario.state_dict(), "ai_training/models/pop{}-ind{}/ind{}-{}-reward{}.pth".format(population, index, index, stage_string, reward))
copy2("cnn_training/models/best.pth", "ai_training/models/pop{}-ind{}/cnn.pth".format(population, index))
#####################################################
# Loads Mario AI and it's accompanying CNN #
# Returns (Mario, CNN) #
#####################################################
def load_agent(population, index, reward, stage_string, cnn_output):
layers = ((cnn_output,9), (9, 5))
W = []
b = []
Nw = []
Nb = []
Ws = []
Bs = []
for layer in layers:
W.append(torch.rand(layer))
b.append(torch.rand((layer[1])))
for j in range(len(layers)):
Ws.append(W[j].float())
Bs.append(b[j].float())
directory = Path("ai_training/models/pop{}-ind{}".format(population, index))
directory.mkdir(parents=True, exist_ok=True)
mario = Mario(Ws, Bs, True)
mario.load_state_dict(torch.load("ai_training/models/pop{}-ind{}/ind{}-{}-reward{}.pth".format(population, index, index, stage_string, reward)))
mario.eval()
cnn = CNN(4)
cnn.load_state_dict(torch.load("ai_training/models/pop{}-ind{}/cnn.pth".format(population, index), map_location=torch.device('cpu')))
cnn.eval()
return mario, cnn
class Server(Thread):
def __init__(self):
super(Server, self).__init__()
self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #IPv4 TCP
self.server.bind(('', 15025)) #Listen to all interfaces, port 15025
self.connection_lock = Lock()
self.clients = []
self.done = False
def run(self):
try:
self.server.listen(3)
while not self.done:
(client, address) = self.server.accept() # Provided workers have static IP we can use a whitelist
self.connection_lock.acquire()
self.clients.append((client,address))
self.connection_lock.release()
except:
print("wtf")
class TaskMasterGlobals():
def __init__(self):
#############################
# TaskMaster global variables
#############################
......@@ -42,21 +103,22 @@ class TaskManagerGlobals():
self.mario_vision = ((self.from_y, self.to_y), (self.from_x, self.to_x))
self.layers = ((self.cnn_shape,9), (9, 5))
self.old_max = -1
self.old_max = -1.0
self.W = []
self.b = []
for layer in self.layers:
self.W.append(torch.tensor(self.globalrandom.randn(layer[0], layer[1])))
self.b.append(torch.tensor(self.globalrandom.randn(layer[1])))
if base_weights is not None:
self.old_max = reward
self.hyperparameters = MarioHyperparameters(base_weights[0], base_weights[1], self.sigma, self.learning_rate)
else:
self.hyperparameters = MarioHyperparameters(self.W, self.b, self.sigma, self.learning_rate)
self.hyperparameters = MarioHyperparameters(self.W, self.b, self.sigma, self.learning_rate)
self.reset_pop_values()
# per population variables #
def load_previous_agent(self, base_weights, reward):
if base_weights is not None:
self.old_max = float(reward)
self.hyperparameters = MarioHyperparameters(base_weights[0], base_weights[1], self.sigma, self.learning_rate)
def reset_pop_values(self):
self.R = []
......@@ -405,19 +467,19 @@ def task_manager_main():
if __name__ == "__main__":
base_weights = None
reward = -1.0
global tm_globals
tm_globals = TaskMasterGlobals()
if len(sys.argv) > 1:
pop = sys.argv[1]
ind = sys.argv[2]
reward = sys.argv[3]
stage = sys.argv[4]
mario, _ = load_agent(pop, ind, reward, stage)
mario, _ = load_agent(pop, ind, reward, stage, tm_globals.cnn_shape)
Ws, bs = mario.get_layers()
base_weights = (Ws, bs)
tm_globals.load_previous_agent(base_weights, reward)
print("Starting at pop-ind with reward {}-{}-{}".format(pop,ind,reward))
global tm_globals
tm_globals = TaskManagerGlobals(base_weights, float(reward))
global server
server = Server()
server.daemon = True
......
......@@ -16,8 +16,7 @@ import gym_super_mario_bros
from gym_super_mario_bros.actions import RIGHT_ONLY, SIMPLE_MOVEMENT, COMPLEX_MOVEMENT
from MarioConfig import *
from LoadBalancer import *
from AIManagerCopy import NES, reward_function
from NetworkingCommons import *
#########################################################
# Agent training, meant to be used with multiprocessing #
......@@ -349,7 +348,7 @@ class Worker():
def send_client_cpu_cores(self):
#print("send_client_cpu_cores start")
packet_id = client_server_packets["client_cpu_cores"].to_bytes(1, byteorder='little')
cpu_cores = 32
cpu_cores = 28
payload = pickle.dumps(ClientConfigPacket(cpu_cores))
length = len(payload).to_bytes(4, byteorder='little')
self.out_buffer.append(packet_id+length+payload)
......
import numpy as np
import pickle as pl
import matplotlib.pyplot as plt
from pathlib import Path
import glob
import re
import sys
import os
if sys.argv.__contains__("max"):
pops = []
rewards = []
all_paths = []
all_numbers = []
best_high = []
for path in glob.glob("ai_training/models/*/*.pth"):
if not path.__contains__("cnn.pth"):
all_paths.append(path)
for i in range(len(all_paths)):
scores = re.findall(r"[^a-zA-Z]+\d.pth", all_paths[i])
pop = all_paths[i].split("pop")[1].split("-")[0]
if len(scores) == 1:
score = ''.join(scores[0])
all_numbers.append((int(pop), float(score[:-4])))
for pop, number in all_numbers:
best_high.append((pop, number))
best_high = sorted(best_high, reverse=False, key=lambda x: x[0])
popsMax = []
rewardsMax = []
all_pathsMax = []
all_numbersMax = []
best_highMax = []
for path in glob.glob("ai_training/models/*/*.pth"):
if not path.__contains__("cnn.pth"):
all_pathsMax.append(path)
for i in range(len(all_pathsMax)):
scores = re.findall(r"[^a-zA-Z]+\d.pth", all_pathsMax[i])
pop = all_pathsMax[i].split("pop")[1].split("-")[0]
if len(scores) == 1:
score = ''.join(scores[0])
all_numbersMax.append((int(pop), float(score[:-4])))
for pop, number in all_numbersMax:
best_highMax.append((pop, number))
for pop, reward in best_high:
pops.append(pop)
rewards.append(reward)
plt.xlim(0, max(pops))
plt.ylim(0, max(rewards))
# naming the x axis
plt.xlabel('Population')
# naming the y axis
plt.ylabel('Max Reward')
plt.plot(pops, rewards, scalex=False, scaley=False)
best_high = sorted(best_highMax, reverse=False, key=lambda x: x[0])
for pop, reward in best_high:
popsMax.append(pop)
rewardsMax.append(reward)
plt.xlim(0, max(popsMax))
plt.ylim(0, max(rewardsMax))
plt.plot(popsMax, rewardsMax, 'b', scalex=False, scaley=False)
# giving a title to my graph
plt.title('The relationship between population and max rewards')
plt.show()
if sys.argv.__contains__("average"):
pops = []
rewards = []
all_paths = []
data = []
for path in glob.glob("ai_training/models/*/*.pickle"):
all_paths.append(path)
for i in range(len(all_paths)):
infile = open(all_paths[i], 'rb')
R = pl.load(infile)
infile.close()
avg_reward = (sum(R) / len(R))
pop = all_paths[i].split("pop")[1].split("-")[0]
data.append((int(pop), avg_reward))
data = sorted(data, reverse=False, key=lambda x: x[0])
for pop, reward in data:
pops.append(pop)
rewards.append(reward)
plt.xlim(0, len(pops))
plt.ylim(0, max(rewards))
# naming the x axis
plt.xlabel('Population')
# naming the y axis
plt.ylabel('Average rewards')
plt.plot(pops, rewards, scalex=False, scaley=False)
# giving a title to my graph
plt.title('The relationship between population and average rewards')
plt.show()
#if sys.argv.__contains__("average"):
pops = []
rewards = []
all_paths = []
data = []
for path in glob.glob("ai_training/models/*/*.pickle"):
all_paths.append(path)
for i in range(len(all_paths)):
infile = open(all_paths[i], 'rb')
R = pl.load(infile)
infile.close()
avg_reward = (sum(R) / len(R))
pop = all_paths[i].split("pop")[1].split("-")[0]
data.append((int(pop), avg_reward))
data = sorted(data, reverse=False, key=lambda x: x[0])
for pop, reward in data:
pops.append(pop)
rewards.append(reward)
plt.xlim(0, len(pops))
plt.ylim(0, max(max(rewards), max(rewardsMax)))
# naming the x axis
plt.xlabel('Population')
# naming the y axis
plt.ylabel('Rewards/Fitness')
plt.plot(pops, rewards, 'r', scalex=False, scaley=False)
# giving a title to my graph
plt.title('The relationship between population and rewards/fitness')
plt.figtext(.83, .99, "Red is average\nBlue is max", verticalalignment='top', bbox=dict(facecolor='yellow', alpha=0.1))
plt.show()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment