Skip to content
Snippets Groups Projects
Commit 7c8df5bc authored by magnubau's avatar magnubau
Browse files

started implementing multithreading in ALSMF.ipynb. Currently not working

parent 96bb5456
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from main import * from main import *
import os import os
from collections import Counter from collections import Counter
from sklearn.decomposition import NMF from sklearn.decomposition import NMF
import numpy as np import numpy as np
DATA_FOLDER = "active1000" DATA_FOLDER = "active1000"
files = os.listdir(DATA_FOLDER) files = os.listdir(DATA_FOLDER)
ARBITRARY_INDEX = 0 ARBITRARY_INDEX = 0
filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX]) filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX])
data = load_data(DATA_FOLDER) data = load_data(DATA_FOLDER)
ratings = load_dataset(data) ratings = load_dataset(data)
``` ```
%% Output %% Output
================================================================================================= =================================================================================================
Factorization method: bmf Factorization method: bmf
Initialization method: nndsvd Initialization method: nndsvd
Basis matrix W: Basis matrix W:
Mixture (Coefficient) matrix H0: Mixture (Coefficient) matrix H0:
Distance (Euclidean): 431720.4844872935 Distance (Euclidean): 431662.364916604
Actual number of iterations: 12 Actual number of iterations: 12
Sparseness basis: 0.1187, Sparseness mixture: 0.5198 Sparseness basis: 0.1184, Sparseness mixture: 0.5134
Explained variance: 0.20623274555185334 Explained variance: 0.20633960499844817
Residual sum of squares: 431720.4844872936 Residual sum of squares: 431662.364916604
================================================================================================= =================================================================================================
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed from concurrent.futures import as_completed
class ALSMF(): class ALSMF():
def __init__(self, data, featureCount, a = 40, lambd = 10): def __init__(self, data, featureCount, a = 40, lambd = 10):
self.data = data self.data = data
self.featureCount = featureCount self.featureCount = featureCount
self.a = a self.a = a
self.lambd= lambd self.lambd= lambd
self.userCount = data.shape[0] self.userCount = data.shape[0]
self.itemCount = data.shape[1] self.itemCount = data.shape[1]
self.userFeatures = np.random.rand(self.userCount, self.featureCount) self.userFeatures = np.random.rand(self.userCount, self.featureCount)
self.itemFeatures = np.random.rand(self.itemCount, self.featureCount) self.itemFeatures = np.random.rand(self.itemCount, self.featureCount)
self.confidence = 1 + a*self.data self.confidence = 1 + a*self.data
self.loss = [] self.loss = []
def MSE(self): def MSE(self):
""" """
Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell
""" """
matrixProduct = np.matmul(self.userFeatures, self.itemFeatures) matrixProduct = np.matmul(self.userFeatures, self.itemFeatures)
res = np.sum((self.data - matrixProduct)**2) res = np.sum((self.data - matrixProduct)**2)
return res return res
def updateItemFeatures(self, l2Reg): def updateItemFeatures(self):
l2Reg = self.lambd * np.identity(self.featureCount)
fixedUsers = self.userFeatures.T.dot(self.userFeatures) fixedUsers = self.userFeatures.T.dot(self.userFeatures)
for i in range(self.itemCount): for i in range(self.itemCount):
print("Jobber med item features 1") print("Jobber med item features 1")
itemConf = np.diag(self.confidence[:, i]) itemConf = np.diag(self.confidence[:, i])
print("Jobber med item features 2") print("Jobber med item features 2")
w_i = fixedUsers + self.userFeatures.T.dot(itemConf - np.identity(self.userCount)).dot(self.userFeatures) + l2Reg w_i = fixedUsers + self.userFeatures.T.dot(itemConf - np.identity(self.userCount)).dot(self.userFeatures) + l2Reg
print("Jobber med item features 3") print("Jobber med item features 3")
self.itemFeatures[i] = np.linalg.inv(w_i).dot(self.userFeatures.T.dot(itemConf).dot(self.data[:,i])) self.itemFeatures[i] = np.linalg.inv(w_i).dot(self.userFeatures.T.dot(itemConf).dot(self.data[:,i]))
def updateSingleUser(self, i):
def updateUserFeatures(self, l2Reg): l2Reg = self.lambd * np.identity(self.featureCount)
fixedItems = self.itemFeatures.T.dot(self.itemFeatures) fixedItems = self.itemFeatures.T.dot(self.itemFeatures)
for i in range(self.userCount): #print("Jobber med user features 1")
print("Jobber med user features 1") userConf = np.diag(self.confidence[i, :])
print(self.userCount) #print("Jobber med user features 2")
print(self.itemCount) w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg
userConf = np.diag(self.confidence[i, :]) #print("Jobber med user features 3")
print("Jobber med user features 2") return np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:]))
w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg
print("Jobber med user features 3")
self.userFeatures[i] = np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:])) def updateUserFeatures(self):
with ThreadPoolExecutor(max_workers = 10) as executor:
for i, el in executor.map(self.updateSingleUser, range(self.userCount)): self.userFeatures[i] = el
def train(self, iterations = 10): def train(self, iterations = 10):
l2Reg = self.lambd * np.identity(self.featureCount)
for i in range(iterations): for i in range(iterations):
print("iteration: " + str(i)) print("iteration: " + str(i))
self.updateUserFeatures(l2Reg) self.updateUserFeatures()
print("User features done") print("User features done")
self.updateItemFeatures(l2Reg) self.updateItemFeatures()
print("Item features done") print("Item features done")
tempLoss = (self.confidence * (self.data - self.userFeatures.dot(self.itemFeatures.T))**2).sum() tempLoss = (self.confidence * (self.data - self.userFeatures.dot(self.itemFeatures.T))**2).sum()
tempL2 = (pow(self.userFeatures, 2).sum() + pow(self.itemFeatures, 2).sum()) tempL2 = (pow(self.userFeatures, 2).sum() + pow(self.itemFeatures, 2).sum())
self.loss.append(tempLoss + self.lambd + tempL2) self.loss.append(tempLoss + self.lambd + tempL2)
print("MSE: " + str(self.MSE())) print("MSE: " + str(self.MSE()))
#https://everdark.github.io/k9/notebooks/ml/matrix_factorization/matrix_factorization.nb.html #https://everdark.github.io/k9/notebooks/ml/matrix_factorization/matrix_factorization.nb.html
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
model = ALSMF(ratings, 5, 40, 10) model = ALSMF(ratings, 5, 40, 10)
model.train() model.train()
``` ```
%% Output %% Output
iteration: 0 iteration: 0
Jobber med user features 1
1000
20344
Jobber med user features 2
Jobber med user features 3
Jobber med user features 1
1000
20344
Jobber med user features 2
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last) MemoryError Traceback (most recent call last)
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 3' in <cell line: 2>() d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 3' in <cell line: 2>()
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=0'>1</a> model = ALSMF(ratings, 5, 40, 10) <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=0'>1</a> model = ALSMF(ratings, 5, 40, 10)
----> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=1'>2</a> model.train() ----> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=1'>2</a> model.train()
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.train(self, iterations) d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.train(self, iterations)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=49'>50</a> for i in range(iterations): <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=54'>55</a> for i in range(iterations):
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=50'>51</a> print("iteration: " + str(i)) <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=55'>56</a> print("iteration: " + str(i))
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=51'>52</a> self.updateUserFeatures(l2Reg) ---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=56'>57</a> self.updateUserFeatures()
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=52'>53</a> print("User features done") <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=57'>58</a> print("User features done")
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=53'>54</a> self.updateItemFeatures(l2Reg) <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=58'>59</a> self.updateItemFeatures()
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.updateUserFeatures(self, l2Reg) d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.updateUserFeatures(self)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=40'>41</a> userConf = np.diag(self.confidence[i, :]) <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=48'>49</a> def updateUserFeatures(self):
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=41'>42</a> print("Jobber med user features 2") <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=49'>50</a> with ThreadPoolExecutor(max_workers = 10) as executor:
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=42'>43</a> w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg ---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=50'>51</a> for i, el in executor.map(self.updateSingleUser, range(self.userCount)): self.userFeatures[i] = el
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=43'>44</a> print("Jobber med user features 3") File C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0\lib\concurrent\futures\_base.py:609, in Executor.map.<locals>.result_iterator()
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=44'>45</a> self.userFeatures[i] = np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:])) <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=605'>606</a> while fs:
KeyboardInterrupt: <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=606'>607</a> # Careful not to keep a reference to the popped future
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=607'>608</a> if timeout is None:
--> <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=608'>609</a> yield fs.pop().result()
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=609'>610</a> else:
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=610'>611</a> yield fs.pop().result(end_time - time.monotonic())
File C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0\lib\concurrent\futures\_base.py:446, in Future.result(self, timeout)
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=443'>444</a> raise CancelledError()
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=444'>445</a> elif self._state == FINISHED:
--> <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=445'>446</a> return self.__get_result()
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=446'>447</a> else:
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=447'>448</a> raise TimeoutError()
File C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0\lib\concurrent\futures\_base.py:391, in Future.__get_result(self)
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=388'>389</a> if self._exception:
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=389'>390</a> try:
--> <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=390'>391</a> raise self._exception
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=391'>392</a> finally:
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=392'>393</a> # Break a reference cycle with the exception in self._exception
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/_base.py?line=393'>394</a> self = None
File C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0\lib\concurrent\futures\thread.py:58, in _WorkItem.run(self)
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/thread.py?line=54'>55</a> return
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/thread.py?line=56'>57</a> try:
---> <a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/thread.py?line=57'>58</a> result = self.fn(*self.args, **self.kwargs)
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/thread.py?line=58'>59</a> except BaseException as exc:
<a href='file:///c%3A/Program%20Files/WindowsApps/PythonSoftwareFoundation.Python.3.9_3.9.3312.0_x64__qbz5n2kfra8p0/lib/concurrent/futures/thread.py?line=59'>60</a> self.future.set_exception(exc)
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.updateSingleUser(self, i)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=41'>42</a> userConf = np.diag(self.confidence[i, :])
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=42'>43</a> #print("Jobber med user features 2")
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=43'>44</a> w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=44'>45</a> #print("Jobber med user features 3")
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=45'>46</a> return np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:]))
MemoryError: Unable to allocate 3.08 GiB for an array with shape (20344, 20344) and data type float64
......
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from main import * from main import *
import os import os
from collections import Counter from collections import Counter
from sklearn.decomposition import NMF from sklearn.decomposition import NMF
import numpy as np import numpy as np
DATA_FOLDER = "active1000" DATA_FOLDER = "active1000"
files = os.listdir(DATA_FOLDER) files = os.listdir(DATA_FOLDER)
ARBITRARY_INDEX = 0 ARBITRARY_INDEX = 0
filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX]) filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX])
data = load_data(DATA_FOLDER) data = load_data(DATA_FOLDER)
ratings = load_dataset(data) ratings = load_dataset(data)
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
class MFAlgorithm(): class MFAlgorithm():
def __init__(self, data, featureCount): def __init__(self, data, featureCount):
self.data = data self.data = data
self.featureCount = featureCount self.featureCount = featureCount
self.userCount = data.shape[0] self.userCount = data.shape[0]
self.itemCount = data.shape[1] self.itemCount = data.shape[1]
#randomly ininitialize the features matrices for users and items #randomly ininitialize the features matrices for users and items
self.userFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.userCount, self.featureCount)) self.userFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.userCount, self.featureCount))
self.itemFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.featureCount, self.itemCount)) self.itemFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.featureCount, self.itemCount))
def MSE(self): def MSE(self):
""" """
Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell
""" """
matrixProduct = np.matmul(self.userFeatures, self.itemFeatures) matrixProduct = np.matmul(self.userFeatures, self.itemFeatures)
res = np.sum((self.data - matrixProduct)**2) res = np.sum((self.data - matrixProduct)**2)
return res return res
def singleGradient(self, userRowIndex, itemCollumnIndex, userIndex=None, itemIndex=None): def singleGradient(self, userRowIndex, itemCollumnIndex, userIndex=None, itemIndex=None):
""" """
Computes gradient of single user-item cell to a single user-feature or feature-item cell Computes gradient of single user-item cell to a single user-feature or feature-item cell
""" """
if userIndex != None and itemIndex != None: if userIndex != None and itemIndex != None:
return "Too many elements" return "Too many elements"
elif userIndex == None and itemIndex == None: elif userIndex == None and itemIndex == None:
return "not enough elements" return "not enough elements"
else: else:
userRow = self.userFeatures[userRowIndex, :] userRow = self.userFeatures[userRowIndex, :]
itemCollumn = self.itemFeatures[:, itemCollumnIndex] itemCollumn = self.itemFeatures[:, itemCollumnIndex]
UIRating = float(self.data[userRowIndex, itemCollumnIndex]) UIRating = float(self.data[userRowIndex, itemCollumnIndex])
prediction = float(np.dot(userRow, itemCollumn)) prediction = float(np.dot(userRow, itemCollumn))
if userIndex != None: if userIndex != None:
rowElement = float(itemCollumn[userIndex]) rowElement = float(itemCollumn[userIndex])
gradient = (UIRating - prediction)*rowElement*2 gradient = (UIRating - prediction)*rowElement*2
else: else:
collumnElement = float(userRow[itemIndex]) collumnElement = float(userRow[itemIndex])
gradient = (UIRating - prediction)*collumnElement*2 gradient = (UIRating - prediction)*collumnElement*2
return gradient return gradient
def userFeatureGradient(self, userRow, userIndex): def userFeatureGradient(self, userRow, userIndex):
sum = 0 sum = 0
for i in range(0, self.itemCount): for i in range(0, self.itemCount):
sum += self.singleGradient(userRowIndex=userRow, itemCollumnIndex=i, userIndex=userIndex) sum += self.singleGradient(userRowIndex=userRow, itemCollumnIndex=i, userIndex=userIndex)
res = sum / self.itemCount res = sum / self.itemCount
return res return res
def itemFeatureGradient(self, itemCollumn, itemIndex): def itemFeatureGradient(self, itemCollumn, itemIndex):
sum = 0 sum = 0
for i in range(0, self.userCount): for i in range(0, self.userCount):
sum += self.singleGradient(userRowIndex=i, itemCollumnIndex=itemCollumn, itemIndex=itemIndex) sum += self.singleGradient(userRowIndex=i, itemCollumnIndex=itemCollumn, itemIndex=itemIndex)
res = sum / self.itemCount res = sum / self.itemCount
return res return res
def updateUserFeatures(self, learningRate): def updateUserFeatures(self, learningRate):
for i in range(0, self.userCount): for i in range(0, self.userCount):
for j in range(0, self.featureCount): for j in range(0, self.featureCount):
self.userFeatures[i, j] += learningRate*self.userFeatureGradient(userRow=i, userIndex=j) self.userFeatures[i, j] += learningRate*self.userFeatureGradient(userRow=i, userIndex=j)
def updateItemFeatures(self, learningRate): def updateItemFeatures(self, learningRate):
for i in range(0, self.featureCount): for i in range(0, self.featureCount):
for j in range(0, self.itemCount): for j in range(0, self.itemCount):
self.itemFeatures[i, j] += learningRate*self.itemFeatureGradient(itemCollumn=j, itemIndex=i) self.itemFeatures[i, j] += learningRate*self.itemFeatureGradient(itemCollumn=j, itemIndex=i)
def trainModel(self, learningRate=0.5, iterations = 100): def trainModel(self, learningRate=0.5, iterations = 100):
for i in range(iterations): for i in range(iterations):
print(i) print(i)
self.updateUserFeatures(learningRate=learningRate) self.updateUserFeatures(learningRate=learningRate)
print("user feature done") print("user feature done")
self.updateItemFeatures(learningRate=learningRate) self.updateItemFeatures(learningRate=learningRate)
print("item feature done") print("item feature done")
print("MSE: " + str(self.MSE())) print("MSE: " + str(self.MSE()))
def predict(self): def predict(self):
predictions = np.zeros(self.userCount, self.itemCount) predictions = np.zeros(self.userCount, self.itemCount)
for i in range(self.userCount): for i in range(self.userCount):
for j in range(self.itemCount): for j in range(self.itemCount):
predictions[i][j] = np.dot(self.userFeatures[i, :], self.itemFeatures[:, j]) predictions[i][j] = np.dot(self.userFeatures[i, :], self.itemFeatures[:, j])
return predictions return predictions
model = MFAlgorithm(ratings, 20) model = MFAlgorithm(ratings, 20)
model.trainModel() model.trainModel()
``` ```
%% Output %% Output
0 0
user feature done user feature done
print item feature done item feature done
MSE: 5327020.971941057
--------------------------------------------------------------------------- 1
TypeError Traceback (most recent call last) user feature done
d:\Projects\RecommenderSystems\recommender-system\MFAlgorithm.ipynb Cell 2' in <cell line: 89>() item feature done
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=84'>85</a> return predictions MSE: 2806226.5704981834
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=87'>88</a> model = MFAlgorithm(ratings, 20) 2
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=88'>89</a> model.trainModel() user feature done
d:\Projects\RecommenderSystems\recommender-system\MFAlgorithm.ipynb Cell 2' in MFAlgorithm.trainModel(self, learningRate, iterations) item feature done
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=74'>75</a> self.updateItemFeatures(learningRate=learningRate) MSE: 1824206.2380513693
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=75'>76</a> print("print item feature done") 3
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=76'>77</a> print("MSE: " + self.MSE()) user feature done
TypeError: can only concatenate str (not "numpy.float64") to str item feature done
MSE: 1400020.315815593
4
user feature done
item feature done
MSE: 1193679.49768956
5
user feature done
item feature done
MSE: 1077231.1141030204
6
user feature done
item feature done
MSE: 1000329.976728513
7
user feature done
item feature done
MSE: 942611.4961759587
8
user feature done
item feature done
MSE: 895570.9313176401
9
user feature done
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment