Skip to content
Snippets Groups Projects
Commit 97cf5191 authored by Matt's avatar Matt
Browse files

Finito 2

parents 74acc629 96bb5456
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
from main import *
import os
from collections import Counter
from sklearn.decomposition import NMF
import numpy as np
DATA_FOLDER = "active1000"
files = os.listdir(DATA_FOLDER)
ARBITRARY_INDEX = 0
filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX])
data = load_data(DATA_FOLDER)
ratings = load_dataset(data)
```
%% Output
=================================================================================================
Factorization method: bmf
Initialization method: nndsvd
Basis matrix W:
Mixture (Coefficient) matrix H0:
Distance (Euclidean): 431720.4844872935
Actual number of iterations: 12
Sparseness basis: 0.1187, Sparseness mixture: 0.5198
Explained variance: 0.20623274555185334
Residual sum of squares: 431720.4844872936
=================================================================================================
%% Cell type:code id: tags:
``` python
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
class ALSMF():
def __init__(self, data, featureCount, a = 40, lambd = 10):
self.data = data
self.featureCount = featureCount
self.a = a
self.lambd= lambd
self.userCount = data.shape[0]
self.itemCount = data.shape[1]
self.userFeatures = np.random.rand(self.userCount, self.featureCount)
self.itemFeatures = np.random.rand(self.itemCount, self.featureCount)
self.confidence = 1 + a*self.data
self.loss = []
def MSE(self):
"""
Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell
"""
matrixProduct = np.matmul(self.userFeatures, self.itemFeatures)
res = np.sum((self.data - matrixProduct)**2)
return res
def updateItemFeatures(self, l2Reg):
fixedUsers = self.userFeatures.T.dot(self.userFeatures)
for i in range(self.itemCount):
print("Jobber med item features 1")
itemConf = np.diag(self.confidence[:, i])
print("Jobber med item features 2")
w_i = fixedUsers + self.userFeatures.T.dot(itemConf - np.identity(self.userCount)).dot(self.userFeatures) + l2Reg
print("Jobber med item features 3")
self.itemFeatures[i] = np.linalg.inv(w_i).dot(self.userFeatures.T.dot(itemConf).dot(self.data[:,i]))
def updateUserFeatures(self, l2Reg):
fixedItems = self.itemFeatures.T.dot(self.itemFeatures)
for i in range(self.userCount):
print("Jobber med user features 1")
print(self.userCount)
print(self.itemCount)
userConf = np.diag(self.confidence[i, :])
print("Jobber med user features 2")
w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg
print("Jobber med user features 3")
self.userFeatures[i] = np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:]))
def train(self, iterations = 10):
l2Reg = self.lambd * np.identity(self.featureCount)
for i in range(iterations):
print("iteration: " + str(i))
self.updateUserFeatures(l2Reg)
print("User features done")
self.updateItemFeatures(l2Reg)
print("Item features done")
tempLoss = (self.confidence * (self.data - self.userFeatures.dot(self.itemFeatures.T))**2).sum()
tempL2 = (pow(self.userFeatures, 2).sum() + pow(self.itemFeatures, 2).sum())
self.loss.append(tempLoss + self.lambd + tempL2)
print("MSE: " + str(self.MSE()))
#https://everdark.github.io/k9/notebooks/ml/matrix_factorization/matrix_factorization.nb.html
```
%% Cell type:code id: tags:
``` python
model = ALSMF(ratings, 5, 40, 10)
model.train()
```
%% Output
iteration: 0
Jobber med user features 1
1000
20344
Jobber med user features 2
Jobber med user features 3
Jobber med user features 1
1000
20344
Jobber med user features 2
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 3' in <cell line: 2>()
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=0'>1</a> model = ALSMF(ratings, 5, 40, 10)
----> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000002?line=1'>2</a> model.train()
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.train(self, iterations)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=49'>50</a> for i in range(iterations):
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=50'>51</a> print("iteration: " + str(i))
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=51'>52</a> self.updateUserFeatures(l2Reg)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=52'>53</a> print("User features done")
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=53'>54</a> self.updateItemFeatures(l2Reg)
d:\Projects\RecommenderSystems\recommender-system\ALSMF.ipynb Cell 2' in ALSMF.updateUserFeatures(self, l2Reg)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=40'>41</a> userConf = np.diag(self.confidence[i, :])
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=41'>42</a> print("Jobber med user features 2")
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=42'>43</a> w_u = fixedItems + self.itemFeatures.T.dot(userConf - np.identity(self.itemCount)).dot(self.itemFeatures) + l2Reg
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=43'>44</a> print("Jobber med user features 3")
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/ALSMF.ipynb#ch0000001?line=44'>45</a> self.userFeatures[i] = np.linalg.inv(w_u).dot(self.itemFeatures.T.dot(userConf).dot(self.data[i,:]))
KeyboardInterrupt:
%% Cell type:code id: tags:
``` python
from main import *
import os
from collections import Counter
from sklearn.decomposition import NMF
import numpy as np
DATA_FOLDER = "active1000"
files = os.listdir(DATA_FOLDER)
ARBITRARY_INDEX = 0
filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX])
data = load_data(DATA_FOLDER)
ratings = load_dataset(data)
```
%% Cell type:code id: tags:
``` python
class MFAlgorithm():
def __init__(self, data, featureCount):
self.data = data
self.featureCount = featureCount
self.userCount = data.shape[0]
self.itemCount = data.shape[1]
#randomly ininitialize the features matrices for users and items
self.userFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.userCount, self.featureCount))
self.itemFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.featureCount, self.itemCount))
def MSE(self):
"""
Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell
"""
matrixProduct = np.matmul(self.userFeatures, self.itemFeatures)
res = np.sum((self.data - matrixProduct)**2)
return res
def singleGradient(self, userRowIndex, itemCollumnIndex, userIndex=None, itemIndex=None):
"""
Computes gradient of single user-item cell to a single user-feature or feature-item cell
"""
if userIndex != None and itemIndex != None:
return "Too many elements"
elif userIndex == None and itemIndex == None:
return "not enough elements"
else:
userRow = self.userFeatures[userRowIndex, :]
itemCollumn = self.itemFeatures[:, itemCollumnIndex]
UIRating = float(self.data[userRowIndex, itemCollumnIndex])
prediction = float(np.dot(userRow, itemCollumn))
if userIndex != None:
rowElement = float(itemCollumn[userIndex])
gradient = (UIRating - prediction)*rowElement*2
else:
collumnElement = float(userRow[itemIndex])
gradient = (UIRating - prediction)*collumnElement*2
return gradient
def userFeatureGradient(self, userRow, userIndex):
sum = 0
for i in range(0, self.itemCount):
sum += self.singleGradient(userRowIndex=userRow, itemCollumnIndex=i, userIndex=userIndex)
res = sum / self.itemCount
return res
def itemFeatureGradient(self, itemCollumn, itemIndex):
sum = 0
for i in range(0, self.userCount):
sum += self.singleGradient(userRowIndex=i, itemCollumnIndex=itemCollumn, itemIndex=itemIndex)
res = sum / self.itemCount
return res
def updateUserFeatures(self, learningRate):
for i in range(0, self.userCount):
for j in range(0, self.featureCount):
self.userFeatures[i, j] += learningRate*self.userFeatureGradient(userRow=i, userIndex=j)
def updateItemFeatures(self, learningRate):
for i in range(0, self.featureCount):
for j in range(0, self.itemCount):
self.itemFeatures[i, j] += learningRate*self.itemFeatureGradient(itemCollumn=j, itemIndex=i)
def trainModel(self, learningRate=0.5, iterations = 100):
for i in range(iterations):
print(i)
self.updateUserFeatures(learningRate=learningRate)
print("user feature done")
self.updateItemFeatures(learningRate=learningRate)
print("item feature done")
print("MSE: " + str(self.MSE()))
def predict(self):
predictions = np.zeros(self.userCount, self.itemCount)
for i in range(self.userCount):
for j in range(self.itemCount):
predictions[i][j] = np.dot(self.userFeatures[i, :], self.itemFeatures[:, j])
return predictions
model = MFAlgorithm(ratings, 20)
model.trainModel()
```
%% Output
0
user feature done
print item feature done
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
d:\Projects\RecommenderSystems\recommender-system\MFAlgorithm.ipynb Cell 2' in <cell line: 89>()
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=84'>85</a> return predictions
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=87'>88</a> model = MFAlgorithm(ratings, 20)
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=88'>89</a> model.trainModel()
d:\Projects\RecommenderSystems\recommender-system\MFAlgorithm.ipynb Cell 2' in MFAlgorithm.trainModel(self, learningRate, iterations)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=74'>75</a> self.updateItemFeatures(learningRate=learningRate)
<a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=75'>76</a> print("print item feature done")
---> <a href='vscode-notebook-cell:/d%3A/Projects/RecommenderSystems/recommender-system/MFAlgorithm.ipynb#ch0000001?line=76'>77</a> print("MSE: " + self.MSE())
TypeError: can only concatenate str (not "numpy.float64") to str
from main import *
import os
from collections import Counter
from sklearn.decomposition import NMF
import numpy as np
DATA_FOLDER = "active1000"
files = os.listdir(DATA_FOLDER)
ARBITRARY_INDEX = 0
filepath = os.path.join(DATA_FOLDER, files[ARBITRARY_INDEX])
data = load_data(DATA_FOLDER)
ratings = load_dataset(data)
#print(data)
#print(ratings)
#print(type(ratings))
class MFAlgorithm():
def __init__(self, data, featureCount):
self.data = data
self.featureCount = featureCount
self.userCount = data.shape[0]
self.itemCount = data.shape[1]
#randomly ininitialize the features matrices for users and items
self.userFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.userCount, self.featureCount))
self.itemFeatures = np.random.uniform(low = 0.1, high = 0.9, size = (self.featureCount, self.itemCount))
def MSE(self):
"""
Mean Squared Error. It compares the dor product of user.feature row and feature-item column to user-item cell
"""
matrixProduct = np.matmul(self.userFeatures, self.itemFeatures)
res = np.sum((self.data - matrixProduct)**2)
return res
def singleGradient(self, userRowIndex, itemCollumnIndex, userIndex=None, itemIndex=None):
"""
Computes gradient of single user-item cell to a single user-feature or feature-item cell
"""
if userIndex != None and itemIndex != None:
return "Too many elements"
elif userIndex == None and itemIndex == None:
return "not enough elements"
else:
userRow = self.userFeatures[userRowIndex, :]
itemCollumn = self.itemFeatures[:, itemCollumnIndex]
UIRating = float(self.data[userRowIndex, itemCollumnIndex])
prediction = float(np.dot(userRow, itemCollumn))
if userIndex != None:
rowElement = float(itemCollumn[userIndex])
gradient = (UIRating - prediction)*rowElement*2
else:
collumnElement = float(userRow[itemIndex])
gradient = (UIRating - prediction)*collumnElement*2
return gradient
def userFeatureGradient(self, userRow, userIndex):
sum = 0
for i in range(0, self.itemCount):
sum += self.singleGradient(userRowIndex=userRow, itemCollumnIndex=i, userIndex=userIndex)
res = sum / self.itemCount
return res
def itemFeatureGradient(self, itemCollumn, itemIndex):
sum = 0
for i in range(0, self.userCount):
sum += self.singleGradient(userRowIndex=i, itemCollumnIndex=itemCollumn, itemIndex=itemIndex)
res = sum / self.itemCount
return res
def updateUserFeatures(self, learningRate):
for i in range(0, self.userCount):
for j in range(0, self.featureCount):
self.userFeatures[i, j] += learningRate*self.userFeatureGradient(userRow=i, userIndex=j)
def updateItemFeatures(self, learningRate):
for i in range(0, self.featureCount):
for j in range(0, self.itemCount):
self.itemFeatures[i, j] += learningRate*self.itemFeatureGradient(itemCollumn=j, itemIndex=i)
def trainModel(self, learningRate=0.1, iterations = 1000):
for i in range(iterations):
self.updateUserFeatures(learningRate=learningRate)
self.updateItemFeatures(learningRate=learningRate)
if i % 50 == 0:
print(self.MSE())
model = MFAlgorithm(ratings, 2)
model.trainModel()
#https://towardsdatascience.com/recommender-systems-in-python-from-scratch-643c8fc4f704
#nmf = NMF()
#ratings = numpy.array(ratings)
#W = nmf.fit_transform(ratings)
#H = nmf.components_
#nR = numpy.dot(W, H)
#print(nR)
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment