Skip to content
Snippets Groups Projects
Commit b287213b authored by magnubau's avatar magnubau
Browse files
parents 7c8df5bc 97cf5191
No related branches found
No related tags found
No related merge requests found
File added
File added
This diff is collapsed.
myfunc.py 0 → 100644
import matplotlib.animation as ani
import matplotlib.cm as cmx
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import json
import os
import pandas as pd
import statistics
from sklearn.metrics.pairwise import cosine_similarity
from matplotlib.patches import Ellipse
from PIL import Image
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
def load_data(path):
"""
Load events from files and convert to dataframe.
"""
map_lst=[]
for f in os.listdir(path):
file_name=os.path.join(path,f)
if os.path.isfile(file_name):
for line in open(file_name):
obj = json.loads(line.strip())
if not obj is None:
map_lst.append(obj)
return pd.DataFrame(map_lst)
def load_dataset(df):
"""
Convert dataframe to user-item-interaction matrix, which is used for
Matrix Factorization based recommendation.
In rating matrix, clicked events are refered as 1 and others are refered as 0.
"""
df = df[~df['documentId'].isnull()]
df = df.drop_duplicates(subset=['userId', 'documentId']).reset_index(drop=True)
df = df.sort_values(by=['userId', 'time'])
n_users = df['userId'].nunique()
n_items = df['documentId'].nunique()
ratings = np.zeros((n_users, n_items))
new_user = df['userId'].values[1:] != df['userId'].values[:-1]
new_user = np.r_[True, new_user]
df['uid'] = np.cumsum(new_user)
item_ids = df['documentId'].unique().tolist()
new_df = pd.DataFrame({'documentId':item_ids, 'tid':range(1,len(item_ids)+1)})
df = pd.merge(df, new_df, on='documentId', how='outer')
df_ext = df[['uid', 'tid']]
for row in df_ext.itertuples():
ratings[row[1]-1, row[2]-1] = 1.0
return ratings
def get_corr_list(user, M):
corr_list = []
for u in M.columns:
if u == user:
continue
A=M[user].to_numpy()
B=M[u].to_numpy()
result=cosine_similarity(A.reshape(1,-1),B.reshape(1,-1))
cor = result[0][0]
corr_list.append((u, cor))
corr_list.sort(key=lambda tup: tup[1], reverse=True)
return corr_list
def get_rec(user, train_df, test_df, num, simular_users_count, filter=True):
users_list = [x[0] for x in user[1][:simular_users_count]]
simular_clicks_df = train_df[train_df['userId'].isin(users_list)]
user_clicks = test_df[test_df['userId'].isin([user])]['documentId'].to_list()
if filter:
df_unread_only = simular_clicks_df[~simular_clicks_df['documentId'].isin(user_clicks)]
titles = df_unread_only['documentId'].value_counts()
else:
titles = simular_clicks_df['documentId'].value_counts()
return titles[:num]
def get_MSE(reality, prediction, articles):
# Given values
Y_true = np.zeros(len(articles)) # Y_true = Y (original values)
# calculated values
Y_pred = np.zeros(len(articles)) # Y_pred = Y'
for a in reality:
Y_true[np.where(articles == a)] = 1
for a in prediction:
Y_pred[np.where(articles == a)] = 1
# Calculation of Mean Squared Error (MSE)
r = mean_squared_error(Y_true,Y_pred)
return r
def get_MSE2(reality, prediction, articles):
# Given values
Y_true = np.zeros(len(articles)) # Y_true = Y (original values)
# calculated values
Y_pred = np.zeros(len(articles)) # Y_pred = Y'
for a in reality:
Y_true[np.where(articles == a)] = 1
for a in prediction:
Y_pred[np.where(articles == a)] = 1
new_y_true = []
new_y_pred = []
for i in range(0, len(Y_true)):
if Y_true[i] == 1 or Y_pred[i] == 1:
new_y_true.append(Y_true[i])
new_y_pred.append(Y_pred[i])
# Calculation of Mean Squared Error (MSE)
r = mean_squared_error(new_y_true,new_y_pred)
return r
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment