Skip to content
Snippets Groups Projects
Commit 6ba8b274 authored by nilstes's avatar nilstes
Browse files

Final kmeans code

parent ee6945f7
No related branches found
No related tags found
No related merge requests found
import pandas as pd import pandas as pd
import numpy as np
from sklearn.cluster import KMeans from sklearn.cluster import KMeans
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from io import StringIO from io import StringIO
from mpl_toolkits.mplot3d import Axes3D
csvTrain = StringIO("""User,Beatles,Beach Boys,Metallica csvTrain = StringIO("""User,Beatles,Beach Boys,Metallica
Nils,9,8,4 Nils,9,8,4
Anita,8,8,5 Anita,8,8,5
Tore,4,2,1 Tore,4,2,1
Solveig,3,2,2 Solveig,3,2,2
Vibeke,2,4,2 Vibeke,2,4,2
Kristine,9,8,8 Kristine,9,8,8
Ola,8,8,8 Ola,8,8,8
Anne,8,9,7 Anne,8,9,7
Per,1,1,6 Per,1,1,6
Sissel,2,1,9 Sissel,2,1,9
Ole,1,2,7 Ole,1,2,7
...@@ -27,29 +23,15 @@ Sigurd,1,1,8 ...@@ -27,29 +23,15 @@ Sigurd,1,1,8
df = pd.read_csv(csvTrain, sep=',') df = pd.read_csv(csvTrain, sep=',')
print(df.head()) print(df.head())
users = df['User']
df = df.drop('User', axis=1) df = df.drop('User', axis=1)
data = df.values data = df.values
# Create models with different k
wcss = []
for k in range (1, 7):
kmeans = KMeans(n_clusters=k).fit(data)
wcss.append(kmeans.inertia_)
# Plot the elbow analysis to find optimal k
plt.plot(range(1, 7), wcss)
plt.title('The elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS') #within cluster sum of squares
plt.show()
# Use k=4 # Use k=4
k = 4 k = 4
kmeans = KMeans(n_clusters=k).fit(data) kmeans = KMeans(n_clusters=k).fit(data)
# Plot 3d graph # Plot 3d graph
fig = plt.figure(figsize=(20,10)) fig = plt.figure(figsize=(12,10)) # figsize sets the diagram size
ax = fig.add_subplot(111, projection='3d') ax = fig.add_subplot(111, projection='3d')
ax.scatter(data[:,0],data[:,1],data[:,2], c=kmeans.labels_, s=40) ax.scatter(data[:,0],data[:,1],data[:,2], c=kmeans.labels_, s=40)
ax.set_xlabel('Beatles') ax.set_xlabel('Beatles')
...@@ -57,6 +39,15 @@ ax.set_ylabel('Beach Boys') ...@@ -57,6 +39,15 @@ ax.set_ylabel('Beach Boys')
ax.set_zlabel('Metallica') ax.set_zlabel('Metallica')
plt.show() plt.show()
# Create models with different k
wcss = []
for k in range (1, 7):
kmeans = KMeans(n_clusters=k).fit(data)
wcss.append(kmeans.inertia_)
print(kmeans.predict([[1,2,3]])) # Plot the elbow analysis to find optimal k
plt.plot(range(1, 7), wcss)
plt.title('The elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS') #within cluster sum of squares
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment