-
Notifications
You must be signed in to change notification settings - Fork 0
/
contentbasedrec.py
45 lines (39 loc) · 1.8 KB
/
contentbasedrec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class ContentBasedRec:
# initialize the class object variables
def __init__(self,name,df):
self.name = name
self.df = df
self.title = df['title']
self.index = df['index']
self.combined_features = df['combined_features']
def cosine_sim(self):
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
count_vec = CountVectorizer()
# convert the contents of the combined features into a matrix of token counts
count_matrix = count_vec.fit_transform(self.combined_features)
cosine_sim = pd.DataFrame(cosine_similarity(count_matrix))
return cosine_sim
# getting the index of the movie in question
def title_index(self):
return self.df[self.title == self.name]["index"].values[0]
# getting the index of the movies most similar to the movie selected
def similar_movies(self):
# self.cosine_idxcol = self.cosine_sim['index']
self.title_index()
similar_movies = list(enumerate(self.cosine_sim()[self.title_index()]))
return sorted(similar_movies, key=lambda x:x[1], reverse=True)
# using the index of the movies to get the movie titles and recommending the first 15 most similar movies
def movie_list(self):
movie_list = []
for movie in self.similar_movies():
movie_index = movie[0]
movie_title = self.df[self.index == movie_index]["title"].values[0]
if movie_title not in movie_list:
movie_list.append(movie_title)
else:
pass
if len(movie_list)==15:
print('Here are a few recommendations for you')
return movie_list