-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrecommmender.py
45 lines (31 loc) · 1.3 KB
/
recommmender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
'''Simple recommender system:
Recommends top 10 movies based on rankings
if the movie has more votes than 75% of the 45,000 movies
in the Full MovieLens dataset (movies_metadata.csv)'''
# Import Pandas
import pandas as pd
# Load dataframe
data = pd.read_csv('movies_metadata.csv', low_memory=False)
# Print the first 5 rows
print(data.head(5))
# Calculate mean of vote average column
C = data['vote_average'].mean()
print("Average rating on scale of 10: ", C)
# Calculate value of minimum number of votes required to be in the chart, m
m = data['vote_count'].quantile(0.75)
print("Minimum number of votes required to be in the chart:", m)
# Copy filtered movies into a new DataFrame
movies = data.copy().loc[data['vote_count'] >= m]
movies.shape
# Define function to compute weighted rating of each movie
def weightedRating(x, m=m, C=C):
v = x['vote_count']
R = x['vote_average']
# use the IMDB formula
return (v/(v+m) * R) + (m/(m+v) * C)
# Define a new column 'score' and calculate its value
movies['score'] = movies.apply(weightedRating, axis=1)
#Sort movies based on score calculated above
movies = movies.sort_values('score', ascending=False)
#Print the top 10 movies
print(movies[['title', 'vote_count', 'vote_average', 'score']].head(10))