Movie Recommendation System with Sentiment Analysis of the reviews(Part 7)
File_7 main.py
This is the main python file. Here I created a web-app by using Flask framework of python.
import numpy as np
import pandas as pd
from flask import Flask, render_template, request
from sklearn.feature_extraction.text import CountVectorizer
import json
import bs4 as bs
import urllib.request
import pickle
import requests
from datetime import date, datetime
Here the Machine learning model and the Vectorizer is imported
filename = 'svm_model.pkl'
svm = pickle.load(open(filename, 'rb'))
vectorizer = pickle.load(open('tranform.pkl','rb'))# converting list of string to list (eg. "["abc","def"]" to ["abc","def"])
def convert_to_list(my_list):
my_list = my_list.split('","')
my_list[0] = my_list[0].replace('["','')
my_list[-1] = my_list[-1].replace('"]','')
return my_list# convert list of numbers to list (eg. "[1,2,3]" to [1,2,3])
def convert_to_list_num(my_list):
my_list = my_list.split(',')
my_list[0] = my_list[0].replace("[","")
my_list[-1] = my_list[-1].replace("]","")
return my_listdef get_suggestions():
data = pd.read_csv('main_data.csv')
return list(data['movie_title'].str.capitalize())app = Flask(__name__)
@app.route("/")
@app.route("/home")
def home():
suggestions = get_suggestions()
return render_template('home.html',suggestions=suggestions)@app.route("/recommend",methods=["POST"])
This function def recommend() will be invoked when user search for any movie. It will fetch data using AJAX request.
def recommend():
title = request.form['title']
cast_ids = request.form['cast_ids']
cast_names = request.form['cast_names']
cast_chars = request.form['cast_chars']
cast_bdays = request.form['cast_bdays']
cast_bios = request.form['cast_bios']
cast_places = request.form['cast_places']
cast_profiles = request.form['cast_profiles']
imdb_id = request.form['imdb_id']
poster = request.form['poster']
genres = request.form['genres']
overview = request.form['overview']
vote_average = request.form['rating']
vote_count = request.form['vote_count']
rel_date = request.form['rel_date']
release_date = request.form['release_date']
runtime = request.form['runtime']
status = request.form['status']
rec_movies = request.form['rec_movies']
rec_posters = request.form['rec_posters']
rec_movies_org = request.form['rec_movies_org']
rec_year = request.form['rec_year']
rec_vote = request.form['rec_vote']
For auto-typing, function get_suggestions() will be invoked when user type for any movie
suggestions = get_suggestions()
Since the data we get using API will be in different format, so here these functions will convert the them to list and python string format
# for converting string to list
rec_movies_org = convert_to_list(rec_movies_org)
rec_movies = convert_to_list(rec_movies)
rec_posters = convert_to_list(rec_posters)
cast_names = convert_to_list(cast_names)
cast_chars = convert_to_list(cast_chars)
cast_profiles = convert_to_list(cast_profiles)
cast_bdays = convert_to_list(cast_bdays)
cast_bios = convert_to_list(cast_bios)
cast_places = convert_to_list(cast_places)
cast_ids = convert_to_list_num(cast_ids)
rec_vote = convert_to_list_num(rec_vote)
rec_year = convert_to_list_num(rec_year)# rendering the string to python string
for i in range(len(cast_bios)):
cast_bios[i] = cast_bios[i].replace(r'\n', '\n').replace(r'\"','\"')
for i in range(len(cast_chars)):
cast_chars[i] = cast_chars[i].replace(r'\n', '\n').replace(r'\"','\"')# combining all list as a dictionary
movie_cards = {rec_posters[i]: [rec_movies[i],rec_movies_org[i],rec_vote[i],rec_year[i]] for i in range(len(rec_posters))}
casts = {cast_names[i]:[cast_ids[i], cast_chars[i], cast_profiles[i]] for i in range(len(cast_profiles))}
cast_details = {cast_names[i]:[cast_ids[i], cast_profiles[i], cast_bdays[i], cast_places[i], cast_bios[i]] for i in range(len(cast_places))}
Using
urllib.request
function of python, the users review from IMDB site will be extracted for a particular movie
sauce = urllib.request.urlopen('https://www.imdb.com/title/{}/reviews?ref_=tt_ov_rt'.format(imdb_id)).read()
soup = bs.BeautifulSoup(sauce,'lxml')
soup_result = soup.find_all("div",{"class":"text show-more__control"})
After extracting the reviews, it will be passed to vectorizer for transforming into a matrix and then the model will predict if the reviews are positive or negative
reviews_list = [] # list of reviews
reviews_status = [] # list of comments (good or bad)
for reviews in soup_result:
if reviews.string:
reviews_list.append(reviews.string)
movie_review_list = np.array([reviews.string])
movie_vector = vectorizer.transform(movie_review_list)
pred = svm.predict(movie_vector)
reviews_status.append('Positive' if pred else 'Negative')
Getting current date to check if the movie is released or not
movie_rel_date = ""
curr_date = ""
if(rel_date):
today = str(date.today())
curr_date = datetime.strptime(today,'%Y-%m-%d')
movie_rel_date = datetime.strptime(rel_date, '%Y-%m-%d')
Combining Reviews and Sentiments into a dictionary to display as a table
movie_reviews = {reviews_list[i]: reviews_status[i] for i in range(len(reviews_list))}
Passing all the data to HTML file for displaying
return render_template('recommend.html',title=title,poster=poster,overview=overview,vote_average=vote_average,vote_count=vote_count,release_date=release_date,movie_rel_date=movie_rel_date,curr_date=curr_date,runtime=runtime,status=status,genres=genres,movie_cards=movie_cards,reviews=movie_reviews,casts=casts,cast_details=cast_details)
To run the flask app, save the file as main.py
if __name__ == '__main__':
app.run(debug=True)
Click here to view the complete code, Click here to see the web-app