# Filtering out the warnings
import warnings
warnings.filterwarnings('ignore')


# Importing the required libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Read the csv file using 'read_csv'. Please write your dataset location here.
movies = pd.read_csv('Movie+Assignment+Data.csv')


# Check the number of rows and columns in the dataframe
movies.shape

(100, 62)


# Look first 5 records in the dataset
movies.head()


# Check the column-wise info of the dataframe
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 62 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Title                   100 non-null    object 
 1   title_year              100 non-null    int64  
 2   budget                  100 non-null    int64  
 3   Gross                   100 non-null    int64  
 4   actor_1_name            100 non-null    object 
 5   actor_2_name            100 non-null    object 
 6   actor_3_name            100 non-null    object 
 7   actor_1_facebook_likes  100 non-null    int64  
 8   actor_2_facebook_likes  99 non-null     float64
 9   actor_3_facebook_likes  98 non-null     float64
 10  IMDb_rating             100 non-null    float64
 11  genre_1                 100 non-null    object 
 12  genre_2                 97 non-null     object 
 13  genre_3                 74 non-null     object 
 14  MetaCritic              95 non-null     float64
 15  Runtime                 100 non-null    int64  
 16  CVotes10                100 non-null    int64  
 17  CVotes09                100 non-null    int64  
 18  CVotes08                100 non-null    int64  
 19  CVotes07                100 non-null    int64  
 20  CVotes06                100 non-null    int64  
 21  CVotes05                100 non-null    int64  
 22  CVotes04                100 non-null    int64  
 23  CVotes03                100 non-null    int64  
 24  CVotes02                100 non-null    int64  
 25  CVotes01                100 non-null    int64  
 26  CVotesMale              100 non-null    int64  
 27  CVotesFemale            100 non-null    int64  
 28  CVotesU18               100 non-null    int64  
 29  CVotesU18M              100 non-null    int64  
 30  CVotesU18F              100 non-null    int64  
 31  CVotes1829              100 non-null    int64  
 32  CVotes1829M             100 non-null    int64  
 33  CVotes1829F             100 non-null    int64  
 34  CVotes3044              100 non-null    int64  
 35  CVotes3044M             100 non-null    int64  
 36  CVotes3044F             100 non-null    int64  
 37  CVotes45A               100 non-null    int64  
 38  CVotes45AM              100 non-null    int64  
 39  CVotes45AF              100 non-null    int64  
 40  CVotes1000              100 non-null    int64  
 41  CVotesUS                100 non-null    int64  
 42  CVotesnUS               100 non-null    int64  
 43  VotesM                  100 non-null    float64
 44  VotesF                  100 non-null    float64
 45  VotesU18                100 non-null    float64
 46  VotesU18M               100 non-null    float64
 47  VotesU18F               100 non-null    float64
 48  Votes1829               100 non-null    float64
 49  Votes1829M              100 non-null    float64
 50  Votes1829F              100 non-null    float64
 51  Votes3044               100 non-null    float64
 52  Votes3044M              100 non-null    float64
 53  Votes3044F              100 non-null    float64
 54  Votes45A                100 non-null    float64
 55  Votes45AM               100 non-null    float64
 56  Votes45AF               100 non-null    float64
 57  Votes1000               100 non-null    float64
 58  VotesUS                 100 non-null    float64
 59  VotesnUS                100 non-null    float64
 60  content_rating          100 non-null    object 
 61  Country                 100 non-null    object 
dtypes: float64(21), int64(32), object(9)
memory usage: 48.6+ KB


# Check the summary for the numeric columns 
movies.describe()


# Divide the 'gross' and 'budget' columns by 1000000 to convert '$' to 'million $'
movies.budget = movies.budget.apply(lambda x: float(x/1000000))
movies.Gross = movies.Gross.apply(lambda x: float(x/1000000))


movies.head(3)


# Create the new column named 'profit' by subtracting the 'budget' column from the 'gross' column
movies['profit'] = movies.Gross - movies.budget
movies.head(2)


# Sort the dataframe with the 'profit' column as reference using the 'sort_values' function. Make sure to set the argument
movies_sort = movies.sort_values(by='profit', ascending=False)
movies_sort.head()


# Get the top 10 profitable movies by using position based indexing. Specify the rows till 10 (0-9)
top_10_movies = movies_sort.iloc[:10,:]
top_10_movies


#Plot profit vs budget
plt.style.use('ggplot')
plt.figure(figsize=(12,8))
plt.scatter(movies['profit'],movies['budget'])
plt.xlabel('Profit', fontdict={'fontsize':14, 'fontweight':5, 'color':'Black'})
plt.ylabel('Budget', fontdict={'fontsize':14, 'fontweight':5, 'color':'Black'})
plt.title('Profit vs Budget', fontdict={'fontsize':24, 'fontweight':8, 'color':'Black'})
plt.show()


#Find the movies with negative profit
negative_profit = movies[movies.profit < 0]
negative_profit.head()


movies.columns

Index(['Title', 'title_year', 'budget', 'Gross', 'actor_1_name',
       'actor_2_name', 'actor_3_name', 'actor_1_facebook_likes',
       'actor_2_facebook_likes', 'actor_3_facebook_likes', 'IMDb_rating',
       'genre_1', 'genre_2', 'genre_3', 'MetaCritic', 'Runtime', 'CVotes10',
       'CVotes09', 'CVotes08', 'CVotes07', 'CVotes06', 'CVotes05', 'CVotes04',
       'CVotes03', 'CVotes02', 'CVotes01', 'CVotesMale', 'CVotesFemale',
       'CVotesU18', 'CVotesU18M', 'CVotesU18F', 'CVotes1829', 'CVotes1829M',
       'CVotes1829F', 'CVotes3044', 'CVotes3044M', 'CVotes3044F', 'CVotes45A',
       'CVotes45AM', 'CVotes45AF', 'CVotes1000', 'CVotesUS', 'CVotesnUS',
       'VotesM', 'VotesF', 'VotesU18', 'VotesU18M', 'VotesU18F', 'Votes1829',
       'Votes1829M', 'Votes1829F', 'Votes3044', 'Votes3044M', 'Votes3044F',
       'Votes45A', 'Votes45AM', 'Votes45AF', 'Votes1000', 'VotesUS',
       'VotesnUS', 'content_rating', 'Country', 'profit'],
      dtype='object')


movies.IMDb_rating.describe()

count    100.000000
mean       7.883000
std        0.247433
min        7.500000
25%        7.700000
50%        7.800000
75%        8.100000
max        8.800000
Name: IMDb_rating, dtype: float64


# Change the scale of MetaCritic
movies.MetaCritic = movies.MetaCritic/10


print(movies.MetaCritic.min())
print(movies.MetaCritic.max())

6.2
10.0


# Find the average ratings
movies['Avg_rating'] = movies[['MetaCritic','IMDb_rating']].mean(axis=1)


movies.Avg_rating.describe()

count    100.000000
mean       7.851000
std        0.478211
min        6.950000
25%        7.500000
50%        7.800000
75%        8.100000
max        8.950000
Name: Avg_rating, dtype: float64


#Sort in descending order of average rating
movies.sort_values(by='Avg_rating', ascending=False)


# Find the movies with metacritic-Imdb rating < 0.5 and also with an average rating of >= 8 (sorted in descending order)
Universal_Acclaim = movies[(abs(movies.MetaCritic - movies.IMDb_rating) < 0.5) & (movies.Avg_rating >=8)].sort_values('Avg_rating', ascending=False)


Universal_Acclaim


movies['Total_facebook_likes'] = movies['actor_1_facebook_likes'] + movies['actor_2_facebook_likes'] + movies['actor_3_facebook_likes']


top_5_trios = movies.sort_values('Total_facebook_likes', ascending=False).iloc[:5,:]
top_5_trios


top_5_trios[['actor_1_name','actor_2_name','actor_3_name']].values.tolist()

[['Dev Patel', 'Nicole Kidman', 'Rooney Mara'],
 ['Leonardo DiCaprio', 'Tom Hardy', 'Joseph Gordon-Levitt'],
 ['Jennifer Lawrence', 'Peter Dinklage', 'Hugh Jackman'],
 ['Casey Affleck', 'Michelle Williams ', 'Kyle Chandler'],
 ['Tom Hardy', 'Christian Bale', 'Joseph Gordon-Levitt']]


trios_satisfy_above_cond = top_5_trios[~((movies.actor_1_facebook_likes < movies.actor_2_facebook_likes /2) | 
            (movies.actor_1_facebook_likes < movies.actor_3_facebook_likes /2) | 
            (movies.actor_2_facebook_likes < movies.actor_1_facebook_likes /2) | 
            (movies.actor_2_facebook_likes < movies.actor_3_facebook_likes) /2 | 
            (movies.actor_3_facebook_likes < movies.actor_1_facebook_likes /2) | 
            (movies.actor_3_facebook_likes < movies.actor_2_facebook_likes /2))]
trios_satisfy_above_cond


trios_satisfy_above_cond[['actor_1_name','actor_2_name','actor_3_name']]


trios_satisfy_above_cond[['actor_1_name','actor_2_name','actor_3_name']].head(1)


# Runtime histogram/density plot
sns.displot(movies['Runtime'])
plt.title('Distribution of movies runtime', fontsize=18)
plt.show()


PopularR = movies[movies.content_rating == 'R'].sort_values('CVotesU18', ascending=False).iloc[:10,:]
PopularR


# Create the dataframe df_by_genre
df_by_genre = movies.filter(regex= 'genre|CVotes|Votes')
df_by_genre.shape

(100, 47)


# Create a column cnt and initialize it to 1
df_by_genre['cnt'] = 1


# Group the movies by individual genres
df_by_g1 = df_by_genre.groupby(by=['genre_1']).sum()
df_by_g2 = df_by_genre.groupby(by=['genre_2']).sum()
df_by_g3 = df_by_genre.groupby(by=['genre_3']).sum()


print(df_by_g1.shape)
print(df_by_g2.shape)
print(df_by_g3.shape)

(8, 45)
(19, 45)
(15, 45)


df_by_g1.head(2)


# Add the grouped data frames and store it in a new data frame
df_add = df_by_g1.add(df_by_g2, fill_value=0).add(df_by_g3, fill_value=0)


df_add.shape

(20, 45)


# Extract genres with atleast 10 occurences
genre_top10 = df_add[df_add.cnt >= 10]
genre_top10


# Take the mean for every column by dividing with cnt 
columns_names = []
for name in genre_top10.columns:
    if name.startswith('cnt') == False:
        columns_names.append(name)

genre_top10[columns_names] = genre_top10[columns_names].apply(lambda x: x/genre_top10.cnt)
genre_top10


# Rounding off the columns of Votes to two decimals
Votes_col = []
for name in genre_top10.columns:
    if name.startswith('Votes'):
        Votes_col.append(name)
        
genre_top10[Votes_col] = genre_top10[Votes_col].round(2)
genre_top10


# Converting CVotes to int type
CVotes_col = []
for name in genre_top10.columns:
    if name.startswith('CVotes'):
        CVotes_col.append(name)
        
genre_top10[CVotes_col] = genre_top10[CVotes_col].astype('int64')
genre_top10


# Countplot for genres
plt.figure(figsize=[10,6])
count_plt = sns.barplot(x = genre_top10.index, y = genre_top10.cnt)
for p in count_plt.patches:
    count_plt.annotate(format(p.get_height(),'.1f'),
                      (p.get_x() + p.get_width() /2. , p.get_height()),
                       ha = 'center', va='center',
                       xytext = (0,9),
                     textcoords = 'offset points')
plt.title('Genres vs Count', fontsize=22)
plt.xlabel("Genres", fontsize=15, color='Black')
plt.ylabel("Count", fontsize=15, color='Black')
plt.xticks(color='Black')
plt.yticks(color='Black')
plt.show()


# 1st set of heat maps for CVotes-related columns
genre_top10.groupby(by=[genre_top10.index])['CVotesU18M','CVotes1829M','CVotes3044M','CVotes45AM'].mean()


genre_top10.groupby(by=[genre_top10.index])['CVotesU18F','CVotes1829F','CVotes3044F','CVotes45AF'].mean()


plt.figure(figsize=(16,10))
plt.suptitle('HeatMaps for Cvotes columns Male vs Female', fontsize=24)
plt.subplot(1,2,1)
sns.heatmap(genre_top10.groupby(by=[genre_top10.index])['CVotesU18M','CVotes1829M','CVotes3044M','CVotes45AM'].mean(),
            annot=True, fmt='d',linewidths=0.5)
plt.xlabel('Age group of Male',fontsize=18, color='Black')
plt.ylabel('Genres', fontsize=18,color='Black')
plt.xticks(rotation=340, fontsize=12,color='Black')
plt.yticks(rotation=360, fontsize=12, color='Black')

plt.subplot(1,2,2)
sns.heatmap(genre_top10.groupby(by=[genre_top10.index])['CVotesU18F','CVotes1829F','CVotes3044F','CVotes45AF'].mean(),
            annot=True, fmt='d',linewidths=0.5)
plt.xlabel('Age group of Female',fontsize=18, color='Black')
plt.ylabel('Genres', fontsize=18,color='Black')
plt.xticks(rotation=340, fontsize=12, color='Black')
plt.yticks([])
plt.show()


# 2nd set of heat maps for Votes-related columns
plt.figure(figsize=(16,10))
plt.suptitle('HeatMaps for Votes columns Male vs Female', fontsize=24)
plt.subplot(1,2,1)
sns.heatmap(genre_top10.groupby(by=[genre_top10.index])['VotesU18M','Votes1829M','Votes3044M','Votes45AM'].mean(),
            annot=True,linewidths=0.5)
plt.xlabel('Age group of Male',fontsize=18, color='Black')
plt.ylabel('Genres', fontsize=18,color='Black')
plt.xticks(rotation=340, fontsize=12,color='Black')
plt.yticks(rotation=360, fontsize=12, color='Black')

plt.subplot(1,2,2)
sns.heatmap(genre_top10.groupby(by=[genre_top10.index])['VotesU18F','Votes1829F','Votes3044F','Votes45AF'].mean(),
            annot=True,linewidths=0.5)
plt.xlabel('Age group of Female',fontsize=18, color='Black')
plt.ylabel('Genres', fontsize=18,color='Black')
plt.xticks(rotation=340, fontsize=12, color='Black')
plt.yticks([])
plt.show()


# Creating IFUS column
movies['IFUS'] = movies.Country.apply(lambda x: "USA" if x in "USA" else 'non-USA')


# Box plot - 1: CVotesUS(y) vs IFUS(x)
plt.figure(figsize=(16,8))
plt.suptitle('Number of Votes for US and Non-US movies by US and Non-US voters', fontsize=24)
plt.subplot(1,2,1)
sns.boxplot(data=movies, x = 'IFUS', y='CVotesUS')
plt.xlabel('Movie', fontsize=18, color='Black')
plt.ylabel('Votes from US Voters', fontsize=18, color='Black')
plt.xticks(fontsize=14, color='Black')

plt.subplot(1,2,2)
sns.boxplot(data=movies, x = 'IFUS', y='CVotesnUS')
plt.xlabel('Movie', fontsize=18, color='Black')
plt.ylabel('Votes from Non-US Voters', fontsize=18, color='Black')
plt.xticks(fontsize=14, color='Black')
plt.show()


# Box plot - 2: VotesUS(y) vs IFUS(x)
# Box plot - 1: CVotesUS(y) vs IFUS(x)
plt.figure(figsize=(16,8))
plt.suptitle('Average rating for US and Non-US movies by US and Non-US voters', fontsize=24)
plt.subplot(1,2,1)
sns.boxplot(data=movies, x = 'IFUS', y='VotesUS')
plt.xlabel('Movie', fontsize=18, color='Black')
plt.ylabel('Average rating from US Voters', fontsize=18, color='Black')
plt.xticks(fontsize=14, color='Black')

plt.subplot(1,2,2)
sns.boxplot(data=movies, x = 'IFUS', y='VotesnUS')
plt.xlabel('Movie', fontsize=18, color='Black')
plt.ylabel('Average rating from Non-US Voters', fontsize=18, color='Black')
plt.xticks(fontsize=14, color='Black')
plt.show()


# Sorting by CVotes1000
genre_top10_sort = genre_top10.sort_values('CVotes1000', ascending=False)


# Bar plot
plt.figure(figsize=(16,8))
bar_plot = sns.barplot(data=genre_top10_sort, x = genre_top10_sort.index, y='CVotes1000')
for p in bar_plot.patches:
    bar_plot.annotate(format(p.get_height(),'.1f'),
                      (p.get_x() + p.get_width() /2. , p.get_height()),
                       ha = 'center', va='center',
                       xytext = (0,9),
                     textcoords = 'offset points')
    
plt.title('Votes from top 1000 Voters across genres', fontsize=24)
plt.xlabel('Genres', fontsize=18, color='Black')
plt.ylabel('Top 1000 Voters', fontsize=18, color='Black')
plt.xticks(color='Black')
plt.yticks(color='Black')
plt.show()

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country
0	La La Land	2016	30000000	151101803	Ryan Gosling	Emma Stone	Amiée Conn	14000	19000.0	NaN	...	7.9	7.8	7.6	7.6	7.5	7.1	8.3	8.1	PG-13	USA
1	Zootopia	2016	150000000	341268248	Ginnifer Goodwin	Jason Bateman	Idris Elba	2800	28000.0	27000.0	...	7.8	8.1	7.8	7.8	8.1	7.6	8.0	8.0	PG	USA
2	Lion	2016	12000000	51738905	Dev Patel	Nicole Kidman	Rooney Mara	33000	96000.0	9800.0	...	7.9	8.2	8.0	7.9	8.4	7.1	8.1	8.0	PG-13	Australia
3	Arrival	2016	47000000	100546139	Amy Adams	Jeremy Renner	Forest Whitaker	35000	5300.0	NaN	...	7.8	7.8	7.6	7.6	7.7	7.3	8.0	7.9	PG-13	USA
4	Manchester by the Sea	2016	9000000	47695371	Casey Affleck	Michelle Williams	Kyle Chandler	518	71000.0	3300.0	...	7.7	7.7	7.6	7.6	7.6	7.1	7.9	7.8	R	USA

	title_year	budget	Gross	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	IMDb_rating	MetaCritic	Runtime	CVotes10	...	Votes1829F	Votes3044	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS
count	100.000000	1.000000e+02	1.000000e+02	100.000000	99.000000	98.000000	100.000000	95.000000	100.000000	100.000000	...	100.000000	100.000000	100.000000	100.000000	100.00000	100.000000	100.000000	100.000000	100.000000	100.000000
mean	2012.820000	7.838400e+07	1.468679e+08	13407.270000	7377.303030	3002.153061	7.883000	78.252632	126.420000	73212.160000	...	7.982000	7.732000	7.723000	7.780000	7.65100	7.624000	7.770000	7.274000	7.958000	7.793000
std	1.919491	7.445295e+07	1.454004e+08	10649.037862	13471.568216	6940.301133	0.247433	9.122066	19.050799	82669.594746	...	0.321417	0.251814	0.260479	0.282128	0.21485	0.213258	0.301344	0.361987	0.232327	0.264099
min	2010.000000	3.000000e+06	2.238380e+05	39.000000	12.000000	0.000000	7.500000	62.000000	91.000000	6420.000000	...	7.300000	7.300000	7.200000	7.200000	7.10000	7.100000	7.000000	6.400000	7.500000	7.300000
25%	2011.000000	1.575000e+07	4.199752e+07	1000.000000	580.000000	319.750000	7.700000	72.000000	114.750000	30587.000000	...	7.700000	7.600000	7.500000	7.600000	7.50000	7.475000	7.500000	7.100000	7.800000	7.600000
50%	2013.000000	4.225000e+07	1.070266e+08	13000.000000	1000.000000	626.500000	7.800000	78.000000	124.000000	54900.500000	...	8.000000	7.700000	7.700000	7.800000	7.65000	7.600000	7.800000	7.300000	7.950000	7.750000
75%	2014.000000	1.500000e+08	2.107548e+08	20000.000000	11000.000000	1000.000000	8.100000	83.500000	136.250000	80639.000000	...	8.200000	7.900000	7.900000	8.000000	7.80000	7.800000	7.925000	7.500000	8.100000	7.925000
max	2016.000000	2.600000e+08	9.366622e+08	35000.000000	96000.000000	46000.000000	8.800000	100.000000	180.000000	584839.000000	...	8.800000	8.700000	8.700000	8.500000	8.10000	8.100000	8.500000	8.200000	8.700000	8.800000

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country
0	La La Land	2016	30.0	151.101803	Ryan Gosling	Emma Stone	Amiée Conn	14000	19000.0	NaN	...	7.9	7.8	7.6	7.6	7.5	7.1	8.3	8.1	PG-13	USA
1	Zootopia	2016	150.0	341.268248	Ginnifer Goodwin	Jason Bateman	Idris Elba	2800	28000.0	27000.0	...	7.8	8.1	7.8	7.8	8.1	7.6	8.0	8.0	PG	USA
2	Lion	2016	12.0	51.738905	Dev Patel	Nicole Kidman	Rooney Mara	33000	96000.0	9800.0	...	7.9	8.2	8.0	7.9	8.4	7.1	8.1	8.0	PG-13	Australia

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit
0	La La Land	2016	30.0	151.101803	Ryan Gosling	Emma Stone	Amiée Conn	14000	19000.0	NaN	...	7.8	7.6	7.6	7.5	7.1	8.3	8.1	PG-13	USA	121.101803
1	Zootopia	2016	150.0	341.268248	Ginnifer Goodwin	Jason Bateman	Idris Elba	2800	28000.0	27000.0	...	8.1	7.8	7.8	8.1	7.6	8.0	8.0	PG	USA	191.268248

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit
97	Star Wars: Episode VII - The Force Awakens	2015	245.0	936.662225	Doug Walker	Rob Walker	0	131	12.0	0.0	...	8.2	7.9	7.8	8.2	7.7	8.2	7.9	PG-13	USA	691.662225
11	The Avengers	2012	220.0	623.279547	Chris Hemsworth	Robert Downey Jr.	Scarlett Johansson	26000	21000.0	19000.0	...	8.1	7.9	7.9	8.1	7.4	8.3	7.9	PG-13	USA	403.279547
47	Deadpool	2016	58.0	363.024263	Ryan Reynolds	Ed Skrein	Stefan Kapicic	16000	805.0	361.0	...	7.9	7.8	7.8	7.9	7.3	8.1	7.9	R	USA	305.024263
32	The Hunger Games: Catching Fire	2013	130.0	424.645577	Jennifer Lawrence	Josh Hutcherson	Sandra Ellis Lafferty	34000	14000.0	523.0	...	7.9	7.3	7.2	7.9	6.7	7.7	7.4	PG-13	USA	294.645577
12	Toy Story 3	2010	200.0	414.984497	Tom Hanks	John Ratzenberger	Don Rickles	15000	1000.0	721.0	...	8.3	8.1	8.1	8.1	8.1	8.5	8.3	G	USA	214.984497

IMDb Movie

1. Reading the data¶

2: Data Analysis¶

3 : Demographic Analysis¶

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit
7	Tangled	2010	260.0	200.807262	Brad Garrett	Donna Murphy	M.C. Gainey	799	553.0	284.0	...	8.0	7.7	7.6	7.9	6.9	7.9	7.7	PG	USA	-59.192738
17	Edge of Tomorrow	2014	178.0	100.189501	Tom Cruise	Lara Pulver	Noah Taylor	10000	854.0	509.0	...	7.7	7.8	7.8	7.8	7.5	8.0	7.8	PG-13	USA	-77.810499
22	Hugo	2011	170.0	73.820094	ChloÃ« Grace Moretz	Christopher Lee	Ray Winstone	17000	16000.0	1000.0	...	7.4	7.5	7.5	7.6	7.4	7.7	7.5	PG	USA	-96.179906
28	X-Men: First Class	2011	160.0	146.405371	Jennifer Lawrence	Michael Fassbender	Oliver Platt	34000	13000.0	1000.0	...	7.8	7.6	7.5	7.7	7.3	7.8	7.7	PG-13	USA	-13.594629
39	The Little Prince	2015	81.2	1.339152	Jeff Bridges	James Franco	Mackenzie Foy	12000	11000.0	6000.0	...	7.9	7.5	7.4	7.9	6.6	7.7	7.7	PG	France	-79.860848

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit	Avg_rating
94	Boyhood	2014	4.0	25.359200	Ellar Coltrane	Lorelei Linklater	Libby Villari	230	193.0	127.0	...	7.7	7.7	7.7	7.2	8.0	7.9	R	USA	21.359200	8.95
69	12 Years a Slave	2013	20.0	56.667870	QuvenzhanÃ© Wallis	Scoot McNairy	Taran Killam	2000	660.0	500.0	...	7.8	7.8	8.1	7.7	8.3	8.0	R	USA	36.667870	8.85
18	Inside Out	2015	175.0	356.454367	Amy Poehler	Mindy Kaling	Phyllis Smith	1000	767.0	384.0	...	7.9	7.9	7.9	7.6	8.2	8.1	PG	USA	181.454367	8.80
0	La La Land	2016	30.0	151.101803	Ryan Gosling	Emma Stone	Amiée Conn	14000	19000.0	NaN	...	7.6	7.6	7.5	7.1	8.3	8.1	PG-13	USA	121.101803	8.75
12	Toy Story 3	2010	200.0	414.984497	Tom Hanks	John Ratzenberger	Don Rickles	15000	1000.0	721.0	...	8.1	8.1	8.1	8.1	8.5	8.3	G	USA	214.984497	8.75
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
59	Kick-Ass	2010	30.0	48.043505	Elizabeth McGovern	Deborah Twiss	Michael Rispoli	553	488.0	385.0	...	7.6	7.7	7.4	7.1	7.8	7.7	R	UK	18.043505	7.15
98	Harry Potter and the Deathly Hallows: Part I	2010	150.0	296.347721	Rupert Grint	Toby Jones	Alfred Enoch	10000	2000.0	1000.0	...	7.4	7.3	8.0	6.7	7.9	7.5	PG-13	UK	146.347721	7.10
99	Tucker and Dale vs Evil	2010	5.0	0.223838	Katrina Bowden	Tyler Labine	Chelan Simmons	948	779.0	440.0	...	7.5	7.4	7.7	7.1	7.7	7.5	R	Canada	-4.776162	7.05
42	Fury	2014	68.0	85.707116	Brad Pitt	Logan Lerman	Jim Parrack	11000	8000.0	697.0	...	7.4	7.4	7.4	6.8	7.6	7.5	R	USA	17.707116	7.00
44	Les MisÃ©rables	2012	61.0	148.775460	Hugh Jackman	Eddie Redmayne	Anne Hathaway	20000	13000.0	11000.0	...	7.4	7.3	7.7	6.6	7.6	7.5	PG-13	USA	87.775460	6.95

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit	Avg_rating
95	Whiplash	2014	3.3	13.092000	J.K. Simmons	Melissa Benoist	Chris Mulkey	24000	970.0	535.0	...	8.1	8.1	8.2	8.0	8.6	8.4	R	USA	9.792000	8.65
35	Django Unchained	2012	100.0	162.804648	Leonardo DiCaprio	Christoph Waltz	Ato Essandoh	29000	11000.0	265.0	...	8.0	8.0	8.1	7.8	8.4	8.4	R	USA	62.804648	8.25
93	Dallas Buyers Club	2013	5.0	27.296514	Matthew McConaughey	Jennifer Garner	Denis O'Hare	11000	3000.0	896.0	...	7.8	7.8	8.0	7.2	8.0	7.9	R	USA	22.296514	8.20
97	Star Wars: Episode VII - The Force Awakens	2015	245.0	936.662225	Doug Walker	Rob Walker	0	131	12.0	0.0	...	7.9	7.8	8.2	7.7	8.2	7.9	PG-13	USA	691.662225	8.10
3	Arrival	2016	47.0	100.546139	Amy Adams	Jeremy Renner	Forest Whitaker	35000	5300.0	NaN	...	7.6	7.6	7.7	7.3	8.0	7.9	PG-13	USA	53.546139	8.05
33	The Martian	2015	108.0	228.430993	Matt Damon	Donald Glover	Benedict Wong	13000	801.0	372.0	...	8.0	7.9	8.2	7.8	8.1	7.9	PG-13	USA	120.430993	8.00
43	Gone Girl	2014	61.0	167.735396	Patrick Fugit	Sela Ward	Emily Ratajkowski	835	812.0	625.0	...	7.7	7.7	7.7	7.6	8.1	8.1	R	USA	106.735396	8.00

	Title	title_year	budget	Gross	actor_1_name	actor_2_name	actor_3_name	actor_1_facebook_likes	actor_2_facebook_likes	actor_3_facebook_likes	...	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	content_rating	Country	profit	Avg_rating	Total_facebook_likes
27	Inception	2010	160.0	292.568851	Leonardo DiCaprio	Tom Hardy	Joseph Gordon-Levitt	29000	27000.0	23000.0	...	8.1	8.0	8.2	8.7	8.8	PG-13	USA	132.568851	8.1	79000.0
14	X-Men: Days of Future Past	2014	200.0	233.914986	Jennifer Lawrence	Peter Dinklage	Hugh Jackman	34000	22000.0	20000.0	...	7.7	7.9	7.4	8.1	7.9	PG-13	USA	33.914986	7.7	76000.0
8	The Dark Knight Rises	2012	250.0	448.130642	Tom Hardy	Christian Bale	Joseph Gordon-Levitt	27000	23000.0	23000.0	...	7.9	7.9	7.8	8.4	8.4	PG-13	USA	198.130642	8.1	73000.0

	CVotes10	CVotes09	CVotes08	CVotes07	CVotes06	CVotes05	CVotes04	CVotes03	CVotes02	CVotes01	...	Votes3044	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	cnt
genre_1
Action	2928407	3261919	4247693	2662020	986774	364234	156150	89483	61975	162426	...	209.1	208.8	210.0	206.5	206.0	209.0	197.2	215.8	209.5	27
Adventure	1058779	1179818	1560541	966275	365486	136985	58559	33174	22018	48100	...	92.7	92.6	93.5	92.0	91.6	93.8	88.9	95.3	93.5	12

	CVotes10	CVotes09	CVotes08	CVotes07	CVotes06	CVotes05	CVotes04	CVotes03	CVotes02	CVotes01	...	Votes3044	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	cnt
Action	3166467.0	3547429.0	4677755.0	2922126.0	1075354.0	393484.0	166970.0	95004.0	65573.0	171247.0	...	240.0	239.5	241.8	237.0	236.4	240.4	226.2	247.6	240.6	31.0
Adventure	3594659.0	4014192.0	5262328.0	3281981.0	1212075.0	438970.0	183070.0	103318.0	69737.0	173858.0	...	294.6	293.7	299.2	291.7	290.4	298.0	280.6	303.5	296.2	38.0
Animation	681562.0	798227.0	1153214.0	722782.0	251076.0	83069.0	30718.0	15733.0	10026.0	25193.0	...	85.4	84.9	87.8	84.5	84.1	86.7	80.0	87.6	86.1	11.0
Biography	852003.0	1401608.0	2231078.0	1332980.0	425595.0	138648.0	53718.0	29510.0	20613.0	51297.0	...	139.1	138.9	139.8	138.5	137.9	141.7	130.1	142.7	139.9	18.0
Comedy	1383616.0	1774987.0	2506851.0	1591069.0	600287.0	226852.0	97469.0	56218.0	39391.0	88367.0	...	177.4	177.4	178.3	175.0	174.7	177.1	165.4	182.6	178.9	23.0
Crime	574526.0	967118.0	1419495.0	821390.0	278391.0	98690.0	42271.0	24713.0	16985.0	37217.0	...	84.9	85.4	83.7	83.9	83.8	84.5	81.3	87.8	85.8	11.0
Drama	3404438.0	4935375.0	7107053.0	4319700.0	1529356.0	552312.0	235475.0	135126.0	94185.0	211308.0	...	501.3	501.1	501.8	496.8	495.3	503.2	469.5	515.9	506.0	65.0
Romance	549959.0	689492.0	1069280.0	712841.0	281289.0	110901.0	48913.0	27698.0	19200.0	40075.0	...	98.9	98.9	99.6	97.8	97.5	98.9	89.9	101.8	100.1	13.0
Sci-Fi	2325284.0	2530855.0	3002994.0	1802098.0	671811.0	254175.0	111925.0	65904.0	46171.0	114435.0	...	133.6	133.5	133.2	131.1	130.8	131.5	127.9	137.5	134.0	17.0
Thriller	1081701.0	1465491.0	1993378.0	1175799.0	416046.0	149953.0	65281.0	37940.0	25767.0	57630.0	...	100.6	100.7	100.1	99.6	99.3	100.7	96.2	103.1	101.5	13.0

	CVotes10	CVotes09	CVotes08	CVotes07	CVotes06	CVotes05	CVotes04	CVotes03	CVotes02	CVotes01	...	Votes3044	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	cnt
Action	102144.096774	114433.193548	150895.322581	94262.129032	34688.838710	12693.032258	5386.129032	3064.645161	2115.258065	5524.096774	...	7.741935	7.725806	7.800000	7.645161	7.625806	7.754839	7.296774	7.987097	7.761290	31.0
Adventure	94596.289474	105636.631579	138482.315789	86367.921053	31896.710526	11551.842105	4817.631579	2718.894737	1835.184211	4575.210526	...	7.752632	7.728947	7.873684	7.676316	7.642105	7.842105	7.384211	7.986842	7.794737	38.0
Animation	61960.181818	72566.090909	104837.636364	65707.454545	22825.090909	7551.727273	2792.545455	1430.272727	911.454545	2290.272727	...	7.763636	7.718182	7.981818	7.681818	7.645455	7.881818	7.272727	7.963636	7.827273	11.0
Biography	47333.500000	77867.111111	123948.777778	74054.444444	23644.166667	7702.666667	2984.333333	1639.444444	1145.166667	2849.833333	...	7.727778	7.716667	7.766667	7.694444	7.661111	7.872222	7.227778	7.927778	7.772222	18.0
Comedy	60157.217391	77173.347826	108993.521739	69176.913043	26099.434783	9863.130435	4237.782609	2444.260870	1712.652174	3842.043478	...	7.713043	7.713043	7.752174	7.608696	7.595652	7.700000	7.191304	7.939130	7.778261	23.0
Crime	52229.636364	87919.818182	129045.000000	74671.818182	25308.272727	8971.818182	3842.818182	2246.636364	1544.090909	3383.363636	...	7.718182	7.763636	7.609091	7.627273	7.618182	7.681818	7.390909	7.981818	7.800000	11.0
Drama	52375.969231	75928.846154	109339.276923	66456.923077	23528.553846	8497.107692	3622.692308	2078.861538	1449.000000	3250.892308	...	7.712308	7.709231	7.720000	7.643077	7.620000	7.741538	7.223077	7.936923	7.784615	65.0
Romance	42304.538462	53037.846154	82252.307692	54833.923077	21637.615385	8530.846154	3762.538462	2130.615385	1476.923077	3082.692308	...	7.607692	7.607692	7.661538	7.523077	7.500000	7.607692	6.915385	7.830769	7.700000	13.0
Sci-Fi	136781.411765	148873.823529	176646.705882	106005.764706	39518.294118	14951.470588	6583.823529	3876.705882	2715.941176	6731.470588	...	7.858824	7.852941	7.835294	7.711765	7.694118	7.735294	7.523529	8.088235	7.882353	17.0
Thriller	83207.769231	112730.076923	153336.769231	90446.076923	32003.538462	11534.846154	5021.615385	2918.461538	1982.076923	4433.076923	...	7.738462	7.746154	7.700000	7.661538	7.638462	7.746154	7.400000	7.930769	7.807692	13.0

	CVotes10	CVotes09	CVotes08	CVotes07	CVotes06	CVotes05	CVotes04	CVotes03	CVotes02	CVotes01	...	Votes3044	Votes3044M	Votes3044F	Votes45A	Votes45AM	Votes45AF	Votes1000	VotesUS	VotesnUS	cnt
Action	102144	114433	150895	94262	34688	12693	5386	3064	2115	5524	...	7.74	7.73	7.80	7.65	7.63	7.75	7.30	7.99	7.76	31.0
Adventure	94596	105636	138482	86367	31896	11551	4817	2718	1835	4575	...	7.75	7.73	7.87	7.68	7.64	7.84	7.38	7.99	7.79	38.0
Animation	61960	72566	104837	65707	22825	7551	2792	1430	911	2290	...	7.76	7.72	7.98	7.68	7.65	7.88	7.27	7.96	7.83	11.0
Biography	47333	77867	123948	74054	23644	7702	2984	1639	1145	2849	...	7.73	7.72	7.77	7.69	7.66	7.87	7.23	7.93	7.77	18.0
Comedy	60157	77173	108993	69176	26099	9863	4237	2444	1712	3842	...	7.71	7.71	7.75	7.61	7.60	7.70	7.19	7.94	7.78	23.0
Crime	52229	87919	129045	74671	25308	8971	3842	2246	1544	3383	...	7.72	7.76	7.61	7.63	7.62	7.68	7.39	7.98	7.80	11.0
Drama	52375	75928	109339	66456	23528	8497	3622	2078	1449	3250	...	7.71	7.71	7.72	7.64	7.62	7.74	7.22	7.94	7.78	65.0
Romance	42304	53037	82252	54833	21637	8530	3762	2130	1476	3082	...	7.61	7.61	7.66	7.52	7.50	7.61	6.92	7.83	7.70	13.0
Sci-Fi	136781	148873	176646	106005	39518	14951	6583	3876	2715	6731	...	7.86	7.85	7.84	7.71	7.69	7.74	7.52	8.09	7.88	17.0
Thriller	83207	112730	153336	90446	32003	11534	5021	2918	1982	4433	...	7.74	7.75	7.70	7.66	7.64	7.75	7.40	7.93	7.81	13.0

	CVotesU18M	CVotes1829M	CVotes3044M	CVotes45AM
Action	1916	164703	132836	24092
Adventure	1900	146808	115795	21910
Animation	1486	103695	75824	12966
Biography	886	114043	92158	18039
Comedy	1178	109016	86346	15979
Crime	932	122690	102671	18799
Drama	915	105203	87644	17422
Romance	535	79024	62253	12054
Sci-Fi	2382	197123	160141	30141
Thriller	1327	156573	129421	24421

	CVotesU18F	CVotes1829F	CVotes3044F	CVotes45AF
Action	525	36996	20627	4269
Adventure	601	39896	20750	4267
Animation	664	39314	17751	3063
Biography	265	29157	17354	4126
Comedy	459	37509	18841	3621
Crime	227	28310	17292	3658
Drama	317	29896	16964	3806
Romance	474	37926	17350	3381
Sci-Fi	613	45269	25620	5340
Thriller	322	36989	21922	4654