from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
A collection of movies found on these streaming platforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from jupyterthemes import jtplot
jtplot.style(theme='monokai', context='notebook', ticks=True, grid=False)
The dataset was download from the Kaggle dataset website; https://www.kaggle.com/ruchi798/movies-on-netflix-prime-video-hulu-and-disney.
data = pd.read_csv('MoviesOnStreamingPlatforms_updated.csv')
data1 = data.groupby('Year').sum()
data1.reset_index(level=0, inplace=True)
fig = make_subplots(rows=4, cols=1,
shared_xaxes=True,
vertical_spacing=0.05)
fig.add_trace(go.Scatter(x=data1['Year'], y=data1['Netflix'],name='Netflix'),
row=1, col=1)
fig.add_trace(go.Scatter(x=data1['Year'], y=data1['Hulu'],name='Hulu'),
row=2, col=1)
fig.add_trace(go.Scatter(x=data1['Year'], y=data1['Prime Video'],name='Prime Video'),
row=3, col=1)
fig.add_trace(go.Scatter(x=data1['Year'], y=data1['Disney+'],name='Disney+'),
row=4, col=1)
fig.update_layout(height=600, width=600,
title_text="Summary of Movies Produced Based on Years Released")
fig.show()
bar_plots = [
go.Bar(x = data1['Year'], y = data1['Runtime'],name='Runtime')
]
layout = go.Layout(
title=go.layout.Title(text='Total Runtime Annually',x=0.5),
yaxis_title='Total Runtime (hrs.)',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
Based on the above graphs shows that movies released in year 2017 recorded the highest total runtime ie 125.822k min.
data2 = data.groupby('IMDb').sum()
data2.reset_index(level=0, inplace=True)
fig = make_subplots(rows=4, cols=1,
shared_xaxes=True,
vertical_spacing=0.05)
fig.add_trace(go.Scatter(x=data2['IMDb'], y=data2['Netflix'], name="Netflix"),
row=1, col=1)
fig.add_trace(go.Scatter(x=data2['IMDb'], y=data2['Hulu'], name="Hulu"),
row=2, col=1)
fig.add_trace(go.Scatter(x=data2['IMDb'], y=data2['Prime Video'], name="Prime Video"),
row=3, col=1)
fig.add_trace(go.Scatter(x=data2['IMDb'], y=data2['Disney+'], name="Disney+"),
row=4, col=1)
fig.update_layout(height=600, width=600,
title_text="Summary of IMDb Ratings")
fig.show()
Based on the above graphs shows that in general all movies streaming in Netflix, Hulu, Prime Video and Disney+ have IMDB ratings between 4 to 8.
data3 = data.groupby('Rotten Tomatoes').sum()
data3.reset_index(level=0, inplace=True)
data3['Rotten Tomatoes'] = data3['Rotten Tomatoes'].str.replace("%","").astype(int)
data3['Rotten Tomatoes'] = data3['Rotten Tomatoes']/1
data3 = data3.sort_values(by='Rotten Tomatoes', ascending=False)
fig = make_subplots(rows=4, cols=1,
shared_xaxes=True,
vertical_spacing=0.05)
fig.add_trace(go.Scatter(x=data3['Rotten Tomatoes'], y=data3['Netflix'], name="Netflix"),
row=1, col=1)
fig.add_trace(go.Scatter(x=data3['Rotten Tomatoes'], y=data3['Hulu'], name="Hulu"),
row=2, col=1)
fig.add_trace(go.Scatter(x=data3['Rotten Tomatoes'], y=data3['Prime Video'], name="Prime Video"),
row=3, col=1)
fig.add_trace(go.Scatter(x=data3['Rotten Tomatoes'], y=data3['Disney+'], name="Disney+"),
row=4, col=1)
fig.update_layout(height=600, width=600,
title_text="Summary of Rotten Tomatoes Ratings")
fig.show()
data4 = data.groupby('Runtime').sum()
data4.reset_index(level=0, inplace=True)
fig = make_subplots(rows=4, cols=1,
shared_xaxes=True,
vertical_spacing=0.05)
fig.add_trace(go.Scatter(x=data4['Runtime'], y=data4['Netflix'], name="Netflix"),
row=1, col=1)
fig.add_trace(go.Scatter(x=data4['Runtime'], y=data4['Hulu'], name="Hulu"),
row=2, col=1)
fig.add_trace(go.Scatter(x=data4['Runtime'], y=data4['Prime Video'], name="Prime Video"),
row=3, col=1)
fig.add_trace(go.Scatter(x=data4['Runtime'], y=data4['Disney+'], name="Disney+"),
row=4, col=1)
fig.update_layout(height=600, width=600,
title_text="Movies Runtime Summary")
fig.show()
Based on the above graphs shows that Netflix, Hulu, Prime Video and Disney+ have most movies with 90 min. runtime.
data5 = data.groupby('Age').sum()
data5.reset_index(level=0, inplace=True)
fig = make_subplots(rows=4, cols=1,
shared_xaxes=True,
vertical_spacing=0.05)
fig.add_trace(go.Scatter(x=data5['Age'], y=data5['Netflix'], name="Netflix"),
row=1, col=1)
fig.add_trace(go.Scatter(x=data5['Age'], y=data5['Hulu'], name="Hulu"),
row=2, col=1)
fig.add_trace(go.Scatter(x=data5['Age'], y=data5['Prime Video'], name="Prime Video"),
row=3, col=1)
fig.add_trace(go.Scatter(x=data5['Age'], y=data5['Disney+'], name="Disney+"),
row=4, col=1)
fig.update_layout(height=600, width=600,
title_text="Movies Age Ratings Summary")
fig.show()