import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import seaborn as sn
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import plotly.express as px
import plotly.graph_objects as go
import dataframe_image as dfi
import geopandas as gpd
import adjustText as aT
from mpl_toolkits.axes_grid1 import make_axes_locatable
#import plotly.offline as py
#%matplotlib notebook
#py.init_notebook_mode(connected=True)
Malaysian 14th General Election was held on 9 May 2018. This report shows the overall result for the general election parlimentary seats as shown in this website, https://election.thestar.com.my/. This report will not shows the results for the state seats election, although the election was held concurrently. The objectives of this report is to shows the main results in the parlimentary seats for the whole Malaysia including the details in the states level. In the final section of this report will shows the prediction election results based on a selected criterias.
data = {'Year':[1959,1964,1969,1974,1978,1982,1986,1990,1995,1999,2004,2008,2013,2018,2023],
'GE':['GE1','GE2','GE3','GE4','GE5','GE6','GE7','GE8','GE9','GE10','GE11','GE12','GE13','GE14','GE15'],
'Dissolution':['27 Jun 1959','1 Mac 1964','20 Mac 1969','31 Jul 1974','12 Jun 1978','29 Mac 1982','19 Jul 1986','4 Oct 1990','6 Apr 1995','10 Nov 1999','4 Mac 2004','13 Feb 2008','3 Apr 2013','7 Apr 2018','TBA'],
'Nomination':['15 Jul 1959','21 Mac 1964','5 Apr 1969','8 Aug 1974','21 Jun 1978','7 Apr 1982','24 Jul 1986','11 Oct 1990','15 Apr 1995','20 Nov 1999','13 Mac 2004','24 Feb 2008','20 Apr 2013','28 Apr 2018','TBA'],
'Polling Date':['19 Aug 1959','25 Apr 1964','10 May 1969','24 Aug 1974','8 Jul 1978','22 Apr 1982','3 Aug 1986','21 Oct 1990','25 Apr 1995','29 Nov 1999','21 Mac 2004','8 Mac 2008','5 May 2013','9 May 2018','TBA'],
'Day':['Wednesday','Saturday','Saturday','Saturday','Saturday','Thursday','Sunday','Sunday','Tuesday','Monday','Sunday','Sunday','Sunday','Wednesday','TBA']}
df_intro = pd.DataFrame(data,columns=['Year','GE','Dissolution','Nomination','Polling Date','Day'])
df_intro
df = pd.read_csv('ge14.csv')
df['Malay Votes']=df['Voter Turnout']*df['Malay']/100
df['Chinese Votes']=df['Voter Turnout']*df['Chinese']/100
df['Indian Votes']=df['Voter Turnout']*df['Indian']/100
df['Sabahan Votes']=df['Voter Turnout']*df['Sabahan']/100
df['Sarawakian Votes']=df['Voter Turnout']*df['Sarawakian']/100
df['Org Asli Votes']=df['Voter Turnout']*df['Org Asli']/100
df['Others Votes']=df['Voter Turnout']*df['Others']/100
#df.head(3)
df_win = df.groupby('Party').sum()
df_win = df_win['Win']
df_win = df_win.rename_axis('Party').reset_index(name='Total Seats Win')
df_party = df['Party'].value_counts()
df_party = df_party.rename_axis('Party').reset_index(name='Total Seats Contested')
df_party = df_party.merge(df_win, on='Party')
df_party['Percentage Win'] = (df_party['Total Seats Win']/df_party['Total Seats Contested'])*100
df_party.round().sort_values('Percentage Win',ascending=False)
#dfi.export(df_party, 'df_party.png')
bar_plots = [
go.Bar(x = df_party['Party'], y = df_party['Total Seats Contested'],name='Total Seats Contested'),
go.Bar(x = df_party['Party'], y = df_party['Total Seats Win'],name='Total Seats Win'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Seats Contested VS Seats Win',x=0.5),
yaxis_title='Total Seats',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
df_race = df.groupby('Party').sum().reset_index()
bar_plots = [
go.Bar(x = df_race['Party'], y = df_race['Malay Votes'],name='Malay Votes'),
go.Bar(x = df_race['Party'], y = df_race['Chinese Votes'],name='Chinese Votes'),
go.Bar(x = df_race['Party'], y = df_race['Indian Votes'],name='Indian Votes'),
go.Bar(x = df_race['Party'], y = df_race['Sabahan Votes'],name='Sabahan Votes'),
go.Bar(x = df_race['Party'], y = df_race['Sarawakian Votes'],name='Sarawakian Votes'),
go.Bar(x = df_race['Party'], y = df_race['Org Asli Votes'],name='Org Asli Votes'),
go.Bar(x = df_race['Party'], y = df_race['Others Votes'],name='Others Votes'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Race Composition By Party Win Seats',x=0.5),
yaxis_title='Total Votes',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(xaxis={'categoryorder':'total descending'})#barmode='stack',
fig.show()
df_race = df.groupby('Party').sum().reset_index()
bar_plots = [
go.Bar(x = df_race['Party'], y = df_race['Malay Votes'],name='Malay Votes'),
go.Bar(x = df_race['Party'], y = df_race['Chinese Votes'],name='Chinese Votes'),
go.Bar(x = df_race['Party'], y = df_race['Indian Votes'],name='Indian Votes'),
go.Bar(x = df_race['Party'], y = df_race['Sabahan Votes'],name='Sabahan Votes'),
go.Bar(x = df_race['Party'], y = df_race['Sarawakian Votes'],name='Sarawakian Votes'),
go.Bar(x = df_race['Party'], y = df_race['Org Asli Votes'],name='Org Asli Votes'),
go.Bar(x = df_race['Party'], y = df_race['Others Votes'],name='Others Votes'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Race Composition By Party Win Seats',x=0.5),
yaxis_title='Total Votes',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
#df.groupby('State').sum()
df_joh = df[df['State']=='Johor'].groupby('Party').sum().reset_index()
df_joh.rename(columns={'Win':'Johor'},inplace=True)
df_joh = df_joh.loc[:,['Party','Johor']]
df_ked = df[df['State']=='Kedah'].groupby('Party').sum().reset_index()
df_ked.rename(columns={'Win':'Kedah'},inplace=True)
df_ked = df_ked.loc[:,['Party','Kedah']]
df_kel = df[df['State']=='Kelantan'].groupby('Party').sum().reset_index()
df_kel.rename(columns={'Win':'Kelantan'},inplace=True)
df_kel = df_kel.loc[:,['Party','Kelantan']]
df_mel = df[df['State']=='Melaka'].groupby('Party').sum().reset_index()
df_mel.rename(columns={'Win':'Melaka'},inplace=True)
df_mel = df_mel.loc[:,['Party','Melaka']]
df_neg = df[df['State']=='Negeri Sembilan'].groupby('Party').sum().reset_index()
df_neg.rename(columns={'Win':'Negeri Sembilan'},inplace=True)
df_neg = df_neg.loc[:,['Party','Negeri Sembilan']]
df_pah = df[df['State']=='Pahang'].groupby('Party').sum().reset_index()
df_pah.rename(columns={'Win':'Pahang'},inplace=True)
df_pah = df_pah.loc[:,['Party','Pahang']]
df_pen = df[df['State']=='Penang'].groupby('Party').sum().reset_index()
df_pen.rename(columns={'Win':'Penang'},inplace=True)
df_pen = df_pen.loc[:,['Party','Penang']]
df_prk = df[df['State']=='Perak'].groupby('Party').sum().reset_index()
df_prk.rename(columns={'Win':'Perak'},inplace=True)
df_prk = df_prk.loc[:,['Party','Perak']]
df_per = df[df['State']=='Perlis'].groupby('Party').sum().reset_index()
df_per.rename(columns={'Win':'Perlis'},inplace=True)
df_per = df_per.loc[:,['Party','Perlis']]
df_sab = df[df['State']=='Sabah'].groupby('Party').sum().reset_index()
df_sab.rename(columns={'Win':'Sabah'},inplace=True)
df_sab = df_sab.loc[:,['Party','Sabah']]
df_sar = df[df['State']=='Sarawak'].groupby('Party').sum().reset_index()
df_sar.rename(columns={'Win':'Sarawak'},inplace=True)
df_sar = df_sar.loc[:,['Party','Sarawak']]
df_sel = df[df['State']=='Selangor'].groupby('Party').sum().reset_index()
df_sel.rename(columns={'Win':'Selangor'},inplace=True)
df_sel = df_sel.loc[:,['Party','Selangor']]
df_ter = df[df['State']=='Terengganu'].groupby('Party').sum().reset_index()
df_ter.rename(columns={'Win':'Terengganu'},inplace=True)
df_ter = df_ter.loc[:,['Party','Terengganu']]
df_wil = df[df['State']=='WP'].groupby('Party').sum().reset_index()
df_wil.rename(columns={'Win':'Wilayah Persekutuan'},inplace=True)
df_wil = df_wil.loc[:,['Party','Wilayah Persekutuan']]
df_states = df_joh.merge(df_ked, on='Party',how='outer')
df_states = df_states.merge(df_kel, on='Party',how='outer')
df_states = df_states.merge(df_mel, on='Party',how='outer')
df_states = df_states.merge(df_neg, on='Party',how='outer')
df_states = df_states.merge(df_pah, on='Party',how='outer')
df_states = df_states.merge(df_pen, on='Party',how='outer')
df_states = df_states.merge(df_prk, on='Party',how='outer')
df_states = df_states.merge(df_per, on='Party',how='outer')
df_states = df_states.merge(df_sab, on='Party',how='outer')
df_states = df_states.merge(df_sar, on='Party',how='outer')
df_states = df_states.merge(df_sel, on='Party',how='outer')
df_states = df_states.merge(df_ter, on='Party',how='outer')
df_states = df_states.merge(df_wil, on='Party',how='outer')
#df_states.loc['Total (States)']= df_states.sum(numeric_only=True, axis=0)
df_states.loc[:,'Total'] = df_states.sum(numeric_only=True, axis=1)
df_states['Percentage']=((df_states['Total']/df_states['Total'].sum())*100).round(1)
df_states.fillna(0).sort_values('Total', ascending=False)
bar_plots = [
go.Bar(x = df_states['Party'], y = df_states['Total'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Total Seat Win Based On Party',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_states['Party'], y = df_states['Johor'],name='Johor'),
go.Bar(x = df_states['Party'], y = df_states['Kedah'],name='Kedah'),
go.Bar(x = df_states['Party'], y = df_states['Kelantan'],name='Kelantan'),
go.Bar(x = df_states['Party'], y = df_states['Melaka'],name='Melaka'),
go.Bar(x = df_states['Party'], y = df_states['Negeri Sembilan'],name='Negeri Sembilan'),
go.Bar(x = df_states['Party'], y = df_states['Pahang'],name='Pahang'),
go.Bar(x = df_states['Party'], y = df_states['Penang'],name='Penang'),
go.Bar(x = df_states['Party'], y = df_states['Perak'],name='Perak'),
go.Bar(x = df_states['Party'], y = df_states['Perlis'],name='Perlis'),
go.Bar(x = df_states['Party'], y = df_states['Sabah'],name='Sabah'),
go.Bar(x = df_states['Party'], y = df_states['Sarawak'],name='Sarawak'),
go.Bar(x = df_states['Party'], y = df_states['Selangor'],name='Selangor'),
go.Bar(x = df_states['Party'], y = df_states['Terengganu'],name='Terengganu'),
go.Bar(x = df_states['Party'], y = df_states['Wilayah Persekutuan'],name='Wilayah Persekutuan'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Total Seats Win By Each Party At Respective States',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_yaxes(range=[0,14])
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_states['Party'], y = df_states['Johor'],name='Johor'),
go.Bar(x = df_states['Party'], y = df_states['Kedah'],name='Kedah'),
go.Bar(x = df_states['Party'], y = df_states['Kelantan'],name='Kelantan'),
go.Bar(x = df_states['Party'], y = df_states['Melaka'],name='Melaka'),
go.Bar(x = df_states['Party'], y = df_states['Negeri Sembilan'],name='Negeri Sembilan'),
go.Bar(x = df_states['Party'], y = df_states['Pahang'],name='Pahang'),
go.Bar(x = df_states['Party'], y = df_states['Penang'],name='Penang'),
go.Bar(x = df_states['Party'], y = df_states['Perak'],name='Perak'),
go.Bar(x = df_states['Party'], y = df_states['Perlis'],name='Perlis'),
go.Bar(x = df_states['Party'], y = df_states['Sabah'],name='Sabah'),
go.Bar(x = df_states['Party'], y = df_states['Sarawak'],name='Sarawak'),
go.Bar(x = df_states['Party'], y = df_states['Selangor'],name='Selangor'),
go.Bar(x = df_states['Party'], y = df_states['Terengganu'],name='Terengganu'),
go.Bar(x = df_states['Party'], y = df_states['Wilayah Persekutuan'],name='Wilayah Persekutuan'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Total Seats Win By Each Party At Respective States',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_yaxes(range=[0,60])
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_joh['Party'], y = df_joh['Johor'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Johor Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_ked['Party'], y = df_ked['Kedah'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Kedah Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_kel['Party'], y = df_kel['Kelantan'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Kelantan Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_mel['Party'], y = df_mel['Melaka'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Melaka Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.update_layout(title_x=0.5)
fig.show()
bar_plots = [
go.Bar(x = df_neg['Party'], y = df_neg['Negeri Sembilan'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Negeri Sembilan Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_pah['Party'], y = df_pah['Pahang'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Pahang Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_pen['Party'], y = df_pen['Penang'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Penang Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_prk['Party'], y = df_prk['Perak'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Perak Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_sab['Party'], y = df_sab['Sabah'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Sabah Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_sar['Party'], y = df_sar['Sarawak'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Sarawak Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_sel['Party'], y = df_sel['Selangor'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Selangor Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_ter['Party'], y = df_ter['Terengganu'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Terengganu Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
bar_plots = [
go.Bar(x = df_wil['Party'], y = df_wil['Wilayah Persekutuan'],name='Total Seats Win')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Wilayah Persekutuan Parlimentary Seats Results',x=1),
yaxis_title='Total Seats Win',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()
df_joh = df_joh.iloc[df_joh['Johor'].idxmax()]
df_ked = df_ked.iloc[df_ked['Kedah'].idxmax()]
df_kel = df_kel.iloc[df_kel['Kelantan'].idxmax()]
df_mel = df_mel.iloc[df_mel['Melaka'].idxmax()]
df_neg = df_neg.iloc[df_neg['Negeri Sembilan'].idxmax()]
df_pah = df_pah.iloc[df_pah['Pahang'].idxmax()]
df_pen = df_pen.iloc[df_pen['Penang'].idxmax()]
df_prk = df_prk.iloc[df_prk['Perak'].idxmax()]
df_per = df_per.iloc[df_per['Perlis'].idxmax()]
df_sab = df_sab.iloc[df_sab['Sabah'].idxmax()]
df_sar = df_sar.iloc[df_sar['Sarawak'].idxmax()]
df_sel = df_sel.iloc[df_sel['Selangor'].idxmax()]
df_ter = df_ter.iloc[df_ter['Terengganu'].idxmax()]
df_wil=df_wil.iloc[df_wil['Wilayah Persekutuan'].idxmax()]
data = {'States':['Johor','Kedah','Kelantan','Kuala Lumpur','Malacca','Negeri Sembilan','Pahang','Penang','Perak',
'Perlis','Sabah','Sarawak','Selangor','Terengganu'],
'Majority Party Win':[df_joh[0],df_ked[0],df_kel[0],df_wil[0],df_mel[0],df_neg[0],df_pah[0],df_pen[0],
df_prk[0],df_per[0],df_sab[0],df_sar[0],df_sel[0],df_ter[0]]}
df_map = pd.DataFrame(data,columns=['States','Majority Party Win'])
df_map.rename(columns={'States':'name'},inplace=True)
mas = gpd.read_file(r'/Users/zahiruddinzahidanishah/Google Drive/Python/Geopandas/Malaysia_Polygon.shp')
mas = mas.merge(df_map, on='name')
mas["center"] = mas["geometry"].centroid
mas_points = mas.copy()
mas_points.set_geometry("center", inplace = True)
fig, ax = plt.subplots(1,figsize=(16,9))
mas.plot(column='Majority Party Win',ax=ax,cmap='Set2',edgecolor='black',linewidth=0.5,
legend=True,legend_kwds={'loc': 'lower right'}).set_facecolor('w')
#ax.set_axis_off()
texts = []
for x, y, label in zip(mas_points.geometry.x, mas_points.geometry.y, mas_points["name"]):
texts.append(plt.text(x, y, label, fontsize = 9,color='black'))
aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(1,1), expand_text=(1,1),
arrowprops=dict(arrowstyle="-", color='grey', lw=0.5))
ax.set_title("Majority Party Win", fontsize=30, color='black')
ax.set_xlabel('longitude', color='black')
ax.set_ylabel('latitude', color='black')
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
data = {'Races':['Malay','Chinese','Indian','Sabahan','Sarawakian','Orang Asli','Others'],
'Total':[df['Malay Votes'].sum().round(),df['Chinese Votes'].sum().round(),df['Indian Votes'].sum().round(),
df['Sabahan Votes'].sum().round(),df['Sarawakian Votes'].sum().round(),
df['Org Asli Votes'].sum().round(),df['Others Votes'].sum().round()]}
df_voter = pd.DataFrame(data,columns=['Races','Total'])
df_voter['Percentage']=((df_voter['Total']/df_voter['Total'].sum())*100).round(1)
#df_voter.loc['Total']= df_voter.sum(numeric_only=True, axis=0)
df_voter
bar_plots = [
go.Bar(x = df_voter['Races'], y = df_voter['Total'],name='Total Voters')
]
layout = go.Layout(
title=go.layout.Title(text='GE14: Total Voters Compositions',x=1),
yaxis_title='Total Voters',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
print(df.isnull().values.any())
print(df.duplicated().values.any())
print(df.shape)
df.head(3)
df= df[df['Win'] != 0]
df.head(3).round()
print(df.isnull().values.any())
print(df.duplicated().values.any())
print(df.shape)
df.columns
X = df[['Win','Candidate Votes', 'Registered Voters', 'Majority', 'Spoilt votes',
'Voter Turnout', 'Malay Votes', 'Chinese Votes', 'Indian Votes',
'Sabahan Votes', 'Sarawakian Votes', 'Org Asli Votes', 'Others Votes']]
y = df['Party']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.35,random_state=0)
logistic_regression= LogisticRegression()
logistic_regression.fit(X_train,y_train)
y_pred=logistic_regression.predict(X_test)
#confusion_matrix = pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'])
#sn.heatmap(confusion_matrix, annot=True)
print('Prediction table shows the prediction results with', round(metrics.accuracy_score(y_test, y_pred)*100,1),'% accuracy')
X_test['Prediction']=y_pred
#X_test.head()
df2 = df[['Win','Candidate Votes', 'Registered Voters', 'Majority', 'Spoilt votes',
'Voter Turnout', 'Malay Votes', 'Chinese Votes', 'Indian Votes',
'Sabahan Votes', 'Sarawakian Votes', 'Org Asli Votes', 'Others Votes']]
y_pred=logistic_regression.predict(df2)
df2['Prediction']=y_pred
#df2.head()
bar_plots = [
go.Bar(x = df['Party'], y = df['Win'],name='GE14 Actual Result'),
go.Bar(x = df2['Prediction'], y = df2['Win'],name='Prediction Result'),
]
layout = go.Layout(
title=go.layout.Title(text='GE14 Election Result Vs Predictions',x=0.5),
yaxis_title='Winning Seats',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
df3 = df['Party'].value_counts().rename_axis('Party').reset_index(name='Win Actual')
df4 = df2['Prediction'].value_counts().rename_axis('Party').reset_index(name='Win Prediction')
result = df3.merge(df4, on=['Party'],how='outer')
result.loc['Total']= result.sum(numeric_only=True, axis=0)
result.fillna(0)