Covid-19 Analysis using Data Science

A novel strain of coronavirus — SARS-CoV-2 — was first detected in December 2019 in Wuhan, a city in China’s Hubei province with a population of 11 million, after an outbreak of pneumonia without an obvious cause. The virus has now spread to over 200 countries and territories across the globe, and was characterised as a pandemic by the World Health Organization (WHO)

Let’s get our environment ready with the libraries we’ll need and then import the data!

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.style.use('ggplot')

Check out all the Data which is useful for our analysis from different sources!

all_data = pd.read_csv('~/covid_19.csv')
all_data.head()

Worldometer Data

worldometer_data = pd.read_csv('~/worldometer_data.csv')
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
worldometer_data.head()

Country Wise

country_wise = pd.read_csv('~/country_wise_latest.csv')
country_wise = country_wise.replace('', np.nan).fillna(0)
country_wise.head()

Day Wise

day_wise = pd.read_csv('~/day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
day_wise.head()

Full Grouped

full_grouped = pd.read_csv('~/full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped.head()

Let’s see the latest information about number of Deaths, Recovered and Active cases in the world!

temp = day_wise[['Date','Deaths','Recovered','Active']].tail(1)
temp
dt = temp.Date.dt.strftime('%d-%m-%y').values
temp = temp.melt(id_vars='Date', value_vars=['Active','Deaths','Recovered'])
temp.sort_values(by='value', inplace = True, ascending=False)
temp

Let’s visualise the total Cases until 19-07-2020

plt.figure(figsize=(25,15))
bars = plt.bar(temp.variable, temp.value, color = ['Green','orange','red'], alpha = 0.7)
#remove ytick labels
plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=True)
# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)

# direct label each bar with Y axis values
for bar in bars:
    plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height()-5, str(int(bar.get_height()/1000)) + 'K', 
                 ha='center', fontsize=19)
    
plt.xticks(temp.variable.values,fontsize=15)
plt.title('Total Cases Untill {}'.format(dt[0]), fontsize=25);

Monthly Cases

plt.style.use('fivethirtyeight')
fig, ax = plt.subplots()
fig.set_size_inches(14,7)
plt.bar(day_wise.Date, day_wise.Confirmed, width=0.8, alpha = 0.7)
ax.set_yscale("log")
plt.title('Monthly Confirmed Cases')

Monthly Active & New Cases

fig, (ax1,ax2) = plt.subplots(1,2,figsize = (17,6))
ax1.bar(day_wise.Date, day_wise.Active, color = '#FF8552', width = 0.8, alpha = 0.8)
ax1.set_yscale("log")
ax1.set_title("Monthly Active Cases")

ax2.bar(day_wise.Date, day_wise['New cases'], color = '#F4D35E', width = 0.8, alpha = 0.8)
ax2.set_yscale("log")
ax2.set_title("Monthly New Cases")

Monthly Deaths & Recovered

fig, (ax1,ax2) = plt.subplots(1,2,figsize=(17,6),sharey=True)

ax1.bar(day_wise.Date,day_wise.Deaths,color='red', width=0.7,alpha=0.8)
ax1.set_yscale("log")
ax1.set_title('Monthly Deaths');

ax2.bar(day_wise.Date,day_wise.Recovered,color='#588157', width=0.7,alpha=0.8)
ax2.set_yscale("log")
ax2.set_title('Monthly Recovered');

Monthly New Deaths & Recovered

fig, (ax1,ax2) = plt.subplots(1,2,figsize=(17,6),sharey=True)

ax1.bar(day_wise.Date,day_wise['New deaths'],color='red', width=0.7,alpha=0.7)
ax1.set_yscale("log")
ax1.set_title('Monthly New Deaths');

ax2.bar(day_wise.Date,day_wise['New recovered'],color='green', width=0.7,alpha=0.7)
ax2.set_yscale("log")
ax2.set_title('Monthly New Recovered')

Number of Deaths / 100 Recovered

fig, ax = plt.subplots()
fig.set_size_inches(15,7)
plt.bar(day_wise.Date,day_wise['Deaths / 100 Recovered'],width=0.7,alpha=0.7)

ax.set_yscale("log")
plt.title('Number of Deaths / 100 Recovered');

Number of Recovered / 100 Cases

fig, ax = plt.subplots()
fig.set_size_inches(15,7)
plt.bar(day_wise.Date,day_wise['Recovered / 100 Cases'],width=0.7,alpha=0.7, color='#ff2e63')

ax.set_yscale("log")
plt.title('Number of Recovered / 100 Cases');

Number of Country Affected

fig, ax = plt.subplots()
fig.set_size_inches(15,7)
plt.bar(day_wise.Date,day_wise['No. of countries'],width=0.7,alpha=0.7, color='#fe9801')

ax.set_yscale("log")
plt.title('Number of Countries Affected');

Top 15 Countries

Confirmed Cases

plt.figure(figsize=(20,15))
temp = country_wise[['Country/Region', 'Confirmed']].sort_values(by='Confirmed', ascending=False)[0:15]
temp.sort_values(by='Confirmed', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Confirmed'],color=['gray'])
bars[-1].set_color('#D63412')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Confirmed Cases in Top15 Countries', fontsize=23)
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Confirmed']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=15)

Active Cases

plt.figure(figsize=(20,15))
temp = country_wise[['Country/Region', 'Active']].sort_values(by='Active', ascending=False)[0:15]
temp.sort_values(by='Active', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Active'],color=['gray'])
bars[-1].set_color('#db971a')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Active Cases in Top15 Countries', fontsize=23)
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Active']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=15)

Deaths

plt.figure(figsize=(20,15))
temp = country_wise[['Country/Region', 'Deaths']].sort_values(by='Deaths', ascending=False)[0:15]
temp.sort_values(by='Deaths', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Deaths'],color=['gray'])
bars[-1].set_color('red')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Deaths in Top15 Countries', fontsize=23)
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Deaths']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=15)

Recovered Cases

plt.figure(figsize=(20,15))
temp = country_wise[['Country/Region', 'Recovered']].sort_values(by='Recovered', ascending=False)[0:15]
temp.sort_values(by='Recovered', inplace=True)

bars = plt.barh(temp['Country/Region'], temp['Recovered'],color=['gray'])
bars[-1].set_color('green')
plt.tick_params(labelbottom=False, bottom=False, top=False)
plt.title('Recovered Cases in Top15 Countries', fontsize=23)
plt.gca().ticklabel_format(useOffset=False, style='plain', axis='x')

# remove the frame of the chart
for spine in plt.gca().spines.values():
    spine.set_visible(False)
    
# direct label each bar with Y axis values
for i, v in enumerate(temp['Recovered']):
    plt.gca().text(v + 3, i - .25, str(int(v/1000))+'K', color='black', fontsize=15)

Confirmed Cases in the world

import plotly.express as px
import plotly.graph_objs as go
def plot_map(df,col):
    df = df[df[col]>0]
    fig = px.choropleth(df, locations="Country/Region", locationmode='country names', 
                  color=col, hover_name="Country/Region", 
                  title=col, hover_data=[col], color_continuous_scale="Portland")
    layout = go.Layout(
        autosize=False,
        width=5000,
        height=2000,
    )
    fig.show()
plot_map(country_wise, 'Confirmed')

Deaths in the world

import plotly.graph_objs as go
def plot_map_death(df,col):
    df = df[df[col]>0]
    fig = px.choropleth(df, locations="Country/Region", locationmode='country names', 
                  color=col, hover_name="Country/Region", 
                  title=col, hover_data=[col], color_continuous_scale="curl")
    layout = go.Layout(
        autosize=False,
        width=5000,
        height=2000,
    )
    fig.show()
plot_map_death(country_wise, 'Deaths')

Cases Over Time

fig = px.choropleth(full_grouped, locations="Country/Region", 
                    color=np.log(full_grouped["Confirmed"]),
                    locationmode='country names', hover_name="Country/Region", 
                    animation_frame=full_grouped["Date"].dt.strftime('%Y-%m-%d'),
                    title='Cases over time / Please Click on the play button', color_continuous_scale=px.colors.sequential.Oryel)
fig.update(layout_coloraxis_showscale=False)
fig.show()

Deaths Vs Confirmed

fig = px.scatter(country_wise.sort_values('Deaths', ascending=False).iloc[:20, :], 
                 x='Confirmed', y='Deaths', color='Country/Region', size='Confirmed', 
                 height=700, text='Country/Region', log_x=True, log_y=True, 
                 title='Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

Symptoms

plt.figure(figsize=(8,13))
explode=(0,0,0,0,0,0.5,0.5,0.5,0.5)
plt.pie(symptoms['percentage'],radius=2,autopct='%0.1f%%',shadow=True,explode=explode,startangle=340)
plt.legend(symptoms['symptom'],loc='lower right', bbox_to_anchor=(1.5, 1.05))

plt.show() 

.

3301cookie-checkCovid-19 Analysis using Data Science

Leave a Reply

Your email address will not be published. Required fields are marked *