In [1]:

import pandas as pd
import matplotlib.pyplot as plt

In [2]:

#Question - Does the Coivd-19 Death Rate have a correlation with GDP Per Capita? Do more people die in poorer countries?
#Let's read in our data sources
#GDP Per Capita Per Country - https://www.focus-economics.com/economic-indicator/gdp-per-capita
gdpPerCapita = pd.read_csv(r"C:\Users\Jonny\Documents\Projects\covidToGDP\gdpPerCapita.csv")
#Covid 19 Death Statistics Per Country - https://ourworldindata.org/explorers/coronavirus-data-explorer?facet=none&pickerSort=desc&pickerMetric=location&Metric=Confirmed+deaths&Interval=Cumulative&Relative+to+Population=true&Color+by+test+positivity=false
covidDeathRates = pd.read_csv(r"C:\Users\Jonny\Documents\Projects\covidToGDP\covidDeathsPerCountry.csv")

In [3]:

#Let's remove some unecessary columns and extra data irrelevant to what we are going to look at today
gdpPerCapita.head()
#After looking at the first several rows, we don't have any extra rows. Everything is in USD ($) and to read this you could
#For each citizen who lives in Luxembourg, they make $101,207 anually

Out[3]:

	Country	gdpPerCapita
0	Luxembourg	101207
1	Switzerland	85682
2	Cayman Islands	83536
3	Ireland	78558
4	Norway	75059

In [4]:

#Let's remove some unecessary columns and extra data irrelevant to what we are going to look at today
covidDeathRates.head()
#this data needs to cleaned a little bit more.
#lets only keep 'location' and 'total_deaths'
#what we want to do is to remove all other columns and then take the MAX of'total_deaths' for each location
covidDeathRates = covidDeathRates.groupby('location')['total_deaths_per_million'].max()
covidDeathRates = covidDeathRates.to_frame()
covidDeathRates.columns.values[0] = "Covid Death Rate Per Million Rate"

In [5]:

covidDeathRates.head()
#perfect, now we have each location and their max total deaths per million people living in that country (their rate)

Out[5]:

	Covid Death Rate Per Million Rate
location
Afghanistan	193.220
Africa	184.726
Albania	1217.223
Algeria	154.091
Andorra	2003.775

In [6]:

#lets sort this data so that we can get a quick look to see who has the lowest reported death rate
covidDeathRates.sort_values('Covid Death Rate Per Million Rate').head()
#Okay cool so we can now see that North Korea has the lowest reported death rates :/

Out[6]:

	Covid Death Rate Per Million Rate
location
North Korea	2.395
Burundi	3.101
China	3.616
Chad	11.410
South Sudan	12.125

In [7]:

#Now lets merge our two datasets together. This will allow us to get a quick view of GDP Per Capita and Covid Deaths Rate for each country
mergedData = covidDeathRates.merge(gdpPerCapita,how="inner",left_on="location",right_on="Country")
mergedData.head()
#let's permanently sort the data so that we get a better visualization
mergedData = mergedData.sort_values('Covid Death Rate Per Million Rate')
#now we have a quick view of each country and their GDP Per Capita and Covid Death Rates

In [8]:

#Lets plot this data out and see what we can visualize
fix,ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.set_xlabel('Country')
ax1.set_ylabel('Covid Deaths (Million)', color = 'blue')
ax2.set_ylabel('GDP (Capita)', color = 'red')
ax1.plot(mergedData['Country'], mergedData['Covid Death Rate Per Million Rate'], label = "Covid Deaths (Million)", color='blue')
ax2.plot(mergedData['Country'], mergedData['gdpPerCapita'], label = "GDP (Capita)", color='red')
plt.show()

In [9]:

#There seems to be some correlation between GDP and Death Rates because in the beginning we can see that countries with low Death rates also ahve low GDP rates
#lets calculate correlation
correlation_matrix = mergedData.corr()
print(correlation_matrix)

                                   Covid Death Rate Per Million Rate  \
Covid Death Rate Per Million Rate                           1.000000   
gdpPerCapita                                                0.161727   

                                   gdpPerCapita  
Covid Death Rate Per Million Rate      0.161727  
gdpPerCapita                           1.000000

In [10]:

#Okay, so our correlation between GDP Per Capita and Covid Death Rates is not great. we have a correlatyion of 0.1617.
#This indicates a weak positive correlation
#This sources of this data is solid, but the differences in reporting accross countries may be affecting the data
#Is there any way that we could get better data?

Recent Posts

Categories

Related