import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline

#그래프 격자로 숫자 범위 눈에 잘 띄도록 ggplot 스타일 적용
plt.style.use('ggplot')

#마이너스 폰트 문자 깨짐 해결
mpl.rcParams['axes.unicode_minus'] = False

#경고 숨기기
import warnings
warnings.filterwarnings('ignore') 

#style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)

#Load Data
data = pd.read_csv('./Corona19/Practice/WHO_report.csv')
population = pd.read_csv('./Corona19/Practice/total_population.csv')
print(data.shape, '\n', population.shape)

(2867, 9) 
 (289, 11)

#EDA
data.isnull().sum()
data.columns
data = data[['date', 'location', 'new_cases', 'new_deaths']]

population.columns
population = population[['Region, subregion, country or area *', '2020']]
population.columns = ['location', 'population']
population['population'] = population['population'].str.replace(' ', '')
population['population'] = population['population'].str.replace('\.\.\.', '0')
population['population'] = population['population'].astype(int)
population.dtypes

location      object
population     int32
dtype: object

#누적값 구하기
data['total_cases'] = data.groupby(by = ['location'])['new_cases'].apply(lambda x : x.cumsum())
data['total_death'] = data.groupby(by = ['location'])['new_deaths'].apply(lambda x : x.cumsum())

#Left Join
data = pd.merge(data, population, how = 'left', on = 'location')
data = data[pd.notnull(data['population'])]
data = data.fillna(0)
data.head(10)

#Group by location
data_gbyl = data.groupby(['location']) 
data_gby_last = data_gbyl.last() 
data_gby_last.sample(10)

1-1. 인구대비 확진자 상위 5개국¶

data_gby_last['cases/population'] = data_gby_last['total_cases'] / data_gby_last['population']
data_gby_last = data_gby_last.sort_values(by = 'cases/population', ascending = False)
data_gby_last.head(5)

1-2. 인구대비 사망자 상위 5개국¶

data_gby_last['deaths/population'] = data_gby_last['total_death'] / data_gby_last['population']
data_gby_last = data_gby_last.sort_values(by = 'deaths/population', ascending = False)
data_gby_last.head(5)

1-3. 5개국 국가별 확진자 누적 그래프 그리기¶

top5 = data.reset_index()
top5 = top5[top5['location'].str.contains('San Marino|Iceland|Italy|Switzerland|Norway')]
pvotTop5 = top5.pivot('location', 'date', 'total_cases')
pvotTop5 = pvotTop5.fillna(0)
pvotTop5

pvotTop5 = pvotTop5.T
pvotTop5.head()

plt.figure(figsize=(15, 7))
plt.xticks(rotation = 45)
plt.plot(pvotTop5)
plt.show()

pvotTop5_Nindex = pvotTop5.reset_index()
pvotTop5_Nindex
plt.figure(figsize=(15, 7))
plt.xticks(rotation = 45)
plt.plot(pvotTop5_Nindex['date'], pvotTop5_Nindex['Iceland'])
plt.plot(pvotTop5_Nindex['date'], pvotTop5_Nindex['Norway'])
plt.plot(pvotTop5_Nindex['date'], pvotTop5_Nindex['Italy'])
plt.plot(pvotTop5_Nindex['date'], pvotTop5_Nindex['San Marino'])
plt.plot(pvotTop5_Nindex['date'], pvotTop5_Nindex['Switzerland'])
plt.show()

top5 = top5[top5['location'].str.contains('San Marino|Iceland|Italy|Switzerland|Norway')]
top5 = top5.sort_values(by = 'date', ascending = True)
top5

plt.figure(figsize=(15, 7))
plt.xticks(rotation = 45)
# plt.legend(fontsize=50) # using a size in points
# plt.legend(fontsize="xx-large")
sns.lineplot(x = 'date', y = 'total_cases', data =top5, hue = 'location')

<AxesSubplot:xlabel='date', ylabel='total_cases'>

	date	location	population
0	2020-02-25	Afghanistan	38928.0
1	2020-02-26	Afghanistan	38928.0
2	2020-02-27	Afghanistan	38928.0
3	2020-02-28	Afghanistan	38928.0
4	2020-02-29	Afghanistan	38928.0
5	2020-03-01	Afghanistan	38928.0
6	2020-03-02	Afghanistan	38928.0
7	2020-03-03	Afghanistan	38928.0
8	2020-03-04	Afghanistan	38928.0
9	2020-03-05	Afghanistan	38928.0

	date	new_cases	new_deaths	total_cases	total_death	population
location
Brunei	2020-03-17	0.0	0.0	49.0	0.0	437.0
Ireland	2020-03-17	54.0	0.0	222.0	1.0	4938.0
Nepal	2020-03-17	0.0	0.0	0.0	0.0	29137.0
Bahrain	2020-03-17	8.0	0.0	221.0	0.0	1702.0
Bahamas	2020-03-17	0.0	0.0	0.0	0.0	393.0
Cyprus	2020-03-17	0.0	0.0	31.0	0.0	1207.0
Serbia	2020-03-17	29.0	0.0	69.0	0.0	8737.0
Liberia	2020-03-17	0.0	0.0	0.0	0.0	5058.0
Qatar	2020-03-17	38.0	0.0	438.0	0.0	2881.0
Germany	2020-03-17	1174.0	1.0	6011.0	11.0	83784.0

	date	new_cases	new_deaths	total_cases	total_death	population	cases/population
location
San Marino	2020-03-17	10.0	4.0	101.0	8.0	34.0	2.970588
Iceland	2020-03-17	61.0	0.0	197.0	0.0	341.0	0.577713
Italy	2020-03-17	3233.0	694.0	27978.0	2501.0	60462.0	0.462737
Switzerland	2020-03-17	0.0	1.0	2199.0	13.0	8655.0	0.254073
Norway	2020-03-17	92.0	2.0	1168.0	2.0	5421.0	0.215458

	date	new_cases	new_deaths	total_cases	total_death	population	cases/population	deaths/population
location
San Marino	2020-03-17	10.0	4.0	101.0	8.0	34.0	2.970588	0.235294
Italy	2020-03-17	3233.0	694.0	27978.0	2501.0	60462.0	0.462737	0.041365
Iran	2020-03-17	0.0	0.0	14989.0	851.0	83993.0	0.178455	0.010132
Spain	2020-03-17	1438.0	21.0	9190.0	308.0	46755.0	0.196557	0.006588
France	2020-03-17	1193.0	21.0	6570.0	147.0	65274.0	0.100653	0.002252

date	2020-01-31	2020-02-01	2020-02-02	2020-02-03	2020-02-04	2020-02-05	2020-02-06	2020-02-07	2020-02-08	2020-02-09	...	2020-03-08	2020-03-09	2020-03-10	2020-03-11	2020-03-12	2020-03-13	2020-03-14	2020-03-15	2020-03-16	2020-03-17
location
Iceland	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	43.0	53.0	53.0	59.0	59.0	59.0	59.0	136.0	136.0	197.0
Italy	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	1.0	1.0	...	5881.0	7373.0	9170.0	10147.0	12460.0	15111.0	17658.0	21155.0	24745.0	27978.0
Norway	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	146.0	168.0	191.0	276.0	488.0	488.0	749.0	906.0	1076.0	1168.0
San Marino	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	26.0	36.0	48.0	62.0	62.0	62.0	65.0	91.0	91.0	101.0
Switzerland	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	263.0	331.0	331.0	490.0	644.0	857.0	1124.0	1358.0	2199.0	2199.0

DenverAlmighty의 기술 블로그

[ADP 실기 준비] 코로나19 - 인구대비 상위 5개국 구하기 + 시각화

1-1. 인구대비 확진자 상위 5개국¶

1-2. 인구대비 사망자 상위 5개국¶

1-3. 5개국 국가별 확진자 누적 그래프 그리기¶

'Data > Data Analystics' 카테고리의 다른 글

티스토리툴바

	index	date	location	new_cases	new_deaths	total_cases	total_death	population
1166	1264	2020-01-31	Italy	0.0	0.0	0.0	0.0	60462.0
1167	1265	2020-02-01	Italy	0.0	0.0	0.0	0.0	60462.0
1168	1266	2020-02-02	Italy	0.0	0.0	0.0	0.0	60462.0
1169	1267	2020-02-03	Italy	0.0	0.0	0.0	0.0	60462.0
1170	1268	2020-02-04	Italy	0.0	0.0	0.0	0.0	60462.0
...	...	...	...	...	...	...	...	...
1709	1812	2020-03-17	Norway	92.0	2.0	1168.0	2.0	5421.0
1212	1310	2020-03-17	Italy	3233.0	694.0	27978.0	2501.0	60462.0
1010	1067	2020-03-17	Iceland	61.0	0.0	197.0	0.0	341.0
2063	2184	2020-03-17	San Marino	10.0	4.0	101.0	8.0	34.0
2367	2488	2020-03-17	Switzerland	0.0	1.0	2199.0	13.0	8655.0

비트코인 최대 하락률과 최대 상승률 분석하기 (1)	2025.05.24
[데이터 분석] 정말 비행기가 가장 안전한 교통 수단일까? 2. 데이터 전처리 (0)	2024.12.30
[데이터 분석] 정말 비행기가 가장 안전한 교통 수단일까? 1. 데이터 수집 (0)	2024.12.30
[데이터 분석] 정말 비행기가 가장 안전한 교통 수단일까? 0. 분석 계기 (1)	2024.12.30
[2023 빅콘테스트] 클래식 공연 활성화를 위한 효과적 가격 모델 수립 (0)	2024.05.01

[ADP 실기 준비] 코로나19 - 인구대비 상위 5개국 구하기 + 시각화

1-1. 인구대비 확진자 상위 5개국¶

1-2. 인구대비 사망자 상위 5개국¶

1-3. 5개국 국가별 확진자 누적 그래프 그리기¶

'Data > Data Analystics' 카테고리의 다른 글

관련글

티스토리툴바