Regional covid-19 mortality in Belgium per gender and age
Combines the mortality number of the last 10 year with those of covid-19 this year.
# Import pandas for data wrangling and Altair for plotting
import pandas as pd
import altair as alt
df_tot_sc = pd.read_excel('https://epistat.sciensano.be/Data/COVID19BE.xlsx')
df_inhab = pd.read_excel('https://statbel.fgov.be/sites/default/files/files/opendata/bevolking%20naar%20woonplaats%2C%20nationaliteit%20burgelijke%20staat%20%2C%20leeftijd%20en%20geslacht/TF_SOC_POP_STRUCT_2019.xlsx')
df_inhab
inhab_provence = df_inhab['TX_PROV_DESCR_NL'].dropna().unique()
inhab_provence
sc_provence = df_tot_sc['PROVINCE'].unique()
sc_provence
[p.split()[1] for p in inhab_provence]
map_statbel_provence_to_sc_provence = {'Provincie Antwerpen':'Antwerpen', 'Provincie Vlaams-Brabant':'VlaamsBrabant',
'Provincie Waals-Brabant':'BrabantWallon', 'Provincie West-Vlaanderen':'WestVlaanderen',
'Provincie Oost-Vlaanderen':'OostVlaanderen', 'Provincie Henegouwen':'Hainaut',
'Provincie Luik':'Liège', 'Provincie Limburg':'Limburg', 'Provincie Luxemburg':'Luxembourg',
'Provincie Namen':'Namur'}
df_inhab['sc_provence'] = df_inhab['TX_PROV_DESCR_NL'].map(map_statbel_provence_to_sc_provence)
df_tot_sc['AGEGROUP'].unique()
df_inhab['AGEGROUP'] =pd.cut(df_inhab['CD_AGE'], bins=[0,10,20,30,40,50,60,70,80,90,200], labels=['0-9','10-19','20-29','30-39','40-49','50-59','60-69','70-79','80-89','90+'], include_lowest=True)
df_inhab_gender_prov = df_inhab.groupby(['sc_provence', 'CD_SEX', 'AGEGROUP'])['MS_POPULATION'].sum().reset_index()
df_inhab_gender_prov_cases = pd.merge(df_inhab_gender_prov, df_tot_sc.dropna(), left_on=['sc_provence', 'AGEGROUP', 'CD_SEX'], right_on=['PROVINCE', 'AGEGROUP', 'SEX'])
df_inhab_gender_prov_cases.head()
df_plot = df_inhab_gender_prov_cases.groupby(['SEX', 'AGEGROUP', 'PROVINCE']).agg(CASES = ('CASES', 'sum'), MS_POPULATION=('MS_POPULATION', 'first')).reset_index()
df_plot
df_plot['PROVINCE'].unique()
alt.Chart(df_plot).mark_bar().encode(x='AGEGROUP:N', y='CASES', color='SEX:N', column='PROVINCE:N')
df_plot['percentage'] = df_plot['CASES'] / df_plot['MS_POPULATION']
alt.Chart(df_plot).mark_bar().encode(x='AGEGROUP:N', y='percentage', color='SEX:N', column='PROVINCE:N')
Let's add a colorscale the makes the male blue and female number pink.
color_scale = alt.Scale(domain=['M', 'F'],
range=['#1f77b4', '#e377c2'])
alt.Chart(df_plot).mark_bar().encode(
x='AGEGROUP:N',
y='percentage',
color=alt.Color('SEX:N', scale=color_scale, legend=None),
column='PROVINCE:N')
The graph's get to wide. Let's use faceting to make two rows.
Inspired and based on https://altair-viz.github.io/gallery/us_population_pyramid_over_time.html
#slider = alt.binding_range(min=1850, max=2000, step=10)
# select_province = alt.selection_single(name='PROVINCE', fields=['PROVINCE'],
# bind=slider, init={'PROVINCE': 'Antwerpen'})
color_scale = alt.Scale(domain=['Male', 'Female'],
range=['#1f77b4', '#e377c2'])
select_province = alt.selection_multi(fields=['PROVINCE'], bind='legend')
base = alt.Chart(df_plot).add_selection(
select_province
).transform_filter(
select_province
).transform_calculate(
gender=alt.expr.if_(alt.datum.SEX == 'M', 'Male', 'Female')
).properties(
width=250
)
left = base.transform_filter(
alt.datum.gender == 'Female'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
x=alt.X('percentage:Q', axis=alt.Axis(format='.0%'),
title='Percentage',
sort=alt.SortOrder('descending'),
),
color=alt.Color('gender:N', scale=color_scale, legend=None),
).mark_bar().properties(title='Female')
middle = base.encode(
y=alt.Y('AGEGROUP:O', axis=None),
text=alt.Text('AGEGROUP:O'),
).mark_text().properties(width=20)
right = base.transform_filter(
alt.datum.gender == 'Male'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
x=alt.X('percentage:Q', title='Percentage', axis=alt.Axis(format='.0%'),),
color=alt.Color('gender:N', scale=color_scale, legend=None)
).mark_bar().properties(title='Male')
# legend = alt.Chart(df_plot).mark_text().encode(
# y=alt.Y('PROVINCE:N', axis=None),
# text=alt.Text('PROVINCE:N'),
# color=alt.Color('PROVINCE:N', legend=alt.Legend(title="Provincie"))
# )
alt.concat(left, middle, right, spacing=5)
#legend=alt.Legend(title="Species by color")
provinces = df_plot['PROVINCE'].unique()
select_province = alt.selection_single(
name='Select', # name the selection 'Select'
fields=['PROVINCE'], # limit selection to the Major_Genre field
init={'PROVINCE': 'Antwerpen'}, # use first genre entry as initial value
bind=alt.binding_select(options=provinces) # bind to a menu of unique provence values
)
base = alt.Chart(df_plot).add_selection(
select_province
).transform_filter(
select_province
).transform_calculate(
gender=alt.expr.if_(alt.datum.SEX == 'M', 'Male', 'Female')
).properties(
width=250
)
left = base.transform_filter(
alt.datum.gender == 'Female'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
x=alt.X('percentage:Q', axis=alt.Axis(format='.0%'),
title='Percentage',
sort=alt.SortOrder('descending'),
scale=alt.Scale(domain=(0.0, 0.1), clamp=True)
),
color=alt.Color('gender:N', scale=color_scale, legend=None),
tooltip=[alt.Tooltip('percentage', format='.1%')]
).mark_bar().properties(title='Female')
middle = base.encode(
y=alt.Y('AGEGROUP:O', axis=None),
text=alt.Text('AGEGROUP:O'),
).mark_text().properties(width=20)
right = base.transform_filter(
alt.datum.gender == 'Male'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
x=alt.X('percentage:Q', title='Percentage', axis=alt.Axis(format='.1%'), scale=alt.Scale(domain=(0.0, 0.1), clamp=True)),
color=alt.Color('gender:N', scale=color_scale, legend=None),
tooltip=[alt.Tooltip('percentage', format='.1%')]
).mark_bar().properties(title='Male')
alt.concat(left, middle, right, spacing=5).properties(title='Percentage of covid-19 cases per province, gender and age grup in Belgium')
# https://epistat.wiv-isp.be/covid/
# Dataset of mortality by date, age, sex, and region
df_dead_sc = pd.read_csv('https://epistat.sciensano.be/Data/COVID19BE_MORT.csv')
df_dead_sc.head()
df_dead_sc['REGION'].value_counts()
df_dead_sc['AGEGROUP'].value_counts()
df_inhab['AGEGROUP_sc'] =pd.cut(df_inhab['CD_AGE'], bins=[0,24,44,64,74,84,200], labels=['0-24','25-44','45-64','65-74','75-84','85+'], include_lowest=True)
df_inhab.groupby('AGEGROUP_sc').agg(lowest_age=('CD_AGE', 'min'), highest_age=('CD_AGE', max))
df_inhab.head()
df_dead_sc['REGION'].unique()
df_inhab['TX_RGN_DESCR_NL'].value_counts()
df_inhab_gender_prov = df_inhab.groupby(['TX_RGN_DESCR_NL', 'CD_SEX', 'AGEGROUP_sc'])['MS_POPULATION'].sum().reset_index()
region_sc_to_region_inhad = {'Flanders':'Vlaams Gewest', 'Wallonia':'Waals Gewest', 'Brussels':'Brussels Hoofdstedelijk Gewest'}
df_dead_sc['TX_RGN_DESCR_NL'] = df_dead_sc['REGION'].map(region_sc_to_region_inhad)
df_dead_sc.groupby(['TX_RGN_DESCR_NL', 'AGEGROUP', 'SEX'])['DEATHS'].sum()
df_dead_sc_region_agegroup_gender = df_dead_sc.groupby(['TX_RGN_DESCR_NL', 'AGEGROUP', 'SEX'])['DEATHS'].sum().reset_index()
df_inhab_gender_prov_deaths = pd.merge(df_inhab_gender_prov, df_dead_sc_region_agegroup_gender, left_on=['TX_RGN_DESCR_NL', 'AGEGROUP_sc', 'CD_SEX'], right_on=['TX_RGN_DESCR_NL', 'AGEGROUP', 'SEX'])
df_inhab_gender_prov_deaths['MS_POPULATION'].sum()
df_inhab_gender_prov_deaths['DEATHS'].sum()
df_inhab_gender_prov_deaths
df_inhab_gender_prov_deaths['percentage'] = df_inhab_gender_prov_deaths['DEATHS']/df_inhab_gender_prov_deaths['MS_POPULATION']
df_plot = df_inhab_gender_prov_deaths
regions = df_plot['TX_RGN_DESCR_NL'].unique()
select_province = alt.selection_single(
name='Select', # name the selection 'Select'
fields=['TX_RGN_DESCR_NL'], # limit selection to the Major_Genre field
init={'TX_RGN_DESCR_NL': 'Vlaams Gewest'}, # use first genre entry as initial value
bind=alt.binding_select(options=regions) # bind to a menu of unique provence values
)
base = alt.Chart(df_plot).add_selection(
select_province
).transform_filter(
select_province
).transform_calculate(
gender=alt.expr.if_(alt.datum.SEX == 'M', 'Male', 'Female')
).properties(
width=250
)
left = base.transform_filter(
alt.datum.gender == 'Female'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
x=alt.X('percentage:Q', axis=alt.Axis(format='.2%'),
title='Percentage',
sort=alt.SortOrder('descending'),
# scale=alt.Scale(domain=(0.0, 0.02), clamp=True)
),
color=alt.Color('gender:N', scale=color_scale, legend=None),
tooltip=[alt.Tooltip('percentage', format='.2%')]
).mark_bar().properties(title='Female')
middle = base.encode(
y=alt.Y('AGEGROUP:O', axis=None),
text=alt.Text('AGEGROUP:O'),
).mark_text().properties(width=20)
right = base.transform_filter(
alt.datum.gender == 'Male'
).encode(
y=alt.Y('AGEGROUP:O', axis=None),
# x=alt.X('percentage:Q', title='Percentage', axis=alt.Axis(format='.2%'), scale=alt.Scale(domain=(0.0, 0.02), clamp=True)),
x=alt.X('percentage:Q', title='Percentage', axis=alt.Axis(format='.2%')),
color=alt.Color('gender:N', scale=color_scale, legend=None),
tooltip=[alt.Tooltip('percentage', format='.2%')]
).mark_bar().properties(title='Male')
alt.concat(left, middle, right, spacing=5).properties(title='Percentage of covid-19 deaths per province, gender and age group relative to number of inhabitants in Belgium')