1
0
Fork 0
rki-impfparser/plot.py
2021-01-17 20:43:58 +01:00

289 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python
# vim: set fileencoding=utf-8 :
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import re
import requests as req
import locale
import os.path
import shutil
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
site_folder = 'site/'
data_folder = 'data/'
einwohner_deutschland = 83190556
herd_immunity = 0.7
today = datetime.date.today()
print_today = today.isoformat()
filename_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
# DIN A4 Plots
plt.rcParams["figure.figsize"] = [11.69, 8.27]
# Download
data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now)
r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile')
with open(data_filename, 'wb') as outfile:
outfile.write(r.content)
rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl')
raw_data = rki_file['Impfungen_proTag']
impfungen = raw_data[:-1].dropna()
dates = impfungen['Datum']
daily = impfungen['Gesamtzahl Impfungen']
cumulative = np.cumsum(impfungen['Gesamtzahl Impfungen'])
total_vaccinations = int(np.sum(daily))
total_vaccinations_percentage = float(total_vaccinations) / einwohner_deutschland
mean_vaccinations_daily = np.mean(daily)
mean_vaccinations_daily_int = int(np.round(mean_vaccinations_daily))
to_be_vaccinated = einwohner_deutschland - total_vaccinations
days_extrapolated = int(np.ceil(to_be_vaccinated / mean_vaccinations_daily))
extrapolated_dates = np.array([dates[0] + datetime.timedelta(days=i) for i in range(days_extrapolated)])
mean_vaccinations_daily_done = extrapolated_dates[-1]
mean_vaccinations_daily_herd_immunity = extrapolated_dates[int(np.ceil(days_extrapolated * herd_immunity))]
days_extrapolated_with_todays_rate = int(np.ceil(to_be_vaccinated / daily.iloc[-1]))
last_date = dates.iloc[-1]
last_date_day_rate = daily.iloc[-1]
last_date_day_rate_done = dates[0] + datetime.timedelta(days=days_extrapolated_with_todays_rate)
last_date_day_rate_herd_immunity = dates[0] + datetime.timedelta(days=int(np.ceil(days_extrapolated_with_todays_rate * herd_immunity)))
extrapolated_vaccinations = mean_vaccinations_daily * range(1, days_extrapolated + 1)
days_since_start = (dates.iloc[-1].date() - dates[0].date()).days
mean_vaccinations_last_seven_days = np.mean(daily[-7:])
mean_vaccinations_last_seven_days_int = int(np.round(mean_vaccinations_last_seven_days))
days_extrapolated_last_seven_days = int(np.ceil(to_be_vaccinated / mean_vaccinations_last_seven_days))
extrapolated_vaccinations_last_seven_days = total_vaccinations + mean_vaccinations_last_seven_days * range(-days_since_start, days_extrapolated - days_since_start)
mean_vaccinations_last_seven_days_done = dates.iloc[-1] + datetime.timedelta(days=days_extrapolated_last_seven_days)
mean_vaccinations_last_seven_days_herd_immunity = dates.iloc[-1] + datetime.timedelta(days=int(np.ceil(days_extrapolated_last_seven_days * herd_immunity)))
mean_vaccinations_daily_up_to_date = np.round(cumulative / range(1, len(cumulative) + 1))
# Stand aus Daten auslesen
#stand = dates.iloc[-1]
#print_stand = stand.isoformat()
# Stand aus offiziellen Angaben auslesen
stand = rki_file['Erläuterung'].iloc[1][0]
stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d\d:\d\d) Uhr$')
m = stand_regex.match(stand)
stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M')
print_stand = stand_date.isoformat()
filename_stand = stand_date.strftime("%Y%m%d%H%M%S")
# Infos der einzelnen Länder
details_sheet_name = (set(rki_file.keys()) - {'Erläuterung', 'Impfungen_proTag'}).pop()
details_sheet = rki_file[details_sheet_name]
land_names = details_sheet['Bundesland'].iloc[0:17]
total_vaccinations_by_land = details_sheet['Impfungen kumulativ'].iloc[0:17]
vaccination_per_mille_by_land = details_sheet['Impfungen pro 1.000 Einwohner'].iloc[0:17]
vaccination_reason_age_by_land = details_sheet['Indikation nach Alter*'].iloc[0:17]
vaccination_reason_job_by_land = details_sheet['Berufliche Indikation*'].iloc[0:17]
vaccination_reason_medical_by_land = details_sheet['Medizinische Indikation*'].iloc[0:17]
vaccination_reason_oldhome_by_land = details_sheet['Pflegeheim-bewohnerIn*'].iloc[0:17]
details_per_land = {}
details_per_land_formatted = {}
def row_to_details(i):
return {
'total_vaccinations': int(total_vaccinations_by_land[i]),
'total_vaccinations_percentage': vaccination_per_mille_by_land[i] / 10,
'vaccination_reason_age': int(vaccination_reason_age_by_land[i]),
'vaccination_reason_age_percentage': np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_job': int(vaccination_reason_job_by_land[i]),
'vaccination_reason_job_percentage': np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_medical': int(vaccination_reason_medical_by_land[i]),
'vaccination_reason_medical_percentage': np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100),
'vaccination_reason_oldhome': int(vaccination_reason_oldhome_by_land[i]),
'vaccination_reason_oldhome_percentage': np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100),
}
def row_to_details_formatted(i):
return {
'total_vaccinations': '{:n}'.format(int(total_vaccinations_by_land[i])).replace('.', ''),
'total_vaccinations_percentage': '{:.3n}'.format(np.round(vaccination_per_mille_by_land[i] / 10, 2)),
'vaccination_reason_age': '{:n}'.format(int(vaccination_reason_age_by_land[i])).replace('.', ''),
'vaccination_reason_age_percentage': '{:n}'.format(np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_job': '{:n}'.format(int(vaccination_reason_job_by_land[i])).replace('.', ''),
'vaccination_reason_job_percentage': '{:n}'.format(np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_medical': '{:n}'.format(int(vaccination_reason_medical_by_land[i])).replace('.', ''),
'vaccination_reason_medical_percentage': '{:n}'.format(np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100)),
'vaccination_reason_oldhome': '{:n}'.format(int(vaccination_reason_oldhome_by_land[i])).replace('.', ''),
'vaccination_reason_oldhome_percentage': '{:n}'.format(np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100))
}
for i in range(len(land_names) - 1):
details_per_land[land_names[i]] = row_to_details(i)
details_per_land_formatted[land_names[i]] = row_to_details_formatted(i)
details_total = row_to_details(16)
details_total_formatted = row_to_details_formatted(16)
archive_folder = site_folder + 'archive/' + filename_stand
if os.path.isdir(archive_folder):
print('Archive folder {} already exists'.format(archive_folder))
else:
os.mkdir(archive_folder)
def plot_extrapolation_portion(percentage):
print_percentage = int(percentage * 100)
archive_plot_filename = '{}/extrapolated_to_{}_percent'.format(archive_folder, print_percentage)
latest_plot_filename = '{}/extrapolated_to_{}_percent'.format(site_folder, print_percentage)
if os.path.isfile(archive_plot_filename + '.pdf'):
print('Plot {} already exists'.format(archive_plot_filename))
return
fig, ax = plt.subplots(1)
plt.title(
'Tägliche Impfquote, kumulierte Impfungen und lineare Extrapolation bis {:n} % der Bevölkerung Deutschlands\n'
'Erstellung: {}, Datenquelle: RKI, Stand: {}\n'
'Impfungen gesamt: {:n} ({:n} %), Durchschnittliche Impfrate: {:n} Impfungen/Tag'.format(
print_percentage,
print_today, print_stand,
total_vaccinations, np.round(total_vaccinations_percentage * 100, 2), mean_vaccinations_daily_int
)
)
ax2 = ax.twinx()
ax.bar(dates, daily, label='Tägliche Impfungen', color='blue')
ax.plot(dates, mean_vaccinations_daily_up_to_date, color='violet', label='Durchschnittliche Impfquote\nbis zu diesem Tag (inkl.)')
ax2.set_ylim([0, einwohner_deutschland * percentage])
ax2.set_xlim(xmax=dates[0] + datetime.timedelta(days=percentage * days_extrapolated))
ax2.grid(True)
ax2.plot(dates, cumulative, color='red', label='Kumulierte Impfungen')
ax2.plot(extrapolated_dates, extrapolated_vaccinations, color='orange', label='Extrap. kumulierte Impfungen (Øgesamt)\n{:n} Impfungen/Tag'.format(mean_vaccinations_daily_int))
ax2.plot(extrapolated_dates, extrapolated_vaccinations_last_seven_days, color='goldenrod', label='Extrap. kumulierte Impfungen (Ø7 Tage)\n{:n} Impfungen/Tag'.format(mean_vaccinations_last_seven_days_int))
#ax2.plot()
ax.legend(loc='upper left')
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.set_xlabel('Datum')
ax.set_ylabel('Tägliche Impfungen')
ax2.legend(loc='center right')
ax2.get_yaxis().get_major_formatter().set_scientific(False)
# Estimated percentage for herd immunity
#ax2.axline((0, einwohner_deutschland * 0.7), slope=0, color='green')
ax2.set_ylabel('Kumulierte Impfungen')
plt.savefig(archive_plot_filename + '.pdf')
plt.savefig(archive_plot_filename + '.png')
plt.savefig(latest_plot_filename + '.pdf')
plt.savefig(latest_plot_filename + '.png')
plt.close()
print('Created plot {} as pdf and png'.format(archive_plot_filename))
plot_extrapolation_portion(0.1)
plot_extrapolation_portion(0.7)
plot_extrapolation_portion(1.0)
def render_dashboard():
dashboard_filename = 'site/index.xhtml'
dashboard_archive_filename = 'site/archive/{}/index.xhtml'.format(filename_stand)
stylesheet_filename = 'site/rki-dashboard.css'
stylesheet_archive_filename = 'site/archive/{}/rki-dashboard.css'.format(filename_stand)
if os.path.isfile(dashboard_archive_filename):
print('Dashboard {} already exists'.format(dashboard_archive_filename))
return
from jinja2 import Template, Environment, FileSystemLoader, select_autoescape
env = Environment(
loader=FileSystemLoader('./'),
autoescape=select_autoescape(['html', 'xml', 'xhtml'])
)
german_text_date_format = '%d. %B %Y'
df = german_text_date_format
german_text_datetime_format = '%d. %B %Y, %H:%M:%S Uhr'
dtf = german_text_datetime_format
latest_dashboard_filename = site_folder + 'index.xhtml'
archive_dashboard_filename = archive_folder
template = env.get_template('dashboard_template.xhtml')
template.stream(
stand = stand_date.strftime(dtf),
filename_stand = filename_stand,
einwohner_deutschland = '{:n}'.format(einwohner_deutschland).replace('.', ''),
herd_immunity = '{:n}'.format(int(herd_immunity * 100)),
total_vaccinations = '{:n}'.format(total_vaccinations).replace('.', ''),
total_vaccinations_percentage = '{:.3n}'.format(total_vaccinations_percentage * 100),
days_since_start = days_since_start,
last_date = last_date.strftime(df),
last_date_day_rate = '{:n}'.format(last_date_day_rate).replace('.', ''),
mean_vaccinations_daily = '{:n}'.format(mean_vaccinations_daily_int).replace('.', ''),
mean_vaccinations_daily_herd_immunity = mean_vaccinations_daily_herd_immunity.strftime(df),
mean_vaccinations_daily_done = mean_vaccinations_daily_done.strftime(df),
last_date_day_rate_herd_immunity = last_date_day_rate_herd_immunity.strftime(df),
last_date_day_rate_done = last_date_day_rate_done.strftime(df),
mean_vaccinations_last_seven_days = '{:n}'.format(mean_vaccinations_last_seven_days_int).replace('.', ''),
mean_vaccinations_last_seven_days_herd_immunity = mean_vaccinations_last_seven_days_herd_immunity.strftime(df),
mean_vaccinations_last_seven_days_done = mean_vaccinations_last_seven_days_done.strftime(df),
details_per_land = dict(sorted(details_per_land_formatted.items(), key=lambda item: item[0])),
details_total = details_total_formatted
).dump('site/index.xhtml')
shutil.copyfile(dashboard_filename, dashboard_archive_filename)
shutil.copyfile(stylesheet_filename, stylesheet_archive_filename)
print('Created dashboard')
render_dashboard()