#!/usr/bin/python # vim: set fileencoding=utf-8 : import numpy as np import matplotlib.pyplot as plt import pandas as pd import datetime import re import requests as req import locale import os.path import shutil from matplotlib.dates import date2num locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') site_folder = 'site/' data_folder = 'data/' einwohner_deutschland = 83190556 herd_immunity = 0.7 today = datetime.date.today() print_today = today.isoformat() filename_now = datetime.datetime.now().strftime("%Y%m%d%H%M%S") # DIN A4 Plots plt.rcParams["figure.figsize"] = [11.69, 8.27] # Download data_filename = '{}/{}_Impfquotenmonitoring.xlsx'.format(data_folder, filename_now) r = req.get('https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile') with open(data_filename, 'wb') as outfile: outfile.write(r.content) #data_filename = 'data/20210118151908_Impfquotenmonitoring.xlsx' rki_file = pd.read_excel(data_filename, sheet_name=None, engine='openpyxl') raw_data = rki_file['Impfungen_proTag'] impfungen = raw_data[:-1].dropna(subset=['Datum'])#.fillna(0) dates = impfungen['Datum'] start_of_reporting_date = dates.iloc[0].date() def calculate_vaccination_data(data): cumulative = np.cumsum(data) total = int(np.sum(data)) total_percentage = float(total) / einwohner_deutschland * 100 mean_all_time = np.mean(data) mean_seven_days = np.mean(data[-7:]) to_be_vaccinated = einwohner_deutschland - total last_date = dates.iloc[-1].date() start_of_vaccination_date = dates[data.first_valid_index()].date() days_since_start_of_vaccination = (last_date - start_of_vaccination_date).days days_since_start_of_reporting = (last_date - start_of_reporting_date).days def extrapolate(rate, to_be_vaccinated): days_extrapolated = int(np.ceil(to_be_vaccinated / rate)) extrapolated_dates = np.array([dates[0] + datetime.timedelta(days=i) for i in range(days_extrapolated)]) date_done = extrapolated_dates[-1] date_herd_immunity = extrapolated_dates[int(np.ceil(days_extrapolated * herd_immunity))] extrapolated_vaccinations = total + rate * range(-days_since_start_of_reporting, days_extrapolated - days_since_start_of_reporting) return { 'rate': rate, 'rate_int': int(np.round(rate)), 'days_extrapolated': days_extrapolated, 'dates': extrapolated_dates, 'date_done': date_done, 'date_done_str': date_done.strftime('%d. %B %Y'), 'date_herd_immunity': date_herd_immunity, 'date_herd_immunity_str': date_herd_immunity.strftime('%d. %B %Y'), 'extrapolated_vaccinations': extrapolated_vaccinations } extrapolation_mean_all_time = extrapolate(mean_all_time, to_be_vaccinated) extrapolation_last_rate = extrapolate(data.iloc[-1], to_be_vaccinated) extrapolation_mean_seven_days = extrapolate(mean_seven_days, to_be_vaccinated) mean_vaccination_rates_daily = np.round(cumulative / range(1, len(cumulative) + 1)) return { 'daily': data, 'cumulative': cumulative, 'total': total, 'total_percentage': total_percentage, 'to_be_vaccinated': to_be_vaccinated, 'last_date': last_date, 'last_date_str': last_date.strftime('%d. %B %Y'), 'days_since_start': days_since_start_of_vaccination + 1, # Shift from zero to one-based-index 'start_of_vaccination_date': start_of_vaccination_date, 'start_of_vaccination_date_str': start_of_vaccination_date.strftime('%d. %B %Y'), 'extrapolation_mean_all_time': extrapolation_mean_all_time, 'extrapolation_last_rate': extrapolation_last_rate, 'extrapolation_mean_seven_days': extrapolation_mean_seven_days, 'mean_vaccination_rates_daily': mean_vaccination_rates_daily } data_first_vaccination = calculate_vaccination_data(impfungen['Erstimpfung']) data_second_vaccination = calculate_vaccination_data(impfungen['Zweitimpfung']) # Stand aus Daten auslesen #stand = dates.iloc[-1] #print_stand = stand.isoformat() # Stand aus offiziellen Angaben auslesen stand = rki_file['Erläuterung'].iloc[1][0] stand_regex = re.compile('^Datenstand: (\d\d.\d\d.\d\d\d\d, \d\d:\d\d) Uhr$') m = stand_regex.match(stand) stand_date = datetime.datetime.strptime(m.groups()[0], '%d.%m.%Y, %H:%M') print_stand = stand_date.isoformat() filename_stand = stand_date.strftime("%Y%m%d%H%M%S") ''' # Infos der einzelnen Länder details_sheet_name = (set(rki_file.keys()) - {'Erläuterung', 'Impfungen_proTag'}).pop() details_sheet = rki_file[details_sheet_name] regionalcodes = details_sheet['RS'].iloc[0:17] land_names = details_sheet['Bundesland'].iloc[0:17] total_vaccinations_by_land = details_sheet['Impfungen kumulativ'].iloc[0:17] vaccination_per_mille_by_land = details_sheet['Impfungen pro 1.000 Einwohner'].iloc[0:17] vaccination_reason_age_by_land = details_sheet['Indikation nach Alter*'].iloc[0:17] vaccination_reason_job_by_land = details_sheet['Berufliche Indikation*'].iloc[0:17] vaccination_reason_medical_by_land = details_sheet['Medizinische Indikation*'].iloc[0:17] vaccination_reason_oldhome_by_land = details_sheet['Pflegeheim-bewohnerIn*'].iloc[0:17] details_per_land = {} details_per_land_formatted = {} # Regionalcodes der Länder zu Abkürzung und Name (Plus gesamt) laendernamen = [ ('SH', 'Schleswig-Holstein'), ('HH', 'Hamburg'), ('NI', 'Niedersachsen'), ('HB', 'Bremen'), ('NW', 'Nordrhein-Westfalen'), ('HE', 'Hessen'), ('RP', 'Rheinland-Pfalz'), ('BW', 'Baden-Württemberg'), ('BY', 'Bayern'), ('SL', 'Saarland'), ('BE', 'Berlin'), ('BB', 'Brandenburg'), ('MV', 'Mecklenburg-Vorpommern'), ('SN', 'Sachsen'), ('ST', 'Sachsen-Anhalt'), ('TH', 'Thüringen'), ('𝚺', 'Gesamt') ] def row_to_details(i): regionalcode = regionalcodes[i] if i != 16 else 16 print(laendernamen[regionalcode]) shortname, name = laendernamen[regionalcode] return { 'name': name, 'shortname': shortname, 'total_vaccinations': int(total_vaccinations_by_land[i]), 'total_vaccinations_percentage': vaccination_per_mille_by_land[i] / 10, 'vaccination_reason_age': int(vaccination_reason_age_by_land[i]), 'vaccination_reason_age_percentage': np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100), 'vaccination_reason_job': int(vaccination_reason_job_by_land[i]), 'vaccination_reason_job_percentage': np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100), 'vaccination_reason_medical': int(vaccination_reason_medical_by_land[i]), 'vaccination_reason_medical_percentage': np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100), 'vaccination_reason_oldhome': int(vaccination_reason_oldhome_by_land[i]), 'vaccination_reason_oldhome_percentage': np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100), } def row_to_details_formatted(i): regionalcode = regionalcodes[i] if i != 16 else 16 print(laendernamen[regionalcode]) shortname, name = laendernamen[regionalcode] return { 'name': name, 'shortname': shortname, 'total_vaccinations': '{:n}'.format(int(total_vaccinations_by_land[i])).replace('.', ' '), 'total_vaccinations_percentage': '{:.3n}'.format(np.round(vaccination_per_mille_by_land[i] / 10, 2)), 'vaccination_reason_age': '{:n}'.format(int(vaccination_reason_age_by_land[i])).replace('.', ' '), 'vaccination_reason_age_percentage': '{:n}'.format(np.round(vaccination_reason_age_by_land[i] / total_vaccinations_by_land[i] * 100)), 'vaccination_reason_job': '{:n}'.format(int(vaccination_reason_job_by_land[i])).replace('.', ' '), 'vaccination_reason_job_percentage': '{:n}'.format(np.round(vaccination_reason_job_by_land[i] / total_vaccinations_by_land[i] * 100)), 'vaccination_reason_medical': '{:n}'.format(int(vaccination_reason_medical_by_land[i])).replace('.', ' '), 'vaccination_reason_medical_percentage': '{:n}'.format(np.round(vaccination_reason_medical_by_land[i] / total_vaccinations_by_land[i] * 100)), 'vaccination_reason_oldhome': '{:n}'.format(int(vaccination_reason_oldhome_by_land[i])).replace('.', ' '), 'vaccination_reason_oldhome_percentage': '{:n}'.format(np.round(vaccination_reason_oldhome_by_land[i] / total_vaccinations_by_land[i] * 100)) } for i in range(len(land_names) - 1): details_per_land[land_names[i]] = row_to_details(i) details_per_land_formatted[land_names[i]] = row_to_details_formatted(i) details_total = row_to_details(16) details_total_formatted = row_to_details_formatted(16) ''' archive_folder = site_folder + 'archive/' + filename_stand if os.path.isdir(archive_folder): print('Archive folder {} already exists'.format(archive_folder)) else: os.mkdir(archive_folder) def plot_extrapolation_portion(percentage): print_percentage = int(percentage * 100) archive_plot_filename = '{}/extrapolated_to_{}_percent'.format(archive_folder, print_percentage) latest_plot_filename = '{}/extrapolated_to_{}_percent'.format(site_folder, print_percentage) if os.path.isfile(archive_plot_filename + '.pdf'): print('Plot {} already exists'.format(archive_plot_filename)) return fig, ax = plt.subplots(1) plt.title( 'Tägliche Impfrate (Erst- und Zweitimpfung), kumulierte Impfungen und lineare Extrapolation bis {:n} % der Bevölkerung Deutschlands\n' 'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n' 'Erstimpfungen: {:n} ({:n} %), Durchschnittliche Impfrate: {:n} Impfungen/Tag (läuft seit {:n} Tagen)\n' 'Zweitimpfungen: {:n} ({:n} %), Durchschnittliche Impfrate: {:n} Impfungen/Tag (läuft seit {:n} Tagen)'.format( print_percentage, print_stand, print_today, data_first_vaccination['total'], np.round(data_first_vaccination['total_percentage'], 2), data_first_vaccination['extrapolation_mean_all_time']['rate'], data_first_vaccination['days_since_start'], data_second_vaccination['total'], np.round(data_second_vaccination['total_percentage'], 2), data_second_vaccination['extrapolation_mean_all_time']['rate'], data_second_vaccination['days_since_start'] ) ) ax2 = ax.twinx() ax.bar(dates, data_first_vaccination['daily'], label='Tägliche Erstimpfungen', color='blue') ax.bar(dates, data_second_vaccination['daily'], label='Tägliche Zweitimpfungen', color='lightblue') ax.plot(dates, data_first_vaccination['mean_vaccination_rates_daily'], color='violet', label='Durchschnittliche Erstimpfrate\nbis zu diesem Tag (inkl.)') ax.plot(dates, data_second_vaccination['mean_vaccination_rates_daily'], color='magenta', label='Durchschnittliche Zweitimpfrate\nbis zu diesem Tag (inkl.)') ax2.set_ylim([0, einwohner_deutschland * percentage]) ax2.set_xlim(xmax=dates[0] + datetime.timedelta(days=percentage * data_first_vaccination['extrapolation_mean_all_time']['days_extrapolated'])) ax2.grid(True) ax2.plot(dates, data_first_vaccination['cumulative'], color='red', label='Kumulierte Erstimpfungen') ax2.plot(dates, data_second_vaccination['cumulative'], color='indianred', label='Kumulierte Zweitimpfungen') ax2.plot(data_first_vaccination['extrapolation_mean_all_time']['dates'], data_first_vaccination['extrapolation_mean_all_time']['extrapolated_vaccinations'], color='orange', label='Extrap. kumulierte Erstimpfungen (Ø gesamt)\n{:n} Impfungen/Tag'.format(data_first_vaccination['extrapolation_mean_all_time']['rate_int'])) ax2.plot(data_first_vaccination['extrapolation_mean_seven_days']['dates'], data_first_vaccination['extrapolation_mean_seven_days']['extrapolated_vaccinations'], color='goldenrod', label='Extrap. kumulierte Erstimpfungen (Ø 7 Tage)\n{:n} Impfungen/Tag'.format(data_first_vaccination['extrapolation_mean_seven_days']['rate_int'])) ax2.plot() ax2.plot(data_second_vaccination['extrapolation_mean_all_time']['dates'], data_second_vaccination['extrapolation_mean_all_time']['extrapolated_vaccinations'], color='orange', label='Extrap. kumulierte Zweitimpfungen (Ø gesamt)\n{:n} Impfungen/Tag'.format(data_second_vaccination['extrapolation_mean_all_time']['rate_int'])) ax2.plot(data_second_vaccination['extrapolation_mean_seven_days']['dates'], data_second_vaccination['extrapolation_mean_seven_days']['extrapolated_vaccinations'], color='goldenrod', label='Extrap. kumulierte Zweitimpfungen (Ø 7 Tage)\n{:n} Impfungen/Tag'.format(data_second_vaccination['extrapolation_mean_seven_days']['rate_int'])) #ax2.plot() ax.legend(loc='upper left') ax.get_yaxis().get_major_formatter().set_scientific(False) ax.set_xlabel('Datum') ax.set_ylabel('Tägliche Impfungen') ax2.legend(loc='lower right') ax2.get_yaxis().get_major_formatter().set_scientific(False) # Estimated percentage for herd immunity #ax2.axline((0, einwohner_deutschland * 0.7), slope=0, color='green') ax2.set_ylabel('Kumulierte Impfungen') plt.savefig(archive_plot_filename + '.pdf') plt.savefig(archive_plot_filename + '.png') plt.savefig(latest_plot_filename + '.pdf') plt.savefig(latest_plot_filename + '.png') plt.close() print('Created plot {} as pdf and png'.format(archive_plot_filename)) plot_extrapolation_portion(0.1) #plot_extrapolation_portion(0.7) plot_extrapolation_portion(1.0) def plot_vaccination_bar_graph_total_time(): archive_plot_filename = '{}/vaccination_bar_graph_total_time'.format(archive_folder) latest_plot_filename = '{}/vaccination_bar_graph_total_time'.format(site_folder) if os.path.isfile(archive_plot_filename + '.pdf'): print('Plot {} already exists'.format(archive_plot_filename)) return fig, ax = plt.subplots(1) plt.title( 'Tägliche Impfrate (Erst- und Zweitimpfung übereinander)\n' 'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format( print_stand, print_today ) ) ax.grid() ax.bar(dates, data_first_vaccination['daily'], label='Tägliche Erstimpfungen', color='blue') ax.bar(dates, data_second_vaccination['daily'], label='Tägliche Zweitimpfungen', color='lightblue', bottom=data_first_vaccination['daily']) ax.set_ylim([0, np.max(data_first_vaccination['daily']) + np.max(data_second_vaccination['daily'])]) ax.legend(loc='upper left') ax.get_yaxis().get_major_formatter().set_scientific(False) ax.set_xlabel('Datum') ax.set_ylabel('Tägliche Impfungen') plt.savefig(archive_plot_filename + '.pdf') plt.savefig(archive_plot_filename + '.png') plt.savefig(latest_plot_filename + '.pdf') plt.savefig(latest_plot_filename + '.png') plt.close() print('Created plot {} as pdf and png'.format(archive_plot_filename)) plot_vaccination_bar_graph_total_time() def plot_vaccination_bar_graph_total_time_two_bars(): archive_plot_filename = '{}/vaccination_bar_graph_total_time_two_bars'.format(archive_folder) latest_plot_filename = '{}/vaccination_bar_graph_total_time_two_bars'.format(site_folder) if os.path.isfile(archive_plot_filename + '.pdf'): print('Plot {} already exists'.format(archive_plot_filename)) return fig, ax = plt.subplots(1) plt.title( 'Tägliche Impfrate (Erst- und Zweitimpfung nebeneinander)\n' 'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format( print_stand, print_today ) ) ax.grid() date_numbers = date2num(dates) ax.bar(date_numbers - 0.2, data_first_vaccination['daily'], width=0.4, label='Tägliche Erstimpfungen', color='blue') ax.bar(date_numbers + 0.2, data_second_vaccination['daily'], width=0.4, label='Tägliche Zweitimpfungen', color='lightblue') ax.set_ylim([0, np.max(data_first_vaccination['daily']) + np.max(data_second_vaccination['daily'])]) ax.legend(loc='upper left') ax.xaxis_date() ax.get_yaxis().get_major_formatter().set_scientific(False) ax.set_xlabel('Datum') ax.set_ylabel('Tägliche Impfungen') plt.savefig(archive_plot_filename + '.pdf') plt.savefig(archive_plot_filename + '.png') plt.savefig(latest_plot_filename + '.pdf') plt.savefig(latest_plot_filename + '.png') plt.close() print('Created plot {} as pdf and png'.format(archive_plot_filename)) plot_vaccination_bar_graph_total_time_two_bars() def plot_vaccination_bar_graph_compare_both_vaccinations(): archive_plot_filename = '{}/vaccination_bar_graph_compare_both_vaccinations'.format(archive_folder) latest_plot_filename = '{}/vaccination_bar_graph_compare_both_vaccinations'.format(site_folder) if os.path.isfile(archive_plot_filename + '.pdf'): print('Plot {} already exists'.format(archive_plot_filename)) return fig, ax = plt.subplots(1) plt.title( 'Tägliche Impfrate (Erst- und Zweitimpfung um 21 Tage versetzt)\n' 'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format( print_stand, print_today ) ) ax.grid() date_numbers_first = date2num(dates + datetime.timedelta(days=21)) date_numbers_second = date2num(dates) ax.bar(date_numbers_first - 0.2, data_first_vaccination['daily'], width=0.4, label='Tägliche Erstimpfungen', color='blue') ax.bar(date_numbers_second + 0.2, data_second_vaccination['daily'], width=0.4, label='Tägliche Zweitimpfungen', color='lightblue') ax.set_ylim([0, np.max([np.max(data_first_vaccination['daily']), np.max(data_second_vaccination['daily'])])]) ax.legend(loc='upper left') ax.xaxis_date() ax.get_yaxis().get_major_formatter().set_scientific(False) ax.set_xlabel('Datum') ax.set_ylabel('Tägliche Impfungen') plt.savefig(archive_plot_filename + '.pdf') plt.savefig(archive_plot_filename + '.png') plt.savefig(latest_plot_filename + '.pdf') plt.savefig(latest_plot_filename + '.png') plt.close() print('Created plot {} as pdf and png'.format(archive_plot_filename)) plot_vaccination_bar_graph_compare_both_vaccinations() def plot_cumulative_two_vaccinations(): archive_plot_filename = '{}/cumulative_two_vaccinations'.format(archive_folder) latest_plot_filename = '{}/cumulative_two_vaccinations'.format(site_folder) if os.path.isfile(archive_plot_filename + '.pdf'): print('Plot {} already exists'.format(archive_plot_filename)) return fig, ax = plt.subplots(1) plt.title( 'Kumulative Impfrate (Erst- und Zweitimpfung)\n' 'Datenquelle: RKI, Stand: {}. Erstellung: {}, Ersteller: Benedikt Bastin, Lizenz: CC BY-SA 4.0\n'.format( print_stand, print_today ) ) ax.grid() first_vaccinations_cumulative = data_first_vaccination['cumulative'] second_vaccinations_cumulative = data_second_vaccination['cumulative'] ax.fill_between(dates, first_vaccinations_cumulative, label='Erstimpfungen', color='blue') ax.fill_between(dates, second_vaccinations_cumulative, label='Zweitimpfungen', color='lightblue') ax.set_ylim([0, first_vaccinations_cumulative.iloc[-1]]) ax.legend(loc='upper left') ax.xaxis_date() ax.get_yaxis().get_major_formatter().set_scientific(False) ax.set_xlabel('Datum') ax.set_ylabel('Tägliche Impfungen') plt.savefig(archive_plot_filename + '.pdf') plt.savefig(archive_plot_filename + '.png') plt.savefig(latest_plot_filename + '.pdf') plt.savefig(latest_plot_filename + '.png') plt.close() print('Created plot {} as pdf and png'.format(archive_plot_filename)) plot_cumulative_two_vaccinations() def render_dashboard(): dashboard_filename = 'site/index.xhtml' dashboard_archive_filename = 'site/archive/{}/index.xhtml'.format(filename_stand) stylesheet_filename = 'site/rki-dashboard.css' stylesheet_archive_filename = 'site/archive/{}/rki-dashboard.css'.format(filename_stand) if os.path.isfile(dashboard_archive_filename): print('Dashboard {} already exists'.format(dashboard_archive_filename)) return from jinja2 import Template, Environment, FileSystemLoader, select_autoescape env = Environment( loader=FileSystemLoader('./'), autoescape=select_autoescape(['html', 'xml', 'xhtml']) ) german_text_date_format = '%d. %B %Y' df = german_text_date_format german_text_datetime_format = '%d. %B %Y, %H:%M:%S Uhr' dtf = german_text_datetime_format latest_dashboard_filename = site_folder + 'index.xhtml' archive_dashboard_filename = archive_folder template = env.get_template('dashboard_template.xhtml') template.stream( stand = stand_date.strftime(dtf), filename_stand = filename_stand, einwohner_deutschland = '{:n}'.format(einwohner_deutschland).replace('.', ' '), herd_immunity = '{:n}'.format(int(herd_immunity * 100)), data_first_vaccination = data_first_vaccination, data_second_vaccination = data_second_vaccination, #details_per_land = dict(sorted(details_per_land_formatted.items(), key=lambda item: item[0])), #details_total = details_total_formatted ).dump('site/index.xhtml') shutil.copyfile(dashboard_filename, dashboard_archive_filename) shutil.copyfile(stylesheet_filename, stylesheet_archive_filename) print('Created dashboard') render_dashboard()