Epidemic modeling - Part 8

Studying real-world data
modeling
SEIR
epidemiology
stochastic
COVID-19
real-world
Author

Jeffrey Post

Published

June 2, 2020

#collapse_hide
# This code wrangles the data from JHU
!pip install plotly==4.9.0
!pip install dash==1.16.0
import pandas as pd
import numpy as np

import math

from scipy import signal

import plotly.graph_objects as go
import plotly.express as px

from scipy.stats import expon
from scipy.stats import gamma
from scipy.stats import weibull_min

from numpy.random import default_rng
rng = default_rng()

import dash
import dash_core_components as dcc
import dash_html_components as html

import datetime


# Import confirmed cases
conf_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

#Import deaths data
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

# Import recovery data
rec_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

#iso_alpha = pd.read_csv('https://raw.githubusercontent.com/jeffufpost/sars-cov-2-world-tracker/master/data/iso_alpha.csv', index_col=0, header=0).T.iloc[0]
iso_alpha = pd.read_csv('https://raw.githubusercontent.com/jeffufpost/sars-cov-2-world-tracker/master/data/iso_alpha.csv', index_col=0, header=0)

# Wrangle the data

#print("Wrangling data by country.......")
# Consolidate countries (ie. frenc dom tom are included in France, etc..)
conf_df = conf_df.groupby("Country/Region")
conf_df = conf_df.sum().reset_index()
conf_df = conf_df.set_index('Country/Region')

deaths_df = deaths_df.groupby("Country/Region")
deaths_df = deaths_df.sum().reset_index()
deaths_df = deaths_df.set_index('Country/Region')

rec_df = rec_df.groupby("Country/Region")
rec_df = rec_df.sum().reset_index()
rec_df = rec_df.set_index('Country/Region')

# Remove Lat and Long columns
conf_df = conf_df.iloc[:,2:]
deaths_df = deaths_df.iloc[:,2:]
rec_df = rec_df.iloc[:,2:]

# Convert country names to correct format for search with pycountry
conf_df = conf_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})
# Convert country names to correct format for search with pycountry
deaths_df = deaths_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})
# Convert country names to correct format for search with pycountry
rec_df = rec_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})

# Convert dates to datime format
conf_df.columns = pd.to_datetime(conf_df.columns).date
deaths_df.columns = pd.to_datetime(deaths_df.columns).date
rec_df.columns = pd.to_datetime(rec_df.columns).date

# Create a per day dataframe
#print("Creating new per day dataframes......")
# Create per day dataframes for cases, deaths, and recoveries - by pd.DatafRame.diff
conf_df_pd = conf_df.diff(axis=1)
deaths_df_pd = deaths_df.diff(axis=1)
rec_df_pd = rec_df.diff(axis=1)

#print("Create infected dataframe = conf - deaths - recoveries")
inf_df = conf_df - deaths_df - rec_df

conf_df_pd.iloc[:,0] = 0
rec_df_pd.iloc[:,0] = 0
deaths_df_pd.iloc[:,0] = 0
inf_df.iloc[:,0] = 0

#print("Adding dataframes of 1st, 2nd, and 3rd derivatives of number of infected")
firstdev = inf_df.apply(np.gradient, axis=1)
seconddev = firstdev.apply(np.gradient)
thirddev = seconddev.apply(np.gradient)

#print("Create series of first date above 100 confirmed cases.....")
# Create a column containing date at which 100 confirmed cases were reached, NaN if not reached yet
fda100 = conf_df[conf_df > 100].apply(pd.Series.first_valid_index, axis=1)

# Create dataframe for probability plot
probevent = iso_alpha.join(inf_df)
probevent['prev'] = probevent.iloc[:,-1] / probevent['SP.POP.TOTL']
Requirement already satisfied: plotly==4.9.0 in /usr/local/lib/python3.6/dist-packages (4.9.0)
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly==4.9.0) (1.3.3)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from plotly==4.9.0) (1.15.0)
Requirement already satisfied: dash==1.16.0 in /usr/local/lib/python3.6/dist-packages (1.16.0)
Requirement already satisfied: dash-core-components==1.12.0 in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (1.12.0)
Requirement already satisfied: dash-renderer==1.8.0 in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (1.8.0)
Requirement already satisfied: flask-compress in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (1.5.0)
Requirement already satisfied: dash-html-components==1.1.1 in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (1.1.1)
Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (4.9.0)
Requirement already satisfied: Flask>=1.0.2 in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (1.1.2)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (0.16.0)
Requirement already satisfied: dash-table==4.10.1 in /usr/local/lib/python3.6/dist-packages (from dash==1.16.0) (4.10.1)
Requirement already satisfied: brotli in /usr/local/lib/python3.6/dist-packages (from flask-compress->dash==1.16.0) (1.0.9)
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly->dash==1.16.0) (1.3.3)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from plotly->dash==1.16.0) (1.15.0)
Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from Flask>=1.0.2->dash==1.16.0) (2.11.2)
Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from Flask>=1.0.2->dash==1.16.0) (1.1.0)
Requirement already satisfied: Werkzeug>=0.15 in /usr/local/lib/python3.6/dist-packages (from Flask>=1.0.2->dash==1.16.0) (1.0.1)
Requirement already satisfied: click>=5.1 in /usr/local/lib/python3.6/dist-packages (from Flask>=1.0.2->dash==1.16.0) (7.1.2)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->Flask>=1.0.2->dash==1.16.0) (1.1.1)
#collapse_hide
# This code is the lowpass filter 
def lowpass(x, fc=0.05, pad=0): # starts with cutoff freq at 0.05 but can be adjusted, should use 0.02 for France for example
  
  # Pad with last value
  if pad == 1:
    i=0
    while i < 100:
      x=np.append(x, (x[-1]+x[-2]+x[-3])/3)
      i=i+1

  fs = 1  # Sampling frequency
  t = np.arange(len(x.T)) #select number of days done in SEIR model
  signala = x.T

  #fc = 0.05  # Cut-off frequency of the filter
  w = fc / (fs / 2) # Normalize the frequency
  b, a = signal.butter(5, w, 'low')
  return signal.filtfilt(b, a, signala), w
#collapse_hide
# This code creates the impulse responses
days = np.arange(100)
cdf = pd.DataFrame({
    'T_Latent': gamma.cdf(days, 1.8,loc=0.9,scale=(5.2-1.8)/0.9), 
    'T_Infectious': weibull_min.cdf(days, 2.3,loc=2,scale=20.11)
    })
h_L = cdf.diff().T_Latent
h_I = cdf.diff().T_Infectious
h_L[0] = 0
h_I[0] = 0
#collapse_hide
# This code is for the iterative deconvolution
# Let's define an iteration function:
def iter_deconv(alpha, impulse_response, input_signal, delay, comparator):
  conv=signal.fftconvolve(impulse_response, input_signal, mode='full')
  correction=np.roll(comparator-conv[:len(comparator)], delay)
  input_signal=np.floor(lowpass(alpha*correction+input_signal, 0.05, 0)[0])
  input_signal[input_signal<0]=0
  return input_signal

# Define a function to return MSE between two signals as a measure of goodness of fit
def msecalc(A, B):
  return ((A - B)**2).mean(axis=0)

Motivation for write-up

This is the 8th part of a multi-part series blog post on modeling in epidemiology.

While we have studied in detail the stochastc SEIR model, we have not compared this to actual real-world data.

The goal of this 8th installment is to expand on the 7th blog post and use deconvolution techniques on real world data to estimate the various parameters.

Most notably, we want to calculate \(R\) from available data from JHU, let’s see how to go about this.

Data available

We will use the JHU data which is updated daily and displayed graphically on my tracker here.

A lot of countries have had difficulty reporting reliable data, but a few have done so rather well.

We will have a closer look at these contries: * Austria * France * Germany * Switzerland * United States

The JHU datasets include the following data: * Daily cumulative confirmed cases * Daily cumulative recoveries * Daily cumulative deaths

From which we can calculate the following: * Daily new confirmed cases: \(I_{pd}\) * Daily new recoveries * Daily new deaths * Current number of cases: \(I\)

Analysis

  • We have seen previously how we can get \(E_{pd}\) from deconvolution of \(I_{pd}\).

  • We also know that: \[E_{pd} = \beta ~ I ~ \frac{S}{N}\] \[\leftrightarrow \beta = \frac{N ~ E_{pd}}{I ~ S}\]

  • And we know: \[R_0 = \frac{\beta}{\gamma}\] and \[R=R_0\frac{S}{N}\]

  • So: \[E_{pd}=\gamma~R~I\] \[\leftrightarrow R = \frac{E_{pd}}{\gamma~I}\]

This means we can find \(R\) by deconvolution of \(I_{pd}\).

Our aim is to find this graphically in the real-world data.

Real-world analysis

Generic analysis

Let’s first have a look at the genral way we will go about the analysis (i.e. not country specific).

  • We have daily data \(I_{pd}\) and \(R_{pd}\)
  • We have our assumed \(T_L\) and \(T_I\)

Note, depending on the data we have, \(R_{pd}\) can be: * sum of deaths and recoveries * only deaths * only recoveries

Analysis steps: 1. The first thing we want to check is whether \(h_I\circledast I_{pd} [j]\) gives us something close to \(R_{pd}\) If not, why not? (what kind of \(R_{pd}\) should we use?) 2. Can we get an estimated \(E_{pd}\) by deconvolution of \(I_{pd}\) ? What cutoff frequency should we use ? 3. What can that tell us about \(R\) and \(\beta\) ?

Austria

Lets first have a look at data from Austria

Use fc = 0.05

1. Checking \(h_I\)

#collapse_hide
fig = go.Figure(data=[    
    go.Bar(name='Ipd', x=conf_df_pd.loc['Austria'].index, y=conf_df_pd.loc['Austria']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Austria'].index, y=lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0]),
    go.Bar(name='Rpd', x=rec_df_pd.loc['Austria'].index, y=rec_df_pd.loc['Austria']),
    go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Austria'].index, y=lowpass(rec_df_pd.loc['Austria'], 0.05, 1)[0]),
    go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Austria'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Austria'], mode='full'))
])

fig.update_layout(
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Austria: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see the actual \(R_{pd}\) leads \(h_I[j]\circledast I_{pd}[j]\) by about 5 days.

There is a 20 day lag between peak \(I_{pd}\) and \(h_I[j]\circledast I_{pd}[j]\) as is expected since \(E[T_I] = 20.11\).

But there is only a 15 day lag between peak \(I_{pd}\) and actual \(R_{pd}\).

There are a few possibilites for why this is the case, including that maybe we haven’t assumed the correct distribution for \(T_I\).

However there is another reason why this could be. Testing, especially in the early days, took time, and it took time for a patient showing symptoms before he could be tested. This may simply explain the 5 day difference.

2. Estimating \(E_{pd}\) by deconvolution of \(I_{pd}\)

#collapse_hide

#Settting up for deconvolution of Ipd

#regularization parameter
alpha=2

# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0])
Ipd[Ipd<0]=0


# Pad with last value
i=0
while i < 100:
  Ipd=np.append(Ipd, Ipd[-1])
  i=i+1

# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()

# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess

# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))

itercount=0
while mse[-1] < mse[-2]:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))
  print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
Iteration #1: MSE= 602.3866158407542
Iteration #2: MSE= 137.5220623354217
Iteration #3: MSE= 95.28871087778828
Iteration #4: MSE= 84.74331769219238
Iteration #5: MSE= 58.26385652331345
Iteration #6: MSE= 57.88558587706432
Iteration #7: MSE= 45.65200740982705
Iteration #8: MSE= 47.295703271338205 so we use the result of the previous iteration.
#collapse_hide
# We can keep going the iteration until lowest MSE

#change alpha if you like
#alpha=2

i=0
while i < 10:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  print(msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))
  i=i+1
35.024578436645875
34.802404169985934
35.024578436645875
34.802404169985934
35.024578436645875
34.802404169985934
35.024578436645875
34.802404169985934
35.024578436645875
34.802404169985934
#collapse_hide
fig = go.Figure(data=[    
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Austria'].index, y=Enext),
    go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Austria'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
    go.Bar(name='Ipd', x=inf_df.loc['Austria'].index, y=conf_df_pd.loc['Austria']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Austria'].index, y=lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0])
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Austria: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see that our estimate for \(\hat{E}_{pd}\) must be close to the reality of \(E_{pd}\) as \(I_{pd}\) is almost identical to \(\hat{E}_{pd} \circledast h_L\).

Of course, this holds only as long as our estimate of \(h_L\) is close to reality.

3. \(\beta\) and \(R\) from \(E_{pd}\) and \(I\)

As described above:

\[R = \frac{E_{pd}}{\gamma~I}\]

#collapse_hide

# Calculate R
gam = 1/15 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Austria'])]*(1/gam)/inf_df.loc['Austria']

fig = go.Figure(data=[    
    go.Scatter(name='R', x=inf_df.loc['Austria'].index, y=R),
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Austria'].index, y=Enext),
    go.Scatter(name='Inf', x=inf_df.loc['Austria'].index, y=inf_df.loc['Austria']),
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'R',
    title={
        'text':r'$\text{Austria: R }$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

France

Better to use fc = 0.02 here

1. Checking \(h_I\)

#collapse_hide
fig = go.Figure(data=[    
    go.Bar(name='Ipd', x=conf_df_pd.loc['France'].index, y=conf_df_pd.loc['France']),
    go.Scatter(name='Ipd=lowpass2(Ipd)', x=conf_df_pd.loc['France'].index, y=lowpass(conf_df_pd.loc['France'], 0.02, 1)[0]),
    go.Bar(name='Rpd', x=rec_df_pd.loc['France'].index, y=rec_df_pd.loc['France']),
    go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['France'].index, y=lowpass(rec_df_pd.loc['France'], 0.05, 1)[0]),
    go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['France'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['France'], mode='full'))
])

fig.update_layout(
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{France: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

Data is not very good, reason for using fc = 0.02 and it helps below:

2. Estimating \(E_{pd}\) by deconvolution of \(I_{pd}\)

#collapse_hide

#Settting up for deconvolution of Ipd

#regularization parameter
alpha=2

# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['France'], 0.02, 1)[0])
Ipd[Ipd<0]=0


# Pad with last value
#i=0
#while i < 100:
#  Ipd=np.append(Ipd, Ipd[-1])
#  i=i+1

  # Pad with last value
i=0
while i < 100:
  Ipd=np.append(Ipd, (Ipd[-1]+Ipd[-2]+Ipd[-3])/3)
  i=i+1

# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()

# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess

# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))

itercount=0
while mse[-1] < mse[-2]:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))
  print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
Iteration #1: MSE= 20606.281237526222
Iteration #2: MSE= 13526.285668534732
Iteration #3: MSE= 10574.377475718808
Iteration #4: MSE= 8110.23542907448
Iteration #5: MSE= 7027.411896952288
Iteration #6: MSE= 6166.3965954792575
Iteration #7: MSE= 5621.1228898175195
Iteration #8: MSE= 5225.169278268174
Iteration #9: MSE= 4785.661680709303
Iteration #10: MSE= 4545.017011595534
Iteration #11: MSE= 4158.892779723382
Iteration #12: MSE= 4001.5961767280764
Iteration #13: MSE= 3669.5048358308395
Iteration #14: MSE= 3563.157463148674
Iteration #15: MSE= 3289.181492376996
Iteration #16: MSE= 3218.1987768323515
Iteration #17: MSE= 2988.8539703808074
Iteration #18: MSE= 2944.054349881666
Iteration #19: MSE= 2751.886100263989
Iteration #20: MSE= 2722.317605737591
Iteration #21: MSE= 2558.207523350096
Iteration #22: MSE= 2538.612302912937
Iteration #23: MSE= 2392.481423469871
Iteration #24: MSE= 2381.424278845392
Iteration #25: MSE= 2252.452737178472
Iteration #26: MSE= 2248.127189416129
Iteration #27: MSE= 2132.364086049821
Iteration #28: MSE= 2132.3036292501274
Iteration #29: MSE= 2030.0926377297346
Iteration #30: MSE= 2034.180323331884 so we use the result of the previous iteration.
#collapse_hide
# We can keep going the iteration until lowest MSE

#change alpha if you like
alpha=2

i=0
while i < 10:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  print(msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))
  i=i+1
188.30632450243417
188.7970643390086
188.30632450243417
188.7970643390086
188.30632450243417
188.7970643390086
188.30632450243417
188.7970643390086
188.30632450243417
188.7970643390086
#collapse_hide
fig = go.Figure(data=[    
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['France'].index, y=Enext),
    go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['France'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
    go.Bar(name='Ipd', x=inf_df.loc['France'].index, y=conf_df_pd.loc['France']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['France'].index, y=lowpass(conf_df_pd.loc['France'], 0.02, 1)[0])
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{France: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

3. \(\beta\) and \(R\) from \(E_{pd}\) and \(I\)

As described above:

\[R = \frac{E_{pd}}{\gamma~I}\]

#collapse_hide

# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['France'])]*(1/gam)/inf_df.loc['France']

fig = go.Figure(data=[    
    go.Scatter(name='R', x=inf_df.loc['France'].index, y=R),
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['France'].index, y=Enext),
    go.Scatter(name='Inf', x=inf_df.loc['France'].index, y=inf_df.loc['France']),
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'R',
    title={
        'text':r'$\text{France: R }$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

After March 15th we see a rapi decline in \(R\) until \(R<1\) since April 12th, however the last 2 weeks of May have seen an increase and it is now close to 1 in early June.

Germany

Use fc = 0.05

1. Checking \(h_I\)

#collapse_hide
fig = go.Figure(data=[    
    go.Bar(name='Ipd', x=conf_df_pd.loc['Germany'].index, y=conf_df_pd.loc['Germany']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Germany'].index, y=lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0]),
    go.Bar(name='Rpd', x=rec_df_pd.loc['Germany'].index, y=rec_df_pd.loc['Germany']),
    go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Germany'].index, y=lowpass(rec_df_pd.loc['Germany'], 0.05, 1)[0]),
    go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Germany'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Germany'], mode='full'))
])

fig.update_layout(
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Germany: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see the actual \(R_{pd}\) lags \(h_I[j]\circledast I_{pd}[j]\) by about 10 days.

Although they are pretty close still.

2. Estimating \(E_{pd}\) by deconvolution of \(I_{pd}\)

#collapse_hide

#Settting up for deconvolution of Ipd

#regularization parameter
alpha=2

# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0])
Ipd[Ipd<0]=0


# Pad with last value
i=0
while i < 100:
  Ipd=np.append(Ipd, Ipd[-1])
  i=i+1

# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()

# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess

# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))

itercount=0
while mse[-1] < mse[-2]:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))
  print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
Iteration #1: MSE= 19182.550164399338
Iteration #2: MSE= 5638.48775032635
Iteration #3: MSE= 3246.2710584964543
Iteration #4: MSE= 2220.8292019558216
Iteration #5: MSE= 1734.0114023879353
Iteration #6: MSE= 1406.1497289577862
Iteration #7: MSE= 1184.4461818454367
Iteration #8: MSE= 1030.202328804179
Iteration #9: MSE= 896.2409457402528
Iteration #10: MSE= 809.4351741573042
Iteration #11: MSE= 717.199971974659
Iteration #12: MSE= 665.2713091464134
Iteration #13: MSE= 597.8790194675906
Iteration #14: MSE= 563.7280585832057
Iteration #15: MSE= 514.1662272707379
Iteration #16: MSE= 488.615995245544
Iteration #17: MSE= 450.7962144016111
Iteration #18: MSE= 432.2572271458473
Iteration #19: MSE= 404.57784046391833
Iteration #20: MSE= 390.7347771267719
Iteration #21: MSE= 369.1301222679626
Iteration #22: MSE= 358.17563961008307
Iteration #23: MSE= 340.32015028482033
Iteration #24: MSE= 330.90810363317763
Iteration #25: MSE= 317.76318508683784
Iteration #26: MSE= 310.0059778323316
Iteration #27: MSE= 299.59228791783625
Iteration #28: MSE= 293.56728048512196
Iteration #29: MSE= 285.3683914705494
Iteration #30: MSE= 280.481496274307
Iteration #31: MSE= 273.53802530941925
Iteration #32: MSE= 269.521074875222
Iteration #33: MSE= 264.4061918959138
Iteration #34: MSE= 260.3478006318621
Iteration #35: MSE= 255.7827146378307
Iteration #36: MSE= 253.58018538676694
Iteration #37: MSE= 249.94632990679477
Iteration #38: MSE= 247.73167727107818
Iteration #39: MSE= 244.17252878049058
Iteration #40: MSE= 242.3365776903206
Iteration #41: MSE= 240.3281460532849
Iteration #42: MSE= 237.68591618463944
Iteration #43: MSE= 236.09886767500404
Iteration #44: MSE= 234.00127371907027
Iteration #45: MSE= 232.41433578896027
Iteration #46: MSE= 229.92902289872004
Iteration #47: MSE= 229.06701011549225
Iteration #48: MSE= 226.27080566070512
Iteration #49: MSE= 226.32595033054216 so we use the result of the previous iteration.
#collapse_hide
# We can keep going the iteration until lowest MSE

#change alpha if you like
alpha=2

i=0
while i < 10:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  print(msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))
  i=i+1
205.9650472667082
208.05908470510605
205.9650472667082
208.05908470510605
205.9650472667082
208.05908470510605
205.9650472667082
208.05908470510605
205.9650472667082
208.05908470510605
#collapse_hide
fig = go.Figure(data=[    
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Germany'].index, y=Enext),
    go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Germany'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
    go.Bar(name='Ipd', x=inf_df.loc['Germany'].index, y=conf_df_pd.loc['Germany']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Germany'].index, y=lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0])
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Germany: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see that our estimate for \(\hat{E}_{pd}\) must be close to the reality of \(E_{pd}\) as \(I_{pd}\) is almost identical to \(\hat{E}_{pd} \circledast h_L\).

Of course, this holds only as long as our estimate of \(h_L\) is close to reality.

3. \(\beta\) and \(R\) from \(E_{pd}\) and \(I\)

As described above:

\[R = \frac{E_{pd}}{\gamma~I}\]

#collapse_hide

# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Germany'])]*(1/gam)/inf_df.loc['Germany']

fig = go.Figure(data=[    
    go.Scatter(name='R', x=inf_df.loc['Germany'].index, y=R),
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Germany'].index, y=Enext),
    go.Scatter(name='Inf', x=inf_df.loc['Germany'].index, y=inf_df.loc['Germany']),
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'R',
    title={
        'text':r'$\text{Germany: R }$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see \(R<1\) since April 3rd, however the peak \(E_{pd}\) is slightly ahead of that March 23rd.

Switzerland

1. Checking \(h_I\)

#collapse_hide
fig = go.Figure(data=[    
    go.Bar(name='Ipd', x=conf_df_pd.loc['Switzerland'].index, y=conf_df_pd.loc['Switzerland']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Switzerland'].index, y=lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0]),
    go.Bar(name='Rpd', x=rec_df_pd.loc['Switzerland'].index, y=rec_df_pd.loc['Switzerland']),
    go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Switzerland'].index, y=lowpass(rec_df_pd.loc['Switzerland'], 0.05, 1)[0]),
    go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Switzerland'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Switzerland'], mode='full'))
])

fig.update_layout(
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Switzerland: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see the actual \(R_{pd}\) leads \(h_I[j]\circledast I_{pd}[j]\) by about 7 days.

2. Estimating \(E_{pd}\) by deconvolution of \(I_{pd}\)

#collapse_hide

#Settting up for deconvolution of Ipd

#regularization parameter
alpha=2

# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0])
Ipd[Ipd<0]=0


# Pad with last value
i=0
while i < 100:
  Ipd=np.append(Ipd, Ipd[-1])
  i=i+1

# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()

# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess

# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Switzerland'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))

itercount=0
while mse[-1] < mse[-2]:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Russia'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))
  print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
Iteration #1: MSE= 864.5294935911032
Iteration #2: MSE= 232.47979293395576
Iteration #3: MSE= 173.17995448248922
Iteration #4: MSE= 142.7278443215308
Iteration #5: MSE= 102.95126255601593
Iteration #6: MSE= 97.14103443111384
Iteration #7: MSE= 73.32554749188243
Iteration #8: MSE= 73.40930143937898 so we use the result of the previous iteration.
#collapse_hide
# We can keep going the iteration until lowest MSE

#change alpha if you like
alpha=2

i=0
while i < 10:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  print(msecalc(Ipd[:len(conf_df_pd.loc['Switzerland'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))
  i=i+1
35.2303147406422
35.62189380732824
35.229698402255934
35.613648731822856
35.229698402255934
35.613648731822856
35.229698402255934
35.613648731822856
35.229698402255934
35.613648731822856
#collapse_hide
fig = go.Figure(data=[    
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Switzerland'].index, y=Enext),
    go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Switzerland'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
    go.Bar(name='Ipd', x=inf_df.loc['Switzerland'].index, y=conf_df_pd.loc['Switzerland']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Switzerland'].index, y=lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0])
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{Switzerland: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

We can see that our estimate for \(\hat{E}_{pd}\) must be close to the reality of \(E_{pd}\) as \(I_{pd}\) is almost identical to \(\hat{E}_{pd} \circledast h_L\).

Of course, this holds only as long as our estimate of \(h_L\) is close to reality.

3. \(\beta\) and \(R\) from \(E_{pd}\) and \(I\)

As described above:

\[R = \frac{E_{pd}}{\gamma~I}\]

#collapse_hide

# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Switzerland'])]*(1/gam)/inf_df.loc['Switzerland']

fig = go.Figure(data=[    
    go.Scatter(name='R', x=inf_df.loc['Switzerland'].index, y=R),
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Switzerland'].index, y=Enext),
    go.Scatter(name='Inf', x=inf_df.loc['Switzerland'].index, y=inf_df.loc['Switzerland']),
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'R',
    title={
        'text':r'$\text{Switzerland: R }$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

United States

1. Checking \(h_I\)

#collapse_hide
#collapse_hide
fig = go.Figure(data=[    
    go.Bar(name='Ipd', x=conf_df_pd.loc['US'].index, y=conf_df_pd.loc['US']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['US'].index, y=lowpass(conf_df_pd.loc['US'], 0.05, 1)[0]),
    go.Bar(name='Rpd', x=rec_df_pd.loc['US'].index, y=rec_df_pd.loc['US']),
    go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['US'].index, y=lowpass(rec_df_pd.loc['US'], 0.05, 1)[0]),
    go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['US'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['US'], mode='full'))
])

fig.update_layout(
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{US: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

Data is not very good.

2. Estimating \(E_{pd}\) by deconvolution of \(I_{pd}\)

#collapse_hide

#Settting up for deconvolution of Ipd

#regularization parameter
alpha=2

# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['US'], 0.05, 1)[0])
Ipd[Ipd<0]=0


# Pad with last value
i=0
while i < 100:
  Ipd=np.append(Ipd, Ipd[-1])
  i=i+1

# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()

if (np.abs(delay)>20):
  delay = -15

# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess

# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))

itercount=0
while mse[-1] < mse[-2]:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))
  print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
Iteration #1: MSE= 1302852.439800496
Iteration #2: MSE= 561276.1316916037
Iteration #3: MSE= 392265.42495631176
Iteration #4: MSE= 318143.92169379967
Iteration #5: MSE= 272333.0641150245
Iteration #6: MSE= 233533.32988425402
Iteration #7: MSE= 202941.06092517954
Iteration #8: MSE= 179715.6638386193
Iteration #9: MSE= 159674.82822623185
Iteration #10: MSE= 144041.10302527325
Iteration #11: MSE= 129965.13942407207
Iteration #12: MSE= 118818.8007805406
Iteration #13: MSE= 108715.87370526823
Iteration #14: MSE= 100634.66153928495
Iteration #15: MSE= 93446.25080919724
Iteration #16: MSE= 87452.88477850877
Iteration #17: MSE= 82485.3263728532
Iteration #18: MSE= 77847.33446293023
Iteration #19: MSE= 74602.08153637181
Iteration #20: MSE= 70781.73306311587
Iteration #21: MSE= 68800.8673361004
Iteration #22: MSE= 65387.86700604501
Iteration #23: MSE= 64292.13662703821
Iteration #24: MSE= 61036.839684856765
Iteration #25: MSE= 60524.93042299802
Iteration #26: MSE= 57292.73138597839
Iteration #27: MSE= 57176.48413202111
Iteration #28: MSE= 53959.94059590202
Iteration #29: MSE= 54120.03347178106 so we use the result of the previous iteration.
#collapse_hide
# We can keep going the iteration until lowest MSE

#change alpha if you like
alpha=2

i=0
while i < 10:
  itercount=itercount+1
  Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
  print(msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))
  i=i+1
33182.952031012894
33274.1027461995
33182.952031012894
33274.1027461995
33182.952031012894
33274.1027461995
33182.952031012894
33274.1027461995
33182.952031012894
33274.1027461995
#collapse_hide
fig = go.Figure(data=[    
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['US'].index, y=Enext),
    go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['US'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
    go.Bar(name='Ipd', x=inf_df.loc['US'].index, y=conf_df_pd.loc['US']),
    go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['US'].index, y=lowpass(conf_df_pd.loc['US'], 0.05, 1)[0])
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'Count',
    title={
        'text':r'$\text{US: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

3. \(\beta\) and \(R\) from \(E_{pd}\) and \(I\)

As described above:

\[R = \frac{E_{pd}}{\gamma~I}\]

#collapse_hide

# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['US'])]*(1/gam)/inf_df.loc['US']

fig = go.Figure(data=[    
    go.Scatter(name='R', x=inf_df.loc['US'].index, y=R),
    go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['US'].index, y=Enext),
    go.Scatter(name='Inf', x=inf_df.loc['US'].index, y=inf_df.loc['US']),
])

fig.update_layout(
    
    xaxis_title = 'Day',
    yaxis_title = 'R',
    title={
        'text':r'$\text{US: R }$',
        'x':0.5,
        'xanchor':'center'
    }
)

fig.show()

After March 15th we see a rapi decline in \(R\) until \(R<1\) since April 12th, however the last 2 weeks of May have seen an increase and it is now close to 1 in early June.

We can see \(R\) declined rapidly after March 12th to \(R<1\) on May 30th, but has since grown back to close to 1.