Epidemic modeling - Part 8
Studying real-world data
- Motivation for write-up
- Data available
- Analysis
- Real-world analysis
#collapse_hide
# This code wrangles the data from JHU
!pip install plotly==4.9.0
!pip install dash==1.16.0
import pandas as pd
import numpy as np
import math
from scipy import signal
import plotly.graph_objects as go
import plotly.express as px
from scipy.stats import expon
from scipy.stats import gamma
from scipy.stats import weibull_min
from numpy.random import default_rng
rng = default_rng()
import dash
import dash_core_components as dcc
import dash_html_components as html
import datetime
# Import confirmed cases
conf_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
#Import deaths data
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
# Import recovery data
rec_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
#iso_alpha = pd.read_csv('https://raw.githubusercontent.com/jeffufpost/sars-cov-2-world-tracker/master/data/iso_alpha.csv', index_col=0, header=0).T.iloc[0]
iso_alpha = pd.read_csv('https://raw.githubusercontent.com/jeffufpost/sars-cov-2-world-tracker/master/data/iso_alpha.csv', index_col=0, header=0)
# Wrangle the data
#print("Wrangling data by country.......")
# Consolidate countries (ie. frenc dom tom are included in France, etc..)
conf_df = conf_df.groupby("Country/Region")
conf_df = conf_df.sum().reset_index()
conf_df = conf_df.set_index('Country/Region')
deaths_df = deaths_df.groupby("Country/Region")
deaths_df = deaths_df.sum().reset_index()
deaths_df = deaths_df.set_index('Country/Region')
rec_df = rec_df.groupby("Country/Region")
rec_df = rec_df.sum().reset_index()
rec_df = rec_df.set_index('Country/Region')
# Remove Lat and Long columns
conf_df = conf_df.iloc[:,2:]
deaths_df = deaths_df.iloc[:,2:]
rec_df = rec_df.iloc[:,2:]
# Convert country names to correct format for search with pycountry
conf_df = conf_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})
# Convert country names to correct format for search with pycountry
deaths_df = deaths_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})
# Convert country names to correct format for search with pycountry
rec_df = rec_df.rename(index={'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo, the Democratic Republic of the', 'Burma': 'Myanmar', 'Korea, South': 'Korea, Republic of', 'Laos': "Lao People's Democratic Republic", 'Taiwan*': 'Taiwan', "West Bank and Gaza":"Palestine, State of"})
# Convert dates to datime format
conf_df.columns = pd.to_datetime(conf_df.columns).date
deaths_df.columns = pd.to_datetime(deaths_df.columns).date
rec_df.columns = pd.to_datetime(rec_df.columns).date
# Create a per day dataframe
#print("Creating new per day dataframes......")
# Create per day dataframes for cases, deaths, and recoveries - by pd.DatafRame.diff
conf_df_pd = conf_df.diff(axis=1)
deaths_df_pd = deaths_df.diff(axis=1)
rec_df_pd = rec_df.diff(axis=1)
#print("Create infected dataframe = conf - deaths - recoveries")
inf_df = conf_df - deaths_df - rec_df
conf_df_pd.iloc[:,0] = 0
rec_df_pd.iloc[:,0] = 0
deaths_df_pd.iloc[:,0] = 0
inf_df.iloc[:,0] = 0
#print("Adding dataframes of 1st, 2nd, and 3rd derivatives of number of infected")
firstdev = inf_df.apply(np.gradient, axis=1)
seconddev = firstdev.apply(np.gradient)
thirddev = seconddev.apply(np.gradient)
#print("Create series of first date above 100 confirmed cases.....")
# Create a column containing date at which 100 confirmed cases were reached, NaN if not reached yet
fda100 = conf_df[conf_df > 100].apply(pd.Series.first_valid_index, axis=1)
# Create dataframe for probability plot
probevent = iso_alpha.join(inf_df)
probevent['prev'] = probevent.iloc[:,-1] / probevent['SP.POP.TOTL']
#collapse_hide
# This code is the lowpass filter
def lowpass(x, fc=0.05, pad=0): # starts with cutoff freq at 0.05 but can be adjusted, should use 0.02 for France for example
# Pad with last value
if pad == 1:
i=0
while i < 100:
x=np.append(x, (x[-1]+x[-2]+x[-3])/3)
i=i+1
fs = 1 # Sampling frequency
t = np.arange(len(x.T)) #select number of days done in SEIR model
signala = x.T
#fc = 0.05 # Cut-off frequency of the filter
w = fc / (fs / 2) # Normalize the frequency
b, a = signal.butter(5, w, 'low')
return signal.filtfilt(b, a, signala), w
#collapse_hide
# This code creates the impulse responses
days = np.arange(100)
cdf = pd.DataFrame({
'T_Latent': gamma.cdf(days, 1.8,loc=0.9,scale=(5.2-1.8)/0.9),
'T_Infectious': weibull_min.cdf(days, 2.3,loc=2,scale=20.11)
})
h_L = cdf.diff().T_Latent
h_I = cdf.diff().T_Infectious
h_L[0] = 0
h_I[0] = 0
#collapse_hide
# This code is for the iterative deconvolution
# Let's define an iteration function:
def iter_deconv(alpha, impulse_response, input_signal, delay, comparator):
conv=signal.fftconvolve(impulse_response, input_signal, mode='full')
correction=np.roll(comparator-conv[:len(comparator)], delay)
input_signal=np.floor(lowpass(alpha*correction+input_signal, 0.05, 0)[0])
input_signal[input_signal<0]=0
return input_signal
# Define a function to return MSE between two signals as a measure of goodness of fit
def msecalc(A, B):
return ((A - B)**2).mean(axis=0)
This is the 8th part of a multi-part series blog post on modeling in epidemiology.
While we have studied in detail the stochastc SEIR model, we have not compared this to actual real-world data.
The goal of this 8th installment is to expand on the 7th blog post and use deconvolution techniques on real world data to estimate the various parameters.
Most notably, we want to calculate $R$ from available data from JHU, let's see how to go about this.
Data available
We will use the JHU data which is updated daily and displayed graphically on my tracker here.
A lot of countries have had difficulty reporting reliable data, but a few have done so rather well.
We will have a closer look at these contries:
- Austria
- France
- Germany
- Switzerland
- United States
The JHU datasets include the following data:
- Daily cumulative confirmed cases
- Daily cumulative recoveries
- Daily cumulative deaths
From which we can calculate the following:
- Daily new confirmed cases: $I_{pd}$
- Daily new recoveries
- Daily new deaths
- Current number of cases: $I$
Analysis
-
We have seen previously how we can get $E_{pd}$ from deconvolution of $I_{pd}$.
-
We also know that: $$E_{pd} = \beta ~ I ~ \frac{S}{N}$$ $$\leftrightarrow \beta = \frac{N ~ E_{pd}}{I ~ S}$$
-
And we know: $$R_0 = \frac{\beta}{\gamma}$$ and $$R=R_0\frac{S}{N}$$
-
So: $$E_{pd}=\gamma~R~I$$ $$\leftrightarrow R = \frac{E_{pd}}{\gamma~I}$$
This means we can find $R$ by deconvolution of $I_{pd}$.
Our aim is to find this graphically in the real-world data.
Generic analysis
Let's first have a look at the genral way we will go about the analysis (i.e. not country specific).
- We have daily data $I_{pd}$ and $R_{pd}$
- We have our assumed $T_L$ and $T_I$
Note, depending on the data we have, $R_{pd}$ can be:
- sum of deaths and recoveries
- only deaths
- only recoveries
Analysis steps:
- The first thing we want to check is whether $h_I\circledast I_{pd} [j]$ gives us something close to $R_{pd}$ If not, why not? (what kind of $R_{pd}$ should we use?)
- Can we get an estimated $E_{pd}$ by deconvolution of $I_{pd}$ ? What cutoff frequency should we use ?
- What can that tell us about $R$ and $\beta$ ?
#collapse_hide
fig = go.Figure(data=[
go.Bar(name='Ipd', x=conf_df_pd.loc['Austria'].index, y=conf_df_pd.loc['Austria']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Austria'].index, y=lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0]),
go.Bar(name='Rpd', x=rec_df_pd.loc['Austria'].index, y=rec_df_pd.loc['Austria']),
go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Austria'].index, y=lowpass(rec_df_pd.loc['Austria'], 0.05, 1)[0]),
go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Austria'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Austria'], mode='full'))
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Austria: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see the actual $R_{pd}$ leads $h_I[j]\circledast I_{pd}[j]$ by about 5 days.
There is a 20 day lag between peak $I_{pd}$ and $h_I[j]\circledast I_{pd}[j]$ as is expected since $E[T_I] = 20.11$.
But there is only a 15 day lag between peak $I_{pd}$ and actual $R_{pd}$.
There are a few possibilites for why this is the case, including that maybe we haven't assumed the correct distribution for $T_I$.
However there is another reason why this could be. Testing, especially in the early days, took time, and it took time for a patient showing symptoms before he could be tested. This may simply explain the 5 day difference.
#collapse_hide
#Settting up for deconvolution of Ipd
#regularization parameter
alpha=2
# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0])
Ipd[Ipd<0]=0
# Pad with last value
i=0
while i < 100:
Ipd=np.append(Ipd, Ipd[-1])
i=i+1
# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()
# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess
# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))
itercount=0
while mse[-1] < mse[-2]:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))
print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
#collapse_hide
# We can keep going the iteration until lowest MSE
#change alpha if you like
#alpha=2
i=0
while i < 10:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
print(msecalc(Ipd[:len(conf_df_pd.loc['Austria'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Austria'])]))
i=i+1
#collapse_hide
fig = go.Figure(data=[
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Austria'].index, y=Enext),
go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Austria'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
go.Bar(name='Ipd', x=inf_df.loc['Austria'].index, y=conf_df_pd.loc['Austria']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Austria'].index, y=lowpass(conf_df_pd.loc['Austria'], 0.05, 1)[0])
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Austria: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see that our estimate for $\hat{E}_{pd}$ must be close to the reality of $E_{pd}$ as $I_{pd}$ is almost identical to $\hat{E}_{pd} \circledast h_L$.
Of course, this holds only as long as our estimate of $h_L$ is close to reality.
#collapse_hide
# Calculate R
gam = 1/15 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Austria'])]*(1/gam)/inf_df.loc['Austria']
fig = go.Figure(data=[
go.Scatter(name='R', x=inf_df.loc['Austria'].index, y=R),
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Austria'].index, y=Enext),
go.Scatter(name='Inf', x=inf_df.loc['Austria'].index, y=inf_df.loc['Austria']),
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'R',
title={
'text':r'$\text{Austria: R }$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
#collapse_hide
fig = go.Figure(data=[
go.Bar(name='Ipd', x=conf_df_pd.loc['France'].index, y=conf_df_pd.loc['France']),
go.Scatter(name='Ipd=lowpass2(Ipd)', x=conf_df_pd.loc['France'].index, y=lowpass(conf_df_pd.loc['France'], 0.02, 1)[0]),
go.Bar(name='Rpd', x=rec_df_pd.loc['France'].index, y=rec_df_pd.loc['France']),
go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['France'].index, y=lowpass(rec_df_pd.loc['France'], 0.05, 1)[0]),
go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['France'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['France'], mode='full'))
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{France: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
Data is not very good, reason for using fc = 0.02 and it helps below:
#collapse_hide
#Settting up for deconvolution of Ipd
#regularization parameter
alpha=2
# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['France'], 0.02, 1)[0])
Ipd[Ipd<0]=0
# Pad with last value
#i=0
#while i < 100:
# Ipd=np.append(Ipd, Ipd[-1])
# i=i+1
# Pad with last value
i=0
while i < 100:
Ipd=np.append(Ipd, (Ipd[-1]+Ipd[-2]+Ipd[-3])/3)
i=i+1
# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()
# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess
# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))
itercount=0
while mse[-1] < mse[-2]:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))
print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
#collapse_hide
# We can keep going the iteration until lowest MSE
#change alpha if you like
alpha=2
i=0
while i < 10:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
print(msecalc(Ipd[:len(conf_df_pd.loc['France'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['France'])]))
i=i+1
#collapse_hide
fig = go.Figure(data=[
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['France'].index, y=Enext),
go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['France'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
go.Bar(name='Ipd', x=inf_df.loc['France'].index, y=conf_df_pd.loc['France']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['France'].index, y=lowpass(conf_df_pd.loc['France'], 0.02, 1)[0])
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{France: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
#collapse_hide
# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['France'])]*(1/gam)/inf_df.loc['France']
fig = go.Figure(data=[
go.Scatter(name='R', x=inf_df.loc['France'].index, y=R),
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['France'].index, y=Enext),
go.Scatter(name='Inf', x=inf_df.loc['France'].index, y=inf_df.loc['France']),
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'R',
title={
'text':r'$\text{France: R }$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
After March 15th we see a rapi decline in $R$ until $R<1$ since April 12th, however the last 2 weeks of May have seen an increase and it is now close to 1 in early June.
#collapse_hide
fig = go.Figure(data=[
go.Bar(name='Ipd', x=conf_df_pd.loc['Germany'].index, y=conf_df_pd.loc['Germany']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Germany'].index, y=lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0]),
go.Bar(name='Rpd', x=rec_df_pd.loc['Germany'].index, y=rec_df_pd.loc['Germany']),
go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Germany'].index, y=lowpass(rec_df_pd.loc['Germany'], 0.05, 1)[0]),
go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Germany'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Germany'], mode='full'))
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Germany: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see the actual $R_{pd}$ lags $h_I[j]\circledast I_{pd}[j]$ by about 10 days.
Although they are pretty close still.
#collapse_hide
#Settting up for deconvolution of Ipd
#regularization parameter
alpha=2
# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0])
Ipd[Ipd<0]=0
# Pad with last value
i=0
while i < 100:
Ipd=np.append(Ipd, Ipd[-1])
i=i+1
# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()
# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess
# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))
itercount=0
while mse[-1] < mse[-2]:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))
print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
#collapse_hide
# We can keep going the iteration until lowest MSE
#change alpha if you like
alpha=2
i=0
while i < 10:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
print(msecalc(Ipd[:len(conf_df_pd.loc['Germany'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Germany'])]))
i=i+1
#collapse_hide
fig = go.Figure(data=[
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Germany'].index, y=Enext),
go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Germany'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
go.Bar(name='Ipd', x=inf_df.loc['Germany'].index, y=conf_df_pd.loc['Germany']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Germany'].index, y=lowpass(conf_df_pd.loc['Germany'], 0.05, 1)[0])
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Germany: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see that our estimate for $\hat{E}_{pd}$ must be close to the reality of $E_{pd}$ as $I_{pd}$ is almost identical to $\hat{E}_{pd} \circledast h_L$.
Of course, this holds only as long as our estimate of $h_L$ is close to reality.
#collapse_hide
# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Germany'])]*(1/gam)/inf_df.loc['Germany']
fig = go.Figure(data=[
go.Scatter(name='R', x=inf_df.loc['Germany'].index, y=R),
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Germany'].index, y=Enext),
go.Scatter(name='Inf', x=inf_df.loc['Germany'].index, y=inf_df.loc['Germany']),
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'R',
title={
'text':r'$\text{Germany: R }$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see $R<1$ since April 3rd, however the peak $E_{pd}$ is slightly ahead of that March 23rd.
#collapse_hide
fig = go.Figure(data=[
go.Bar(name='Ipd', x=conf_df_pd.loc['Switzerland'].index, y=conf_df_pd.loc['Switzerland']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['Switzerland'].index, y=lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0]),
go.Bar(name='Rpd', x=rec_df_pd.loc['Switzerland'].index, y=rec_df_pd.loc['Switzerland']),
go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['Switzerland'].index, y=lowpass(rec_df_pd.loc['Switzerland'], 0.05, 1)[0]),
go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['Switzerland'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['Switzerland'], mode='full'))
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Switzerland: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see the actual $R_{pd}$ leads $h_I[j]\circledast I_{pd}[j]$ by about 7 days.
#collapse_hide
#Settting up for deconvolution of Ipd
#regularization parameter
alpha=2
# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0])
Ipd[Ipd<0]=0
# Pad with last value
i=0
while i < 100:
Ipd=np.append(Ipd, Ipd[-1])
i=i+1
# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()
# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess
# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Switzerland'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))
itercount=0
while mse[-1] < mse[-2]:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['Russia'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))
print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
#collapse_hide
# We can keep going the iteration until lowest MSE
#change alpha if you like
alpha=2
i=0
while i < 10:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
print(msecalc(Ipd[:len(conf_df_pd.loc['Switzerland'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['Switzerland'])]))
i=i+1
#collapse_hide
fig = go.Figure(data=[
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Switzerland'].index, y=Enext),
go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['Switzerland'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
go.Bar(name='Ipd', x=inf_df.loc['Switzerland'].index, y=conf_df_pd.loc['Switzerland']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['Switzerland'].index, y=lowpass(conf_df_pd.loc['Switzerland'], 0.05, 1)[0])
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{Switzerland: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
We can see that our estimate for $\hat{E}_{pd}$ must be close to the reality of $E_{pd}$ as $I_{pd}$ is almost identical to $\hat{E}_{pd} \circledast h_L$.
Of course, this holds only as long as our estimate of $h_L$ is close to reality.
#collapse_hide
# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['Switzerland'])]*(1/gam)/inf_df.loc['Switzerland']
fig = go.Figure(data=[
go.Scatter(name='R', x=inf_df.loc['Switzerland'].index, y=R),
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['Switzerland'].index, y=Enext),
go.Scatter(name='Inf', x=inf_df.loc['Switzerland'].index, y=inf_df.loc['Switzerland']),
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'R',
title={
'text':r'$\text{Switzerland: R }$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
#collapse_hide
#collapse_hide
fig = go.Figure(data=[
go.Bar(name='Ipd', x=conf_df_pd.loc['US'].index, y=conf_df_pd.loc['US']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=conf_df_pd.loc['US'].index, y=lowpass(conf_df_pd.loc['US'], 0.05, 1)[0]),
go.Bar(name='Rpd', x=rec_df_pd.loc['US'].index, y=rec_df_pd.loc['US']),
go.Scatter(name='Rpd=lowpass(Rpd)', x=rec_df_pd.loc['US'].index, y=lowpass(rec_df_pd.loc['US'], 0.05, 1)[0]),
go.Scatter(name='Rpd=conv(Ipd)', x=conf_df_pd.loc['US'].index, y=signal.fftconvolve(h_I, conf_df_pd.loc['US'], mode='full'))
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{US: Actual } R_{pd} \text{ vs. } h_I[j]\circledast I_{pd}[j]$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
Data is not very good.
#collapse_hide
#Settting up for deconvolution of Ipd
#regularization parameter
alpha=2
# Setup up the resultant Ipd we want to compare our guess with
Ipd=np.floor(lowpass(conf_df_pd.loc['US'], 0.05, 1)[0])
Ipd[Ipd<0]=0
# Pad with last value
i=0
while i < 100:
Ipd=np.append(Ipd, Ipd[-1])
i=i+1
# Find delay caused by h_L
delay=Ipd.argmax()-signal.fftconvolve(Ipd, h_L, mode='full').argmax()
if (np.abs(delay)>20):
delay = -15
# We want initial guess to simply be the result of the convolution delayed
initial_guess=np.roll(Ipd,delay)
Enext = initial_guess
# AN array to record MSE between result we want and our iterated guess
mse=np.array([])
mse=np.append(mse, 10000000)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))
itercount=0
while mse[-1] < mse[-2]:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
mse=np.append(mse, msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))
print("Iteration #" + str(itercount) +": MSE= "+str(mse[itercount]))
print("Iteration #" + str(itercount+1) +": MSE= "+str(mse[-1])+" so we use the result of the previous iteration.")
#collapse_hide
# We can keep going the iteration until lowest MSE
#change alpha if you like
alpha=2
i=0
while i < 10:
itercount=itercount+1
Enext=iter_deconv(alpha, h_L, Enext, delay, Ipd)
print(msecalc(Ipd[:len(conf_df_pd.loc['US'])], signal.fftconvolve(h_L, Enext, mode='full')[:len(conf_df_pd.loc['US'])]))
i=i+1
#collapse_hide
fig = go.Figure(data=[
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['US'].index, y=Enext),
go.Scatter(name='Ipd=conv(deconv(Ipd))', x=inf_df.loc['US'].index, y=signal.fftconvolve(h_L, Enext, mode='full')),
go.Bar(name='Ipd', x=inf_df.loc['US'].index, y=conf_df_pd.loc['US']),
go.Scatter(name='Ipd=lowpass(Ipd)', x=inf_df.loc['US'].index, y=lowpass(conf_df_pd.loc['US'], 0.05, 1)[0])
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'Count',
title={
'text':r'$\text{US: Actual } I_{pd} \text{ vs. convolution of deconvolution of } I_{pd}$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
#collapse_hide
# Calculate R
gam = 1/20.11 # As we say gamma is 1/20.11
R = Enext[:len(inf_df.loc['US'])]*(1/gam)/inf_df.loc['US']
fig = go.Figure(data=[
go.Scatter(name='R', x=inf_df.loc['US'].index, y=R),
go.Scatter(name='Epd=deconv(Ipd)', x=inf_df.loc['US'].index, y=Enext),
go.Scatter(name='Inf', x=inf_df.loc['US'].index, y=inf_df.loc['US']),
])
fig.update_layout(
xaxis_title = 'Day',
yaxis_title = 'R',
title={
'text':r'$\text{US: R }$',
'x':0.5,
'xanchor':'center'
}
)
fig.show()
After March 15th we see a rapi decline in $R$ until $R<1$ since April 12th, however the last 2 weeks of May have seen an increase and it is now close to 1 in early June.
We can see $R$ declined rapidly after March 12th to $R<1$ on May 30th, but has since grown back to close to 1.