调整数据数组的形状以在 SciPy 中执行优化

Adjusting shape of a data array to perform optimization in SciPy

我有一个执行优化以推断参数的代码:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
    
    
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
    
def peak_infections(beta, df):
    
    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()
    
    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, weeks[-1], weeks[-1] + 1)
    
    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ
    
    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T
    
    return I/N
    
def residual(x, df):
    
    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
    
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

但是,它没有给出正确的值,所以在 d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]} 行中我不想将周数设为 1,2,3... 我想用天数代替 Python 有更清晰的信息可供使用。我想将天数 linspace 分割为每周间隔。但是,我遇到了一些形状对齐问题:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
    
time = np.linspace(0, 77, 77 + 1)
d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
    
def peak_infections(beta, df):
    
    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()
    
    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, 77, 77 + 1)
    
    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ
    
    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T
    
    return I/N
    
def residual(x, df):
    
    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
    
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

我在这里尝试的方法是通过切片 time 重新创建数据框,即 77 天,也就是 11 周。它仍然 returns 形状错误,77 对 11 个元素出现在我的函数 residualreturn np.sum((peak_infections(x, df)[1:] - incidence) ** 2) 中。我的方法哪里出了问题?

------------编辑---------- 更新代码

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd

t = np.arange(7,84,7)
d = {'Week': t, 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)

def peak_infections(beta, df):

    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()

    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, 77, 77 + 1)

    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ

    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T

    return I/N

def residual(x, df):

    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df) - incidence) ** 2)

x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

您的问题出现在第 52 行,您通过求解 peak_infections(x, df)[1:] 得到 77 个值,并且您有 11 个 incidence 值,如您所提到的。

出现这种情况是因为您正在 t(第 29 行)求解您的颂歌,它有 78 个值。为避免这种情况,请在 peak_infections 函数中生成一个包含 7 个值的时间向量,如下所示:

t = np.linspace(0, 77, 77 + 1)
t = [t[7],t[14],t[21],t[28],t[35],t[42],t[49],t[56],t[63],t[70],t[77]]

或全新的:

t = np.arange(7,84,7)

并按如下方式更改 residual 函数(不要分割 peak_infections(x, df)[1:]):

def residual(x, df):

    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df) - incidence) ** 2)

这将解决您的问题,因为现在您正在比较具有形状 (7,) 和 (7,) 的 NumPy 数组,这不会产生错误。