调整数据数组的形状以在 SciPy 中执行优化
Adjusting shape of a data array to perform optimization in SciPy
我有一个执行优化以推断参数的代码:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, weeks[-1], weeks[-1] + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
但是,它没有给出正确的值,所以在 d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
行中我不想将周数设为 1,2,3... 我想用天数代替 Python 有更清晰的信息可供使用。我想将天数 linspace
分割为每周间隔。但是,我遇到了一些形状对齐问题:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
time = np.linspace(0, 77, 77 + 1)
d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
我在这里尝试的方法是通过切片 time
重新创建数据框,即 77 天,也就是 11 周。它仍然 returns 形状错误,77 对 11 个元素出现在我的函数 residual
行 return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
中。我的方法哪里出了问题?
------------编辑----------
更新代码
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd
t = np.arange(7,84,7)
d = {'Week': t, 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
您的问题出现在第 52 行,您通过求解 peak_infections(x, df)[1:]
得到 77 个值,并且您有 11 个 incidence
值,如您所提到的。
出现这种情况是因为您正在 t
(第 29 行)求解您的颂歌,它有 78 个值。为避免这种情况,请在 peak_infections
函数中生成一个包含 7 个值的时间向量,如下所示:
t = np.linspace(0, 77, 77 + 1)
t = [t[7],t[14],t[21],t[28],t[35],t[42],t[49],t[56],t[63],t[70],t[77]]
或全新的:
t = np.arange(7,84,7)
并按如下方式更改 residual
函数(不要分割 peak_infections(x, df)[1:]
):
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
这将解决您的问题,因为现在您正在比较具有形状 (7,) 和 (7,) 的 NumPy 数组,这不会产生错误。
我有一个执行优化以推断参数的代码:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, weeks[-1], weeks[-1] + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
但是,它没有给出正确的值,所以在 d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
行中我不想将周数设为 1,2,3... 我想用天数代替 Python 有更清晰的信息可供使用。我想将天数 linspace
分割为每周间隔。但是,我遇到了一些形状对齐问题:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
time = np.linspace(0, 77, 77 + 1)
d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
我在这里尝试的方法是通过切片 time
重新创建数据框,即 77 天,也就是 11 周。它仍然 returns 形状错误,77 对 11 个元素出现在我的函数 residual
行 return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
中。我的方法哪里出了问题?
------------编辑---------- 更新代码
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd
t = np.arange(7,84,7)
d = {'Week': t, 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
def peak_infections(beta, df):
# Weeks for which the ODE system will be solved
weeks = df.Week.to_numpy()
# Total population, N.
N = 100000
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = 10, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
J0 = I0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
#reproductive no. R zero is beta/gamma
gamma = 1/7 * 7 #rate should be in weeks now
# A grid of time points
t = np.linspace(0, 77, 77 + 1)
# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
S, I, R, J = y
dS = ((-beta * S * I) / N)
dI = ((beta * S * I) / N) - (gamma * I)
dR = (gamma * I)
dJ = ((beta * S * I) / N)
return dS, dI, dR, dJ
# Initial conditions are S0, I0, R0
# Integrate the SIR equations over the time grid, t.
solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
S, I, R, J = solve.T
return I/N
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)
您的问题出现在第 52 行,您通过求解 peak_infections(x, df)[1:]
得到 77 个值,并且您有 11 个 incidence
值,如您所提到的。
出现这种情况是因为您正在 t
(第 29 行)求解您的颂歌,它有 78 个值。为避免这种情况,请在 peak_infections
函数中生成一个包含 7 个值的时间向量,如下所示:
t = np.linspace(0, 77, 77 + 1)
t = [t[7],t[14],t[21],t[28],t[35],t[42],t[49],t[56],t[63],t[70],t[77]]
或全新的:
t = np.arange(7,84,7)
并按如下方式更改 residual
函数(不要分割 peak_infections(x, df)[1:]
):
def residual(x, df):
# Total population, N.
N = 100000
incidence = df.incidence.to_numpy()/N
return np.sum((peak_infections(x, df) - incidence) ** 2)
这将解决您的问题,因为现在您正在比较具有形状 (7,) 和 (7,) 的 NumPy 数组,这不会产生错误。