迭代二维回归相关截断 normal/laplace 分布
iterative 2-D regression dependent truncated normal/laplace distribution
我需要你的帮助来创建一个正常或更好的截断拉普拉斯分布,它线性依赖于另一个拉普拉斯分布变量。
到目前为止,不幸的是,这是我在导出的 y 分布中使用 NaN 所取得的成果:
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde, truncnorm
slope = 0.2237
intercept = 1.066
spread = 4.8719
def dependency(x):
y_lin = slope * x + intercept
lower = slope / spread * 3 * x
upper = slope * spread / 3 * x + 2 * intercept
y_lin_noise = np.random.laplace(loc=0, scale=spread, size=len(y_lin)) + y_lin
y_lin_noise[y_lin_noise < lower] = np.nan # This is the desperate solution where
y_lin_noise[y_lin_noise > upper] = np.nan # NaNs are introduced
return y_lin_noise
max = 100
min = 1
mean = 40
sigma = 25
x = truncnorm((min-mean)/sigma, (max-mean)/sigma, loc=mean, scale=sigma).rvs(5000)
y = dependency(x)
# Plotting
xx = np.linspace(np.nanmin(x), np.nanmax(x), 100)
yy = slope * xx + intercept
lower = slope/spread*3*xx
upper = slope*spread/3*xx + 2*intercept
mask = ~np.isnan(y) & ~np.isnan(x)
x = x[mask]
y = y[mask]
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
idz = z.argsort()
x, y, z = x[idz], y[idz], z[idz]
fig, ax = plt.subplots(figsize=(5, 5))
plt.plot(xx, upper, 'r-.', label='upper constraint')
plt.plot(xx, lower, 'r--', label='lower constraint')
ax.scatter(x, y, c=z, s=3)
plt.xlabel(r'$\bf X_{laplace}$')
plt.ylabel(r'$\bf Y_{{derived}}$')
plt.plot(xx, yy, 'r', label='regression model')
plt.legend()
plt.tight_layout()
plt.show()
我最终想要得到的是一个没有 NaN 的 y 分布,这样对于每个 x 都有一个对应的 y 在 upper/lower 阈值范围内。也就是说,回归线周围的 lower/upper-truncated 分布。
期待创意!
谢谢你,并致以最诚挚的问候。
你想要做的基本上是重试超出范围的值。您可以定义一个函数来为每个值执行此操作,或者您可以定义初始值并循环遍历 "correct" 超出范围的值的答案,如下所示:
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde, truncnorm
slope = 0.2237
intercept = 1.066
spread = 4.8719
#slow but effective
def truncated_noise(y, lower, upper):
while True:
y_noise = np.random.laplace(loc=0, scale=spread, size=1) + y
if upper > y_noise > lower:
return y_noise
def refine_noise(y, y_lin_noise, lower, upper):
for i in range(len(y_lin_noise)):
if upper[i] < y_lin_noise[i] or lower[i] > y_lin_noise[i]:
y_lin_noise[i] = truncated_noise(y[i], lower[i], upper[i])
return y_lin_noise
def dependency(x):
y_lin = slope * x + intercept
lower = slope / spread * 3 * x
upper = slope * spread / 3 * x + 2 * intercept
y_lin_noise = np.random.laplace(loc=0, scale=spread, size=len(y_lin)) + y_lin
y_lin_noise = refine_noise(y_lin, y_lin_noise, lower, upper)
return y_lin_noise
max = 100
min = 1
mean = 40
sigma = 25
x = truncnorm((min-mean)/sigma, (max-mean)/sigma, loc=mean, scale=sigma).rvs(5000)
y = dependency(x)
# Plotting
xx = np.linspace(np.nanmin(x), np.nanmax(x), 100)
yy = slope * xx + intercept
lower = slope/spread*3*xx
upper = slope*spread/3*xx + 2*intercept
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
idz = z.argsort()
x, y, z = x[idz], y[idz], z[idz]
fig, ax = plt.subplots(figsize=(5, 5))
plt.plot(xx, upper, 'r-.', label='upper constraint')
plt.plot(xx, lower, 'r--', label='lower constraint')
ax.scatter(x, y, c=z, s=3)
plt.xlabel(r'$\bf X_{laplace}$')
plt.ylabel(r'$\bf Y_{{derived}}$')
plt.plot(xx, yy, 'r', label='regression model')
plt.legend()
plt.tight_layout()
plt.show()
我不知道你想用它做什么,但我知道这不再是随机拉普拉斯分布,所以如果你的函数需要它,请不要这样做。
我需要你的帮助来创建一个正常或更好的截断拉普拉斯分布,它线性依赖于另一个拉普拉斯分布变量。
到目前为止,不幸的是,这是我在导出的 y 分布中使用 NaN 所取得的成果:
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde, truncnorm
slope = 0.2237
intercept = 1.066
spread = 4.8719
def dependency(x):
y_lin = slope * x + intercept
lower = slope / spread * 3 * x
upper = slope * spread / 3 * x + 2 * intercept
y_lin_noise = np.random.laplace(loc=0, scale=spread, size=len(y_lin)) + y_lin
y_lin_noise[y_lin_noise < lower] = np.nan # This is the desperate solution where
y_lin_noise[y_lin_noise > upper] = np.nan # NaNs are introduced
return y_lin_noise
max = 100
min = 1
mean = 40
sigma = 25
x = truncnorm((min-mean)/sigma, (max-mean)/sigma, loc=mean, scale=sigma).rvs(5000)
y = dependency(x)
# Plotting
xx = np.linspace(np.nanmin(x), np.nanmax(x), 100)
yy = slope * xx + intercept
lower = slope/spread*3*xx
upper = slope*spread/3*xx + 2*intercept
mask = ~np.isnan(y) & ~np.isnan(x)
x = x[mask]
y = y[mask]
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
idz = z.argsort()
x, y, z = x[idz], y[idz], z[idz]
fig, ax = plt.subplots(figsize=(5, 5))
plt.plot(xx, upper, 'r-.', label='upper constraint')
plt.plot(xx, lower, 'r--', label='lower constraint')
ax.scatter(x, y, c=z, s=3)
plt.xlabel(r'$\bf X_{laplace}$')
plt.ylabel(r'$\bf Y_{{derived}}$')
plt.plot(xx, yy, 'r', label='regression model')
plt.legend()
plt.tight_layout()
plt.show()
我最终想要得到的是一个没有 NaN 的 y 分布,这样对于每个 x 都有一个对应的 y 在 upper/lower 阈值范围内。也就是说,回归线周围的 lower/upper-truncated 分布。
期待创意!
谢谢你,并致以最诚挚的问候。
你想要做的基本上是重试超出范围的值。您可以定义一个函数来为每个值执行此操作,或者您可以定义初始值并循环遍历 "correct" 超出范围的值的答案,如下所示:
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde, truncnorm
slope = 0.2237
intercept = 1.066
spread = 4.8719
#slow but effective
def truncated_noise(y, lower, upper):
while True:
y_noise = np.random.laplace(loc=0, scale=spread, size=1) + y
if upper > y_noise > lower:
return y_noise
def refine_noise(y, y_lin_noise, lower, upper):
for i in range(len(y_lin_noise)):
if upper[i] < y_lin_noise[i] or lower[i] > y_lin_noise[i]:
y_lin_noise[i] = truncated_noise(y[i], lower[i], upper[i])
return y_lin_noise
def dependency(x):
y_lin = slope * x + intercept
lower = slope / spread * 3 * x
upper = slope * spread / 3 * x + 2 * intercept
y_lin_noise = np.random.laplace(loc=0, scale=spread, size=len(y_lin)) + y_lin
y_lin_noise = refine_noise(y_lin, y_lin_noise, lower, upper)
return y_lin_noise
max = 100
min = 1
mean = 40
sigma = 25
x = truncnorm((min-mean)/sigma, (max-mean)/sigma, loc=mean, scale=sigma).rvs(5000)
y = dependency(x)
# Plotting
xx = np.linspace(np.nanmin(x), np.nanmax(x), 100)
yy = slope * xx + intercept
lower = slope/spread*3*xx
upper = slope*spread/3*xx + 2*intercept
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
idz = z.argsort()
x, y, z = x[idz], y[idz], z[idz]
fig, ax = plt.subplots(figsize=(5, 5))
plt.plot(xx, upper, 'r-.', label='upper constraint')
plt.plot(xx, lower, 'r--', label='lower constraint')
ax.scatter(x, y, c=z, s=3)
plt.xlabel(r'$\bf X_{laplace}$')
plt.ylabel(r'$\bf Y_{{derived}}$')
plt.plot(xx, yy, 'r', label='regression model')
plt.legend()
plt.tight_layout()
plt.show()
我不知道你想用它做什么,但我知道这不再是随机拉普拉斯分布,所以如果你的函数需要它,请不要这样做。