概率分布循环

Question

我有这个数据集

DF:

Type        Value       Average     SD      Q.
S           AA+         3           1       30
S           AA          2           1       30
S           A           1           1       30
S           B          -2           1       30
S           BB -       -3           1       30
F           AA+         2       0.75        30
F           AA          1       0.75        30
F           A           0       0.75        30
F           B          -1       0.75        30
F           BB -       -2       0.75        30

我想像这样按类型和值在循环中进行概率分布

rnorm(n, mean = 0, sd = 1)  

rnorm(DF$Q., DF$Average, DF$SD)

我有唯一值列表

    type_list <- unique(DF$Type)
    Value_list <- unique(DF$Value)


And now I am trying to loop it

  probability_distributions <- list()
    for (i in 1:length(type_list)) { 
      for (j in 1:length(Value_list) {
        
       pd <-  rnorm(DF$Q.[i,j], DF$Average[i,j], DF$SD[i,j])
         
       probability_distributions <- c(pd,    list(probability_distributions ))
      }
    }

我想要这样的东西

List:

    S AA+ 
1   -3.837712 
2   -3.690301 
3   -3.837331
4    -2.302341 ....

还有另外 10 个列表

Answer 1

你可以这样做：

setNames(lapply(seq(nrow(DF)), function(i) {
  rnorm(DF$Q.[i], DF$Average[i], DF$SD[i])
}), paste(DF$Type, DF$Value))

#> $`S AA+`
#>  [1] 1.9993416 2.2418212 2.3885642 1.6832777 0.9925850 3.7680021 3.4057613 2.1682841
#>  [9] 3.9580758 4.8107193 0.4499855 4.1502551 4.2278937 3.1521032 2.4769162 1.8984066
#> [17] 2.1202697 2.3632997 0.8940686 2.3537416 3.9867934 2.2450999 4.5652049 2.9499507
#> [25] 3.1110287 3.4754710 2.0609961 2.2259544 2.5764415 4.7728795
#> 
#> $`S AA`
#>  [1]  2.42655066  1.90552983  0.88340457  1.91256485  2.39252609  1.63937783
#>  [7]  1.11201564  1.05358345  1.78008844  2.34222012  2.96992413  0.66454686
#> [13]  1.19912052 -0.04679634  2.11005622  2.71610037  2.25412060  3.22876219
#> [19]  2.58340401  1.41287523  3.44666536  2.44339404  2.57794689  1.07816504
#> [25]  1.75067329  0.77810135  1.92746035  3.36490125  1.54246898  2.06520022
#> 
#> $`S A`
#>  [1]  1.783980336 -0.587708073 -0.369364313  0.072480197  1.300841560  1.080468946
#>  [7]  2.246746831 -0.234449100  0.560930706 -1.101593953  2.618756171  2.084328491
#> [13]  0.359199093  0.747180174  0.865170727  2.795355992  1.038396717  1.412998289
#> [19]  1.699572123  1.689790945 -0.671059465  1.740048308  0.075875101  0.968311427
#> [25] -0.927792982  1.214303030 -0.005038866 -1.178953492 -0.672549131 -0.420722136
#> 
#> $`S B`
#>  [1] -1.44946836 -1.68956682 -1.31492609 -2.53191049 -1.81821454 -1.58840382
#>  [7] -2.08505905 -3.10670620 -0.87086640 -0.33198438  0.01910293 -1.10745196
#> [13] -2.18720468 -2.12769742 -2.30533014 -2.26286684 -2.05146864 -3.97266336
#> [19] -1.98877175 -1.76465514 -2.95036985 -3.75714798 -2.35996065 -5.12158956
#> [25] -0.32745289 -1.30945018 -2.97667032 -1.98486582 -2.16545418 -3.66021337
#> 
#> $`S BB-`
#>  [1] -3.878434 -2.034184 -4.155821 -3.751396 -3.745084 -1.772948 -3.190858 -2.445689
#>  [9] -2.228567 -3.380067 -4.128551 -2.829898 -3.358542 -1.557062 -3.519947 -3.310642
#> [17] -2.317263 -3.663578 -3.017951 -2.503409 -3.404275 -4.211649 -2.687256 -3.279862
#> [25] -5.019855 -2.730421 -2.868201 -4.678771 -3.525880 -3.175125
#> 
#> $`F AA+`
#>  [1] 1.5825269 2.4138863 0.8179614 1.2842804 1.6626024 3.0829298 1.8835594 1.2337108
#>  [9] 3.1538523 1.8266180 2.4139429 2.8413455 2.3071590 2.9751961 1.4068090 3.1989646
#> [17] 0.6328248 1.2684777 1.5601545 2.1748322 1.6449135 2.4373332 2.3150221 2.5091457
#> [25] 3.1118458 0.9310370 2.7274812 1.8009007 1.3976708 0.6672244
#> 
#> $`F AA`
#>  [1]  0.363222331  0.775391336 -0.455183359  0.729975409  1.382579640  0.026522186
#>  [7]  0.996364448 -0.008639176  0.961236861  1.671137345 -0.634911705  2.729812324
#> [13]  0.124187233  1.705322289  2.559326197  0.292131983  0.493409391  1.766237746
#> [19]  0.386872427  0.282159449  2.185839460  2.324832101  0.829723631  2.710832646
#> [25]  2.427810412  0.948533848  0.389605646  0.495058514  2.051522848  1.405012456
#> 
#> $`F A`
#>  [1]  0.481705627 -0.009539824  0.137159665 -0.366385935  0.851427552 -0.244538267
#>  [7]  1.493896900  0.440079671  0.741249918  1.106717951 -0.035215035 -1.325648324
#> [13] -0.457225479  0.444942684  0.902540415  0.156192620  0.629354519  0.707281075
#> [19]  0.771069839  0.560672883 -0.143570299  0.768517623 -0.378166481  0.261411645
#> [25] -0.382030406  0.358368343  0.375739047 -0.079185388  0.020481554  0.325286853
#> 
#> $`F B`
#>  [1] -1.84829836 -0.23448482 -0.79804428 -0.58858852 -1.12706587 -2.40883019
#>  [7] -0.43876960 -1.19507511 -0.53630451  0.53595272 -1.86671863  0.01470606
#> [13] -1.27564149 -1.04373285 -1.39916357 -1.37387536 -1.86260468 -0.90531931
#> [19] -0.64535208 -1.13989391 -2.21446484 -1.30206928 -0.69039082 -1.54053955
#> [25] -1.44254892 -1.87721996 -0.55640752 -1.50147921  1.37595324 -0.24022044
#> 
#> $`F BB-`
#>  [1] -2.1244905 -1.9981876 -2.5379765 -1.7889965 -1.9945386 -3.2972752 -3.3826052
#>  [8] -1.5490977 -3.7384532 -2.2852341 -0.7586263 -3.2471413 -1.7789528 -0.8225739
#> [15] -1.6160398 -1.0355732 -2.7256361 -1.5257080 -1.8626910 -0.9874129 -0.1345042
#> [22] -1.9140037 -1.1781947 -2.3020324 -2.2693023 -2.4145912 -3.0160062 -1.9449959
#> [29] -2.0292414 -1.4585976

Answer 2

请始终提供我们可以 copy-paste 作为 reproducible example 的内容。在这里我为你创造了一些东西。

library(dplyr)
library(data.table)
Type <- 1:3
Value <- 11:13
SD <- seq(0.1,0.3, by=0.1)
mean <- c(1,11,21)
df <- data.frame(Type, Value, SD, mean)
> df
  Type Value  SD mean
1    1    11 0.1    1
2    2    12 0.2   11
3    3    13 0.3   21

您想为所有可能的组合创建一个 rnorm 系列，对于我的最小示例，这将是 9 个值。 expand.grid 会做繁重的工作。

> expand.grid(unique(types), unique(values)) %>% nrow
[1] 9

首先，您需要使用正确的方法和 SD 来构建您独特的 data.frame。然后您只需调用 lapply 即可获得包含您预期结果的 list。

# get unique combinations
unique.df <- expand.grid(types, values)
colnames(unique.df) <- c("Type", "Value")
# I suppose the mean and SD are determined by the type 
unique.df$mean <- df$mean[match(unique.df$Type, df$Type)]
unique.df$SD <- df$SD[match(unique.df$Type, df$Type)]
#convert to list by keeping names
unique.list <- setNames(split(unique.df, seq(nrow(unique.df))), rownames(unique.df))
probability_distributions <- lapply(unique.list, function(x) rnorm(x[["Value"]], x[["mean"]], x[["SD"]]))

请注意，我首先在这里创建了一个数据框，其中包含您的功能所需的所有正确信息。

> head(unique.df)
  Type Value probability_distributions mean  SD
1    1    11                 11.046879    1 0.1
2    2    11                  9.803352   11 0.2
3    3    11                 11.799637   21 0.3

然后我将 unique.df data.frame 转换为包含其所有行的列表，通过保留名称，这在 here 中进行了解释

最后，无需任何显式循环，我可以在此列表上调用 lapply。大部分的困难是知道如何在构建结果之前 安排数据，这样你就可以避免循环并使用相对优雅的代码，例如 lapply.

完整代码：

library(dplyr) Type <- 1:3 Value <- 11:13 SD <- seq(0.1,0.3, by=0.1) mean <- c(1,11,21) df <- data.frame(Type, Value, SD, mean) # get unique combinations unique.df <- expand.grid(types, values) colnames(unique.df) <- c("Type", "Value") # I suppose the mean and SD are determined by the type unique.df$mean <- df$mean[match(unique.df$Type, df$Type)] unique.df$SD <- df$SD[match(unique.df$Type, df$Type)] #convert to list by keeping names unique.list <- setNames(split(unique.df, seq(nrow(unique.df))), rownames(unique.df)) probability_distributions <- lapply(unique.list, function(x) rnorm(x[["Value"]], x[["mean"]], x[["SD"]]))

概率分布循环

Probability distribution looping

loops

for-loop

r

distribution