将值放在列表列表中大于零的行中

Put the values in a row which are larger than zero in a list of lists

我有如下数据:

dat_in <- structure(list(rn = c("W", "M"), `      0` = c(0L, 0L), `[      0,     25)` = c(5L, 
0L), `[     25,     50)` = c(0L, 0L), `[     25,    100)` = c(38L, 
3L), `[     50,    100)` = c(0L, 0L), `[    100,    250)` = c(43L, 
5L), `[    100,    500)` = c(0L, 0L), `[    250,    500)` = c(27L, 
12L), `[    500,   1000)` = c(44L, 0L), `[    500,1000000]` = c(0L, 
53L), `[   1000,   1500)` = c(0L, 0L), `[   1000,1000000]` = c(20L, 
0L), `[   1500,   3000)` = c(0L, 0L), `[   3000,1000000]` = c(0L, 
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table", 
"data.frame"))

  rn       0 [      0,     25) [     25,     50) [     25,    100) [     50,    100) [    100,    250) [    100,    500) [    250,    500) [    500,   1000)
1  W       0                 5                 0                38                 0                43                 0                27                44
2  M       0                 0                 0                 3                 0                 5                 0                12                 0
  [    500,1000000] [   1000,   1500) [   1000,1000000] [   1500,   3000) [   3000,1000000] Sum_col
1                 0                 0                20                 0                 0     177
2                53                 0                 0                 0                 0      73

我想创建一个列表列表,每行都包含非零值。所以对于第一行和第二行,这将是:

dat_out <- structure(
  list( 
  freq = list(a= c(5, 38, 43, 27, 44, 20, 177), b=c(3, 5, 12, 53, 73))), 
  row.names = c(NA, -2L), class = "data.frame")

                        freq
1 5, 38, 43, 27, 44, 20, 177
2           3, 5, 12, 53, 73

最好的方法是什么?

您可以尝试类似的方法:

library(data.table)
library(magrittr)

lapply(dat_in$rn,function(x){
  dat_in[rn == x]  %>% 
    transpose() %>% 
    .[2:.N,V1] %>% 
    as.numeric() %>% 
    .[which(.>0)]
})

[[1]]
[1]   5  38  43  27  44  20 177

[[2]]
[1]  3  5 12 53 73

这是一个简洁的解决方案:

dat_in <- structure(list(rn = c("W", "M"), `      0` = c(0L, 0L), `[      0,     25)` = c(5L, 
                                                                                          0L), `[     25,     50)` = c(0L, 0L), `[     25,    100)` = c(38L, 
                                                                                                                                                        3L), `[     50,    100)` = c(0L, 0L), `[    100,    250)` = c(43L, 
                                                                                                                                                                                                                      5L), `[    100,    500)` = c(0L, 0L), `[    250,    500)` = c(27L, 
                                                                                                                                                                                                                                                                                    12L), `[    500,   1000)` = c(44L, 0L), `[    500,1000000]` = c(0L, 
                                                                                                                                                                                                                                                                                                                                                    53L), `[   1000,   1500)` = c(0L, 0L), `[   1000,1000000]` = c(20L, 
                                                                                                                                                                                                                                                                                                                                                                                                                   0L), `[   1500,   3000)` = c(0L, 0L), `[   3000,1000000]` = c(0L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        "data.frame"))
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
out <- dat_in %>% 
  rowwise() %>% 
  summarise(rn = rn, 
            freq =list(c_across(-rn))) %>% 
  rowwise() %>% 
  mutate(freq = list(freq[which(freq > 0)]))
out
#> # A tibble: 2 × 2
#> # Rowwise: 
#>   rn    freq     
#>   <chr> <list>   
#> 1 W     <dbl [7]>
#> 2 M     <dbl [5]>
out$freq
#> [[1]]
#> [1]   5  38  43  27  44  20 177
#> 
#> [[2]]
#> [1]  3  5 12 53 73

reprex package (v2.0.1)

创建于 2022-04-22

使用 base R,您可以使用循环,但这可能会很慢,具体取决于数据集的大小:

out <- list() # for storage
for (i in 1:nrow(dat_in)) { # loop through rows
  vec <- as.numeric(dat_in[i,-1]) # get numbers from the row
  vec <- vec[vec != 0] # get non-zero numbers
  out[[i]] <- vec # store in a list
}

您可以使用 apply 执行相同的操作,这可能会更快:

out <- apply(dat_in, 1, function (x) {
  vec <- as.numeric(x[-1])
  vec <- vec[vec != 0]
  return(vec)
})

使用 toString.

apply(dat_in[-c(1, length(dat_in))], 1, \(x) toString(x[x != 0])) |>
  as.data.frame() |> setNames('freq')
#                    freq
# 1 5, 38, 43, 27, 44, 20
# 2          3, 5, 12, 53