将值放在列表列表中大于零的行中
Put the values in a row which are larger than zero in a list of lists
我有如下数据:
dat_in <- structure(list(rn = c("W", "M"), ` 0` = c(0L, 0L), `[ 0, 25)` = c(5L,
0L), `[ 25, 50)` = c(0L, 0L), `[ 25, 100)` = c(38L,
3L), `[ 50, 100)` = c(0L, 0L), `[ 100, 250)` = c(43L,
5L), `[ 100, 500)` = c(0L, 0L), `[ 250, 500)` = c(27L,
12L), `[ 500, 1000)` = c(44L, 0L), `[ 500,1000000]` = c(0L,
53L), `[ 1000, 1500)` = c(0L, 0L), `[ 1000,1000000]` = c(20L,
0L), `[ 1500, 3000)` = c(0L, 0L), `[ 3000,1000000]` = c(0L,
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table",
"data.frame"))
rn 0 [ 0, 25) [ 25, 50) [ 25, 100) [ 50, 100) [ 100, 250) [ 100, 500) [ 250, 500) [ 500, 1000)
1 W 0 5 0 38 0 43 0 27 44
2 M 0 0 0 3 0 5 0 12 0
[ 500,1000000] [ 1000, 1500) [ 1000,1000000] [ 1500, 3000) [ 3000,1000000] Sum_col
1 0 0 20 0 0 177
2 53 0 0 0 0 73
我想创建一个列表列表,每行都包含非零值。所以对于第一行和第二行,这将是:
dat_out <- structure(
list(
freq = list(a= c(5, 38, 43, 27, 44, 20, 177), b=c(3, 5, 12, 53, 73))),
row.names = c(NA, -2L), class = "data.frame")
freq
1 5, 38, 43, 27, 44, 20, 177
2 3, 5, 12, 53, 73
最好的方法是什么?
您可以尝试类似的方法:
library(data.table)
library(magrittr)
lapply(dat_in$rn,function(x){
dat_in[rn == x] %>%
transpose() %>%
.[2:.N,V1] %>%
as.numeric() %>%
.[which(.>0)]
})
[[1]]
[1] 5 38 43 27 44 20 177
[[2]]
[1] 3 5 12 53 73
这是一个简洁的解决方案:
dat_in <- structure(list(rn = c("W", "M"), ` 0` = c(0L, 0L), `[ 0, 25)` = c(5L,
0L), `[ 25, 50)` = c(0L, 0L), `[ 25, 100)` = c(38L,
3L), `[ 50, 100)` = c(0L, 0L), `[ 100, 250)` = c(43L,
5L), `[ 100, 500)` = c(0L, 0L), `[ 250, 500)` = c(27L,
12L), `[ 500, 1000)` = c(44L, 0L), `[ 500,1000000]` = c(0L,
53L), `[ 1000, 1500)` = c(0L, 0L), `[ 1000,1000000]` = c(20L,
0L), `[ 1500, 3000)` = c(0L, 0L), `[ 3000,1000000]` = c(0L,
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table",
"data.frame"))
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
out <- dat_in %>%
rowwise() %>%
summarise(rn = rn,
freq =list(c_across(-rn))) %>%
rowwise() %>%
mutate(freq = list(freq[which(freq > 0)]))
out
#> # A tibble: 2 × 2
#> # Rowwise:
#> rn freq
#> <chr> <list>
#> 1 W <dbl [7]>
#> 2 M <dbl [5]>
out$freq
#> [[1]]
#> [1] 5 38 43 27 44 20 177
#>
#> [[2]]
#> [1] 3 5 12 53 73
由 reprex package (v2.0.1)
创建于 2022-04-22
使用 base R,您可以使用循环,但这可能会很慢,具体取决于数据集的大小:
out <- list() # for storage
for (i in 1:nrow(dat_in)) { # loop through rows
vec <- as.numeric(dat_in[i,-1]) # get numbers from the row
vec <- vec[vec != 0] # get non-zero numbers
out[[i]] <- vec # store in a list
}
您可以使用 apply 执行相同的操作,这可能会更快:
out <- apply(dat_in, 1, function (x) {
vec <- as.numeric(x[-1])
vec <- vec[vec != 0]
return(vec)
})
使用 toString
.
apply(dat_in[-c(1, length(dat_in))], 1, \(x) toString(x[x != 0])) |>
as.data.frame() |> setNames('freq')
# freq
# 1 5, 38, 43, 27, 44, 20
# 2 3, 5, 12, 53
我有如下数据:
dat_in <- structure(list(rn = c("W", "M"), ` 0` = c(0L, 0L), `[ 0, 25)` = c(5L,
0L), `[ 25, 50)` = c(0L, 0L), `[ 25, 100)` = c(38L,
3L), `[ 50, 100)` = c(0L, 0L), `[ 100, 250)` = c(43L,
5L), `[ 100, 500)` = c(0L, 0L), `[ 250, 500)` = c(27L,
12L), `[ 500, 1000)` = c(44L, 0L), `[ 500,1000000]` = c(0L,
53L), `[ 1000, 1500)` = c(0L, 0L), `[ 1000,1000000]` = c(20L,
0L), `[ 1500, 3000)` = c(0L, 0L), `[ 3000,1000000]` = c(0L,
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table",
"data.frame"))
rn 0 [ 0, 25) [ 25, 50) [ 25, 100) [ 50, 100) [ 100, 250) [ 100, 500) [ 250, 500) [ 500, 1000)
1 W 0 5 0 38 0 43 0 27 44
2 M 0 0 0 3 0 5 0 12 0
[ 500,1000000] [ 1000, 1500) [ 1000,1000000] [ 1500, 3000) [ 3000,1000000] Sum_col
1 0 0 20 0 0 177
2 53 0 0 0 0 73
我想创建一个列表列表,每行都包含非零值。所以对于第一行和第二行,这将是:
dat_out <- structure(
list(
freq = list(a= c(5, 38, 43, 27, 44, 20, 177), b=c(3, 5, 12, 53, 73))),
row.names = c(NA, -2L), class = "data.frame")
freq
1 5, 38, 43, 27, 44, 20, 177
2 3, 5, 12, 53, 73
最好的方法是什么?
您可以尝试类似的方法:
library(data.table)
library(magrittr)
lapply(dat_in$rn,function(x){
dat_in[rn == x] %>%
transpose() %>%
.[2:.N,V1] %>%
as.numeric() %>%
.[which(.>0)]
})
[[1]]
[1] 5 38 43 27 44 20 177
[[2]]
[1] 3 5 12 53 73
这是一个简洁的解决方案:
dat_in <- structure(list(rn = c("W", "M"), ` 0` = c(0L, 0L), `[ 0, 25)` = c(5L,
0L), `[ 25, 50)` = c(0L, 0L), `[ 25, 100)` = c(38L,
3L), `[ 50, 100)` = c(0L, 0L), `[ 100, 250)` = c(43L,
5L), `[ 100, 500)` = c(0L, 0L), `[ 250, 500)` = c(27L,
12L), `[ 500, 1000)` = c(44L, 0L), `[ 500,1000000]` = c(0L,
53L), `[ 1000, 1500)` = c(0L, 0L), `[ 1000,1000000]` = c(20L,
0L), `[ 1500, 3000)` = c(0L, 0L), `[ 3000,1000000]` = c(0L,
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table",
"data.frame"))
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
out <- dat_in %>%
rowwise() %>%
summarise(rn = rn,
freq =list(c_across(-rn))) %>%
rowwise() %>%
mutate(freq = list(freq[which(freq > 0)]))
out
#> # A tibble: 2 × 2
#> # Rowwise:
#> rn freq
#> <chr> <list>
#> 1 W <dbl [7]>
#> 2 M <dbl [5]>
out$freq
#> [[1]]
#> [1] 5 38 43 27 44 20 177
#>
#> [[2]]
#> [1] 3 5 12 53 73
由 reprex package (v2.0.1)
创建于 2022-04-22使用 base R,您可以使用循环,但这可能会很慢,具体取决于数据集的大小:
out <- list() # for storage
for (i in 1:nrow(dat_in)) { # loop through rows
vec <- as.numeric(dat_in[i,-1]) # get numbers from the row
vec <- vec[vec != 0] # get non-zero numbers
out[[i]] <- vec # store in a list
}
您可以使用 apply 执行相同的操作,这可能会更快:
out <- apply(dat_in, 1, function (x) {
vec <- as.numeric(x[-1])
vec <- vec[vec != 0]
return(vec)
})
使用 toString
.
apply(dat_in[-c(1, length(dat_in))], 1, \(x) toString(x[x != 0])) |>
as.data.frame() |> setNames('freq')
# freq
# 1 5, 38, 43, 27, 44, 20
# 2 3, 5, 12, 53