将长数据 table 重塑为宽数据列表 tables

Reshape long data table to list of wide data tables

我的问题是这里提出的问题的扩展 How to reshape data from long to wide format 所以我会用类似的方式表达它。

区别是我想把一个长数据table重新排列成一个list宽数据tables。

dat <- data.table(
    sim = rep(c(1,2), each=4),
    time = rep(1:4, 2),
    value1 = rnorm(8),
    value2 = rnorm(8)
    )

dat
   sim  time      value1  value2
1    1     1      0.3407  0.5167
2    1     2     -0.7033  0.8416
3    1     3     -0.3795 -0.4717
4    1     4     -0.7460  0.8479
5    2     1      0.8981 -0.7163
6    2     2     -0.3347 -0.6849
7    2     3      0.5013  0.8941
8    2     4     -0.1745  0.0795

我想重塑它,以便我有一个 list 的宽数据 table 命名为 value1value2 ... value99 等...

l = list()


l[["value1"]]

    sim        1       2       3       4
1     1   0.3407 -0.7033 -0.3795 -0.7460
5     2  -0.8981 -0.3347 -0.5013 -0.1745

l[["value2"]]

    sim        1       2       3       4
1     1   0.5167  0.8416 -0.4717  0.8479
5     2  -0.7163 -0.6849  0.8941  0.0795

我对这个问题的解决方案是创建结果的嵌套数据集。我已经提供了方法的简要说明,然后是 reprex。

我会使用 pivot_wider()pivot_longer() 重塑数据。 pivot_longer 首先用于使每一行仅包含 1 个值,并带有时间、模拟以及值一或二的标签。然后使用 pivot_wider 每行将包含每次带有模拟标签的值以及它们是哪一组值。 (值 1 或值 2)。

最后,我们使用 nest 嵌套数据框,它将每组值的所有数据存储在数据框中。如有必要,nested_vals$data 可以将其作为数据帧数组访问,其中 nested_vals 是我们将嵌套数据帧分配给的对象。

library(tidyverse)

#Setup data
dat <- data.frame(
  sim = rep(c(1,2), each=4),
  time = rep(1:4, 2),
  value1 = rnorm(8),
  value2 = rnorm(8)
)

# Construct nested dataframe
nested_vals <- dat %>%
  # Format dataset in tidy format
  pivot_longer(cols = c(value1, value2)) %>% 
  # Move the name of the data to the beginning of the dataframe
  relocate(name) %>% 
  # Pivot to matrix form as requested (i.e. times as columns, sims as rows)
  pivot_wider(id_cols = c(name, sim), names_from = time, values_from = value) %>% 
  # Nest results by name
  nest(-name)
#> Warning: All elements of `...` must be named.
#> Did you want `data = c(sim, `1`, `2`, `3`, `4`)`?

nested_vals
#> # A tibble: 2 x 2
#>   name   data                
#>   <chr>  <list>              
#> 1 value1 <tibble[,5] [2 x 5]>
#> 2 value2 <tibble[,5] [2 x 5]>

nested_vals$data[[2]]
#> # A tibble: 2 x 5
#>     sim     `1`      `2`    `3`    `4`
#>   <dbl>   <dbl>    <dbl>  <dbl>  <dbl>
#> 1     1  0.0639 0.250    -1.28   0.850
#> 2     2 -1.90   0.000421  0.704 -0.164

reprex package (v2.0.0)

于 2021-04-07 创建

一个 tidyverse 解决方案可以是:

library(dplyr)
library(purrr)
library(tidyr)

dat_longer <- dat %>%
  tidyr::pivot_longer(starts_with("value"), names_to="col_name", values_to="values")

list_wide <- purrr::map(unique(dat_longer[["col_name"]]),
                         ~dat_longer %>%
                           dplyr::filter(col_name==.x) %>%
                           tidyr::pivot_wider(values_from = "values", names_from="time") %>% 
                           select(-col_name)) %>% 
  purrr::set_names(unique(dat_longer[["col_name"]]))

$value1
# A tibble: 2 x 5
    sim    `1`    `2`    `3`    `4`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 -0.710 -0.334 -0.370  0.777
2     2  0.130  0.877  1.24  -0.202

$value2
# A tibble: 2 x 5
    sim    `1`    `2`    `3`    `4`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 -0.719 -0.909 0.0821 -0.158
2     2 -0.706  1.51  0.234   1.09 

两个变体。

data.table

library(data.table)
tmp <- dcast(melt(as.data.table(dat), id = c("sim", "time")), sim + variable ~ time)
tmp <- split(tmp, tmp$variable)
tmp <- lapply(tmp, set, i = NULL, j = "variable", value = NULL)
tmp
# $value1
#      sim             1          2          3          4
#    <num>         <num>      <num>      <num>      <num>
# 1:     1  1.0458737762 -0.4845954  0.1891288 0.05100633
# 2:     2 -0.0002406689  1.8093820 -0.8253280 1.14547045
# $value2
#      sim           1          2           3         4
#    <num>       <num>      <num>       <num>     <num>
# 1:     1  0.03157319 -0.8352058 -0.06876365 0.7467717
# 2:     2 -0.42551873 -0.7720822  0.15276411 0.9885968

我也经常把magrittr::%>%data.table一起使用,所以可以转换成

library(data.table)
library(magrittr) # if %>% is not already available
as.data.table(dat) %>%
  melt(., id = c("sim", "time")) %>%
  dcast(., sim + variable ~ time) %>%
  split(., .$variable) %>%
  lapply(., set, i = NULL, j = "variable", value = NULL)
# $value1
#      sim             1          2          3          4
#    <num>         <num>      <num>      <num>      <num>
# 1:     1  1.0458737762 -0.4845954  0.1891288 0.05100633
# 2:     2 -0.0002406689  1.8093820 -0.8253280 1.14547045
# $value2
#      sim           1          2           3         4
#    <num>       <num>      <num>       <num>     <num>
# 1:     1  0.03157319 -0.8352058 -0.06876365 0.7467717
# 2:     2 -0.42551873 -0.7720822  0.15276411 0.9885968

整洁宇宙

library(dplyr)
library(tidyr) # pivot_longer, pivot_wider
dat %>%
  pivot_longer(., -c(sim, time)) %>%
  pivot_wider(., names_from = time, values_from = value) %>%
  split(., .$name) %>%
  lapply(., select, -name)
# $value1
# # A tibble: 2 x 5
#     sim       `1`    `2`    `3`    `4`
#   <dbl>     <dbl>  <dbl>  <dbl>  <dbl>
# 1     1  1.05     -0.485  0.189 0.0510
# 2     2 -0.000241  1.81  -0.825 1.15  
# $value2
# # A tibble: 2 x 5
#     sim     `1`    `2`     `3`   `4`
#   <dbl>   <dbl>  <dbl>   <dbl> <dbl>
# 1     1  0.0316 -0.835 -0.0688 0.747
# 2     2 -0.426  -0.772  0.153  0.989

另一种方式,使用单管道语法

library(tidyverse)
dat %>% pivot_longer(c(value1, value2)) %>%
  group_split(name) %>% setNames(map(., ~.x[[3]][1])) %>%
  map(~ .x %>% pivot_wider(id_cols = sim, names_from = time, values_from = value))

$value1
# A tibble: 2 x 5
    sim    `1`     `2`    `3`    `4`
  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
1     1 -0.851 -0.0484 -0.656 -0.121
2     2 -0.645  1.59   -0.274  0.445

$value2
# A tibble: 2 x 5
    sim   `1`    `2`    `3`   `4`
  <dbl> <dbl>  <dbl>  <dbl> <dbl>
1     1  1.46 -1.62  -0.672 1.43 
2     2  1.65  0.790  0.495 0.162

另一种方法:

library(dplyr)
library(tidyr)
wide_dat <- dat %>% pivot_wider(id_cols = sim, names_from = time, values_from = starts_with('value'))
lapply(lapply(split.default(wide_dat[-1], sub('_\d','',names(wide_dat[-1]))), function(x) cbind(wide_dat[1],x)), setNames, c('sim', 1:4))
$value1
  sim          1         2        3         4
1   1 -0.1704969 0.2820143 1.181898 2.2377396
2   2  2.1920534 0.8214070 0.421177 0.7601796

$value2
  sim          1         2          3          4
1   1  0.1760887 0.3440053 -0.8435849  0.6729751
2   2 -0.1714095 1.5125986 -0.5739871 -0.9648294