使用 R 将行转换为新列

Question

样本是这样的：

# A tibble: 10 x 18
   trip_id position_lon_1 position_lat_1 position_lon_2 position_lat_2 position_lon_3 position_lat_3 position_lon_4 position_lat_4 position_lon_5
     <int>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>
 1       1           101.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
 2       2           101.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
 3       3           101.           13.8           101.           13.7           101.           13.7           101.           13.7           101.
 4       4           101.           13.8           101.           13.8           101.           13.8           101.           13.7           101.
 5       5           101.           13.8           101.           13.8           101.           13.8           101.           13.8           101.
 6       6           101.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
 7       7           101.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
 8    1977           101.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
 9    1978           100.           13.7           101.           13.7           101.           13.7           101.           13.7           101.
10    1979           101.           13.8           101.           13.8           101.           13.8           101.           13.7           101.
# ... with 8 more variables: position_lat_5 <dbl>, position_lon_6 <dbl>, position_lat_6 <dbl>, start_time1 <dbl>, end_time1 <dbl>, length <dbl>,
#   purpose <chr>, clusterNum <dbl>

example <- structure(list(trip_id = c(1, 2, 3, 4, 5, 6, 7, 1977, 1978, 1979), position_lon_1 = c(100.5423, 100.53881, 100.5824, 100.53724,100.52715, 100.50491, 100.51813, 100.57668, 100.49228, 100.52679), position_lat_1 = c(13.7444, 13.71867, 13.75446, 13.76453,13.76981, 13.73128, 13.73133, 13.73969, 13.7447, 13.76592), position_lon_2 = c(100.54767,100.53822, 100.57764, 100.53057, 100.53335, 100.50947, 100.52216,100.57951, 100.50324, 100.52661), position_lat_2 = c(13.74409,13.72446, 13.74911, 13.76827, 13.75848, 13.72012, 13.73132, 13.73407,13.74693, 13.76381), position_lon_3 = c(100.54828, 100.54081,100.58012, 100.53755, 100.53717, 100.53354, 100.52927, 100.58263,100.51611, 100.52553), position_lat_3 = c(13.74762, 13.72764,13.74582, 13.75122, 13.76324, 13.72289, 13.73678, 13.73423, 13.73462,13.75167), position_lon_4 = c(100.54839, 100.55916, 100.58443,100.52854, 100.53642, 100.53843, 100.53061, 100.58309, 100.5224,100.53361), position_lat_4 = c(13.7487, 13.72082, 13.7449, 13.73238,13.76578, 13.73429, 13.74567, 13.73556, 13.72415, 13.74591),position_lon_5 = c(100.54841, 100.54585, 100.59693, 100.51346,100.54466, 100.54338, 100.53061, 100.58449, 100.5156, 100.53465), position_lat_5 = c(13.74902, 13.74356, 13.74322, 13.73616,13.76328, 13.76289, 13.74567, 13.73958, 13.71989, 13.74595), position_lon_6 = c(100.54841, 100.54585, 100.59693, 100.51346,100.54466, 100.54338, 100.53061, 100.58449, 100.5156, 100.53465), position_lat_6 = c(13.74902, 13.74356, 13.74322, 13.73616,13.76328, 13.76289, 13.74567, 13.73958, 13.71989, 13.74595), start_time1 = c(6.72472222222222, 6.82194444444444, 7.57361111111111,6.56611111111111, 6.31111111111111, 6.29916666666667, 6.59555555555556,16.3666666666667, 16.1305555555556, 16.0111111111111), end_time1 = c(6.82027777777778,7.30527777777778, 7.79777777777778, 7.13111111111111, 6.66111111111111,6.89916666666667, 6.77388888888889, 16.55, 16.6925, 16.7827777777778), length = c(835.421673262, 2857.08440862, 2003.94967386,4057.83640791, 2026.71013038, 5435.16534982, 2083.02205084,844.774412796, 3727.75857424, 2367.29993395), purpose = c("Bank/ATM","Business", "Infrastructure/Hydro", "Service", "Bank/ATM","Automobile", "Food", "Food", "Food", "Business"), clusterNum = c(2,2, 1, 3, 2, 2, 2, 4, 3, 1)), row.names = c(NA, -10L), class = c("tbl_df","tbl", "data.frame"))

我想通过将行数据转换为新列来创建新的数据框。这就是我预期的输出结果；

R 中的任何包和函数都可以实现这一点吗？感谢您的帮助。

Answer 1

这可能对你有用

library(tidyverse)
example %>%
  pivot_longer(cols = starts_with("position"), 
               names_to = c(".value", "position"),
               names_pattern = "^position_(.*)_(.)",
               values_drop_na = TRUE)

Answer 2

我希望这是你的想法：

library(dplyr)
library(tidyr)

example %>%
  pivot_longer(starts_with("position"), names_to = c(NA, ".value", "position"), 
               names_pattern = "(\w+)_(\w+)_(\d)")

# A tibble: 60 x 9
   trip_id start_time1 end_time1 length purpose  clusterNum position   lon   lat
     <dbl>       <dbl>     <dbl>  <dbl> <chr>         <dbl> <chr>    <dbl> <dbl>
 1       1        6.72      6.82   835. Bank/ATM          2 1         101.  13.7
 2       1        6.72      6.82   835. Bank/ATM          2 2         101.  13.7
 3       1        6.72      6.82   835. Bank/ATM          2 3         101.  13.7
 4       1        6.72      6.82   835. Bank/ATM          2 4         101.  13.7
 5       1        6.72      6.82   835. Bank/ATM          2 5         101.  13.7
 6       1        6.72      6.82   835. Bank/ATM          2 6         101.  13.7
 7       2        6.82      7.31  2857. Business          2 1         101.  13.7
 8       2        6.82      7.31  2857. Business          2 2         101.  13.7
 9       2        6.82      7.31  2857. Business          2 3         101.  13.7
10       2        6.82      7.31  2857. Business          2 4         101.  13.7
# ... with 50 more rows

Answer 3

您正在寻找的是一种将列转换为行的方法（而不是相反），但要保持“位置”标识符并将经度和纬度存储在单独的列中。 tidyr 的 pivot_longer 函数可以让你做到这一点！查看文档 (here) 以获取对不同参数的详细解释。

library(tidyverse)

example %>% 
  pivot_longer(
    starts_with("position"),names_sep = "_", names_to = c(NA, ".value", "pos")
  )
#> # A tibble: 60 x 9
#>    trip_id start_time1 end_time1 length purpose  clusterNum pos     lon   lat
#>      <dbl>       <dbl>     <dbl>  <dbl> <chr>         <dbl> <chr> <dbl> <dbl>
#>  1       1        6.72      6.82   835. Bank/ATM          2 1      101.  13.7
#>  2       1        6.72      6.82   835. Bank/ATM          2 2      101.  13.7
#>  3       1        6.72      6.82   835. Bank/ATM          2 3      101.  13.7
#>  4       1        6.72      6.82   835. Bank/ATM          2 4      101.  13.7
#>  5       1        6.72      6.82   835. Bank/ATM          2 5      101.  13.7
#>  6       1        6.72      6.82   835. Bank/ATM          2 6      101.  13.7
#>  7       2        6.82      7.31  2857. Business          2 1      101.  13.7
#>  8       2        6.82      7.31  2857. Business          2 2      101.  13.7
#>  9       2        6.82      7.31  2857. Business          2 3      101.  13.7
#> 10       2        6.82      7.31  2857. Business          2 4      101.  13.7
#> # ... with 50 more rows

Answer 4

为了完整起见，这里有一个使用 melt() 和 measure() 函数的解决方案（data.table 1.14.1 版新增）：

library(data.table) # development version 1.14.1 used
long <- melt(setDT(example), measure.vars = 
               measure(value.name, position, pattern = "(lon|lat)_(\d)"))[order(trip_id)]
head(long, 12)

    trip_id start_time1 end_time1    length  purpose clusterNum position      lon      lat
 1:       1    6.724722  6.820278  835.4217 Bank/ATM          2        1 100.5423 13.74440
 2:       1    6.724722  6.820278  835.4217 Bank/ATM          2        2 100.5477 13.74409
 3:       1    6.724722  6.820278  835.4217 Bank/ATM          2        3 100.5483 13.74762
 4:       1    6.724722  6.820278  835.4217 Bank/ATM          2        4 100.5484 13.74870
 5:       1    6.724722  6.820278  835.4217 Bank/ATM          2        5 100.5484 13.74902
 6:       1    6.724722  6.820278  835.4217 Bank/ATM          2        6 100.5484 13.74902
 7:       2    6.821944  7.305278 2857.0844 Business          2        1 100.5388 13.71867
 8:       2    6.821944  7.305278 2857.0844 Business          2        2 100.5382 13.72446
 9:       2    6.821944  7.305278 2857.0844 Business          2        3 100.5408 13.72764
10:       2    6.821944  7.305278 2857.0844 Business          2        4 100.5592 13.72082
11:       2    6.821944  7.305278 2857.0844 Business          2        5 100.5459 13.74356
12:       2    6.821944  7.305278 2857.0844 Business          2        6 100.5459 13.74356

如果需要，可以通过

更改列顺序

setcolorder(long, c("trip_id", "lon", "lat", "position"))[]
head(long, 12)

    trip_id      lon      lat position start_time1 end_time1    length  purpose clusterNum
 1:       1 100.5423 13.74440        1    6.724722  6.820278  835.4217 Bank/ATM          2
 2:       1 100.5477 13.74409        2    6.724722  6.820278  835.4217 Bank/ATM          2
 3:       1 100.5483 13.74762        3    6.724722  6.820278  835.4217 Bank/ATM          2
 4:       1 100.5484 13.74870        4    6.724722  6.820278  835.4217 Bank/ATM          2
 5:       1 100.5484 13.74902        5    6.724722  6.820278  835.4217 Bank/ATM          2
 6:       1 100.5484 13.74902        6    6.724722  6.820278  835.4217 Bank/ATM          2
 7:       2 100.5388 13.71867        1    6.821944  7.305278 2857.0844 Business          2
 8:       2 100.5382 13.72446        2    6.821944  7.305278 2857.0844 Business          2
 9:       2 100.5408 13.72764        3    6.821944  7.305278 2857.0844 Business          2
10:       2 100.5592 13.72082        4    6.821944  7.305278 2857.0844 Business          2
11:       2 100.5459 13.74356        5    6.821944  7.305278 2857.0844 Business          2
12:       2 100.5459 13.74356        6    6.821944  7.305278 2857.0844 Business          2

使用 R 将行转换为新列

Converting rows into new columns using R

r

reshape

tidyr