Pivot_Longer 基于列值
Pivot_Longer based on column value
我有一个如下所示的数据框:
我正在努力让它看起来像这样:
team points
chicago_fire 1.725424
Club de Foot Montreal 1.0349628
Chicago Fire 1.16111572
Columbus Crew 1.591609
.
.
.
这意味着有选择地使用 pivot_longer
。
我现在的代码是这样的:
expected_points <- simulate_results %>%
mutate(home_expected_points= 3* home_win_prob + 1* draw_prob, away_expected_points = 3* away_win_prob + 1* draw_prob) %>%
select(home,away, away_expected_points,home_expected_points) %>%
pivot_longer(,cols = c("home","away","away_expected_points","home_expected_points"), values_to = 'points',names_to = 'type')
expected_points
但是我在混合 df 的双精度值和字符值时出错。这是一步到位的事情吗?
这是枢轴之前的 df 的头部:
structure(list(home = c("Chicago Fire", "Chicago Fire", "Chicago Fire",
"Chicago Fire", "Chicago Fire", "Chicago Fire"), away = c("Club de Foot Montreal",
"Columbus Crew", "DC United", "Houston Dynamo", "Los Angeles Galaxy",
"New England Revolution"), away_expected_points = c(1.03496281825711,
1.16115715218849, 1.07116563833606, 1.09794662072245, 1.50393127764973,
1.22115923847215), home_expected_points = c(1.72542415785488,
1.59160932991963, 1.67152015760113, 1.64871325211506, 1.25278573638545,
1.52350845382982)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -6L), groups = structure(list(
home = "Chicago Fire", .rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
您需要分两步进行旋转:
- 合并
home
和 away
- 合并
away_expected_points
和 home_expected_points
下面是一个例子。我调用你提供的数据 df
:
library(tidyr)
df %>%
pivot_longer(
cols = c("home", "away"),
names_to = "home_away",
values_to = "team"
) %>%
pivot_longer(
cols = contains("expected_points"),
names_to = NULL,
values_to = "points"
)
#> # A tibble: 24 x 3
#> home_away team points
#> <chr> <chr> <dbl>
#> 1 home Chicago Fire 1.03
#> 2 home Chicago Fire 1.73
#> 3 away Club de Foot Montreal 1.03
#> 4 away Club de Foot Montreal 1.73
#> 5 home Chicago Fire 1.16
#> 6 home Chicago Fire 1.59
#> 7 away Columbus Crew 1.16
#> 8 away Columbus Crew 1.59
#> 9 home Chicago Fire 1.07
#> 10 home Chicago Fire 1.67
#> # ... with 14 more rows
PS:如果您不想要 home_away
列,您可以将行 names_to = "home_away"
更改为 names_to = NULL
。
使用 pmap_dfr()
的替代方法。
library(tidyverse)
pmap_dfr(df, ~ bind_rows(tibble(team = ..1, points = ..4),
tibble(team = ..2, points = ..3)))
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22
可以使用 pivot_wider()
两次创建相同的输出,正如 jpiversen 所建议的那样。
library(tidyverse)
df %>%
mutate(id = row_number()) %>%
pivot_longer(c('home', 'away'), values_to = 'team', names_to = NULL) %>%
pivot_longer(ends_with('s'), values_to = 'points', names_to = NULL) %>%
group_by(id) %>%
filter(row_number() %in% 2:3) %>%
ungroup() %>%
select(-id)
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22
我有一个如下所示的数据框:
我正在努力让它看起来像这样:
team points
chicago_fire 1.725424
Club de Foot Montreal 1.0349628
Chicago Fire 1.16111572
Columbus Crew 1.591609
.
.
.
这意味着有选择地使用 pivot_longer
。
我现在的代码是这样的:
expected_points <- simulate_results %>%
mutate(home_expected_points= 3* home_win_prob + 1* draw_prob, away_expected_points = 3* away_win_prob + 1* draw_prob) %>%
select(home,away, away_expected_points,home_expected_points) %>%
pivot_longer(,cols = c("home","away","away_expected_points","home_expected_points"), values_to = 'points',names_to = 'type')
expected_points
但是我在混合 df 的双精度值和字符值时出错。这是一步到位的事情吗?
这是枢轴之前的 df 的头部:
structure(list(home = c("Chicago Fire", "Chicago Fire", "Chicago Fire",
"Chicago Fire", "Chicago Fire", "Chicago Fire"), away = c("Club de Foot Montreal",
"Columbus Crew", "DC United", "Houston Dynamo", "Los Angeles Galaxy",
"New England Revolution"), away_expected_points = c(1.03496281825711,
1.16115715218849, 1.07116563833606, 1.09794662072245, 1.50393127764973,
1.22115923847215), home_expected_points = c(1.72542415785488,
1.59160932991963, 1.67152015760113, 1.64871325211506, 1.25278573638545,
1.52350845382982)), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -6L), groups = structure(list(
home = "Chicago Fire", .rows = structure(list(1:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
您需要分两步进行旋转:
- 合并
home
和away
- 合并
away_expected_points
和home_expected_points
下面是一个例子。我调用你提供的数据 df
:
library(tidyr)
df %>%
pivot_longer(
cols = c("home", "away"),
names_to = "home_away",
values_to = "team"
) %>%
pivot_longer(
cols = contains("expected_points"),
names_to = NULL,
values_to = "points"
)
#> # A tibble: 24 x 3
#> home_away team points
#> <chr> <chr> <dbl>
#> 1 home Chicago Fire 1.03
#> 2 home Chicago Fire 1.73
#> 3 away Club de Foot Montreal 1.03
#> 4 away Club de Foot Montreal 1.73
#> 5 home Chicago Fire 1.16
#> 6 home Chicago Fire 1.59
#> 7 away Columbus Crew 1.16
#> 8 away Columbus Crew 1.59
#> 9 home Chicago Fire 1.07
#> 10 home Chicago Fire 1.67
#> # ... with 14 more rows
PS:如果您不想要 home_away
列,您可以将行 names_to = "home_away"
更改为 names_to = NULL
。
使用 pmap_dfr()
的替代方法。
library(tidyverse)
pmap_dfr(df, ~ bind_rows(tibble(team = ..1, points = ..4),
tibble(team = ..2, points = ..3)))
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22
可以使用 pivot_wider()
两次创建相同的输出,正如 jpiversen 所建议的那样。
library(tidyverse)
df %>%
mutate(id = row_number()) %>%
pivot_longer(c('home', 'away'), values_to = 'team', names_to = NULL) %>%
pivot_longer(ends_with('s'), values_to = 'points', names_to = NULL) %>%
group_by(id) %>%
filter(row_number() %in% 2:3) %>%
ungroup() %>%
select(-id)
# # A tibble: 12 x 2
# team points
# <chr> <dbl>
# 1 Chicago Fire 1.73
# 2 Club de Foot Montreal 1.03
# 3 Chicago Fire 1.59
# 4 Columbus Crew 1.16
# 5 Chicago Fire 1.67
# 6 DC United 1.07
# 7 Chicago Fire 1.65
# 8 Houston Dynamo 1.10
# 9 Chicago Fire 1.25
# 10 Los Angeles Galaxy 1.50
# 11 Chicago Fire 1.52
# 12 New England Revolution 1.22