如何通过在 R 中分隔年份列来重建我的数据集
How can I rebuild my dataset by separating year column in R
我有这个数据集
但我想使用 R
得到这样的数据集
我们可能会使用 pivot_longer
library(tidyr)
library(dplyr)
pivot_longer(df1, cols = starts_with("GDP"), names_to = c(".value", "Year"),
names_pattern = "([^\d]+)(\d+)") %>%
rename(`Growth rate` = GDP_GR)
-输出
# A tibble: 4 × 4
`Country Name` `Country Code` Year `Growth rate`
<chr> <chr> <chr> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 1234143668
3 Albania ALB 2011 2703864872
4 Albania ALB 2012 -4429023858
或没有rename
步骤
pivot_longer(df1, cols = starts_with("GDP"), names_to = "Year",
values_to = "Growth rate", names_pattern = "\D+(\d+)")
# A tibble: 4 × 4
`Country Name` `Country Code` Year `Growth rate`
<chr> <chr> <chr> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 1234143668
3 Albania ALB 2011 2703864872
4 Albania ALB 2012 -4429023858
数据
df1 <- structure(list(`Country Name` = c("Afghanistan", "Albania"),
`Country Code` = c("AFG", "ALB"), GDP_GR2011 = c(NA, 2703864872
), GDP_GR2012 = c(1234143668, -4429023858)),
class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -2L))
这是你想要的吗?
library(dpylr)
df <- df %>% arrange(Country name, Year)
library(tidyr)
library(dplyr)
数据
df <-
tibble(
country_name = c("Afghanistan"),
country_code = c("AFG"),
GDP_GR2011 = NA,
GDP_GR2012 = 12.3414,
GDP_GR2013 = 2.79713
)
# A tibble: 1 x 5
country_name country_code GDP_GR2011 GDP_GR2012 GDP_GR2013
<chr> <chr> <lgl> <dbl> <dbl>
1 Afghanistan AFG NA 12.3 2.80
如何
df %>%
pivot_longer(
cols = starts_with("GDP_"),
names_to = "year",
names_transform = list(year = parse_number),
values_to = "growth_rate"
)
# A tibble: 3 x 4
country_name country_code year growth_rate
<chr> <chr> <dbl> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 12.3
3 Afghanistan AFG 2013 2.80
这里有一个稍微不同的方式:
library(dplyr)
library(stringr)
library(tidyr)
df %>%
pivot_longer(
cols = contains("GDP_"),
names_to = "Year",
values_to = "Growth_Rate"
) %>%
mutate(Year = str_extract(Year, '[0-9]+')) %>%
filter(Country.Name == "Afghanistan")
输出:
Country.Name Country.Code Year Growth_Rate
<chr> <chr> <chr> <chr>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 12,34143668
数据:
structure(list(Country.Name = c("Afghanistan", "Albania", "Algeria",
"American Samoa", "Andorra", "Angola", "Antigua and Barbuda",
"Argentina"), Country.Code = c("AFG", "ALB", "DZA", "ASM", "AND",
"AGO", "ATG", "ARG"), GDP_GR2011 = c(NA, "270,3864872", "265,051384",
NA, "153,9593311", "1124,470493", "37,66672643", "86,54338008"
), GDP_GR2012 = c("12,34143668", "-4,429023858", "4,522681887",
"12,28070175", "-12,13475102", "14,54800321", "5,477240023",
"2,983815492")), class = "data.frame", row.names = c(NA, -8L)
我有这个数据集
但我想使用 R
得到这样的数据集我们可能会使用 pivot_longer
library(tidyr)
library(dplyr)
pivot_longer(df1, cols = starts_with("GDP"), names_to = c(".value", "Year"),
names_pattern = "([^\d]+)(\d+)") %>%
rename(`Growth rate` = GDP_GR)
-输出
# A tibble: 4 × 4
`Country Name` `Country Code` Year `Growth rate`
<chr> <chr> <chr> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 1234143668
3 Albania ALB 2011 2703864872
4 Albania ALB 2012 -4429023858
或没有rename
步骤
pivot_longer(df1, cols = starts_with("GDP"), names_to = "Year",
values_to = "Growth rate", names_pattern = "\D+(\d+)")
# A tibble: 4 × 4
`Country Name` `Country Code` Year `Growth rate`
<chr> <chr> <chr> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 1234143668
3 Albania ALB 2011 2703864872
4 Albania ALB 2012 -4429023858
数据
df1 <- structure(list(`Country Name` = c("Afghanistan", "Albania"),
`Country Code` = c("AFG", "ALB"), GDP_GR2011 = c(NA, 2703864872
), GDP_GR2012 = c(1234143668, -4429023858)),
class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -2L))
这是你想要的吗?
library(dpylr)
df <- df %>% arrange(Country name, Year)
library(tidyr)
library(dplyr)
数据
df <-
tibble(
country_name = c("Afghanistan"),
country_code = c("AFG"),
GDP_GR2011 = NA,
GDP_GR2012 = 12.3414,
GDP_GR2013 = 2.79713
)
# A tibble: 1 x 5
country_name country_code GDP_GR2011 GDP_GR2012 GDP_GR2013
<chr> <chr> <lgl> <dbl> <dbl>
1 Afghanistan AFG NA 12.3 2.80
如何
df %>%
pivot_longer(
cols = starts_with("GDP_"),
names_to = "year",
names_transform = list(year = parse_number),
values_to = "growth_rate"
)
# A tibble: 3 x 4
country_name country_code year growth_rate
<chr> <chr> <dbl> <dbl>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 12.3
3 Afghanistan AFG 2013 2.80
这里有一个稍微不同的方式:
library(dplyr)
library(stringr)
library(tidyr)
df %>%
pivot_longer(
cols = contains("GDP_"),
names_to = "Year",
values_to = "Growth_Rate"
) %>%
mutate(Year = str_extract(Year, '[0-9]+')) %>%
filter(Country.Name == "Afghanistan")
输出:
Country.Name Country.Code Year Growth_Rate
<chr> <chr> <chr> <chr>
1 Afghanistan AFG 2011 NA
2 Afghanistan AFG 2012 12,34143668
数据:
structure(list(Country.Name = c("Afghanistan", "Albania", "Algeria",
"American Samoa", "Andorra", "Angola", "Antigua and Barbuda",
"Argentina"), Country.Code = c("AFG", "ALB", "DZA", "ASM", "AND",
"AGO", "ATG", "ARG"), GDP_GR2011 = c(NA, "270,3864872", "265,051384",
NA, "153,9593311", "1124,470493", "37,66672643", "86,54338008"
), GDP_GR2012 = c("12,34143668", "-4,429023858", "4,522681887",
"12,28070175", "-12,13475102", "14,54800321", "5,477240023",
"2,983815492")), class = "data.frame", row.names = c(NA, -8L)