如何通过在 R 中分隔年份列来重建我的数据集

How can I rebuild my dataset by separating year column in R

我有这个数据集

但我想使用 R

得到这样的数据集

我们可能会使用 pivot_longer

library(tidyr)
library(dplyr)
pivot_longer(df1, cols = starts_with("GDP"), names_to = c(".value", "Year"),
   names_pattern = "([^\d]+)(\d+)") %>%
   rename(`Growth rate` = GDP_GR)

-输出

# A tibble: 4 × 4
  `Country Name` `Country Code` Year  `Growth rate`
  <chr>          <chr>          <chr>         <dbl>
1 Afghanistan    AFG            2011             NA
2 Afghanistan    AFG            2012     1234143668
3 Albania        ALB            2011     2703864872
4 Albania        ALB            2012    -4429023858

或没有rename步骤

pivot_longer(df1, cols = starts_with("GDP"), names_to = "Year", 
     values_to = "Growth rate", names_pattern = "\D+(\d+)")
# A tibble: 4 × 4
  `Country Name` `Country Code` Year  `Growth rate`
  <chr>          <chr>          <chr>         <dbl>
1 Afghanistan    AFG            2011             NA
2 Afghanistan    AFG            2012     1234143668
3 Albania        ALB            2011     2703864872
4 Albania        ALB            2012    -4429023858

数据

df1 <- structure(list(`Country Name` = c("Afghanistan", "Albania"), 
    `Country Code` = c("AFG", "ALB"), GDP_GR2011 = c(NA, 2703864872
    ), GDP_GR2012 = c(1234143668, -4429023858)), 
class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L))

这是你想要的吗?

library(dpylr)
df <- df %>% arrange(Country name, Year)
library(tidyr)
library(dplyr)

数据

df <-
  tibble(
    country_name = c("Afghanistan"),
    country_code = c("AFG"),
    GDP_GR2011 = NA,
    GDP_GR2012 = 12.3414,
    GDP_GR2013 = 2.79713
  ) 

# A tibble: 1 x 5
  country_name country_code GDP_GR2011 GDP_GR2012 GDP_GR2013
  <chr>        <chr>        <lgl>           <dbl>      <dbl>
1 Afghanistan  AFG          NA               12.3       2.80

如何

   df %>% 
      pivot_longer(
        cols = starts_with("GDP_"),
        names_to = "year",
        names_transform = list(year = parse_number),
        values_to = "growth_rate"
        )

# A tibble: 3 x 4
  country_name country_code  year growth_rate
  <chr>        <chr>        <dbl>       <dbl>
1 Afghanistan  AFG           2011       NA   
2 Afghanistan  AFG           2012       12.3 
3 Afghanistan  AFG           2013        2.80

这里有一个稍微不同的方式:

library(dplyr)
library(stringr)
library(tidyr)
df %>% 
    pivot_longer(
        cols = contains("GDP_"),
        names_to = "Year",
        values_to = "Growth_Rate"
    ) %>% 
    mutate(Year = str_extract(Year, '[0-9]+')) %>% 
    filter(Country.Name == "Afghanistan")

输出:

  Country.Name Country.Code Year  Growth_Rate
  <chr>        <chr>        <chr> <chr>      
1 Afghanistan  AFG          2011  NA         
2 Afghanistan  AFG          2012  12,34143668

数据:

structure(list(Country.Name = c("Afghanistan", "Albania", "Algeria", 
"American Samoa", "Andorra", "Angola", "Antigua and Barbuda", 
"Argentina"), Country.Code = c("AFG", "ALB", "DZA", "ASM", "AND", 
"AGO", "ATG", "ARG"), GDP_GR2011 = c(NA, "270,3864872", "265,051384", 
NA, "153,9593311", "1124,470493", "37,66672643", "86,54338008"
), GDP_GR2012 = c("12,34143668", "-4,429023858", "4,522681887", 
"12,28070175", "-12,13475102", "14,54800321", "5,477240023", 
"2,983815492")), class = "data.frame", row.names = c(NA, -8L)