如何在 r 中的多列之间进行线性插值?
How to linearly interpolate between multiple columns in r?
我尝试了几种不同的方法,但似乎无法弄清楚。
这是我的数据:
NEI_all_years <-structure(list(state = c("AK", "AK", "AK", "AK", "AK", "AK"),
pollutant = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Ammonia",
"Carbon Dioxide", "Carbon Monoxide", "Methane", "Nitrogen Oxides",
"Nitrous Oxide", "PM10 Filterable", "PM10 Primary (Filt + Cond)",
"PM2.5 Filterable", "PM2.5 Primary (Filt + Cond)", "Sulfur Dioxide",
"Volatile Organic Compounds"), class = "factor"), CEDS_Sector = c("1A1b_Pet-refining",
"1A1g_Other-energy-transf", "1A2_Industrial_fuel_combustion",
"1A2_Industrial_fuel_combustion", "1A2_Industrial_fuel_combustion",
"1A2_Industrial_fuel_combustion"), CEDS_Fuel = c("natural_gas",
"natural_gas", "biomass", "diesel_oil", "hard_coal", "heavy_oil"
), emissions2011 = c(0.00576, 0.00043, 0.000237024333990807,
0.00188652040085416, 0.000193489271882478, 0.00013881276046503
), emissions2014 = c(0.00583, 0, 5.32360487697781e-06, 0.00124625603613746,
7.0333234068145e-05, 0), emissions2017 = c(0.0089604, 2.34e-05,
0.000388967014684649, 0.000837643732771449, 8.95609282040459e-05,
2.86528055941436e-05), tier1_description = structure(c(5L,
5L, 5L, 5L, 5L, 5L), .Label = c("", "CHEMICAL & ALLIED PRODUCT MFG",
"DOMESTIC SHIPPING", "FUEL COMB. ELEC. UTIL.", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. OTHER", "HIGHWAY VEHICLES", "METALS PROCESSING",
"MISCELLANEOUS", "OFF-HIGHWAY", "OTHER INDUSTRIAL PROCESSES",
"OTHER NATURAL", "PETROLEUM & RELATED INDUSTRIES", "SOLVENT UTILIZATION",
"STORAGE & TRANSPORT", "WASTE DISPOSAL & RECYCLING"), class = "factor"),
unit = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "TON", class = "factor")), row.names = c(NA,
6L), class = "data.frame")
看起来像这样(示例):
state pollutant CEDS_Sector CEDS_Fuel emissions2011 emissions2014 emissions2017 tier1_description unit
AK Ammonia 1A1b_Pet-refining natural gas 11 14 17 FUEL COMB. INDUSTRIAL TON
AK Ammonia 1A1g_Other-energy-transf natural gas 10 22 30 FUEL COMB. INDUSTRIAL TON
AK Ammonia 1A2_Industrial_fuel_combustion biomass 15 48 50 FUEL COMB. INDUSTRIAL TON
我需要对排放量进行插值,这样我就可以根据 emissions2011
、emissions2014
和 emissions2017
列获得从 2010 年到 2020 年的每一年的数据。
我们可以将数据重塑为 'long' 格式,然后使用 complete
(来自 tidyr
)扩展数据集并在 'emissions' 上使用 na.approx
library(dplyr)
library(tidyr)
library(zoo)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na.approx(emissions, na.rm = FALSE, rule = 2))
或者我们可以使用 forecast
中的 na.interp
而不是 na.approx
library(forecast)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na.interp(emissions))
# A tibble: 66 x 8
# Groups: state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
# state pollutant CEDS_Sector CEDS_Fuel tier1_description unit year emissions
# <chr> <fct> <chr> <chr> <fct> <fct> <int> <ts>
# 1 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2010 0.005760000
# 2 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2011 0.005760000
# 3 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2012 0.005783333
# 4 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2013 0.005806667
# 5 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2014 0.005830000
# 6 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2015 0.006873467
# 7 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2016 0.007916933
# 8 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2017 0.008960400
# 9 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2018 0.008960400
#10 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2019 0.008960400
# … with 56 more rows
如果我们需要进行线性外推,一个选项是
# remotes::install_github("skgrange/threadr")
library(threadr)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na_extrapolate(na.approx(emissions, na.rm = FALSE)))
# A tibble: 66 x 8
# Groups: state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
# state pollutant CEDS_Sector CEDS_Fuel tier1_description unit year emissions
# <chr> <fct> <chr> <chr> <fct> <fct> <int> <dbl>
# 1 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2010 0.00574
# 2 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2011 0.00576
# 3 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2012 0.00578
# 4 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2013 0.00581
# 5 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2014 0.00583
# 6 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2015 0.00687
# 7 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2016 0.00792
# 8 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2017 0.00896
# 9 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2018 0.0100
#10 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2019 0.0110
# … with 56 more rows
我尝试了几种不同的方法,但似乎无法弄清楚。
这是我的数据:
NEI_all_years <-structure(list(state = c("AK", "AK", "AK", "AK", "AK", "AK"),
pollutant = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Ammonia",
"Carbon Dioxide", "Carbon Monoxide", "Methane", "Nitrogen Oxides",
"Nitrous Oxide", "PM10 Filterable", "PM10 Primary (Filt + Cond)",
"PM2.5 Filterable", "PM2.5 Primary (Filt + Cond)", "Sulfur Dioxide",
"Volatile Organic Compounds"), class = "factor"), CEDS_Sector = c("1A1b_Pet-refining",
"1A1g_Other-energy-transf", "1A2_Industrial_fuel_combustion",
"1A2_Industrial_fuel_combustion", "1A2_Industrial_fuel_combustion",
"1A2_Industrial_fuel_combustion"), CEDS_Fuel = c("natural_gas",
"natural_gas", "biomass", "diesel_oil", "hard_coal", "heavy_oil"
), emissions2011 = c(0.00576, 0.00043, 0.000237024333990807,
0.00188652040085416, 0.000193489271882478, 0.00013881276046503
), emissions2014 = c(0.00583, 0, 5.32360487697781e-06, 0.00124625603613746,
7.0333234068145e-05, 0), emissions2017 = c(0.0089604, 2.34e-05,
0.000388967014684649, 0.000837643732771449, 8.95609282040459e-05,
2.86528055941436e-05), tier1_description = structure(c(5L,
5L, 5L, 5L, 5L, 5L), .Label = c("", "CHEMICAL & ALLIED PRODUCT MFG",
"DOMESTIC SHIPPING", "FUEL COMB. ELEC. UTIL.", "FUEL COMB. INDUSTRIAL",
"FUEL COMB. OTHER", "HIGHWAY VEHICLES", "METALS PROCESSING",
"MISCELLANEOUS", "OFF-HIGHWAY", "OTHER INDUSTRIAL PROCESSES",
"OTHER NATURAL", "PETROLEUM & RELATED INDUSTRIES", "SOLVENT UTILIZATION",
"STORAGE & TRANSPORT", "WASTE DISPOSAL & RECYCLING"), class = "factor"),
unit = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "TON", class = "factor")), row.names = c(NA,
6L), class = "data.frame")
看起来像这样(示例):
state pollutant CEDS_Sector CEDS_Fuel emissions2011 emissions2014 emissions2017 tier1_description unit
AK Ammonia 1A1b_Pet-refining natural gas 11 14 17 FUEL COMB. INDUSTRIAL TON
AK Ammonia 1A1g_Other-energy-transf natural gas 10 22 30 FUEL COMB. INDUSTRIAL TON
AK Ammonia 1A2_Industrial_fuel_combustion biomass 15 48 50 FUEL COMB. INDUSTRIAL TON
我需要对排放量进行插值,这样我就可以根据 emissions2011
、emissions2014
和 emissions2017
列获得从 2010 年到 2020 年的每一年的数据。
我们可以将数据重塑为 'long' 格式,然后使用 complete
(来自 tidyr
)扩展数据集并在 'emissions' 上使用 na.approx
library(dplyr)
library(tidyr)
library(zoo)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na.approx(emissions, na.rm = FALSE, rule = 2))
或者我们可以使用 forecast
na.interp
而不是 na.approx
library(forecast)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na.interp(emissions))
# A tibble: 66 x 8
# Groups: state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
# state pollutant CEDS_Sector CEDS_Fuel tier1_description unit year emissions
# <chr> <fct> <chr> <chr> <fct> <fct> <int> <ts>
# 1 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2010 0.005760000
# 2 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2011 0.005760000
# 3 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2012 0.005783333
# 4 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2013 0.005806667
# 5 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2014 0.005830000
# 6 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2015 0.006873467
# 7 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2016 0.007916933
# 8 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2017 0.008960400
# 9 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2018 0.008960400
#10 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2019 0.008960400
# … with 56 more rows
如果我们需要进行线性外推,一个选项是
# remotes::install_github("skgrange/threadr")
library(threadr)
NEI_all_years %>%
pivot_longer(cols = starts_with('emission'), names_to = c(".value", "year"),
names_sep="(?<=[a-z])(?=[0-9])") %>%
mutate(year = as.integer(year)) %>%
group_by(across(state:unit)) %>%
complete(year = 2010:2020) %>%
mutate(emissions = na_extrapolate(na.approx(emissions, na.rm = FALSE)))
# A tibble: 66 x 8
# Groups: state, pollutant, CEDS_Sector, CEDS_Fuel, tier1_description, unit [6]
# state pollutant CEDS_Sector CEDS_Fuel tier1_description unit year emissions
# <chr> <fct> <chr> <chr> <fct> <fct> <int> <dbl>
# 1 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2010 0.00574
# 2 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2011 0.00576
# 3 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2012 0.00578
# 4 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2013 0.00581
# 5 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2014 0.00583
# 6 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2015 0.00687
# 7 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2016 0.00792
# 8 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2017 0.00896
# 9 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2018 0.0100
#10 AK Ammonia 1A1b_Pet-refining natural_gas FUEL COMB. INDUSTRIAL TON 2019 0.0110
# … with 56 more rows