如何使用 streamstats 包在 R 中描绘多个分水岭？

Question

有一个正在开发中的 R 包，我想使用它，叫做 streamstats。它的作用是为沿水体的纬度和经度点划定一个分水岭（在美国境内），并提供分水岭特征，例如流域面积和各种土地覆盖的比例。我想做的是从几个纬度和经度位置的数据框中提取一些感兴趣的分水岭特征。

我可以得到包裹做我想做的一分

devtools::install_github("markwh/streamstats")
library(streamstats)

setTimeout(120)

dat1 <- data.frame(matrix(ncol = 3, nrow = 3))
x <- c("state","lat","long")
colnames(dat1) <- x
dat1$state <- c("NJ","NY","VA")
dat1$lat <- c(40.99194,42.02458,38.04235)
dat1$long <- c(-74.28000,-75.11928,-79.88144)

test_dat <- dat1[1,]

ws1 <- delineateWatershed(xlocation = test_dat$long, ylocation = test_dat$lat, crs = 4326, 
                          includeparameters = "true", includeflowtypes = "true")

chars1 <- computeChars(workspaceID = ws1$workspaceID, rcode = "MA")
chars1$parameters

然而，我想要的是能够一次为 delineateWatershed 函数提供多个分水岭（即，在 dat1 中找到的所有 3 个位置）并组合 chars1$parameters 输出变量 DRNAREA、FOREST、LC11DEV 和 LC11IMP 到数据框中。也许这可以通过 for 循环来实现？

理想的输出应该是这样的

  state      lat      long DRNAREA FOREST LC11DEV LC11IMP
1    NJ 40.99194 -74.28000     160   66.2   26.20    5.50
2    NY 42.02458 -75.11928     457   89.3    2.52    0.18
3    VA 38.04235 -79.88144     158     NA    4.63    0.20

Answer 1

我会把你拥有的东西放在一个函数中，然后使用 purrr::pmap_df() to loop through each row in dat1 then bind all the results together. See also this

library(dplyr)
library(purrr)
library(tidyr)
library(streamstats)

setTimeout(120)

dat1 <- data.frame(matrix(ncol = 3, nrow = 2))
colnames(dat1) <- c("state", "lat", "long")
dat1$state <- c("NJ", "NY")
dat1$lat <- c(40.99194, 42.02458)
dat1$long <- c(-74.28000, -75.11928)
dat1
#>   state      lat      long
#> 1    NJ 40.99194 -74.28000
#> 2    NY 42.02458 -75.11928

定义流域划分函数

catchment_delineation <- function(rcode_in, lat_y, long_x) {
  
  print(paste0("Processing for lat = ", lat_y, " and long = ", long_x))

  ws <- delineateWatershed(xlocation = long_x, ylocation = lat_y, crs = 4326, 
                           includeparameters = "true", includeflowtypes = "true")
  ws_properties <- computeChars(workspaceID = ws$workspaceID, rcode = rcode_in)

  # keep only what we need
  ws_properties_df <- ws_properties$parameters %>% 
    filter(code %in% c("DRNAREA", "FOREST", "LC11DEV", "LC11IMP")) %>% 
    mutate(ID = ws$workspaceID, 
           state = rcode_in,
           long = long_x,
           lat = lat_y)

  return(ws_properties_df)

}

将函数应用于 dat1 数据框中的每一行

catchment_df <- pmap_df(dat1, ~ catchment_delineation(..1, ..2, ..3))
#> https://streamstats.usgs.gov/streamstatsservices/watershed.geojson?rcode=NJ&xlocation=-74.28&ylocation=40.99194&includeparameters=true&includeflowtypes=true&includefeatures=true&crs=4326https://streamstats.usgs.gov/streamstatsservices/parameters.json?rcode=NJ&workspaceID=NJ20210923064141811000&includeparameters=truehttps://streamstats.usgs.gov/streamstatsservices/watershed.geojson?rcode=NY&xlocation=-75.11928&ylocation=42.02458&includeparameters=true&includeflowtypes=true&includefeatures=true&crs=4326https://streamstats.usgs.gov/streamstatsservices/parameters.json?rcode=NY&workspaceID=NY20210923064248530000&includeparameters=true

catchment_df
#>                       ID                            name
#> 1 NJ20210923064141811000                   Drainage Area
#> 2 NJ20210923064141811000                  Percent Forest
#> 3 NJ20210923064141811000 Percent Developed from NLCD2011
#> 4 NJ20210923064141811000     Percent_Impervious_NLCD2011
#> 5 NY20210923064248530000                   Drainage Area
#> 6 NY20210923064248530000                  Percent Forest
#> 7 NY20210923064248530000 Percent Developed from NLCD2011
#> 8 NY20210923064248530000     Percent_Impervious_NLCD2011
#>                                                                          description
#> 1                                            Area that drains to a point on a stream
#> 2                                               Percentage of area covered by forest
#> 3                  Percentage of developed (urban) land from NLCD 2011 classes 21-24
#> 4 Average percentage of impervious area determined from NLCD 2011 impervious dataset
#> 5                                            Area that drains to a point on a stream
#> 6                                               Percentage of area covered by forest
#> 7                  Percentage of developed (urban) land from NLCD 2011 classes 21-24
#> 8 Average percentage of impervious area determined from NLCD 2011 impervious dataset
#>      code         unit  value state      long      lat
#> 1 DRNAREA square miles 160.00    NJ -74.28000 40.99194
#> 2  FOREST      percent  66.20    NJ -74.28000 40.99194
#> 3 LC11DEV      percent  26.20    NJ -74.28000 40.99194
#> 4 LC11IMP      percent   5.50    NJ -74.28000 40.99194
#> 5 DRNAREA square miles 457.00    NY -75.11928 42.02458
#> 6  FOREST      percent  89.30    NY -75.11928 42.02458
#> 7 LC11DEV      percent   2.52    NY -75.11928 42.02458
#> 8 LC11IMP      percent   0.18    NY -75.11928 42.02458

将结果重塑为所需格式

catchment_reshape <- catchment_df %>% 
  select(state, long, lat, code, value) %>% 
  pivot_wider(names_from = code,
              values_from = value)
catchment_reshape
#> # A tibble: 2 x 7
#>   state  long   lat DRNAREA FOREST LC11DEV LC11IMP
#>   <chr> <dbl> <dbl>   <dbl>  <dbl>   <dbl>   <dbl>
#> 1 NJ    -74.3  41.0     160   66.2   26.2     5.5 
#> 2 NY    -75.1  42.0     457   89.3    2.52    0.18

^{由 reprex package (v2.0.1)}

于 2021-09-22 创建

Answer 2

既然你提到了使用 for 循环，我想为什么不解决它呢。

这是您的数据：

library(dplyr)
library(purrr)
library(tidyr)
library(streamstats)

setTimeout(120)

dat1 <- data.frame(matrix(ncol = 3, nrow = 2))
colnames(dat1) <- c("state", "lat", "long")
dat1$state <- c("NJ", "NY")
dat1$lat <- c(40.99194, 42.02458)
dat1$long <- c(-74.28000, -75.11928)
dat1

创建一个空列表来存储分水岭特征：

water_shed <- list()

遍历 dat1 和 return 每个经度和纬度的属性：

for(i in 1:nrow(dat1)){
  water_shed[[i]] <- 
    delineateWatershed(xlocation = dat1$long[i], ylocation = dat1$lat[i], crs = 4326,
                       includeparameters = "true", includeflowtypes = "true")
}

现在创建一个列表来存储流域属性：

ws_properties <- list()

遍历water_shed return每个位置的参数：

for(i in 1:length(water_shed)){
  ws_properties[[i]] <- computeChars(workspaceID = water_shed[[i]][[1]], rcode = dat1$state)
  
}

最后，为您想要的输出创建一个数据框，然后为循环遍历分水岭属性列表的每个位置附加属性：

# data frame:
ws_properties_df <- data.frame(state=character(),long=integer(), lat=integer(), 
                               DRNAREA = integer(), FOREST = integer(), LC11DEV = integer(), LC11IMP = integer(),
                               stringsAsFactors=FALSE)

#append properties for eact location
for(i in 1:length(ws_properties)){
  ws_properties_df[i,] <- ws_properties[[i]]$parameters %>% 
    filter(code %in% c("DRNAREA", "FOREST", "LC11DEV", "LC11IMP")) %>%
    mutate(state = dat1$state[i],
           long = dat1$long[i],
           lat =  dat1$lat[i]) %>% 
    select(state, long, lat, code, value) %>% 
    pivot_wider(names_from = code,
                values_from = value)
}

期望的输出：

如何使用 streamstats 包在 R 中描绘多个分水岭？

How can I delineate multiple watersheds in R using the streamstats package?

r

purrr

tidyverse