st_union (sf) 个具有 data.table 的多边形
st_union (sf) polygons with data.table
不久前,我将库 sf 与库 data.table 一起使用,没有出现特殊问题。但是现在我尝试了一个以错误结束的代码。当我在 data.table 中按 id 合并多边形时,结果无法绘制或写入 shp。解决方法是转换为 sp 并返回到 sf,但是对于很多多边形,这非常耗时。小例子:
library(data.table)
library(sf)
a=structure(list(Id = c(1L, 2L, 3L, 3L, 1L, 1L, 2L), geometry = structure(list(
structure(list(structure(c(455311.710673953, 455314.345317508,
455315.716716433, 455316.87150159, 455312.03568616, 455311.710673953,
376691.889842887, 376694.12745275, 376692.828258414, 376689.327648062,
376689.255443473, 376691.889842887), .Dim = c(6L, 2L))), class = c("XY",
"POLYGON", "sfg")), structure(list(structure(c(455316.87150159,
455315.716716433, 455319.072917605, 455321.382671023, 455316.87150159,
376689.327648062, 376692.828258414, 376693.694408316, 376690.518443961,
376689.327648062), .Dim = c(5L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(455312.93790784, 455316.87150159,
455317.485088015, 455313.370891238, 455312.93790784, 376686.224010367,
376685.646434684, 376682.57880773, 376682.542858023, 376686.224010367
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455318.062480594, 455322.068278933,
455321.815715457, 455314.634074832, 455318.062480594, 376684.52766027,
376682.903819937, 376679.980419059, 376681.027049918, 376684.52766027
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455321.346477176, 455323.548076297,
455324.414104129, 455322.104472781, 455321.346477176, 376688.028453727,
376688.569652457, 376686.404430289, 376685.466014762, 376688.028453727
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455319.302470828, 455321.743510867,
455323.90891614, 455321.382671023, 455319.302470828, 376693.379039664,
376694.27186193, 376691.456859488, 376690.518443961, 376693.379039664
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455315.538127566, 455316.87150159,
455318.105693484, 455318.856486941, 455315.35587659, 455315.538127566,
376689.307811637, 376689.327648062, 376689.653453727, 376687.667430777,
376687.667430777, 376689.307811637), .Dim = c(6L, 2L))), class = c("XY",
"POLYGON", "sfg"))), n_empty = 0L, crs = structure(list(input = NA_character_,
wkt = NA_character_), class = "crs"), class = c("sfc_POLYGON",
"sfc"), precision = 0, bbox = structure(c(xmin = 455311.710673953,
ymin = 376679.980419059, xmax = 455324.414104129, ymax = 376694.27186193
), class = "bbox"))), row.names = c(NA, -7L), class = c("sf",
"data.frame"), sf_column = "geometry", agr = structure(c(Id = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")))
setDT(a)
################original data
#############################
#plot work ok
plot(a$geometry)
##write to shp work ok
st_write(a,"ab.shp", delete_layer=T)
################union polygons by id
####################################
aa<-a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
##plot error: "Error in CPL_geos_is_empty(st_geometry(x)) : Not a matrix."
plot(aa$geometry)
## write error: "Error in CPL_write_ogr(obj, dsn, layer, driver, as.character(dataset_options), :Not a matrix."
st_write(aa,"abc.shp", delete_layer=T)
#####################transform from data.table to sf
####################################################
aa=st_as_sf(aa)
##plot error "Error in CPL_geos_is_empty(st_geometry(x)) : Not a matrix."
plot(aa)
## write to shp with error: "Error in CPL_write_ogr(obj, dsn, layer, driver, as.character(dataset_options), :Not a matrix."
st_write(aa,"abc.shp", delete_layer=T)
############################transform sf to sp and back
#######################################################
aa=st_as_sf(as(st_as_sf(aa),"Spatial"))
##plot work ok
plot(aa)
##write to shp work ok
st_write(a,"ab.shp", delete_layer=T)
编辑:
我通过 https://github.com/Rdatatable/data.table/issues/4681 找到了部分解决方案基础。如果在 st_union 之后我添加了一行代码,如 aa=aa[1:nrow(aa),] 绘图和写作按预期工作。
setDT(a)
aa<-a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
aa=aa[1:nrow(aa),]
plot(aa)
st_write(aa,"aa.shp")
因为你的 id 1 没有重叠,由多个多边形组成,所以它把它强制为多边形而不是多边形。其他人只有两个重叠的多边形,因此将它们分解为一个多边形。我使用 dplyr 方法对此进行了测试,并收到了类似的错误。您可以在下面的 data.table 中看到这一点。
aa<- a[,.(geometry=st_union(geometry)), by=Id]
# Id geometry
#1: 1 <XY[3]> <--- three polygons
#2: 2 <XY[1]>
#3: 3 <XY[1]>
# Geometry set for 3 features
# geometry type: MULTIPOLYGON <--- Multipolygon conflicting with polygon geometries
# dimension: XY
# bbox: xmin: 455311.7 ymin: 376685.5 xmax: 455324.4 ymax: 376694.3
# CRS: NA
# MULTIPOLYGON (((455321.3 376688, 455323.5 37668...
# POLYGON ((455315.5 376689.3, 455316.9 376689.3,...
# POLYGON ((455318.1 376684.5, 455322.1 376682.9,...
这可能是一个错误。如果满足您的需要,另一种方法是使用 st_combine
而不是 st_union
。或者您可以尝试将错误的几何图形恢复为正确的几何图形。
编辑:仔细观察,您可能不希望只使用 st_combine
方法。似乎 st_combine
会组合但不会合并几何图形。一种解决方法是先使用 st_combine
,然后再使用 st_union
。这是可行的,因为它按组组合所有功能,然后按功能合并它们。
# first way with st_combine
# EDIT this didn't work with just st_combine added the st_union at the end
aa<-a[,.(geometry=st_combine(geometry)), by=Id] %>% sf::st_as_sf() %>% st_union(by_feature=TRUE)
plot(aa$geometry)
# The hackier way
aa<- a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
newgeom <- aa[which(st_geometry_type(aa) == 'POLYGON'), ] %>% st_cast("MULTIPOLYGON")
fixedaa <- rbind(aa[which(st_geometry_type(aa) != 'POLYGON'), ], newgeom)
不久前,我将库 sf 与库 data.table 一起使用,没有出现特殊问题。但是现在我尝试了一个以错误结束的代码。当我在 data.table 中按 id 合并多边形时,结果无法绘制或写入 shp。解决方法是转换为 sp 并返回到 sf,但是对于很多多边形,这非常耗时。小例子:
library(data.table)
library(sf)
a=structure(list(Id = c(1L, 2L, 3L, 3L, 1L, 1L, 2L), geometry = structure(list(
structure(list(structure(c(455311.710673953, 455314.345317508,
455315.716716433, 455316.87150159, 455312.03568616, 455311.710673953,
376691.889842887, 376694.12745275, 376692.828258414, 376689.327648062,
376689.255443473, 376691.889842887), .Dim = c(6L, 2L))), class = c("XY",
"POLYGON", "sfg")), structure(list(structure(c(455316.87150159,
455315.716716433, 455319.072917605, 455321.382671023, 455316.87150159,
376689.327648062, 376692.828258414, 376693.694408316, 376690.518443961,
376689.327648062), .Dim = c(5L, 2L))), class = c("XY", "POLYGON",
"sfg")), structure(list(structure(c(455312.93790784, 455316.87150159,
455317.485088015, 455313.370891238, 455312.93790784, 376686.224010367,
376685.646434684, 376682.57880773, 376682.542858023, 376686.224010367
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455318.062480594, 455322.068278933,
455321.815715457, 455314.634074832, 455318.062480594, 376684.52766027,
376682.903819937, 376679.980419059, 376681.027049918, 376684.52766027
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455321.346477176, 455323.548076297,
455324.414104129, 455322.104472781, 455321.346477176, 376688.028453727,
376688.569652457, 376686.404430289, 376685.466014762, 376688.028453727
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455319.302470828, 455321.743510867,
455323.90891614, 455321.382671023, 455319.302470828, 376693.379039664,
376694.27186193, 376691.456859488, 376690.518443961, 376693.379039664
), .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")),
structure(list(structure(c(455315.538127566, 455316.87150159,
455318.105693484, 455318.856486941, 455315.35587659, 455315.538127566,
376689.307811637, 376689.327648062, 376689.653453727, 376687.667430777,
376687.667430777, 376689.307811637), .Dim = c(6L, 2L))), class = c("XY",
"POLYGON", "sfg"))), n_empty = 0L, crs = structure(list(input = NA_character_,
wkt = NA_character_), class = "crs"), class = c("sfc_POLYGON",
"sfc"), precision = 0, bbox = structure(c(xmin = 455311.710673953,
ymin = 376679.980419059, xmax = 455324.414104129, ymax = 376694.27186193
), class = "bbox"))), row.names = c(NA, -7L), class = c("sf",
"data.frame"), sf_column = "geometry", agr = structure(c(Id = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")))
setDT(a)
################original data
#############################
#plot work ok
plot(a$geometry)
##write to shp work ok
st_write(a,"ab.shp", delete_layer=T)
################union polygons by id
####################################
aa<-a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
##plot error: "Error in CPL_geos_is_empty(st_geometry(x)) : Not a matrix."
plot(aa$geometry)
## write error: "Error in CPL_write_ogr(obj, dsn, layer, driver, as.character(dataset_options), :Not a matrix."
st_write(aa,"abc.shp", delete_layer=T)
#####################transform from data.table to sf
####################################################
aa=st_as_sf(aa)
##plot error "Error in CPL_geos_is_empty(st_geometry(x)) : Not a matrix."
plot(aa)
## write to shp with error: "Error in CPL_write_ogr(obj, dsn, layer, driver, as.character(dataset_options), :Not a matrix."
st_write(aa,"abc.shp", delete_layer=T)
############################transform sf to sp and back
#######################################################
aa=st_as_sf(as(st_as_sf(aa),"Spatial"))
##plot work ok
plot(aa)
##write to shp work ok
st_write(a,"ab.shp", delete_layer=T)
编辑:
我通过 https://github.com/Rdatatable/data.table/issues/4681 找到了部分解决方案基础。如果在 st_union 之后我添加了一行代码,如 aa=aa[1:nrow(aa),] 绘图和写作按预期工作。
setDT(a)
aa<-a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
aa=aa[1:nrow(aa),]
plot(aa)
st_write(aa,"aa.shp")
因为你的 id 1 没有重叠,由多个多边形组成,所以它把它强制为多边形而不是多边形。其他人只有两个重叠的多边形,因此将它们分解为一个多边形。我使用 dplyr 方法对此进行了测试,并收到了类似的错误。您可以在下面的 data.table 中看到这一点。
aa<- a[,.(geometry=st_union(geometry)), by=Id]
# Id geometry
#1: 1 <XY[3]> <--- three polygons
#2: 2 <XY[1]>
#3: 3 <XY[1]>
# Geometry set for 3 features
# geometry type: MULTIPOLYGON <--- Multipolygon conflicting with polygon geometries
# dimension: XY
# bbox: xmin: 455311.7 ymin: 376685.5 xmax: 455324.4 ymax: 376694.3
# CRS: NA
# MULTIPOLYGON (((455321.3 376688, 455323.5 37668...
# POLYGON ((455315.5 376689.3, 455316.9 376689.3,...
# POLYGON ((455318.1 376684.5, 455322.1 376682.9,...
这可能是一个错误。如果满足您的需要,另一种方法是使用 st_combine
而不是 st_union
。或者您可以尝试将错误的几何图形恢复为正确的几何图形。
编辑:仔细观察,您可能不希望只使用 st_combine
方法。似乎 st_combine
会组合但不会合并几何图形。一种解决方法是先使用 st_combine
,然后再使用 st_union
。这是可行的,因为它按组组合所有功能,然后按功能合并它们。
# first way with st_combine
# EDIT this didn't work with just st_combine added the st_union at the end
aa<-a[,.(geometry=st_combine(geometry)), by=Id] %>% sf::st_as_sf() %>% st_union(by_feature=TRUE)
plot(aa$geometry)
# The hackier way
aa<- a[,.(geometry=st_union(geometry)), by=Id] %>% sf::st_as_sf()
newgeom <- aa[which(st_geometry_type(aa) == 'POLYGON'), ] %>% st_cast("MULTIPOLYGON")
fixedaa <- rbind(aa[which(st_geometry_type(aa) != 'POLYGON'), ], newgeom)