如何通过 shapefile 多边形聚合地理编码数据以使用 R 进行可视化?
How can I aggregate geocoded data by a shapefile polygon for visualisation using R?
我有一个地理编码数据集,我试图将其聚合成多边形,以便我可以将结果绘制为一系列不同级别(例如郊区、当地政府区域等)的等值线图。
为此,我采用了一种方法 - shown here - 它使用 sp
包中的 over
函数将数据与空间对象连接起来并找到哪个多边形我的坐标(来自单独的文件)落入其中。然后我使用 ggplot2
强化了要绘制的空间对象。
总的来说,我似乎已经让大部分过程正常工作,但是正如您从结果图表中看到的那样。当我将坐标与多边形匹配时,我显然没有做正确的事情。多边形(表示郊区)应该是完整的形状。我无法弄清楚我的工作流程中的哪一部分导致了这个混乱。
谁能建议我在这里可能出问题的地方?有没有比使用 over
更好的方法来解决多边形中的点问题?
可以从澳大利亚统计局网站here下载shapefile(文件:"State Suburbs ASGS Non ABS Structures Ed 2011 Digital Boundaries in ESRI Shapefile Format")。我在 google sheet 中保存了一些地理编码的示例数据,可以通过下面的 运行 代码访问。
我最初的尝试是在下面的代码中:
## LOAD REQUIRED PACKAGES
library(googlesheets)
library(dplyr)
library(ggplot2)
library(sp)
library(rgdal)
library(maptools)
## READ DATA FROM GOOGLE SHEETS FILE
googleDocKey <- "1IyXSC0dtOCh1xGFiBG38nKzK2nO8wKUECRCEhvtZVS0"
geoCodedData <- googleDocKey %>% gs_key()
geoData <- geoCodedData %>% gs_read(ws = "geoData", range = cell_limits())
suburbList <- geoCodedData %>% gs_read(ws = "suburbList", range = cell_limits())
## SET COORDINATES FROM GEOCODED DATA
geoData <- as.data.frame(geoData)
coordinates(geoData) <- c("Longitude","Latitude")
## LOAD AUSTRALIA SHAPEFILE AND SUBSET FOR NSW
## YOU WILL NEED TO DOWNLOAD THIS FILE FROM THE ABS MANUALLY (LINK ABOVE)
ausSuburbs <- readOGR(dsn ="02 - Shapefiles", layer="SSC_2011_AUST")
suburbList$SSC_CODE_2011 <- as.numeric(suburbList$SSC_CODE_2011)
nswSuburbList <- suburbList %>%
filter(SSC_CODE_2011 < 20000) %>%
filter(SSC_CODE_2011 > 9999) %>%
select(SSC_CODE_2011)
nswSuburbs <- ausSuburbs[ausSuburbs$SSC_CODE %in% nswSuburbList$SSC_CODE_2011, ]
nswSuburbs <- nswSuburbs[!nswSuburbs$SSC_CODE %in% 11408,] # exclude Lord Howe Island
## TELL R THAT THE COORDINATES IN THE SHAPEFILE MATCH THOSE IN THE SPATIAL POINTS DATA FRAME
proj4string(geoData) <- proj4string(nswSuburbs)
## ASSIGN UNIQUE IDENTIFIER TO EACH SPATIAL OBJECT
nswSuburbs@data$id <- rownames(nswSuburbs@data)
nswSuburbs@data <- mutate(nswSuburbs@data, id_poly = as.numeric(rownames(nswSuburbs@data)))
geoData@data <- mutate(geoData@data, id_shape = as.numeric(rownames(geoData@data)))
## GET THE SUBURB THAT THE POINT IS LOCATED IN
gpsSuburb <- over(geoData, nswSuburbs)
## ADD 'id_shape' TO THE DATA FRAME
gpsSuburbID <- mutate(gpsSuburb, id_shape = as.numeric(rownames(gpsSuburb)))
## AGGREGATE DROP BEAR DATA BY SUBURB
gpsSuburbJoin <- left_join(geoData@data, gpsSuburbID, by = c("id_shape" = "id_shape"))
gpsSuburbData <- gpsSuburbJoin %>%
group_by(SSC_CODE) %>%
summarise(DropBearSightings = sum(DropBearSightings))
gpsSuburbData <- as.data.frame(gpsSuburbData)
## CONVERT SHAPEFILE TO DATA FRAME TO ALLOW DATA TO BE JOINED TO IT
nswPoints <- fortify(nswSuburbs, region="id")
nswData <- merge(nswPoints, nswSuburbs, by="id", stringsAsFactors=FALSE)
nswData$id <- as.numeric(nswData$id)
nswSuburbMapData <- merge(nswData, gpsSuburbData, by="SSC_CODE", stringsAsFactors=FALSE)
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$id, nswSuburbMapData$id),]
## SET THEME FOR GGPLOT
theme_clean <- function(base_size = 12) {
require(grid)
theme_grey(base_size) %+replace%
theme(
axis.title = element_blank(),
axis.text = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks.length = unit(0,"cm"),
axis.ticks.margin = unit(0,"cm"),
panel.margin = unit(0,"lines"),
plot.margin = unit(c(0, 0, 0, 0), "lines"),
complete = TRUE
)}
## PLOT TEST MAP USING GGPLOT
dropBearMap <- ggplot(nswSuburbMapData) +
aes(long, lat, group=group, fill=DropBearSightings) +
geom_polygon() +
coord_map(projection = "mercator", xlim = c(140.0, 154.0), ylim = c(-38.0, -27.0)) +
theme_clean()
dropBearMap
#ggsave("dropBearMap.png", type = "cairo-png")
对于如何解决此问题的任何建议,我将不胜感激。干杯!
好吧,我的第一个答案是 wayyyy off...我对 dplyr 没有太多经验,并且在编辑数据槽时过于紧张。问题就简单多了。合并功能打乱了需要在绘图之前恢复的强化形状文件的顺序,因此:
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$id, nswSuburbMapData$id),]
需要变成这样:
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$order),]
绘制时会产生这个:
您可能需要对地图进行一些额外的更改才能更有用,但这应该是正确表示的数据。
我有一个地理编码数据集,我试图将其聚合成多边形,以便我可以将结果绘制为一系列不同级别(例如郊区、当地政府区域等)的等值线图。
为此,我采用了一种方法 - shown here - 它使用 sp
包中的 over
函数将数据与空间对象连接起来并找到哪个多边形我的坐标(来自单独的文件)落入其中。然后我使用 ggplot2
强化了要绘制的空间对象。
总的来说,我似乎已经让大部分过程正常工作,但是正如您从结果图表中看到的那样。当我将坐标与多边形匹配时,我显然没有做正确的事情。多边形(表示郊区)应该是完整的形状。我无法弄清楚我的工作流程中的哪一部分导致了这个混乱。
over
更好的方法来解决多边形中的点问题?
可以从澳大利亚统计局网站here下载shapefile(文件:"State Suburbs ASGS Non ABS Structures Ed 2011 Digital Boundaries in ESRI Shapefile Format")。我在 google sheet 中保存了一些地理编码的示例数据,可以通过下面的 运行 代码访问。
我最初的尝试是在下面的代码中:
## LOAD REQUIRED PACKAGES
library(googlesheets)
library(dplyr)
library(ggplot2)
library(sp)
library(rgdal)
library(maptools)
## READ DATA FROM GOOGLE SHEETS FILE
googleDocKey <- "1IyXSC0dtOCh1xGFiBG38nKzK2nO8wKUECRCEhvtZVS0"
geoCodedData <- googleDocKey %>% gs_key()
geoData <- geoCodedData %>% gs_read(ws = "geoData", range = cell_limits())
suburbList <- geoCodedData %>% gs_read(ws = "suburbList", range = cell_limits())
## SET COORDINATES FROM GEOCODED DATA
geoData <- as.data.frame(geoData)
coordinates(geoData) <- c("Longitude","Latitude")
## LOAD AUSTRALIA SHAPEFILE AND SUBSET FOR NSW
## YOU WILL NEED TO DOWNLOAD THIS FILE FROM THE ABS MANUALLY (LINK ABOVE)
ausSuburbs <- readOGR(dsn ="02 - Shapefiles", layer="SSC_2011_AUST")
suburbList$SSC_CODE_2011 <- as.numeric(suburbList$SSC_CODE_2011)
nswSuburbList <- suburbList %>%
filter(SSC_CODE_2011 < 20000) %>%
filter(SSC_CODE_2011 > 9999) %>%
select(SSC_CODE_2011)
nswSuburbs <- ausSuburbs[ausSuburbs$SSC_CODE %in% nswSuburbList$SSC_CODE_2011, ]
nswSuburbs <- nswSuburbs[!nswSuburbs$SSC_CODE %in% 11408,] # exclude Lord Howe Island
## TELL R THAT THE COORDINATES IN THE SHAPEFILE MATCH THOSE IN THE SPATIAL POINTS DATA FRAME
proj4string(geoData) <- proj4string(nswSuburbs)
## ASSIGN UNIQUE IDENTIFIER TO EACH SPATIAL OBJECT
nswSuburbs@data$id <- rownames(nswSuburbs@data)
nswSuburbs@data <- mutate(nswSuburbs@data, id_poly = as.numeric(rownames(nswSuburbs@data)))
geoData@data <- mutate(geoData@data, id_shape = as.numeric(rownames(geoData@data)))
## GET THE SUBURB THAT THE POINT IS LOCATED IN
gpsSuburb <- over(geoData, nswSuburbs)
## ADD 'id_shape' TO THE DATA FRAME
gpsSuburbID <- mutate(gpsSuburb, id_shape = as.numeric(rownames(gpsSuburb)))
## AGGREGATE DROP BEAR DATA BY SUBURB
gpsSuburbJoin <- left_join(geoData@data, gpsSuburbID, by = c("id_shape" = "id_shape"))
gpsSuburbData <- gpsSuburbJoin %>%
group_by(SSC_CODE) %>%
summarise(DropBearSightings = sum(DropBearSightings))
gpsSuburbData <- as.data.frame(gpsSuburbData)
## CONVERT SHAPEFILE TO DATA FRAME TO ALLOW DATA TO BE JOINED TO IT
nswPoints <- fortify(nswSuburbs, region="id")
nswData <- merge(nswPoints, nswSuburbs, by="id", stringsAsFactors=FALSE)
nswData$id <- as.numeric(nswData$id)
nswSuburbMapData <- merge(nswData, gpsSuburbData, by="SSC_CODE", stringsAsFactors=FALSE)
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$id, nswSuburbMapData$id),]
## SET THEME FOR GGPLOT
theme_clean <- function(base_size = 12) {
require(grid)
theme_grey(base_size) %+replace%
theme(
axis.title = element_blank(),
axis.text = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank(),
axis.ticks.length = unit(0,"cm"),
axis.ticks.margin = unit(0,"cm"),
panel.margin = unit(0,"lines"),
plot.margin = unit(c(0, 0, 0, 0), "lines"),
complete = TRUE
)}
## PLOT TEST MAP USING GGPLOT
dropBearMap <- ggplot(nswSuburbMapData) +
aes(long, lat, group=group, fill=DropBearSightings) +
geom_polygon() +
coord_map(projection = "mercator", xlim = c(140.0, 154.0), ylim = c(-38.0, -27.0)) +
theme_clean()
dropBearMap
#ggsave("dropBearMap.png", type = "cairo-png")
对于如何解决此问题的任何建议,我将不胜感激。干杯!
好吧,我的第一个答案是 wayyyy off...我对 dplyr 没有太多经验,并且在编辑数据槽时过于紧张。问题就简单多了。合并功能打乱了需要在绘图之前恢复的强化形状文件的顺序,因此:
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$id, nswSuburbMapData$id),]
需要变成这样:
nswSuburbMapData <- nswSuburbMapData[order(nswSuburbMapData$order),]
绘制时会产生这个:
您可能需要对地图进行一些额外的更改才能更有用,但这应该是正确表示的数据。