在 R 中解析军械测量 SPARQL GET 请求
Parse Ordnance Survey SPARQL GET request in R
我对这个主题有点陌生,请耐心等待;
我正在从 R 访问 Ordnance Survey SPARQL Endpoint 以获取他们的 RDF 数据。我在解析 returned GML 几何属性时遇到问题。
我的 SPARQL 查询(为此,例如)returns 爱丁堡南部选区的几何图形,具有几个属性(名称、代码和 URI);
require(SPARQL)
require(XML)
endpoint <- "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri ?g
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName=\"Edinburgh South\")")
在 R 中,我想提取这些结果并将其映射到传单中。我目前有两种方法可以从上面的查询中获取结果;
通过 httr 包使用 GET 请求并生成 XML 数据;
packs <- c("sp","stringr","rgdal","leaflet","gsubfn","XML","SPARQL","plyr","RColorBrewer","utils","httr")
lapply(packs, require, character.only = TRUE)
#format the request properly to return XML
request <- paste0("http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql?query=",URLencode(query, reserved = TRUE),"&output=xml")
#GET request from the endpoint and parse to XML
data <- GET(request)
data.xml <- xmlParse(content(data,"text"))
使用 SPARQL 包到 return 数据框;
# set endpoint URL and pass query to it, just keeping the results
qd <- SPARQL(endpoint,query)$results
使用任一方法时,几何 returned(作为 XML 变量或数据框列)具有所有 GML 标签:
"<gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LONG LIST OF COORDINATE PAIRS</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>
当使用方法 2 时,我可以执行以下变通方法来创建多边形,但它看起来很丑陋;
# BNG proj4 string
BNG <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"
# extract data to attach to SpatialPolygonDataFrame later
data <- qd[,!(colnames(qd) == "g")]
# extract geometry part of results
geom <- qd[,"g"]
# use various gsub and split functions to create a dataframe of X and Y
geom.sub <- sub(".*<gml:coordinates> *(.*?) *</gml:coordinates>.*", "\1", geom)
s <- strsplit(as.character(geom.sub), ' ')
coords <- data.frame(coords=unlist(s))
coords[] <- lapply(coords, as.character)
l <- strsplit(coords$coords, ",")
df <- ldply(l)
colnames(df) <- c("x", "y")
df[] <- lapply(df, as.numeric)
# create Polygon(s)
geom.list <- Polygons(list(Polygon(df)),1)
final <- SpatialPolygons(list(geom.list),proj4string=CRS(BNG))
final.df <- SpatialPolygonsDataFrame(final,data)
方法 1 returns 一个像这样的 XML 文件;
<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
<head>
<variable name="WestminsterConstituencyName"/>
<variable name="gssCode"/>
<variable name="uri"/>
<variable name="g"/>
</head>
<results>
<result>
<binding name="WestminsterConstituencyName">
<literal>Edinburgh South</literal>
</binding>
<binding name="gssCode">
<literal>S14000024</literal>
</binding>
<binding name="uri">
<uri>http://data.ordnancesurvey.co.uk/id/7000000000033932</uri>
</binding>
<binding name="g">
<literal datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"><gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LOTS OF COORDINATE PAIRS HERE</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></literal>
</binding>
</result>
</results>
</sparql>
但我不知道如何根据 XMl 结果制作多边形(可写入 shapefile)甚至 JSON。我更喜欢使用 XML,因为我也想访问其他 XML 资源。
另外,方法2的处理(stringsplitting等)真的可行吗?什么数据不总是符合?多部分多边形的信息会丢失吗? (我认为会)。还有更'proper'的方法吗?
感谢您的帮助。
我不知道这是否符合您对(普通香草?)XML 的兴趣,但它确实绘制了地图。我将 WestminsterConstituencyName 参数化,因此您甚至可以将其转换为函数。
我对 GML 一无所知,但 我决定按原样使用它,而不是将坐标解析为更通用的数据框之类的东西。 之后经过一些研究,我相信 rgdal 可以充当来自 OS 的 GML 数据与传单预期的输入之间的桥梁。 SPARQL 中的字符串转换只是消除了在 R 中删除引号和字符串文字类型的需要。我怀疑 rgdal 可能 能够直接使用 OS 中的 GML 多边形做一些事情,但是将多边形包装到一些定义图层和特征的 GML 中对我来说更清楚。
合理的下一步是尝试为 readOGR 使用文本连接,这样您就不需要保存然后打开文件。
这是 Shiny implementation。我没有尝试任何性能改进,例如缓存。
require(SPARQL)
require(XML)
require(leaflet)
# requires some system libraries
# I followed this for ubuntu
# http://www.sarasafavi.com/installing-gdalogr-on-ubuntu.html
# might also need to do something like
# sudo apt-get install libgdal-dev libproj-dev
library(rgdal)
WestminsterConstituency <- "Edinburgh South"
endpoint <-
"http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri (str(?g) as ?gstr)
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName='",
WestminsterConstituency,
"')"
)
qd <- SPARQL(endpoint, query)$results
xmlres <-
xmlTreeParse(qd$gstr, asText = TRUE, useInternalNodes = TRUE)
template.text <- '<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:featureMember>
<WestminsterConstituency fid="0">
<ogr:geometryProperty>
</ogr:geometryProperty>
</WestminsterConstituency>
</gml:featureMember>
</ogr:FeatureCollection>'
# some node adding ideas from
#
template.xml = xmlTreeParse(template.text,
useInternalNodes = TRUE,
asText = TRUE)
template.top = xmlRoot(template.xml)
name.node = newXMLNode("NAME", WestminsterConstituency)
template.node = xmlElementsByTagName(el = template.top,
name = "WestminsterConstituency",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(name.node))
template.node = xmlElementsByTagName(el = template.top,
name = "geometryProperty",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(xmlres))
saveXML(template.top, 'expanded.gml')
if ("GML" %in% ogrDrivers()$name) {
WC <-
try(readOGR(dsn = 'expanded.gml', layer = "WestminsterConstituency"))
if (class(WC) != "try-error")
summary(WC)
}
# http://www.alex-singleton.com/R-Tutorial-Materials/7-converting-coordinates.pdf
ukgrid <- "+init=epsg:27700"
latlong <- "+init=epsg:4326"
# http://gis.stackexchange.com/questions/123212/assign-crs-to-shapefile-in-r
proj4string(WC) <- CRS(ukgrid)
WC.LL <- spTransform(WC, CRS(latlong))
m <- leaflet()
m <- addTiles(m)
m <- addPolygons(data = WC.LL, map = m)
m
我对这个主题有点陌生,请耐心等待;
我正在从 R 访问 Ordnance Survey SPARQL Endpoint 以获取他们的 RDF 数据。我在解析 returned GML 几何属性时遇到问题。
我的 SPARQL 查询(为此,例如)returns 爱丁堡南部选区的几何图形,具有几个属性(名称、代码和 URI);
require(SPARQL)
require(XML)
endpoint <- "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri ?g
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName=\"Edinburgh South\")")
在 R 中,我想提取这些结果并将其映射到传单中。我目前有两种方法可以从上面的查询中获取结果;
通过 httr 包使用 GET 请求并生成 XML 数据;
packs <- c("sp","stringr","rgdal","leaflet","gsubfn","XML","SPARQL","plyr","RColorBrewer","utils","httr") lapply(packs, require, character.only = TRUE) #format the request properly to return XML request <- paste0("http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql?query=",URLencode(query, reserved = TRUE),"&output=xml") #GET request from the endpoint and parse to XML data <- GET(request) data.xml <- xmlParse(content(data,"text"))
使用 SPARQL 包到 return 数据框;
# set endpoint URL and pass query to it, just keeping the results qd <- SPARQL(endpoint,query)$results
使用任一方法时,几何 returned(作为 XML 变量或数据框列)具有所有 GML 标签:
"<gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LONG LIST OF COORDINATE PAIRS</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>
当使用方法 2 时,我可以执行以下变通方法来创建多边形,但它看起来很丑陋;
# BNG proj4 string
BNG <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"
# extract data to attach to SpatialPolygonDataFrame later
data <- qd[,!(colnames(qd) == "g")]
# extract geometry part of results
geom <- qd[,"g"]
# use various gsub and split functions to create a dataframe of X and Y
geom.sub <- sub(".*<gml:coordinates> *(.*?) *</gml:coordinates>.*", "\1", geom)
s <- strsplit(as.character(geom.sub), ' ')
coords <- data.frame(coords=unlist(s))
coords[] <- lapply(coords, as.character)
l <- strsplit(coords$coords, ",")
df <- ldply(l)
colnames(df) <- c("x", "y")
df[] <- lapply(df, as.numeric)
# create Polygon(s)
geom.list <- Polygons(list(Polygon(df)),1)
final <- SpatialPolygons(list(geom.list),proj4string=CRS(BNG))
final.df <- SpatialPolygonsDataFrame(final,data)
方法 1 returns 一个像这样的 XML 文件;
<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
<head>
<variable name="WestminsterConstituencyName"/>
<variable name="gssCode"/>
<variable name="uri"/>
<variable name="g"/>
</head>
<results>
<result>
<binding name="WestminsterConstituencyName">
<literal>Edinburgh South</literal>
</binding>
<binding name="gssCode">
<literal>S14000024</literal>
</binding>
<binding name="uri">
<uri>http://data.ordnancesurvey.co.uk/id/7000000000033932</uri>
</binding>
<binding name="g">
<literal datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"><gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LOTS OF COORDINATE PAIRS HERE</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon></literal>
</binding>
</result>
</results>
</sparql>
但我不知道如何根据 XMl 结果制作多边形(可写入 shapefile)甚至 JSON。我更喜欢使用 XML,因为我也想访问其他 XML 资源。
另外,方法2的处理(stringsplitting等)真的可行吗?什么数据不总是符合?多部分多边形的信息会丢失吗? (我认为会)。还有更'proper'的方法吗?
感谢您的帮助。
我不知道这是否符合您对(普通香草?)XML 的兴趣,但它确实绘制了地图。我将 WestminsterConstituencyName 参数化,因此您甚至可以将其转换为函数。
我对 GML 一无所知,但 我决定按原样使用它,而不是将坐标解析为更通用的数据框之类的东西。 之后经过一些研究,我相信 rgdal 可以充当来自 OS 的 GML 数据与传单预期的输入之间的桥梁。 SPARQL 中的字符串转换只是消除了在 R 中删除引号和字符串文字类型的需要。我怀疑 rgdal 可能 能够直接使用 OS 中的 GML 多边形做一些事情,但是将多边形包装到一些定义图层和特征的 GML 中对我来说更清楚。
合理的下一步是尝试为 readOGR 使用文本连接,这样您就不需要保存然后打开文件。
这是 Shiny implementation。我没有尝试任何性能改进,例如缓存。
require(SPARQL)
require(XML)
require(leaflet)
# requires some system libraries
# I followed this for ubuntu
# http://www.sarasafavi.com/installing-gdalogr-on-ubuntu.html
# might also need to do something like
# sudo apt-get install libgdal-dev libproj-dev
library(rgdal)
WestminsterConstituency <- "Edinburgh South"
endpoint <-
"http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"
query <- paste0(
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?WestminsterConstituencyName ?gssCode ?uri (str(?g) as ?gstr)
WHERE
{
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName='",
WestminsterConstituency,
"')"
)
qd <- SPARQL(endpoint, query)$results
xmlres <-
xmlTreeParse(qd$gstr, asText = TRUE, useInternalNodes = TRUE)
template.text <- '<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:featureMember>
<WestminsterConstituency fid="0">
<ogr:geometryProperty>
</ogr:geometryProperty>
</WestminsterConstituency>
</gml:featureMember>
</ogr:FeatureCollection>'
# some node adding ideas from
#
template.xml = xmlTreeParse(template.text,
useInternalNodes = TRUE,
asText = TRUE)
template.top = xmlRoot(template.xml)
name.node = newXMLNode("NAME", WestminsterConstituency)
template.node = xmlElementsByTagName(el = template.top,
name = "WestminsterConstituency",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(name.node))
template.node = xmlElementsByTagName(el = template.top,
name = "geometryProperty",
recursive = TRUE)
addChildren(template.node[[1]], kids = list(xmlres))
saveXML(template.top, 'expanded.gml')
if ("GML" %in% ogrDrivers()$name) {
WC <-
try(readOGR(dsn = 'expanded.gml', layer = "WestminsterConstituency"))
if (class(WC) != "try-error")
summary(WC)
}
# http://www.alex-singleton.com/R-Tutorial-Materials/7-converting-coordinates.pdf
ukgrid <- "+init=epsg:27700"
latlong <- "+init=epsg:4326"
# http://gis.stackexchange.com/questions/123212/assign-crs-to-shapefile-in-r
proj4string(WC) <- CRS(ukgrid)
WC.LL <- spTransform(WC, CRS(latlong))
m <- leaflet()
m <- addTiles(m)
m <- addPolygons(data = WC.LL, map = m)
m