在 R 中解析军械测量 SPARQL GET 请求

Parse Ordnance Survey SPARQL GET request in R

我对这个主题有点陌生,请耐心等待;

我正在从 R 访问 Ordnance Survey SPARQL Endpoint 以获取他们的 RDF 数据。我在解析 returned GML 几何属性时遇到问题。

我的 SPARQL 查询(为此,例如)returns 爱丁堡南部选区的几何图形,具有几个属性(名称、代码和 URI);

require(SPARQL)
require(XML)

endpoint <- "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"

query <- paste0( 
"PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?WestminsterConstituencyName ?gssCode ?uri ?g
WHERE 
{ 
?uri a geog:WestminsterConstituency;
skos:prefLabel ?WestminsterConstituencyName;
geog:gssCode ?gssCode;
geom:extent ?geom .
?geom geom:asGML ?g.
}
HAVING(?WestminsterConstituencyName=\"Edinburgh South\")")

在 R 中,我想提取这些结果并将其映射到传单中。我目前有两种方法可以从上面的查询中获取结果;

  1. 通过 httr 包使用 GET 请求并生成 XML 数据;

    packs <- c("sp","stringr","rgdal","leaflet","gsubfn","XML","SPARQL","plyr","RColorBrewer","utils","httr")
    lapply(packs, require, character.only = TRUE)
    
    #format the request properly to return XML
    request <- paste0("http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql?query=",URLencode(query, reserved = TRUE),"&output=xml")
    
    #GET request from the endpoint and parse to XML
    data <- GET(request)
    data.xml <- xmlParse(content(data,"text"))
    
  2. 使用 SPARQL 包到 return 数据框;

    # set endpoint URL and pass query to it, just keeping the results
    qd <- SPARQL(endpoint,query)$results
    

使用任一方法时,几何 returned(作为 XML 变量或数据框列)具有所有 GML 标签:

"<gml:Polygon><gml:outerBoundaryIs><gml:LinearRing><gml:coordinates>LONG LIST OF COORDINATE PAIRS</gml:coordinates></gml:LinearRing></gml:outerBoundaryIs></gml:Polygon>"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral>

当使用方法 2 时,我可以执行以下变通方法来创建多边形,但它看起来很丑陋;

# BNG proj4 string
BNG <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

# extract data to attach to SpatialPolygonDataFrame later
data <- qd[,!(colnames(qd) == "g")]

# extract geometry part of results
geom <- qd[,"g"]

# use various gsub and split functions to create a dataframe of X and Y
geom.sub <- sub(".*<gml:coordinates> *(.*?) *</gml:coordinates>.*", "\1", geom)
s <- strsplit(as.character(geom.sub), ' ')
coords <- data.frame(coords=unlist(s))
coords[] <- lapply(coords, as.character)
l <- strsplit(coords$coords, ",")
df <- ldply(l)
colnames(df) <- c("x", "y")
df[] <- lapply(df, as.numeric)

# create Polygon(s)
geom.list <- Polygons(list(Polygon(df)),1)
final <- SpatialPolygons(list(geom.list),proj4string=CRS(BNG))
final.df <- SpatialPolygonsDataFrame(final,data)

方法 1 returns 一个像这样的 XML 文件;

<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
  <head>
    <variable name="WestminsterConstituencyName"/>
    <variable name="gssCode"/>
    <variable name="uri"/>
    <variable name="g"/>
  </head>
  <results>
    <result>
      <binding name="WestminsterConstituencyName">
        <literal>Edinburgh South</literal>
      </binding>
      <binding name="gssCode">
        <literal>S14000024</literal>
      </binding>
      <binding name="uri">
        <uri>http://data.ordnancesurvey.co.uk/id/7000000000033932</uri>
      </binding>
      <binding name="g">
        <literal datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral">&lt;gml:Polygon&gt;&lt;gml:outerBoundaryIs&gt;&lt;gml:LinearRing&gt;&lt;gml:coordinates&gt;LOTS OF COORDINATE PAIRS HERE&lt;/gml:coordinates&gt;&lt;/gml:LinearRing&gt;&lt;/gml:outerBoundaryIs&gt;&lt;/gml:Polygon&gt;</literal>
      </binding>
    </result>
  </results>
</sparql>

但我不知道如何根据 XMl 结果制作多边形(可写入 shapefile)甚至 JSON。我更喜欢使用 XML,因为我也想访问其他 XML 资源。

另外,方法2的处理(stringsplitting等)真的可行吗?什么数据不总是符合?多部分多边形的信息会丢失吗? (我认为会)。还有更'proper'的方法吗?

感谢您的帮助。

我不知道这是否符合您对(普通香草?)XML 的兴趣,但它确实绘制了地图。我将 WestminsterConstituencyName 参数化,因此您甚至可以将其转换为函数。

我对 GML 一无所知,但 我决定按原样使用它,而不是将坐标解析为更通用的数据框之类的东西。 之后经过一些研究,我相信 rgdal 可以充当来自 OS 的 GML 数据与传单预期的输入之间的桥梁。 SPARQL 中的字符串转换只是消除了在 R 中删除引号和字符串文字类型的需要。我怀疑 rgdal 可能 能够直接使用 OS 中的 GML 多边形做一些事情,但是将多边形包装到一些定义图层和特征的 GML 中对我来说更清楚。

合理的下一步是尝试为 readOGR 使用文本连接,这样您就不需要保存然后打开文件。

这是 Shiny implementation。我没有尝试任何性能改进,例如缓存。

require(SPARQL)
require(XML)
require(leaflet)

# requires some system libraries
# I followed this for ubuntu
# http://www.sarasafavi.com/installing-gdalogr-on-ubuntu.html
# might also need to do something like
# sudo apt-get install libgdal-dev libproj-dev
library(rgdal)

WestminsterConstituency <- "Edinburgh South"

endpoint <-
  "http://data.ordnancesurvey.co.uk/datasets/os-linked-data/apis/sparql"

query <- paste0(
  "PREFIX geom: <http://data.ordnancesurvey.co.uk/ontology/geometry/>
  PREFIX geog: <http://data.ordnancesurvey.co.uk/ontology/admingeo/>
  PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

  SELECT ?WestminsterConstituencyName ?gssCode ?uri (str(?g) as ?gstr)
  WHERE
  {
  ?uri a geog:WestminsterConstituency;
  skos:prefLabel ?WestminsterConstituencyName;
  geog:gssCode ?gssCode;
  geom:extent ?geom .
  ?geom geom:asGML ?g.
  }
  HAVING(?WestminsterConstituencyName='",
  WestminsterConstituency,
  "')"
)

qd <- SPARQL(endpoint, query)$results

xmlres <-
  xmlTreeParse(qd$gstr, asText = TRUE, useInternalNodes = TRUE)

template.text <- '<?xml version="1.0" encoding="utf-8" ?>
<ogr:FeatureCollection
xmlns:ogr="http://ogr.maptools.org/"
xmlns:gml="http://www.opengis.net/gml">
<gml:featureMember>
<WestminsterConstituency fid="0">
<ogr:geometryProperty>

</ogr:geometryProperty>
</WestminsterConstituency>
</gml:featureMember>
</ogr:FeatureCollection>'

# some node adding ideas from
# 
template.xml = xmlTreeParse(template.text,
                            useInternalNodes = TRUE,
                            asText = TRUE)
template.top = xmlRoot(template.xml)

name.node = newXMLNode("NAME", WestminsterConstituency)

template.node = xmlElementsByTagName(el = template.top,
                                     name = "WestminsterConstituency",
                                     recursive = TRUE)

addChildren(template.node[[1]], kids = list(name.node))

template.node = xmlElementsByTagName(el = template.top,
                                     name = "geometryProperty",
                                     recursive = TRUE)

addChildren(template.node[[1]], kids = list(xmlres))

saveXML(template.top, 'expanded.gml')

if ("GML" %in% ogrDrivers()$name) {
  WC <-
    try(readOGR(dsn = 'expanded.gml', layer = "WestminsterConstituency"))
  if (class(WC) != "try-error")
    summary(WC)
}

# http://www.alex-singleton.com/R-Tutorial-Materials/7-converting-coordinates.pdf
ukgrid  <-  "+init=epsg:27700"
latlong  <-  "+init=epsg:4326"

# http://gis.stackexchange.com/questions/123212/assign-crs-to-shapefile-in-r
proj4string(WC) <- CRS(ukgrid)
WC.LL <- spTransform(WC, CRS(latlong))

m <- leaflet()
m <- addTiles(m)
m <- addPolygons(data = WC.LL, map = m)
m