从以米为单位的 x 和 y 猜测坐标系,并近似对应的 long 和 lat
guess coordinate system from x and y in meters, and approximate corresponding long and lat
我有这个比利时位置坐标的数据框:
data
# # A tibble: 11 x 4
# LON LAT x y
# <dbl> <dbl> <dbl> <dbl>
# 1 3.618942 50.68165 96227.01 152551.2
# 2 3.473466 50.55899 86074.26 138702.0
# 3 3.442395 50.69979 84369.88 154860.5
# 4 3.293505 50.68766 73127.74 153413.9
# 5 3.352688 50.68009 77876.00 153115.7
# 6 3.567919 50.52372 93229.16 134975.7
# 7 3.333666 50.54388 76183.82 137373.9
# 8 3.394737 50.61322 80806.11 145230.0
# 9 3.410566 50.53073 82041.22 135743.9
# 10 3.613925 50.59610 96907.40 143057.9
# 11 3.502580 50.74147 88860.56 159399.0
我知道 x
和 y
以米为单位,但我不知道使用的是什么约定,我通过挖掘 public 比利时数据的数据库得到它们但是无法从可用信息中弄清楚这一点。
table中的经度和纬度不完全匹配(x
和y
是区域中心的坐标,LAT
和LON
是这些地区居民样本坐标的平均值),但它们很好地说明了两者之间的转换应该是什么。
如何找出x
和y
编码的坐标系?
我查看了程序包 sp
并构建了一些与通用系统 (UTM) 之间的转换函数,这些函数是围绕 @josh-Obrien 对 this question 的回答构建的,但是这些似乎是另一扇门的钥匙。
请在下面查看它们,以防它们可以 adapted/used 解决方案(可能通过某种方式循环 sp::CRS
的参数)?。
我知道库 rgdal
也可以使用与 sp
.
类似的语法进行坐标转换
数据
data <- structure(list(
LON = c(3.6189419546606, 3.47346614446389, 3.44239459327957,
3.29350462630471, 3.35268808777572, 3.56791893543689, 3.33366611318681,
3.39473714826007, 3.41056562146275, 3.61392544406354, 3.50258),
LAT = c(50.6816466868977, 50.5589876530483, 50.6997902260753,
50.6876572958438, 50.6800941411327, 50.523718459466, 50.5438833669109,
50.6132227223641, 50.5307279235646, 50.5960956577015, 50.7414748843137),
x = c(96227.0052771935, 86074.2589595734, 84369.8773101478,
73127.7357132523, 77875.9986049107, 93229.1592839806, 76183.8151614011,
80806.111537044, 82041.2236842105, 96907.4010078463, 88860.5615808823),
y = c(152551.212026743, 138702.046875, 154860.466229839, 153413.886429398,
153115.726084184, 134975.700053095, 137373.913804945, 145229.97987092,
135743.853978207, 143057.883184524, 159399.019607843)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L),
.Names = c("LON", "LAT", "x", "y"))
转换函数
#' add UTM coordinates (x and y in km) from WGS84 coordinates (long and lat)
#'
#' @param data a data frame
#' @param out_names names of x and y columns in output data frame
#' @param in_names names of longlat columns in input, by default searches for
#' an unambiguous set of columns with names starting with "lat" and "lon"
#' (case insensitive)
#' @param zone UTM zone, for Belgium it's 31 (the default), see
#' http://www.dmap.co.uk/utmworld.htm
#'
#' @return a data frame with 2 more columns
#' @export
#'
#' @examples
#' xy <- data.frame(ID = 1:2, longitude = c(118, 119), latitude = c(10, 50))
# add_longlat(add_utm(xy))
add_utm <- function(data, out_names = c("x","y"), in_names = NULL, zone = 31){
nms <- names(data)
if(length(temp <- intersect(out_names,nms)))
stop(paste("data already contains",paste(temp,collapse =" and ")))
if(is.null(in_names)){
lon_col <- grep("^lon",nms, ignore.case = TRUE,value = TRUE)
lat_col <- grep("^lat",nms, ignore.case = TRUE,value = TRUE)
if((n <- length(lon_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'lon' (case insensitive)", n))
if((n <- length(lat_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'lon' (case insensitive)", n))
in_names <- c(lon_col, lat_col)
}
new_data <- data[in_names]
sp::coordinates(new_data) <- names(new_data)
sp::proj4string(new_data) <- sp::CRS("+proj=longlat +datum=WGS84") ## for example
res <- sp::spTransform(new_data, sp::CRS(sprintf("+proj=utm +zone=%s ellps=WGS84",zone)))
res <- setNames(as.data.frame(res), out_names)
cbind(data,res)
}
#' add WGS84 coordinates (long and lat) from UTM coordinates (x and y in km)
#'
#' @param data a data frame
#' @param out_names names of longitude and latitude columns in output data frame
#' @param in_names names of longlat columns in input, by default searches for
#' an unambiguous set of columns with names starting with "x" and "y"
#' (case insensitive)
#' @param zone UTM zone, for Belgium it's 31 (the default), see
#' http://www.dmap.co.uk/utmworld.htm
#'
#' @return a data frame with 2 more columns
#' @export
#'
#' @examples
#' xy <- data.frame(ID = 1:2, longitude = c(118, 119), latitude = c(10, 50))
# add_longlat(add_utm(xy))
add_longlat <- function(data, out_names = c("LON","LAT"), in_names = NULL, zone = 31){
nms <- names(data)
if(length(temp <- intersect(out_names,nms)))
stop(paste("data already contains",paste(temp,collapse =" and ")))
if(is.null(in_names)){
lon_col <- grep("^x",nms, ignore.case = TRUE,value = TRUE)
lat_col <- grep("^y",nms, ignore.case = TRUE,value = TRUE)
if((n <- length(lon_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'x' (case insensitive)", n))
if((n <- length(lat_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'y' (case insensitive)", n))
in_names <- c(lon_col, lat_col)
}
new_data <- data[in_names]
sp::coordinates(new_data) <- names(new_data)
sp::proj4string(new_data) <- sp::CRS(sprintf("+proj=utm +zone=%s ellps=WGS84",zone)) ## for example
res <- sp::spTransform(new_data, sp::CRS("+proj=longlat +datum=WGS84"))
res <- setNames(as.data.frame(res), out_names)
cbind(data,res)
}
我认为 可能 是 Belgian Lambert 1972 reference system or something close to it, with EPSG code 31370. I did this basically by searching through a list of the first few EPSG codes from a search for Belgian ones,假设 x_y 坐标在那个 CRS 中,然后转换为 WGS84 以与纬度和经度。这是我的代码;您可以看到 31370 的均方根误差约为 490m,低于我搜索过的其他误差。 (你显然可以扩大搜索范围,我尝试了一些,但这是我找到的最接近的)。请注意使用 possibly
来处理以下事实:当没有找到 EPSG 代码的转换时,代码会出错,因为我找不到所有可用代码的简单列表。我也不知道这里的预期精度是多少,因为你说经纬度点与未知的 crs 点不完全相同。根据区域的大小和似是而非的采样模式,470 米可能是一个决定性因素或表明我们还很遥远的指标...
library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
data <- structure(list(LON = c(3.6189419546606, 3.47346614446389, 3.44239459327957, 3.29350462630471, 3.35268808777572, 3.56791893543689, 3.33366611318681, 3.39473714826007, 3.41056562146275, 3.61392544406354, 3.50258), LAT = c(50.6816466868977, 50.5589876530483, 50.6997902260753, 50.6876572958438, 50.6800941411327, 50.523718459466, 50.5438833669109, 50.6132227223641, 50.5307279235646, 50.5960956577015, 50.7414748843137), x = c(96227.0052771935, 86074.2589595734, 84369.8773101478, 73127.7357132523, 77875.9986049107, 93229.1592839806, 76183.8151614011, 80806.111537044, 82041.2236842105, 96907.4010078463, 88860.5615808823), y = c(152551.212026743, 138702.046875, 154860.466229839, 153413.886429398, 153115.726084184, 134975.700053095, 137373.913804945, 145229.97987092, 135743.853978207, 143057.883184524, 159399.019607843)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L), .Names = c("LON", "LAT", "x", "y"))
epsg_belgium <- c(3447, 3812, 31370, 31300, 21500, 4809, 4313, 4215, 25832, 8370, 6190, 5710, 25831)
guess_crs <- function(tbl, lon_lat, x_y, epsg_codes) {
if(! requireNamespace("lwgeom"))
stop("Package 'lwgeom' must be installed to use `guess_crs`")
wgs84 <- tbl %>%
st_as_sf(coords = lon_lat, crs = 4326)
compare_crs <- function(epsg) {
tbl %>%
st_as_sf(coords = x_y, crs = epsg) %>%
st_transform(4326) %>%
st_distance(wgs84, by_element = TRUE) %>%
as.numeric %>%
`^`(2) %>%
mean %>%
sqrt
}
poss_commpare <- possibly(compare_crs, otherwise = Inf)
tibble(
crs = epsg_codes,
rms_dist = map_dbl(epsg_codes, poss_commpare)
) %>%
arrange(rms_dist)
}
guess_crs(data, c("LON", "LAT"), c("x", "y"), epsg_belgium)
#> # A tibble: 13 x 2
#> crs rms_dist
#> <dbl> <dbl>
#> 1 31370 492.
#> 2 6190 492.
#> 3 21500 533.
#> 4 31300 1101.
#> 5 3447 1695.
#> 6 3812 706664.
#> 7 25832 5466924.
#> 8 25831 5478500.
#> 9 4809 9862261.
#> 10 4215 9882639.
#> 11 8370 Inf
#> 12 5710 Inf
#> 13 4313 NA
由 reprex package (v0.2.1)
于 2019-02-08 创建
我有这个比利时位置坐标的数据框:
data
# # A tibble: 11 x 4
# LON LAT x y
# <dbl> <dbl> <dbl> <dbl>
# 1 3.618942 50.68165 96227.01 152551.2
# 2 3.473466 50.55899 86074.26 138702.0
# 3 3.442395 50.69979 84369.88 154860.5
# 4 3.293505 50.68766 73127.74 153413.9
# 5 3.352688 50.68009 77876.00 153115.7
# 6 3.567919 50.52372 93229.16 134975.7
# 7 3.333666 50.54388 76183.82 137373.9
# 8 3.394737 50.61322 80806.11 145230.0
# 9 3.410566 50.53073 82041.22 135743.9
# 10 3.613925 50.59610 96907.40 143057.9
# 11 3.502580 50.74147 88860.56 159399.0
我知道 x
和 y
以米为单位,但我不知道使用的是什么约定,我通过挖掘 public 比利时数据的数据库得到它们但是无法从可用信息中弄清楚这一点。
table中的经度和纬度不完全匹配(x
和y
是区域中心的坐标,LAT
和LON
是这些地区居民样本坐标的平均值),但它们很好地说明了两者之间的转换应该是什么。
如何找出x
和y
编码的坐标系?
我查看了程序包 sp
并构建了一些与通用系统 (UTM) 之间的转换函数,这些函数是围绕 @josh-Obrien 对 this question 的回答构建的,但是这些似乎是另一扇门的钥匙。
请在下面查看它们,以防它们可以 adapted/used 解决方案(可能通过某种方式循环 sp::CRS
的参数)?。
我知道库 rgdal
也可以使用与 sp
.
数据
data <- structure(list(
LON = c(3.6189419546606, 3.47346614446389, 3.44239459327957,
3.29350462630471, 3.35268808777572, 3.56791893543689, 3.33366611318681,
3.39473714826007, 3.41056562146275, 3.61392544406354, 3.50258),
LAT = c(50.6816466868977, 50.5589876530483, 50.6997902260753,
50.6876572958438, 50.6800941411327, 50.523718459466, 50.5438833669109,
50.6132227223641, 50.5307279235646, 50.5960956577015, 50.7414748843137),
x = c(96227.0052771935, 86074.2589595734, 84369.8773101478,
73127.7357132523, 77875.9986049107, 93229.1592839806, 76183.8151614011,
80806.111537044, 82041.2236842105, 96907.4010078463, 88860.5615808823),
y = c(152551.212026743, 138702.046875, 154860.466229839, 153413.886429398,
153115.726084184, 134975.700053095, 137373.913804945, 145229.97987092,
135743.853978207, 143057.883184524, 159399.019607843)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L),
.Names = c("LON", "LAT", "x", "y"))
转换函数
#' add UTM coordinates (x and y in km) from WGS84 coordinates (long and lat)
#'
#' @param data a data frame
#' @param out_names names of x and y columns in output data frame
#' @param in_names names of longlat columns in input, by default searches for
#' an unambiguous set of columns with names starting with "lat" and "lon"
#' (case insensitive)
#' @param zone UTM zone, for Belgium it's 31 (the default), see
#' http://www.dmap.co.uk/utmworld.htm
#'
#' @return a data frame with 2 more columns
#' @export
#'
#' @examples
#' xy <- data.frame(ID = 1:2, longitude = c(118, 119), latitude = c(10, 50))
# add_longlat(add_utm(xy))
add_utm <- function(data, out_names = c("x","y"), in_names = NULL, zone = 31){
nms <- names(data)
if(length(temp <- intersect(out_names,nms)))
stop(paste("data already contains",paste(temp,collapse =" and ")))
if(is.null(in_names)){
lon_col <- grep("^lon",nms, ignore.case = TRUE,value = TRUE)
lat_col <- grep("^lat",nms, ignore.case = TRUE,value = TRUE)
if((n <- length(lon_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'lon' (case insensitive)", n))
if((n <- length(lat_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'lon' (case insensitive)", n))
in_names <- c(lon_col, lat_col)
}
new_data <- data[in_names]
sp::coordinates(new_data) <- names(new_data)
sp::proj4string(new_data) <- sp::CRS("+proj=longlat +datum=WGS84") ## for example
res <- sp::spTransform(new_data, sp::CRS(sprintf("+proj=utm +zone=%s ellps=WGS84",zone)))
res <- setNames(as.data.frame(res), out_names)
cbind(data,res)
}
#' add WGS84 coordinates (long and lat) from UTM coordinates (x and y in km)
#'
#' @param data a data frame
#' @param out_names names of longitude and latitude columns in output data frame
#' @param in_names names of longlat columns in input, by default searches for
#' an unambiguous set of columns with names starting with "x" and "y"
#' (case insensitive)
#' @param zone UTM zone, for Belgium it's 31 (the default), see
#' http://www.dmap.co.uk/utmworld.htm
#'
#' @return a data frame with 2 more columns
#' @export
#'
#' @examples
#' xy <- data.frame(ID = 1:2, longitude = c(118, 119), latitude = c(10, 50))
# add_longlat(add_utm(xy))
add_longlat <- function(data, out_names = c("LON","LAT"), in_names = NULL, zone = 31){
nms <- names(data)
if(length(temp <- intersect(out_names,nms)))
stop(paste("data already contains",paste(temp,collapse =" and ")))
if(is.null(in_names)){
lon_col <- grep("^x",nms, ignore.case = TRUE,value = TRUE)
lat_col <- grep("^y",nms, ignore.case = TRUE,value = TRUE)
if((n <- length(lon_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'x' (case insensitive)", n))
if((n <- length(lat_col)) != 1)
stop(sprintf(
"%s columns have names starting with 'y' (case insensitive)", n))
in_names <- c(lon_col, lat_col)
}
new_data <- data[in_names]
sp::coordinates(new_data) <- names(new_data)
sp::proj4string(new_data) <- sp::CRS(sprintf("+proj=utm +zone=%s ellps=WGS84",zone)) ## for example
res <- sp::spTransform(new_data, sp::CRS("+proj=longlat +datum=WGS84"))
res <- setNames(as.data.frame(res), out_names)
cbind(data,res)
}
我认为 可能 是 Belgian Lambert 1972 reference system or something close to it, with EPSG code 31370. I did this basically by searching through a list of the first few EPSG codes from a search for Belgian ones,假设 x_y 坐标在那个 CRS 中,然后转换为 WGS84 以与纬度和经度。这是我的代码;您可以看到 31370 的均方根误差约为 490m,低于我搜索过的其他误差。 (你显然可以扩大搜索范围,我尝试了一些,但这是我找到的最接近的)。请注意使用 possibly
来处理以下事实:当没有找到 EPSG 代码的转换时,代码会出错,因为我找不到所有可用代码的简单列表。我也不知道这里的预期精度是多少,因为你说经纬度点与未知的 crs 点不完全相同。根据区域的大小和似是而非的采样模式,470 米可能是一个决定性因素或表明我们还很遥远的指标...
library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
data <- structure(list(LON = c(3.6189419546606, 3.47346614446389, 3.44239459327957, 3.29350462630471, 3.35268808777572, 3.56791893543689, 3.33366611318681, 3.39473714826007, 3.41056562146275, 3.61392544406354, 3.50258), LAT = c(50.6816466868977, 50.5589876530483, 50.6997902260753, 50.6876572958438, 50.6800941411327, 50.523718459466, 50.5438833669109, 50.6132227223641, 50.5307279235646, 50.5960956577015, 50.7414748843137), x = c(96227.0052771935, 86074.2589595734, 84369.8773101478, 73127.7357132523, 77875.9986049107, 93229.1592839806, 76183.8151614011, 80806.111537044, 82041.2236842105, 96907.4010078463, 88860.5615808823), y = c(152551.212026743, 138702.046875, 154860.466229839, 153413.886429398, 153115.726084184, 134975.700053095, 137373.913804945, 145229.97987092, 135743.853978207, 143057.883184524, 159399.019607843)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -11L), .Names = c("LON", "LAT", "x", "y"))
epsg_belgium <- c(3447, 3812, 31370, 31300, 21500, 4809, 4313, 4215, 25832, 8370, 6190, 5710, 25831)
guess_crs <- function(tbl, lon_lat, x_y, epsg_codes) {
if(! requireNamespace("lwgeom"))
stop("Package 'lwgeom' must be installed to use `guess_crs`")
wgs84 <- tbl %>%
st_as_sf(coords = lon_lat, crs = 4326)
compare_crs <- function(epsg) {
tbl %>%
st_as_sf(coords = x_y, crs = epsg) %>%
st_transform(4326) %>%
st_distance(wgs84, by_element = TRUE) %>%
as.numeric %>%
`^`(2) %>%
mean %>%
sqrt
}
poss_commpare <- possibly(compare_crs, otherwise = Inf)
tibble(
crs = epsg_codes,
rms_dist = map_dbl(epsg_codes, poss_commpare)
) %>%
arrange(rms_dist)
}
guess_crs(data, c("LON", "LAT"), c("x", "y"), epsg_belgium)
#> # A tibble: 13 x 2
#> crs rms_dist
#> <dbl> <dbl>
#> 1 31370 492.
#> 2 6190 492.
#> 3 21500 533.
#> 4 31300 1101.
#> 5 3447 1695.
#> 6 3812 706664.
#> 7 25832 5466924.
#> 8 25831 5478500.
#> 9 4809 9862261.
#> 10 4215 9882639.
#> 11 8370 Inf
#> 12 5710 Inf
#> 13 4313 NA
由 reprex package (v0.2.1)
于 2019-02-08 创建