在数据框中按行程计算距离

Calculate distances by trip in dataframe

我有一个数据框,其中有一只企鹅在福克兰群岛进行了 46 次旅行,我计算了几个参数,假设每次觅食旅行都是在企鹅距离海岸超过 1000 米时停止,并在返回时停止<1000 米 penguin 1 data in .xlsx .Rda al dataframe stored:

library(sp)
>p.1 

 date               | lon          | lat          | trip | distancetocoast
2014-06-11 19:02:00 | -58,3508585  | -51,88438373 | 1    | 2236.067977
2014-06-12 01:02:00 | -58,35589725 | -51,88349529 | 1    | 1000
2014-06-12 13:02:00 | -58,27224941 | -51,91903677 | 1    | 7211,102551
2014-06-12 19:02:00 | -58,27974654 | -51,90535003 | 1    | 5830,951895
2014-06-13 01:02:00 | -58,32331901 | -51,89410464 | 1    | 3605,551275
2014-06-13 07:02:00 | -58,35833139 | -51,88809227 | 1    | 1414,213562
2014-06-13 13:02:00 | -58,35617673 | -51,88156281 | 1    | 1000
2014-06-13 19:02:00 | -58,34055711 | -51,89002367 | 1    | 2236,067977
2014-06-14 01:02:00 | -58,34982536 | -51,8715761  | 2    | 1000
2014-06-14 13:02:00 | -58,3073814  | -51,92722937 | 2    | 7071,067812
2014-06-14 19:02:00 | -58,34581314 | -51,86761133 | 3    | 1000
2014-06-15 01:02:00 | -58,34050624 | -51,88382088 | 3    | 1414,213562
2014-06-15 13:02:00 | -58,2974691  | -51,91795326 | 3    | 6324,55532
2014-06-15 19:02:00 | -58,19881901 | -51,95172233 | 3    | 13000
2014-06-16 01:02:00 | -58,1348416  | -51,98673766 | 3    | 18788,29423
2014-06-16 07:02:00 | -57,99399544 | -52,06988191 | 3    | 28861,73938
2014-06-16 13:02:00 | -58,00469754 | -52,02795069 | 3    | 26627,05391
2014-06-16 19:02:00 | -57,92758675 | -52,02184666 | 3    | 29000
2014-06-17 01:02:00 | -57,91658235 | -51,99748699 | 3    | 28284,27125
2014-06-17 07:02:00 | -57,77015528 | -51,99031797 | 3    | 30805,8436
2014-06-17 13:02:00 | -57,99601712 | -51,91519551 | 3    | 17804,49381
2014-06-17 19:02:00 | -58,06820013 | -51,92972737 | 3    | 14866,06875
2014-06-18 01:02:00 | -58,19845185 | -51,89522513 | 3    | 7615,773106
2014-06-18 07:02:00 | -58,35241361 | -51,88015998 | 3    | 1000
2014-06-18 13:02:00 | -58,35603546 | -51,88336878 | 3    | 1000
2014-06-18 19:02:00 | -58,33350332 | -51,87308427 | 3    | 1000
2014-06-19 01:02:00 | -58,33839581 | -51,87846631 | 3    | 1414,213562
2014-06-19 07:02:00 | -58,42661519 | -51,80902388 | 4    | 0
2014-06-19 13:02:00 | -58,30461883 | -51,93745837 | 4    | 7810,249676
2014-06-19 19:02:00 | -58,18362875 | -51,96475914 | 4    | 14317,82106

我使用这段代码计算矩阵中每个点的距离:

distancebetwenpoints=spDists(locs1_utm, longlat=FALSE)
p.1$dist=distancebetwenpoints   #INCLUDE COLUMN DISTANCE TO THE COLONY OF EACH POINT

然而,这在数据框 p.1 中包含一个 779 值的巨大矩阵,我尝试了其他方法但我无法使其工作,并且使用该矩阵我无法真正正确地研究统计数据,因此,如何才能我计算企鹅每次旅行的距离?

FalklandCRS = CRS("+proj=utm +zone=21 +south +datum=WGS84 +units=m +no_defs") 
locs1 = sp::SpatialPointsDataFrame(coords = cbind(subset(p.1$lon, p.1$id == 1), subset(p.1$lat, p.1$id == 1)), data = subset(p.1, p.1$id == 1), proj4string=CRS("+proj=longlat +datum=WGS84"))
locs1_utm = sp::spTransform(locs1, FalklandCRS)

head(p.1)
id                date       lon       lat   lon.025     lon.5   lon.975   lat.025     lat.5   lat.975
1  1 2014-06-11 19:02:00 -58.35086 -51.88438 -58.35466 -58.35112 -58.34588 -51.88563 -51.88442 -51.88295
2  1 2014-06-12 01:02:00 -58.35590 -51.88350 -58.36226 -58.35594 -58.34932 -51.88602 -51.88350 -51.88068
4  1 2014-06-12 13:02:00 -58.27225 -51.91904 -58.32643 -58.26749 -58.24544 -51.93304 -51.91970 -51.90113
5  1 2014-06-12 19:02:00 -58.27975 -51.90535 -58.51877 -58.27893 -58.02546 -52.05056 -51.90636 -51.75887
6  1 2014-06-13 01:02:00 -58.32332 -51.89410 -58.56753 -58.32094 -58.09604 -52.04776 -51.89361 -51.74586
7  1 2014-06-13 07:02:00 -58.35833 -51.88809 -58.37411 -58.35856 -58.34158 -51.89519 -51.88818 -51.88029
  bathy    bathy2 distancetocoast
1    -1  4.080409        2236.068
2    -1  4.080409        1000.000
4    -6 -5.849781        7211.103
5    -4 -5.308407        5830.952
6    -1 -2.060174        3605.551
7    -1 -1.450360        1414.214

我冒昧地从头开始创建了一个解决方法

library(lubridate)
library(data.table)

dt<-data.table(
  date=ymd_hms(c("2014-06-11 19:02:00"
             ,"2014-06-12 01:02:00"
             ,"2014-06-12 13:02:00"
             ,"2014-06-12 19:02:00"
             ,"2014-06-13 01:02:00"
             ,"2014-06-13 07:02:00"
             ,"2014-06-13 13:02:00"
             ,"2014-06-13 19:02:00"
             ,"2014-06-14 01:02:00"
             ,"2014-06-14 13:02:00"
             ,"2014-06-14 19:02:00"
             ,"2014-06-15 01:02:00"
             ,"2014-06-15 13:02:00"
             ,"2014-06-15 19:02:00"
             ,"2014-06-16 01:02:00"
             ,"2014-06-16 07:02:00"
             ,"2014-06-16 13:02:00"
             ,"2014-06-16 19:02:00"
             ,"2014-06-17 01:02:00"
             ,"2014-06-17 07:02:00"
             ,"2014-06-17 13:02:00"
             ,"2014-06-17 19:02:00"
             ,"2014-06-18 01:02:00"
             ,"2014-06-18 07:02:00"
             ,"2014-06-18 13:02:00"
             ,"2014-06-18 19:02:00"
             ,"2014-06-19 01:02:00"
             ,"2014-06-19 07:02:00"
             ,"2014-06-19 13:02:00"
             ,"2014-06-19 19:02:00"))
  ,lon=c(-58.3508585 
         ,-58.35589725
         ,-58.27224941
         ,-58.27974654
         ,-58.32331901
         ,-58.35833139
         ,-58.35617673
         ,-58.34055711
     ,-58.34982536
     ,-58.3073814 
     ,-58.34581314
     ,-58.34050624
     ,-58.2974691 
     ,-58.19881901
     ,-58.1348416 
     ,-57.99399544
     ,-58.00469754
     ,-57.92758675
     ,-57.91658235
     ,-57.77015528
     ,-57.99601712
     ,-58.06820013
     ,-58.19845185
     ,-58.35241361
     ,-58.35603546
     ,-58.33350332
     ,-58.33839581
     ,-58.42661519
     ,-58.30461883
     ,-58.18362875)
  ,lat=c(-51.88438373 
     ,-51.88349529 
     ,-51.91903677 
     ,-51.90535003 
     ,-51.89410464 
     ,-51.88809227 
     ,-51.88156281 
     ,-51.89002367 
     ,-51.8715761  
     ,-51.92722937 
     ,-51.86761133 
     ,-51.88382088 
     ,-51.91795326 
     ,-51.95172233 
     ,-51.98673766 
     ,-52.06988191 
     ,-52.02795069 
     ,-52.02184666 
     ,-51.99748699 
     ,-51.99031797 
     ,-51.91519551 
     ,-51.92972737 
     ,-51.89522513 
     ,-51.88015998 
     ,-51.88336878 
     ,-51.87308427 
     ,-51.87846631 
     ,-51.80902388 
     ,-51.93745837 
     ,-51.96475914 )
  ,trip=c( 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 1 
       , 2 
       , 2 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 3 
       , 4 
       , 4 
       , 4 )
  ,distance2coast=c(2236.067977
                ,1000
                ,7211.102551
                ,5830.951895
                ,3605.551275
                ,1414.213562
                ,1000
                ,2236.067977
                ,1000
                ,7071.067812
                ,1000
                ,1414.213562
                ,6324.55532
                ,13000
                ,18788.29423
                ,28861.73938
                ,26627.05391
                ,29000
                ,28284.27125
                ,30805.8436
                ,17804.49381
                ,14866.06875
                ,7615.773106
                ,1000
                ,1000
                ,1000
                ,1414.213562
                ,0
                ,7810.249676
                ,14317.82106)

)

计算球面上各点距离的函数:

distanceOfPoints <- function(long1, lat1, long2, lat2) {
  R <- 6371 
  d <- acos(sin(lat1)*sin(lat2) + cos(lat1)*cos(lat2) * cos(long2-long1)) * R
  return(d) #km
}
##yet another function for the distance calculation between geodetic coordinates

gcd.hf <- function(long1, lat1, long2, lat2) {
  R <- 6371 # Earth mean radius [km]
   delta.long <- (long2 - long1)
   delta.lat <- (lat2 - lat1)
  a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.long/2)^2
  c <- 2 * asin(min(1,sqrt(a)))
  d = R * c
  return(d) # Distance in km
}

此外:

dt<-dt[order(date)]#order chronologically

然后我们根据 "trip":

将数据分成列表
dt_split<-split(dt,by="trip")
names(dt_split)<-paste0("trip",names(dt_split))#and we give name to each element

一个考虑连续行并计算累积距离的简单函数。

calcDistancePerTrip<-function(y){#y is dataframe-element of list
  l1<-lapply(seq_along(1:nrow(y)),function(x){
    dist<-distanceOfPoints(long1 = y[,.(lon)][x],long2= y[,.(lon)][x+1],lat1 = y[,.(lat)][x],lat2 = y[,.(lat)][x+1])
    #you can always use the gcd.hf for the dist calculation
    return(dist)
  })
  l1<-as.numeric(unlist(l1))#list to numeric vector
  tot_dist<-sum(l1,na.rm = T)#cumulative distance per trip 
  return(tot_dist)
}

最后我们 "loop" 它覆盖了初始列表:

lapply(dt_split,calcDistancePerTrip)

屈服于:

$trip1
[1] 531.6949

$trip2
[1] 354.8975

$trip3
[1] 3067.865

$trip4
[1] 1012.005

我发现从 data.frame 获取所有信息的最佳方法是使用 adehabitat 包。 因此,我使用了名为 locs_utm 的 "SpatialPointDataframe"(与上面我称为 locs1_utm 的方法相同),将其转换为具有 "x" 和 "y" 列名称的坐标,制作了一个名为 BirdTrips 的列,该列具有唯一值,因此我可以稍后将其用作 burst 和 split 获取每次旅行的距离和时间,将 id 列与 trip 列相结合。

gent$trip[gent$trip<10]=paste(0,gent$trip[gent$trip<10],sep="")#so that trip 1,2,3 becomes 01,02,03...
gent$BirdTrip=paste(gent$id,gent$trip,sep=".")  #combine the animal id with the trip so you have 1.01,1.02, 1.03 and soon
gent$BirdTrip=as.factor(gent$BirdTrip)

BirdTrip
  1.01
  1.01
  ...
  2.01


foo = as.data.frame(locs_utm) #transform locs_utm-->SpatialPointDataframe into data.frame
foo = cbind.data.frame(foo$coords.x1, foo$coords.x2)  #convert lon lat, columns in coordinatse
names(foo) = c("x", "y")  #convert the columns in "x" and "y" to be able to use the as.ltraj and convert thedata in ltraj forat

tr=as.ltraj(xy=foo,
            date=p.1$date,
            id=p.1$id,
            burst=p.1$BirdTrip)

class(tr)    #[1] "ltraj" "list" 
tr=ld(tr)   #transform the ltraj into data.frame

为了将所有信息收集到一个 data.frame 我使用 as.POSIXct 和 tappply 函数创建了一些向量来获取行程的长度、行程的开始和结束日期以及最大位移。

ts = as.data.frame(as.POSIXct(tapply(tr$date, tr$burst, min, na.rm = T), origin = "1970-01-01")) #trip start
te = as.data.frame(as.POSIXct(tapply(tr$date, tr$burst, max, na.rm = T), origin = "1970-01-01")) #trip end
th = as.data.frame(tapply(tr$dt, tr$burst, sum, na.rm = T)/3600) #trip hours
tl = as.data.frame(tapply(tr$dist, list(tr$burst), sum, na.rm = T)/1000) #trip kms-->LENGTH
td = as.data.frame(sqrt(tapply(tr$R2n, list(tr$burst), max, na.rm = T))/1000) #max displacement kms

resumen = cbind.data.frame(ts[,1], te[,1], as.numeric(th[,1]), as.numeric(tl[,1]), as.numeric(td[,1]))
names(resumen) = c("Start", "End", "Hours", "Length", "Displacement")
View(resumen)