查找数据中的突然斜率

Find abrupt slopes in data

我有一个带有 X 和 Y 坐标的人行道表面的高分辨率垂直轮廓,我正在寻找 Y 的突然增加,这可能是由于绊倒危险造成的(归类为增加 6 毫米)。我在 pracma 中使用 findpeaks 命令,但它没有按照我的意愿执行(或者我没有正确使用它)。我需要做的是检测 Y 在指定的 X 距离上至少增加 6 毫米,在本例中假设为 100 毫米,并记录增加的最大值 Y。本质上是最高点的'trip hazard'.

这是数据(XY 的单位是毫米)

    x <- seq (0, 2080, by = 10)

y<- c(1.21, 1.67,   2.10,   2.50,   2.88,   3.24,   3.56,   3.85,   4.11,   4.33,   4.53,   4.70,   4.84,   4.94,   4.99,   4.98,   4.95,   4.95,   4.91,   4.82,   4.80,   4.95,   5.20,   5.39,   5.44,   5.44,   5.48,   5.58,   5.73,
5.93,   6.17,   6.60,   7.13,   7.45,   7.52,   7.53,   7.49,   7.11,   6.46,   6.03,   6.01,   6.16,   6.38,   6.57,   6.78,   7.05,   7.22,   7.14,   6.94,   6.82,   6.80,   6.79,   6.79,   6.86,   7.01,   7.17,   7.26,   7.26,   
7.21,   7.14,   7.13,   7.13,   7.04,   6.89,   6.72,   6.43,   5.90,   5.17,   4.42,   3.80,   3.30,   2.81,   2.38,   2.01,   1.69,   1.45,   1.29,   1.20,   1.17,   1.25,   1.44,   1.65,   1.80,   1.94,   2.11,   2.24,   2.19,   
2.04,   1.97,   2.05,   2.17,   2.29,   2.39,   2.50,   2.61,   2.70,   2.69,   2.62,   2.61,   2.71,   2.84,   2.97,   3.20,   3.50,   3.71,   3.79,   3.80,   3.77,   3.73,   3.67,   3.60,   3.52,   3.40,   3.24,   3.12,   3.10,   
3.14,   3.13,   3.06,   2.96,   2.83,   2.65,   2.32,   1.90,   1.64,   1.62, 1.66, 1.71,   1.85,   2.11,   2.30,   2.37,   2.42,   2.47,   2.53,   2.56,   2.56,   2.59,   2.83,   3.19,   3.43,   3.43,   3.33,   3.19,   2.96,   
2.64,   2.34,   2.18,   2.18,   2.22,   2.27,   2.46,   2.78,   2.96,   2.93,   2.83,   2.68,   2.43,   2.05, 1.65, 1.30,   0.98,   0.66,   0.41,   0.15,   -0.11,  -0.26,  -0.28,  -0.24,  -0.09,  0.30,   0.88,   1.51,
2.06,   2.56,   3.06,   3.49,   3.65,   3.67,   3.92,   4.36,   4.83,   5.47,   6.52,   7.88,   9.30,   10.48,  11.40,  12.24,  13.03,  13.65,  14.12,  14.65,  15.24,  15.81,  16.43,  17.16,  17.97,  18.76,
19.45,  20.04,  20.59,  21.04,  21.39,  21.67,  21.86,  21.95,  21.95,  21.87)

data<- data.frame(x,y)

这是我现在使用的代码

    plot(x, y, ylim=c(0, 30), xlim = c(0, 2200), cex=0.2, type='o')
    grid()

## FROM LEFT TO RIGHT

peaks_1<-data.frame(findpeaks(data$y, minpeakheight = 6, threshold = 0, 
                              nups = 10, ndowns = 0,  minpeakdistance = 1, sortstr=F))

## FROM RIGHT TO LEFT

peaks_2<-data.frame(findpeaks(data$y, minpeakheight = 6, threshold = 0, 
          nups = 0, ndowns = 10,  minpeakdistance = 1, sortstr=F))


peaks<-rbind(peaks_1, peaks_2)

colnames(peaks)<-c("y", "X2", "X3", "X4")

peak_points<- data.frame(merge(peaks, data, by='y'))


    ## NOTE: I HAVE ROUNDED THE RAW DATA FOR THIS EXAMPLE AND SO WHEN THE DATA ARE MERGED, 
    ## IT PRODUCES THREE ADDITIONAL VALUES WHICH WE WILL MANUALLY REMOVE HERE

peak_points<- peak_points[-c(1, 2, 5),]

points(peak_points$x, peak_points$y,pch=19, cex=1,col='maroon')

右边的那个(21.95 毫米)似乎是正确的,也许中间的那个(7.13 毫米),但是左边的那个不对(7.53 毫米)。有没有一种方法可以使用 pracma(或其他任何方法)来指定 nups 命令的最小增加量?

您可以通过对多个 poly 项式进行 step 明智回归来大致计算位置。我们使用最合适的方法来估计 yhat.

fit <- step(lm(as.formula(paste("y ~ ", paste0("I(x^", 1:(length(x)/2.3), ")",
                                               collapse=" + ")))))
yhat <- fit$fitted.values

现在我们可以计算二阶导数了;在大于零的地方我们有局部最小值,在它小于零的地方我们有局部最大值。

lmin <- which(c(FALSE, diff(diff(yhat) > 0) > 0))
lmax <- which(c(FALSE, diff(diff(yhat) < 0) > 0))
lmax <- lmax[lmax > min(lmin)]  ## delete lmax appearing before first lmin

现在我们从lmin中减去lmax,差值> 6就是我们要找的POS

mp <- - mapply(`-`, yhat[lmin], yhat[lmax])
POS <- x[as.numeric(names(mp[mp > 6]))]

看起来像这样:

plot(x, y, cex=0.2, type='o', main="Trip hazard")
grid()
lines(x, yhat, col=6, lty=2)
abline(v=x[lmin], lwd=1, lty=3, col=3)
abline(v=x[lmax], lwd=1, lty=3, col=4)
abline(v=POS, col="red", lwd=2)
legend("topleft", legend=c("y", "yhat", "lmin", "lmax", "POS"), 
       lwd=c(1, 1, 1, 1, 2), lty=c(1, 2, 3, 3, 1), col=c(1, 6, 3, 4, "red"))

我编写了一个程序,其输出是旅行危险的不同点和终点。它采用三个参数:数据增量(您感兴趣的区间内有多少 x 数据点)、海拔阈值和数据集。从那里,它将产生输出,指定高程变化大于允许的位置,并根据输出显示哪个方向。

get.vector.right <- function(i, increment, data){
  return(data$y[i:(i+increment)])
}

get.vector.left <- function(i, increment, data){
  return(data$y[(i - increment):i])
}

get.vector.right.abridged <- function(i, increment, data){
  return(data$y[i : nrow(data)])
}

get.vector.left.abridged <- function(i, increment, data){
  return(data$y[1 : i])
}

print.warning <- function(data, i, increment, direction){
  if(direction == "right"){
    print(paste0("Steep change in vertical distance noted between ", data$x[i], " and ", data$x[(i + increment)]))
  } else if(direction == "left"){
    print(paste0("Steep change in vertical distance noted between ", data$x[i], " and ", data$x[(i - increment)]))
  }
}

check.right.up <- function(vector, increment, vertical.distance, data, i){
  if(max(vector) - vector[1] >= vertical.distance){
    print.warning(data, i, increment, "right")
  }
}

check.right.down <- function(vector, increment, vertical.distance, data, i){
  if(vector[1] - min(vector) >= vertical.distance){
    print.warning(data, i, increment, "right")
  }
}

check.left.up <- function(vector, increment, vertical.distance, data, i){
  if(max(vector) - vector[length(vector)] >= vertical.distance){
    print.warning(data, i, increment, "left")
  }
}

check.left.down <- function(vector, increment, vertical.distance, data, i){
  if(vector[length(vector)] - min(vector) >= vertical.distance){
    print.warning(data, i, increment, "left")
  }
}

check.function <- function(left.vector, right.vector, increment, vertical.distance, data, i){
  check.left.up(left.vector, increment, vertical.distance, data, i)
  check.left.down(left.vector, increment, vertical.distance, data, i)
  check.right.up(right.vector, increment, vertical.distance, data, i)
  check.right.down(right.vector, increment, vertical.distance, data, i)
}



trip.function <- function(increment, vertical.distance, data){
  for(i in 1:nrow(data)){
    if(data$x[i] == min(data$x)){
      get.vector.right(i, increment, data) -> right.vector
      check.right.up(right.vector, increment, vertical.distance, data, i)
      check.right.down(right.vector, increment, vertical.distance, data, i)
    } else if (data$x[i] == max(data$x)){
      get.vector.left(i, increment, data) -> left.vector
      check.left.up(left.vector, increment, vertical.distance, data, i)
      check.left.down(left.vector, increment, vertical.distance, data, i)
    } else {
      if(nrow(data[1:i, ]) <= increment){
        get.vector.left.abridged(i, increment, data) -> left.abridged.vector
        get.vector.right(i, increment, data) -> right.vector
        check.function(left.abridged.vector, right.vector, increment, vertical.distance, data, i)
      } else if (nrow(data[i:nrow(data), ]) <= increment){
        get.vector.right.abridged(i, increment, data) -> right.abridged.vector
        get.vector.left(i, increment, data) -> left.vector
        check.function(left.vector, right.abridged.vector, increment, vertical.distance, data, i)
      } else {
        get.vector.left(i, increment, data) -> left.vector
        get.vector.right(i, increment, data) -> right.vector
        check.function(left.vector, right.vector, increment, vertical.distance, data, i)
      }
    }
    rm(right.vector, left.vector, left.abridged.vector, right.abridged.vector)
  }
}

因此,如果您想知道100mm以内是否有6mm的变化,您可以键入(假设x轴上的10个数据点代表100mm,y轴以mm为单位记录):

trip.function(10, 6, data)

输出为:

[1] "Steep change in vertical distance noted between 1750 and 1850"
[1] "Steep change in vertical distance noted between 1760 and 1860"
[1] "Steep change in vertical distance noted between 1770 and 1870"
[1] "Steep change in vertical distance noted between 1780 and 1880"
[1] "Steep change in vertical distance noted between 1790 and 1890"
[1] "Steep change in vertical distance noted between 1800 and 1900"
[1] "Steep change in vertical distance noted between 1810 and 1910"
[1] "Steep change in vertical distance noted between 1820 and 1920"
[1] "Steep change in vertical distance noted between 1830 and 1930"
[1] "Steep change in vertical distance noted between 1840 and 1940"
[1] "Steep change in vertical distance noted between 1850 and 1750"
[1] "Steep change in vertical distance noted between 1850 and 1950"
[1] "Steep change in vertical distance noted between 1860 and 1760"
[1] "Steep change in vertical distance noted between 1860 and 1960"
[1] "Steep change in vertical distance noted between 1870 and 1770"
[1] "Steep change in vertical distance noted between 1870 and 1970"
[1] "Steep change in vertical distance noted between 1880 and 1780"
[1] "Steep change in vertical distance noted between 1880 and 1980"
[1] "Steep change in vertical distance noted between 1890 and 1790"
[1] "Steep change in vertical distance noted between 1890 and 1990"
[1] "Steep change in vertical distance noted between 1900 and 1800"
[1] "Steep change in vertical distance noted between 1900 and 2000"
[1] "Steep change in vertical distance noted between 1910 and 1810"
[1] "Steep change in vertical distance noted between 1910 and 2010"
[1] "Steep change in vertical distance noted between 1920 and 1820"
[1] "Steep change in vertical distance noted between 1920 and 2020"
[1] "Steep change in vertical distance noted between 1930 and 1830"
[1] "Steep change in vertical distance noted between 1930 and 2030"
[1] "Steep change in vertical distance noted between 1940 and 1840"
[1] "Steep change in vertical distance noted between 1950 and 1850"
[1] "Steep change in vertical distance noted between 1960 and 1860"
[1] "Steep change in vertical distance noted between 1970 and 1870"
[1] "Steep change in vertical distance noted between 1980 and 1880"
[1] "Steep change in vertical distance noted between 1990 and 1890"
[1] "Steep change in vertical distance noted between 2000 and 1900"
[1] "Steep change in vertical distance noted between 2010 and 1910"
[1] "Steep change in vertical distance noted between 2020 and 1920"
[1] "Steep change in vertical distance noted between 2030 and 1930"

数字的顺序表示哪个方向:2030 and 1930表示从x = 2030移动到x = 1930(向左移动),反之亦然。

这是一个简单的暴力破解方法;如果你的数据集不是太大,应该足够了。

# All the code below assumes that `data` is already sorted by x

# Flag every point within the range of the trip hazard
data$trip_hazard = F

# Iterate over every pair of points
for(i in 1:(nrow(data) - 1)) {
  for(j in (i + 1):nrow(data)) {
    # Get the x-coordinates of the points
    x1 = data$x[i]
    x2 = data$x[j]
    # If the points are no more than 100 mm apart, check whether there's a trip
    # hazard between them
    if(x2 - x1 <= 100) {
      # Get the y-coordinates of the points
      y1 = data$y[i]
      y2 = data$y[j]
      # If there's a rise or fall of at least 6 mm, we have a trip hazard; flag
      # all the points in the range accordingly
      if(abs(y2 - y1) >= 6) {
        data$trip_hazard[i:j] = T
      }
    }
    # If the points are more than 100 mm apart, we don't need to keep checking
    # points that are even further apart
    else {
      break
    }
  }
}

# Get the maximum y-value within each trip hazard
library(dplyr)
library(tidyr)
data = data %>%
  mutate(range_id = ifelse(trip_hazard != coalesce(lag(trip_hazard),
                                                   !trip_hazard),
                           x, NA)) %>%
  fill(range_id) %>%
  group_by(range_id) %>%
  mutate(peak = trip_hazard & y == max(y)) %>%
  ungroup() %>%
  dplyr::select(-range_id)

# Plot the sidewalk (repeated from question)
plot(x, y, ylim = c(0, 30), xlim = c(0, 2200), cex = 0.2, type = "o")
# Plot the trip hazards in red
points(data$x[data$trip_hazard], data$y[data$trip_hazard],
       lwd = 4, col = "red", type = "l")
# Plot the highest point within each trip hazard
points(data$x[data$peak], data$y[data$peak], pch = 19, cex = 2, col = "red")