一个更快的函数来降低光栅 R 的分辨率

A faster function to lower the resolution of a raster R

我正在使用栅格包来降低大栅格的分辨率,使用这样的函数聚合

require(raster)    
x <- matrix(rpois(1000000, 2),1000)

a <-raster(x)
plot(a)

agg.fun <- function(x,...) 
    if(sum(x)==0){
        return(NA)
    } else {
        which.max(table(x))
    }

a1<-aggregate(a,fact=10,fun=agg.fun)
plot(a1)

我必须聚合的光栅图像要大得多 34000x34000 所以我想知道是否有更快的方法来实现 agg.fun 函数。

试试这个:

fasterAgg.Fun <- function(x,...) {
    myRle.Alt <- function (x1) {
        n1 <- length(x1)
        y1 <- x1[-1L] != x1[-n1]
        i <- c(which(y1), n1)
        which.max(diff(c(0L, i)))
    }

    if (sum(x)==0) {
        return(NA)
    } else {
        myRle.Alt(sort(x, method="quick"))
    }
}

library(rbenchmark)
benchmark(FasterAgg=aggregate(a, fact=10, fun=fasterAgg.Fun),
            AggFun=aggregate(a, fact=10, fun=agg.fun),
            replications=10,
            columns = c("test", "replications", "elapsed", "relative"),
            order = "relative")
       test replications elapsed relative
1 FasterAgg           10  12.896    1.000
2    AggFun           10  30.454    2.362

对于更大的测试对象,我们有:

x <- matrix(rpois(10^8,2),10000)
a <- raster(x)
system.time(a2 <- aggregate(a, fact=10, fun=fasterAgg.Fun))
   user  system elapsed 
111.271  22.225 133.943  

system.time(a1 <- aggregate(a, fact=10, fun=agg.fun))
   user  system elapsed 
282.170  24.327 308.112

如果您想要@digEmAll 在上面的评论中所说的实际值,只需将 myRle.Alt 中的 return 值从 which.max(diff(c(0L, i))) 更改为 x1[i][which.max(diff(c(0L, i)))]

您可以为此使用 gdalUtils::gdalwarp。对我来说,对于具有 1,000,000 个像元的栅格,它的效率低于 @JosephWood 的 fasterAgg.Fun,但对于 Joseph 的更大示例,它要快得多。它要求光栅存在于磁盘上,因此如果您的光栅在内存中,则将写入时间考虑在内。

下面,我使用了 fasterAgg.Fun 的修改 returns 最频繁的 value,而不是它在块中的索引。

library(raster)
x <- matrix(rpois(10^8, 2), 10000)
a <- raster(x)

fasterAgg.Fun <- function(x,...) {
    myRle.Alt <- function (x1) {
        n1 <- length(x1)
        y1 <- x1[-1L] != x1[-n1]
        i <- c(which(y1), n1)
        x1[i][which.max(diff(c(0L, i)))]
    }

    if (sum(x)==0) {
        return(NA)
    } else {
        myRle.Alt(sort(x, method="quick"))
    }
}
system.time(a2 <- aggregate(a, fact=10, fun=fasterAgg.Fun))

##   user  system elapsed 
##  67.42    8.82   76.38

library(gdalUtils)
writeRaster(a, f <- tempfile(fileext='.tif'), datatype='INT1U')
system.time(a3 <- gdalwarp(f, f2 <- tempfile(fileext='.tif'), r='mode', 
                           multi=TRUE, tr=res(a)*10, output_Raster=TRUE))

##   user  system elapsed 
##   0.00    0.00    2.93

注意,有并列时模式的定义略有不同:gdalwarp select是最高值,而上面传递给aggregate的函数(通过 which.max 的行为)select 最低(例如,参见 which.max(table(c(1, 1, 2, 2, 3, 4))))。

此外,将栅格数据存储为整数也很重要(如果适用)。如果数据存储为浮点数(writeRaster 默认值),例如,上面的 gdalwarp 操作在我的系统上需要大约 14 秒。有关可用类型,请参阅 ?dataType

为了好玩,我还创建了一个 Rcpp 函数(不比@JosephWood 快多少):

########### original function 
#(modified to return most frequent value instead of index)
agg.fun <- function(x,...){
    if(sum(x)==0){
        return(NA)
    } else {
        as.integer(names(which.max(table(x))))
    }
}
########### @JosephWood function
fasterAgg.Fun <- function(x,...) {
    myRle.Alt <- function (x1) {
        n1 <- length(x1)
        y1 <- x1[-1L] != x1[-n1]
        i <- c(which(y1), n1)
        x1[i][which.max(diff(c(0L, i)))]
    }

    if (sum(x)==0) {
        return(NA)
    } else {
        myRle.Alt(sort(x, method="quick"))
    }
}
########### Rcpp function
library(Rcpp)
library(inline)

aggrRcpp <- cxxfunction(signature(values='integer'), '
                Rcpp::IntegerVector v(clone(values));
                std::sort(v.begin(),v.end());
                int n = v.size();
                double sum = 0;
                int currentValue = 0, currentCount = 0, maxValue = 0, maxCount = 0;
                for(int i=0; i < n; i++) {
                    int value = v[i];
                    sum += value;
                    if(i==0 || currentValue != value){
                      if(currentCount > maxCount){
                        maxCount = currentCount;
                        maxValue = currentValue;
                      }
                      currentValue = value;
                      currentCount = 0;
                    }else{
                      currentCount++;
                    }
                }
                if(sum == 0){
                  return Rcpp::IntegerVector::create(NA_INTEGER);
                }
                if(currentCount > maxCount){
                  maxCount = currentCount;
                  maxValue = currentValue;
                }
                return wrap( maxValue ) ;
        ', plugin="Rcpp", verbose=FALSE, 
        includes='')
# wrap it to support "..." argument
aggrRcppW <- function(x,...)aggrRcpp(x);

基准:

require(raster)   
set.seed(123)
x <- matrix(rpois(10^8, 2), 10000)
a <- raster(x)

system.time(a1<-aggregate(a,fact=100,fun=agg.fun))
#   user  system elapsed 
#  35.13    0.44   35.87 
system.time(a2<-aggregate(a,fact=100,fun=fasterAgg.Fun))
#   user  system elapsed 
#   8.20    0.34    8.59
system.time(a3<-aggregate(a,fact=100,fun=aggrRcppW))
#   user  system elapsed 
#   5.77    0.39    6.22 

###########  all equal ?
all(TRUE,all.equal(a1,a2),all.equal(a2,a3))
# > [1] TRUE

如果您的目标是聚合,您不需要 max 函数吗?

library(raster)    
x <- matrix(rpois(1000000, 2),1000)
a <- aggregate(a,fact=10,fun=max)