If 语句错误/不应用 if 语句

If statement error / not applying if statement

我正在尝试生成一段代码,该代码类似于 zoo/xts 中的 rollapply 等函数,但适用于我的需要。我使用一些非常简单的示例数据生成了代码,并且一切正常。但是现在我尝试在 edhec 数据上 运行 它时,我收到了一个错误。我不清楚为什么,但假设它与 if 语句有关。有谁能诊断出我收到错误的原因吗?

#rm(list=ls()) #Clear environment
cat("4") #CTRL + L

library(xts)
library(lubridate)

is.even <- function(x) x %% 2 == 0 

roundUp <- function(x,to=2)
{
  to*(x%/%to + as.logical(x%%to))
}

functionTest <- function(data, window, slide){

  nyears_t = nyears(data)

  #IF statement for non-even numbers only
  if(is.even(nyears_t == FALSE)) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[length(data)]+ 1900 + 1
    end_extend <- start_extend + length(data) - 1
    index(data_extend) <- update(index(data),year=start_extend:end_extend)

    data <- rbind(data, data_extend)

    warning("WARNING! The function has looped to the start of the timeseries. The final list(s) 
            will contain years that do not exist in the dataset. Please modify.")
  }

  nslides = nyears_t/slide

  #Matrix
  year_1 = (.indexyear(data)[1]+1900)

  start <- seq(from = year_1, by = slide, length.out = nslides)
  end <- start + window - 1 

  mat <- matrix(c(start, end), ncol = 2, dimnames = list(c(1:nslides), c("start", "end")))

  #For loop
  subsetlist <- vector('list')

  for(i in 1:nslides){
    subset <- data[paste0(mat[i,1], "/", mat[i,2])]
    subsetlist[[i]] <- subset
  }
  print(subsetlist)
}

我在制作上面的函数时使用的示例代码:

a <- seq(from = as.POSIXct("2000", format = "%Y"), to = as.POSIXct("2008", format = "%Y"), by = "year")
a <- as.xts(1:length(a), order.by = a)
a

functionTest(data = a, window = 3, slide = 2)

我正在测试并收到错误的示例代码:

> data(edhec, package = "PerformanceAnalytics")
> edhec <- edhec[,1:3]
> edhec <- edhec["/2007"]
> head(edhec)
           Convertible Arbitrage CTA Global Distressed Securities
1997-01-31                0.0119     0.0393                0.0178
1997-02-28                0.0123     0.0298                0.0122
1997-03-31                0.0078    -0.0021               -0.0012
1997-04-30                0.0086    -0.0170                0.0030
1997-05-31                0.0156    -0.0015                0.0233
1997-06-30                0.0212     0.0085                0.0217
> functionTest(data = edhec, window = 3, slide = 2)
 Show Traceback

 Rerun with Debug
 Error in start_extend:end_extend : NA/NaN argument 
> 

更新:

现在的代码 运行 对 if 语句进行了以下更新(感谢 Joshua Ulrich)(请参阅下面的代码)。但是,if 语句仍然存在问题 - 无论数据集中存在偶数年还是奇数年,它似乎 运行。虽然这不会影响函数的准确性,但在考虑大型数据集时可能会出现问题。如果有人对此有任何想法,将不胜感激。否则这已经超级了!干杯

if(is.even(nyears_t == FALSE)) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
    end_extend <- start_extend + nyears(data) - 1

    dates <- index(data)
    tmp <- as.POSIXlt(dates)
    tmp$year <- tmp$year + nyears(data)
    dates2 <- as.POSIXct(tmp, tz = tz)
    index(data_extend) <- dates2

    data <- rbind(data, data_extend)

    warning("WARNING! The function has looped to the start of the timeseries. The final list(s) 
            will contain years that do not exist in the dataset. Please modify.")
  }

在矩阵上调用 length(xts/zoo 对象的 coredata 是什么)会得到元素的总数(即基础向量的长度)。您应该改用 nrow

start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
end_extend <- start_extend + nrow(data) - 1

如果您不确定 data 是矩阵还是向量,那么您应该使用 NROW 而不是 nrow。在向量 returns NULLNROW 上调用 nrow 将 return length(x) 如果 x 是一个向量。

我现在已经想出了具有预期效果的完整答案。谢谢@Joshua 的帮助——我认为没有它我无法修复代码。为了运行它在大数据上我不得不做一些额外的改变。

为了感兴趣,这是我的完整工作代码(减去我额外的自定义函数):

bootOffset <- function(data, window, slide, tz = "GMT"){

    nyears_t = nyears(data)

  #IF statement for non-even numbers only
  if(is.even(nyears_t) == FALSE) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
    end_extend <- start_extend + nyears(data) - 1

    dates <- index(data)
      tmp <- as.POSIXlt(dates); tmp$year <- tmp$year + nyears(data)
    dates2 <- as.POSIXct(tmp, tz = tz)

    index(data_extend) <- dates2
    data <- rbind(data, data_extend)
  }

    nslides = nyears_t/slide

    year_1 = (.indexyear(data)[1] + 1900)  

  #Matrix
  start <- seq(from = year_1, by = slide, length.out = nslides); end <- start + window - 1 
  mat <- matrix(c(start, end), ncol = 2, dimnames = list(c(1:nslides), c("start", "end")))

  #For loop
  subsetlist <- vector('list')

  for(i in 1:nslides){
    subset <- window(data, 
                     start = as.POSIXct(paste0(mat[i,1], "-01-01")), 
                     end = as.POSIXct(paste0(mat[i,2], "-12-31")))

    subsetlist[[i]] <- subset
  }
  print(subsetlist)
}

并确认这些结果符合预期:

data(edhec, package = "PerformanceAnalytics")
edhec <- edhec[,1:3]
edhec08 <- edhec["/2008"]
edhec07 <- edhec["/2007"]

bootOffset(data = edhec08, #EVEN
                     window = 4,
                     slide = 3)

bootOffset(data = edhec07, #ODD
                     window = 4,
                     slide = 3)
#
> bootOffset.Check <- function(boot){
+ dates <- lapply(boot, year)
+ dates <- lapply(dates, unique)
+ dates <- lapply(dates, `length<-`, max(lengths(dates)))
+ as.data.frame(dates, 
+ col.names = paste0("boot_", 1:length(boot)))
+ 
+ }
> 
> nyears(edhec08)
[1] 12
> bootOffset.Check(boot08) #EVEN number of years
  boot_1 boot_2 boot_3 boot_4
1   1997   2000   2003   2006
2   1998   2001   2004   2007
3   1999   2002   2005   2008
4   2000   2003   2006     NA
> 
> nyears(edhec07)
[1] 11
> bootOffset.Check(boot07) #ODD number of years
  boot_1 boot_2 boot_3 boot_4
1   1997   2000   2003   2006
2   1998   2001   2004   2007
3   1999   2002   2005   2008
4   2000   2003   2006   2009
>