将 List 转换为 zoo 并在 List 上使用 rollapply

convert List to zoo and use rollapply on the List

我想将列表对象转换为 zoo,然后在 zoo 对象上应用 rollapply。下面转载的简短示例(我有 90,000 个这样的文件要处理,使用 UNIX:))。假设我的列表有两个数据框。

1) 我想将每个数据框中的日期转换为这种格式:

dates <- as.Date(paste0(mylist$year, "-", mylist$month, "-", mylist$day), format="%Y-%m-%d")

z <- zoo(mylist, order.by=mylist[,1])

我知道 lapply 可以做到这一点,但我试过了但没有成功。

获得 zoo 对象后,我想使用 rollapply:

library(hydroTSM)#for daily2annual function but aggregate can do 

    x.3max <- rollapply(data=zooobject, width=3, FUN=sum, fill=NA, partial= TRUE,
                         align="center")
    # Maximum value per year of 3-day total rainfall for each one of the simulations
    z.3max.annual <- daily2annual(z.3max,  FUN=max,na.rm=TRUE)#dates=1

上面的代码所做的是将 3 天 window 集中在 zooobject 中数据帧的每一列上,并对值求和。提取 3 天总数的每年最大值。

      mylist<- list(a,a)
mylist<-lapply(mylist, function(x) x[x[["Month"]] %in% c(12,1,2),])# extract data for DJF for individual sites


    library(zoo)
       a= structure(list(Year = c(1975L, 1975L, 1975L, 1975L, 1975L, 1975L
), Month = c(1L, 1L, 1L, 1L, 1L, 1L), Site = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = "G100", class = "factor"), Day = 1:6, 
    sim01 = c(28.49, 29.04, 27.62, 28.43, 28.69, 29.16), sim02 = c(29.49, 
    30.04, 28.62, 29.43, 29.69, 30.16), sim03 = c(30.49, 31.04, 
    29.62, 30.43, 30.69, 31.16), sim04 = c(31.49, 32.04, 30.62, 
    31.43, 31.69, 32.16), sim05 = c(32.49, 33.04, 31.62, 32.43, 
    32.69, 33.16), sim06 = c(33.49, 34.04, 32.62, 33.43, 33.69, 
    34.16), sim07 = c(34.49, 35.04, 33.62, 34.43, 34.69, 35.16
    ), sim08 = c(35.49, 36.04, 34.62, 35.43, 35.69, 36.16), sim09 = c(36.49, 
    37.04, 35.62, 36.43, 36.69, 37.16), sim10 = c(37.49, 38.04, 
    36.62, 37.43, 37.69, 38.16), sim11 = c(38.49, 39.04, 37.62, 
    38.43, 38.69, 39.16), sim12 = c(39.49, 40.04, 38.62, 39.43, 
    39.69, 40.16), sim13 = c(40.49, 41.04, 39.62, 40.43, 40.69, 
    41.16), sim14 = c(41.49, 42.04, 40.62, 41.43, 41.69, 42.16
    ), sim15 = c(42.49, 43.04, 41.62, 42.43, 42.69, 43.16), sim16 = c(43.49, 
    44.04, 42.62, 43.43, 43.69, 44.16), sim17 = c(44.49, 45.04, 
    43.62, 44.43, 44.69, 45.16), sim18 = c(45.49, 46.04, 44.62, 
    45.43, 45.69, 46.16), sim19 = c(46.49, 47.04, 45.62, 46.43, 
    46.69, 47.16), sim20 = c(47.49, 48.04, 46.62, 47.43, 47.69, 
    48.16)), .Names = c("Year", "Month", "Site", "Day", "sim01", 
"sim02", "sim03", "sim04", "sim05", "sim06", "sim07", "sim08", 
"sim09", "sim10", "sim11", "sim12", "sim13", "sim14", "sim15", 
"sim16", "sim17", "sim18", "sim19", "sim20"), row.names = c(NA, 
6L), class = "data.frame")

输出应类似于:

Year Site Sim01... 
1975 G100 ...
1976 G100 ...
1977 G100 ...

只需要 c(12,1,2) 月份的值。

这会生成一个动物园对象列表,Lz,然后对列表的每个组件执行 rollapply,给出 L2。最后 L3 汇总了一年中每一列的 max

library(zoo)

mylist <- list(a, a) # a is given at bottom of question

Lz <- lapply(mylist, read.zoo, index = 1:3, format = "%Y %m %d")
L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

给予:

> L3

[[1]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28

[[2]]
     sim01 sim02 sim03 sim04 sim05  sim06  sim07  sim08  sim09  sim10  sim11
1975 86.28 89.28 92.28 95.28 98.28 101.28 104.28 107.28 110.28 113.28 116.28
      sim12  sim13  sim14  sim15  sim16  sim17  sim18  sim19  sim20
1975 119.28 122.28 125.28 128.28 131.28 134.28 137.28 140.28 143.28

已解决

lst1 <- lapply(list.files(pattern=".csv"),function(x) read.table(x,header=TRUE,sep="")) # read all files and data and replace -999.9 with NA

lst2<-lapply(lst1, function(x) x[x[["Month"]] %in% c(6,7,8),])#c(6,7,8) extract data for DJF for individual sites
names(lst2)<-list.files(pattern=".csv")
lapply(lst2,tail,4)
lst3<-lapply(lst2, function(x) x[!(names(x) %in% c("Site"))])
Lz <- lapply(lst3, read.zoo, index = 1:3, format = "%Y %m %d")

L2 <- lapply(Lz, rollapply, 3, sum, partial = TRUE)
L3 <- lapply(L2, function(z) aggregate(z, as.numeric(format(time(z), "%Y")), max))

mapply(
  write.table,
  x=L3, file=paste(names(L3), "csv", sep="."),
  MoreArgs=list(row.names=FALSE, sep=",")
) # write files to folder keeping the list names as file names