R 中时间序列的 运行 window 相关系数
Running window of correlation coefficients for time series in R
我想用R中的运行ning window计算12个变量的相关系数
我的数据存储在一个带有 %m.%d.%Y %H:%M:%S 索引的动物园对象中,12 个变量中的每一个都有 1343 个观测值。我不知道我要使用什么 window 大小,但如果需要我可以更改它。
@Joshua Ulrich 已发布 here 如何使用 rollapplyr 计算滚动相关性,但此示例只有两个变量。由于我有限的 R 经验,我不确定如何将应用族函数之一合并到 运行 所有 12 个变量之间的相关性。
任何指点将不胜感激
我的数据如下所示:
> head(wideRawXTS)
DO0182U09A3 DO0182U09B3 DO0182U09C3 DO0182U21A1 DO0182U21A2 DO0182U21A3 DO0182U21B1 DO0182U21B2 DO0182U21B3
2017-01-20 16:30:00 -101.50 -103.37 -103.86 -104.78 -104.95 -105.33 -102.50 -99.43 -104.05
2017-01-20 16:45:00 -101.32 -102.75 -104.22 -104.51 -103.94 -105.29 -102.82 -101.99 -103.94
2017-01-20 17:00:00 -101.45 -103.30 -103.93 -104.70 -104.82 -105.13 -103.72 -103.95 -104.25
2017-01-20 17:15:00 -100.91 -95.92 -99.22 -103.83 -104.72 -105.19 -103.57 -101.36 -104.09
2017-01-20 17:30:00 -100.91 -103.04 -104.09 -102.15 -104.91 -105.18 -103.88 -104.09 -103.96
2017-01-20 17:45:00 -100.97 -103.67 -104.12 -105.07 -104.23 -97.48 -103.92 -103.89 -104.01
DO0182U21C1 DO0182U21C2 DO0182U21C3
2017-01-20 16:30:00 -104.51 -104.42 -105.17
2017-01-20 16:45:00 -104.74 -104.65 -105.25
2017-01-20 17:00:00 -105.02 -105.04 -105.32
2017-01-20 17:15:00 -103.90 -102.95 -105.16
2017-01-20 17:30:00 -104.75 -105.07 -105.23
2017-01-20 17:45:00 -105.08 -105.14 -104.89
#Importing Data
rawDF <- read.csv("RTWP_DO0182_14Day_Raw.csv",
header = F,
col.names = c("Period Start Time",
"PLMNname",
"RNCname",
"WBTSname",
"WBTSID",
"WCELname",
"WCELID",
"Overload",
"AverageRTWP",
"DC_RTWP_High_PRX",
"RTWP_Threshold"),
colClasses = c("character",
"NULL", #drops PLMN name
"NULL", #drops RNC name
"character",
"integer",
"character",
"integer",
"NULL", #drops Overload
"numeric",
"NULL", #drops DC_RTWP_High_PRX
"NULL"), #drops RTWP_Threshold
strip.white=TRUE,
na.strings = c("NA",
"NULL"),
as.is = T,
skip=2)
#convert period.start.time to POSIXlt
rawDF$Period.Start.Time <- as.POSIXlt(strptime(rawDF$Period.Start.Time,
format="%m.%d.%Y %H:%M:%S"))
#dcast the long data frame to a wide data frame
wideRawDF <- dcast(rawDF,
Period.Start.Time ~ WCELname,
value.var = "AverageRTWP")
#assign the date times as rownames for converting to XTS
rownames(wideRawDF) = wideRawDF[[1]]
#drops the duplicate Period Start Time column since date times are rownames
wideRawDF$Period.Start.Time <- NULL
#Convert wideRawDF to XTS time series
wideRawDF <- as.xts(wideRawDF)
#NA values replaced by interpolated values
wideRawDF <- na.approx(wideRawDF, na.rm = FALSE)
#DF is centered by subtracting the column means and scaled by dividing the
#centered columns by their standard deviations
wideRawDFscaled <- scale(wideRawDF, center = TRUE, scale = TRUE)
#window <- 20
#cors <- combn(names(wideRawDFscaled),2,
# function(p) rollapply(wideRawDFscaled, window ,
# function(x) cor(x[,p[1]],x[,p[2]]),
# by.column = FALSE))
#colnames(cors) <- combn(names(wideRawDFscaled),2,paste,collapse=".")
#Running window of correlation coefficients
Cor <- function(x) {
corr <- cor(wideRawDFscaled)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
slidingCor <- rollapplyr(wideRawDFscaled, 6, Cor, by.column = FALSE)
您可以使用 combn
将相关性应用于每对列。调整您引用的答案:
window <- 20
cors <- combn(colnames(wideRawXTS),2,
function(p) rollapply(wideRawXTS, window ,
function(x) cor(x[,p[1]],x[,p[2]]),
by.column=FALSE))
colnames(cors) <- combn(colnames(wideRawXTS),2, paste, collapse=".")
假设注释中可重复显示 3 列输入并使用宽度为 3 的 window 进行说明,定义相关函数 Cor
接受矩阵并计算其相关矩阵,提取下三角部分以消除冗余和添加列名。现在将其与 rollapplyr
:
一起使用
library(xts)
Cor <- function(x) {
corr <- cor(x)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
rollapplyr(xx, 3, Cor, by.column = FALSE)
给予:
DO0182U09B3.DO0182U09A3 DO0182U09C3.DO0182U09A3 DO0182U09C3.DO0182U09B3
2017-01-20 16:30:00 NA NA NA
2017-01-20 16:45:00 NA NA NA
2017-01-20 17:00:00 0.98573 -0.99613 -0.99671
2017-01-20 17:15:00 0.98629 0.95983 0.99297
2017-01-20 17:30:00 0.52664 0.47475 0.99820
2017-01-20 17:45:00 0.56204 0.50460 0.99769
注意: 输入 xx
的可重现形式是:
xx <- structure(c(-101.5, -101.32, -101.45, -100.91, -100.91, -100.97,
-103.37, -102.75, -103.3, -95.92, -103.04, -103.67, -103.86,
-104.22, -103.93, -99.22, -104.09, -104.12), .Dim = c(6L, 3L), .Dimnames = list(
NULL, c("DO0182U09A3", "DO0182U09B3", "DO0182U09C3")), index = structure(c(1484947800,
1484948700, 1484949600, 1484950500, 1484951400, 1484952300), tzone = "", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "")
我想用R中的运行ning window计算12个变量的相关系数
我的数据存储在一个带有 %m.%d.%Y %H:%M:%S 索引的动物园对象中,12 个变量中的每一个都有 1343 个观测值。我不知道我要使用什么 window 大小,但如果需要我可以更改它。
@Joshua Ulrich 已发布 here 如何使用 rollapplyr 计算滚动相关性,但此示例只有两个变量。由于我有限的 R 经验,我不确定如何将应用族函数之一合并到 运行 所有 12 个变量之间的相关性。
任何指点将不胜感激
我的数据如下所示:
> head(wideRawXTS)
DO0182U09A3 DO0182U09B3 DO0182U09C3 DO0182U21A1 DO0182U21A2 DO0182U21A3 DO0182U21B1 DO0182U21B2 DO0182U21B3
2017-01-20 16:30:00 -101.50 -103.37 -103.86 -104.78 -104.95 -105.33 -102.50 -99.43 -104.05
2017-01-20 16:45:00 -101.32 -102.75 -104.22 -104.51 -103.94 -105.29 -102.82 -101.99 -103.94
2017-01-20 17:00:00 -101.45 -103.30 -103.93 -104.70 -104.82 -105.13 -103.72 -103.95 -104.25
2017-01-20 17:15:00 -100.91 -95.92 -99.22 -103.83 -104.72 -105.19 -103.57 -101.36 -104.09
2017-01-20 17:30:00 -100.91 -103.04 -104.09 -102.15 -104.91 -105.18 -103.88 -104.09 -103.96
2017-01-20 17:45:00 -100.97 -103.67 -104.12 -105.07 -104.23 -97.48 -103.92 -103.89 -104.01
DO0182U21C1 DO0182U21C2 DO0182U21C3
2017-01-20 16:30:00 -104.51 -104.42 -105.17
2017-01-20 16:45:00 -104.74 -104.65 -105.25
2017-01-20 17:00:00 -105.02 -105.04 -105.32
2017-01-20 17:15:00 -103.90 -102.95 -105.16
2017-01-20 17:30:00 -104.75 -105.07 -105.23
2017-01-20 17:45:00 -105.08 -105.14 -104.89
#Importing Data
rawDF <- read.csv("RTWP_DO0182_14Day_Raw.csv",
header = F,
col.names = c("Period Start Time",
"PLMNname",
"RNCname",
"WBTSname",
"WBTSID",
"WCELname",
"WCELID",
"Overload",
"AverageRTWP",
"DC_RTWP_High_PRX",
"RTWP_Threshold"),
colClasses = c("character",
"NULL", #drops PLMN name
"NULL", #drops RNC name
"character",
"integer",
"character",
"integer",
"NULL", #drops Overload
"numeric",
"NULL", #drops DC_RTWP_High_PRX
"NULL"), #drops RTWP_Threshold
strip.white=TRUE,
na.strings = c("NA",
"NULL"),
as.is = T,
skip=2)
#convert period.start.time to POSIXlt
rawDF$Period.Start.Time <- as.POSIXlt(strptime(rawDF$Period.Start.Time,
format="%m.%d.%Y %H:%M:%S"))
#dcast the long data frame to a wide data frame
wideRawDF <- dcast(rawDF,
Period.Start.Time ~ WCELname,
value.var = "AverageRTWP")
#assign the date times as rownames for converting to XTS
rownames(wideRawDF) = wideRawDF[[1]]
#drops the duplicate Period Start Time column since date times are rownames
wideRawDF$Period.Start.Time <- NULL
#Convert wideRawDF to XTS time series
wideRawDF <- as.xts(wideRawDF)
#NA values replaced by interpolated values
wideRawDF <- na.approx(wideRawDF, na.rm = FALSE)
#DF is centered by subtracting the column means and scaled by dividing the
#centered columns by their standard deviations
wideRawDFscaled <- scale(wideRawDF, center = TRUE, scale = TRUE)
#window <- 20
#cors <- combn(names(wideRawDFscaled),2,
# function(p) rollapply(wideRawDFscaled, window ,
# function(x) cor(x[,p[1]],x[,p[2]]),
# by.column = FALSE))
#colnames(cors) <- combn(names(wideRawDFscaled),2,paste,collapse=".")
#Running window of correlation coefficients
Cor <- function(x) {
corr <- cor(wideRawDFscaled)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
slidingCor <- rollapplyr(wideRawDFscaled, 6, Cor, by.column = FALSE)
您可以使用 combn
将相关性应用于每对列。调整您引用的答案:
window <- 20
cors <- combn(colnames(wideRawXTS),2,
function(p) rollapply(wideRawXTS, window ,
function(x) cor(x[,p[1]],x[,p[2]]),
by.column=FALSE))
colnames(cors) <- combn(colnames(wideRawXTS),2, paste, collapse=".")
假设注释中可重复显示 3 列输入并使用宽度为 3 的 window 进行说明,定义相关函数 Cor
接受矩阵并计算其相关矩阵,提取下三角部分以消除冗余和添加列名。现在将其与 rollapplyr
:
library(xts)
Cor <- function(x) {
corr <- cor(x)
out <- as.data.frame.table(corr)[lower.tri(corr), ]
with(out, setNames(Freq, paste(Var1, Var2)))
}
rollapplyr(xx, 3, Cor, by.column = FALSE)
给予:
DO0182U09B3.DO0182U09A3 DO0182U09C3.DO0182U09A3 DO0182U09C3.DO0182U09B3
2017-01-20 16:30:00 NA NA NA
2017-01-20 16:45:00 NA NA NA
2017-01-20 17:00:00 0.98573 -0.99613 -0.99671
2017-01-20 17:15:00 0.98629 0.95983 0.99297
2017-01-20 17:30:00 0.52664 0.47475 0.99820
2017-01-20 17:45:00 0.56204 0.50460 0.99769
注意: 输入 xx
的可重现形式是:
xx <- structure(c(-101.5, -101.32, -101.45, -100.91, -100.91, -100.97,
-103.37, -102.75, -103.3, -95.92, -103.04, -103.67, -103.86,
-104.22, -103.93, -99.22, -104.09, -104.12), .Dim = c(6L, 3L), .Dimnames = list(
NULL, c("DO0182U09A3", "DO0182U09B3", "DO0182U09C3")), index = structure(c(1484947800,
1484948700, 1484949600, 1484950500, 1484951400, 1484952300), tzone = "", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "")