将大型 R 数据框转换为 dgcmatrix
Convert large R data frame to dgcmatrix
我正在尝试将大型数据帧转换为 R 中的 dgcmatrix
。但由于不合理的内存要求而失败。
library(Matrix)
dim(my_df)
[1] 22865 442010
# Convert to regular matrix
my.M <- as.matrix(my_df)
class(my.M)
[1] "matrix"
用于模拟类似的矩阵(行名和列名很重要,需要保留)[根据评论更新,警告:需要~80GM RAM space]
my.M <- replicate(n = 442010, expr = abs(rnorm(n = 22865, mean = 0, sd = 1)))
#object.size(my.M)
#80852469416 bytes
colnames(my.M) <- c(paste("IncidentBarcode_XXXX",seq(1:ncol(my.M)),sep=""))
rownames(my.M) <- c(paste("group",seq(1:nrow(my.M)),sep=""))
sparse.M <- Matrix(my.M, sparse = T )
Error in (if (is.logical(from)) .m2lgC else .m2dgC)(from) :
'Realloc' could not re-allocate memory (18446744072179369984 bytes)
dgc.Matrix <- as(my.M, "dgCMatrix")
Error in asMethod(object) :
'Realloc' could not re-allocate memory (18446744072179369984 bytes)
我也尝试过以前的方法(如评论中提到的@priya)
dgr.Matrix <- as(my.M, "dgRMatrix")
*** caught segfault ***
address 0x7ed8fe848868, cause 'memory not mapped'
matSparse <- sparseMatrix(
+ i = rownames(my_df),
+ j = colnames(my_df),
+ dims = c(nrow(my_df), ncol(my_df)),
+ dimnames = list(rownames(my_df), colnames(my_df))
+ )
Error in i + !(m.i || i1) : non-numeric argument to binary operator
你可以尝试通过rows/cols拆分大数据帧,转换为dgcMatrix然后加入它们。
nsplit = 10
splitMxList = lapply(split(my.M, cut(1:nrow(my.M), nsplit)), function(mx) {
Matrix(as.matrix(mx), sparse=T)
})
sparse.M = Reduce(rbind, splitMxList)
我正在尝试将大型数据帧转换为 R 中的 dgcmatrix
。但由于不合理的内存要求而失败。
library(Matrix)
dim(my_df)
[1] 22865 442010
# Convert to regular matrix
my.M <- as.matrix(my_df)
class(my.M)
[1] "matrix"
用于模拟类似的矩阵(行名和列名很重要,需要保留)[根据评论更新,警告:需要~80GM RAM space]
my.M <- replicate(n = 442010, expr = abs(rnorm(n = 22865, mean = 0, sd = 1)))
#object.size(my.M)
#80852469416 bytes
colnames(my.M) <- c(paste("IncidentBarcode_XXXX",seq(1:ncol(my.M)),sep=""))
rownames(my.M) <- c(paste("group",seq(1:nrow(my.M)),sep=""))
sparse.M <- Matrix(my.M, sparse = T )
Error in (if (is.logical(from)) .m2lgC else .m2dgC)(from) :
'Realloc' could not re-allocate memory (18446744072179369984 bytes)
dgc.Matrix <- as(my.M, "dgCMatrix")
Error in asMethod(object) :
'Realloc' could not re-allocate memory (18446744072179369984 bytes)
我也尝试过以前的方法(如评论中提到的@priya)
dgr.Matrix <- as(my.M, "dgRMatrix")
*** caught segfault ***
address 0x7ed8fe848868, cause 'memory not mapped'
matSparse <- sparseMatrix(
+ i = rownames(my_df),
+ j = colnames(my_df),
+ dims = c(nrow(my_df), ncol(my_df)),
+ dimnames = list(rownames(my_df), colnames(my_df))
+ )
Error in i + !(m.i || i1) : non-numeric argument to binary operator
你可以尝试通过rows/cols拆分大数据帧,转换为dgcMatrix然后加入它们。
nsplit = 10
splitMxList = lapply(split(my.M, cut(1:nrow(my.M), nsplit)), function(mx) {
Matrix(as.matrix(mx), sparse=T)
})
sparse.M = Reduce(rbind, splitMxList)