※ 引述《ntpuisbest (阿龙)》之铭言:
: 我有个list
: 长这样
: https://imgur.com/glWVNGb
: 他是一个模拟得到的VCOV variance covariance矩阵
: 我希望做到对应的座标取平均
: 也就是
: https://imgur.com/tcpNNnY
: 希望回传的也是一个三成三的list
: 不知道怎么做
: 发现lapply 也没用QAQ
给四个方法参考XD
library(abind)
# data
l <- replicate(3L, matrix(rnorm(9), 3), FALSE)
# method 1
apply(abind(l, along = 3L), 1:2, mean)
# [,1] [,2] [,3]
# [1,] 0.08595378 -0.9663702 -0.7770976
# [2,] 0.13758227 0.4697197 0.2799617
# [3,] -0.57574027 -0.4079516 -0.9508097
# method 2
apply(array(unlist(l), dim=rep(3,3)), 1:2, mean)
# [,1] [,2] [,3]
# [1,] 0.08595378 -0.9663702 -0.7770976
# [2,] 0.13758227 0.4697197 0.2799617
# [3,] -0.57574027 -0.4079516 -0.9508097
# method 3
Reduce("+", l) / length(l)
# [,1] [,2] [,3]
# [1,] 0.08595378 -0.9663702 -0.7770976
# [2,] 0.13758227 0.4697197 0.2799617
# [3,] -0.57574027 -0.4079516 -0.9508097
# method 4
out <- l[[1]]
for (i in 2L:length(l))
out <- out + l[[i]]
out / length(l)
# [,1] [,2] [,3]
# [1,] 0.08595378 -0.9663702 -0.7770976
# [2,] 0.13758227 0.4697197 0.2799617
# [3,] -0.57574027 -0.4079516 -0.9508097
我做了一下benchmark.... 正如我推文所说的,for比较快XD
# benchmark
forFunc <- function(l){
out <- l[[1]]
for (i in 2L:length(l))
out <- out + l[[i]]
out / length(l)
}
library(microbenchmark)
l <- replicate(3e3, matrix(rnorm(200^2), 200), FALSE)
print(object.size(l), units = "Gb") # 0.9 Gb
microbenchmark(method1 = apply(abind(l, along = 3L), 1:2, mean),
method2 = apply(array(unlist(l),
dim=c(nrow(l[[1]]), ncol(l[[1]]), 3)),
1:2, mean),
method3 = Reduce("+", l) / length(l),
method4 = forFunc(l), times = 20L)
# Unit: milliseconds
# expr min lq mean median uq max neval
# method1 2481.3607 2681.3842 2730.1426 2776.8715 2803.6407 2821.4399 20
# method2 474.9360 485.1195 531.9193 488.4900 582.2409 670.2529 20
# method3 123.0389 124.6572 144.5512 126.9948 132.6468 310.0517 20
# method4 121.3197 123.1581 127.6650 126.5533 131.4164 139.4469 20
内存使用方面,abind是里面最花内存的
虽然使用上满简单的,但不建议使用abind
# memory usage
library(data.table)
library(profmem)
memUsageList <- vector("list", 4L)
memUsageList[[1]] <- profmem({apply(abind(l, along = 3L), 1:2, mean)})
memUsageList[[2]] <- profmem({
apply(array(unlist(l), dim=c(nrow(l[[1]]), ncol(l[[1]]), 3)), 1:2, mean)
})
memUsageList[[3]] <- profmem({Reduce("+", l) / length(l)})
memUsageList[[4]] <- profmem({forFunc(l)})
data.table(methods = paste0("method", 1:4),
"memory (Mb)" = sapply(memUsageList, total) / 2^20)
# methods memory (Mb)
# 1: method1 5076.6124
# 2: method2 918.5794
# 3: method3 915.6992
# 4: method4 915.6418
以上,供你参考