※ 引述《f496328mm (为什么会流泪)》之铭言:
: > sapply(c(1:10), function(x) actv_fun(data,bo_matrix,x))
: [1] 0.5 0.5 0.5 3.0 1.5 17.5 9.0 0.5 2.5 2.5
: > parSapply(cl,c(1:10), function(x) actv_fun(data,bo_matrix,x))
: Error in checkForRemoteErrors(val) :
: 6 nodes produced errors; first error: 没有这个函数 "actv_fun"
:
: 一样的东西 sapply都可以执行
: 但是为什么用到parSapply
: 却会出现没有这个函数??
: sapply不是可以用吗?
: 该不会parSapply只能用内定的函数吧?
我通常都用doSNOW搭配plyr跟foreach来做
举个例子,我要平行来parse xml档案,里面用到xml2, purrr, stringr跟pipeR:
(可以到这里下载几个xml.gz,放在vd资料夹里面测试看看:
http://tisvcloud.freeway.gov.tw/history/vd/20160501/ )
library(xml2)
library(foreach)
library(doSNOW)
library(plyr)
library(purrr)
library(stringr)
library(pipeR)
# 我只用九成的执行绪个数 (可以根据自己喜好去设定要不要全用)
# 至于MPI部分,我就比较不熟了...
cl <- makeCluster(floor(parallel::detectCores() * 0.9), type = "SOCK")
registerDoSNOW(cl)
# library package in clusters
clusterEnv <- clusterEvalQ(cl, {
library(plyr)
library(stringr)
library(purrr)
library(pipeR)
library(xml2)
})
# export functions (我这没有函数、变量要export就写个范例这样
# 就把每一个变量或是函数的名称用字串放到list里面即可
# clusterExport(cl, list("x", "y", "some_variables", "some_functions"))
# list the folders in vd
vdFiles <- list.files("vd", "\\.xml.gz", full.names = TRUE)
# 将llply做平行使用,只要加上.parallel=TRUE,以及registerDoSNOW就可以了
vdValueDataTable <- llply(vdFiles, function(xmlFileName){
# try to read the file
xmlFile <- try({xmlFileName %>>% read_xml(encoding = 'UTF-8') %>>%
xml_children %>>% xml_children})
# if there is something wrong, return NULL
if (any(class(xmlFile) == "try-error") || length(xmlFile) == 0)
return(NULL)
# used for combine columns of dat1
repRows <- map(seq_along(xmlFile), ~ rep(., each =
xml_length(xmlFile[[.]])*3)) %>>% do.call(what = c)
# output character matrix
xmlFile %>>% {
xmlFile %>>%
# find the vdid, status and datacollecttime
(~ dat1 <- xml_attrs(.) %>>% do.call(what = rbind) %>>%
(x ~ x[repRows, ])) %>>% xml_children %>>%
# find the vsrdir, vsrid, speed and laneoccupy
(~ dat2 <- xml_attrs(.) %>>% do.call(what = rbind) %>>%
(x ~ x[rep(1:nrow(x), each = 3), ])) %>>% xml_children %>>%
xml_attrs %>>% do.call(what = rbind) %>>%
# find the carid and volume
cbind(dat1, dat2)
} %>>% (.[.[, "laneoccupy"] != "-99" & .[, "volume"] != "-99" &
.[, "speed"] != "-99" & .[, "volume"] != "0" &
.[, "status"] == "0", ]) %>>%
(.[ , c("vdid", "carid", "datacollecttime", "speed",
"laneoccupy", "volume")])
}, .parallel = TRUE) %>>% do.call(what = rbind)