【发布时间】:2018-06-20 11:07:46
【问题描述】:
我正在使用parallel 包中的mclapply 在高性能集群上使用lme4 包估计混合 glmer 模型。我遇到了described here 的问题。我应用了添加mc.preschedule=F 的建议修复,但问题仍然存在。代码设置为described here。
我不确定如何解决它,有什么想法吗?我应该切换到另一种并行化方法吗?如果有,怎么做?
这是我的代码,但基本上它遵循链接文章的逻辑:
rm(list = ls())
require(lme4)
require(parallel)
load(file="//share//home//eborbath//ess_rescaled.Rda") # load data
# paralelizing function
f_lmer_mc = function(data, calls, mc.cores) {
require(parallel)
if (is.data.frame(data))
data = replicate(length(calls), data, simplify = F)
for (i in 1:length(data)) attr(data[[i]], "cll") = calls[i]
m.list = mclapply(data, function(i) eval(parse(text = attr(i, "cll"))),
mc.cores = mc.cores, mc.preschedule = FALSE)
return(m.list)
}
##########
# Models #
##########
controls <- c("gender", "agea", "eduyrs", "domicil", "unemployed", "rideol", "union", "pid", "hincfel")
values <- c("conformity", "universalism", "security")
issues <- c("gincdif", "freehms")
agr.ctrl <- c("gdp_wb_ppp", "wb_vae")
lr.agr <- c("lr_rsquar_std", "ri_l2_std")
val.agr <- c("mean_univ", "mean_conf", "mean_secur")
end <- "1 + (1|cntry/countryyear), data=i, control=glmerControl(optimizer='bobyqa', optCtrl = list(maxfun = 1e9)), family=binomial(link='logit'))"
models = c(paste0("glmer(protest ~", paste(c(controls, end), collapse="+")),
paste0("glmer(protest ~", paste(c(controls, values, end), collapse="+")),
paste0("glmer(protest ~", paste(c(controls, values, issues, end), collapse="+")),
paste0("glmer(protest ~ region+", paste(c(controls, values, issues, end), collapse="+")),
paste0("glmer(protest ~ region+", paste(c(controls, values, issues, agr.ctrl, end), collapse="+")),
paste0("glmer(protest ~ region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, end), collapse="+")),
paste0("glmer(protest ~ region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")), # until here it's only main effects
paste0("glmer(protest ~ region*rideol + region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*rideol*year + region+year+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*rideol*year_num + region+year_num+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*soc_pop_eleches + region+soc_pop_eleches+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")), # now come the expl. models
paste0("glmer(protest ~ region*rideol*soc_pop_eleches + region+soc_pop_eleches+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*ri_l2_std + region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*ri_l2_std*rideol + region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*lr_rsquar_std + region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*lr_rsquar_std*rideol + region+", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region+gov_genlr", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*gov_genlr + region+gov_genlr", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*gov_genlr*rideol + region+gov_genlr", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region+pol_galtan", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region+pol_galtan+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*pol_lrecon+region+pol_galtan+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*pol_galtan+region+pol_galtan+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*pol_lrecon*rideol+region+pol_galtan+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")),
paste0("glmer(protest ~ region*pol_galtan*rideol+region+pol_galtan+pol_lrecon", paste(c(controls, values, issues, agr.ctrl, lr.agr, val.agr, end), collapse="+")))
m.list = f_lmer_mc(data, models, 24)
m.1 <- c(m.list[1:3])
m.2 <- c(m.list[4:6])
m.3 <- c(m.list[7:9])
m.4 <- c(m.list[10:12])
m.5 <- c(m.list[13:15])
m.6 <- c(m.list[16:18])
m.7 <- c(m.list[19:21])
m.8 <- c(m.list[22:24])
m.9 <- c(m.list[25:26])
save(m.1, data, file='m_1.RData')
save(m.2, data, file='m_2.RData')
save(m.3, data, file='m_3.RData')
save(m.4, data, file='m_4.RData')
save(m.5, data, file='m_5.RData')
save(m.6, data, file='m_6.RData')
save(m.7, data, file='m_7.RData')
save(m.8, data, file='m_8.RData')
save(m.9, data, file='m_9.RData')
这是相关的错误信息:
Error in sendMaster(try(eval(expr, env), silent = TRUE)) :
long vectors not supported yet: fork.c:378
Calls: f_lmer_mc ... mclapply -> lapply -> FUN -> mcparallel -> sendMaster
谢谢!
更新:
数据是公开可用的European Social Survey 的清理版本。您可以从here (1.8 MB) 下载文件
【问题讨论】:
-
这在没有数据的情况下很难分析。你能用随机数据重现这个问题吗?否则,您可以在本地运行一些模型并检查生成的对象。这些对象通常包含大型数据集,这些数据集在进一步向下不是严格必要的。在这种情况下,您可以在将对象发送回主对象之前将其从对象中删除。
-
谢谢!你是对的,这是一个大型数据集。在我的机器上运行这些模型中的任何一个都需要几个小时。我已通过帖子末尾的链接提供了数据。
-
我看到您正在复制数据集,然后将其发送到所有进程。我有一段时间没有做过并行的事情了,但你可能不需要这样做;小插图说“通过 mclapply,我们使用的所有包和对象都自动在工作人员身上可用。”如果是这样,那将负责处理流程,而 Ralf Stubner 的建议有望负责返回。
-
老实说,我不太确定如何实施这些建议...意味着在发送回之前清理 lme4 输出或依靠 mcapply 处理流程?
标签: r parallel-processing lme4 mclapply