【问题标题】:R doParallel: couldn't find functionR doParallel:找不到函数
【发布时间】:2019-07-08 19:23:29
【问题描述】:

我已经设置了以下功能:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
    mape <- MAPE(y_pred = data$pred, y_true = data$obs)
    RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
    MAE <- mean(abs(data$obs - data$pred))
    BIAS <- mean(data$obs - data$pred)
    Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr",    na.rm = FALSE)
    c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
  }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- trainControl(method="repeatedcv", number=10,   repeats=10, search="grid", savePred =T,
                                summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))




        set.seed(42)
        model <- train(formula(paste0(targets[i], 
                                      " ~ ", 
                                      paste(predictors_name, sep = '',    collapse = ' + '))),
                       data = ab[[j]],
                       method="rf",
                       ntree = 25, 
                       metric= "RMSE", 
                       tuneGrid=tunegrid, 
                       trControl=control)


      }
    }
  }

}

根据本教程 (https://topepo.github.io/caret/parallel-processing.html),我可以通过调用 library(doParallel); cl &lt;- makePSOCKcluster(2); registerDoParallel(cl) 来并行化我的代码。 然后当我使用 doParallel 的功能时

predictors_name <- c("Time", "Chick")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:500,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)

出现错误消息couldn't find function "MAPE"

如何在不使用 foreach 语法的情况下解决此问题?

【问题讨论】:

    标签: r for-loop r-caret doparallel


    【解决方案1】:

    如果我在调用package::function 之类的函数时指定了包,那么它可以工作。也许有一个更优雅的解决方案,但这就是我使代码运行没有错误的方式:

    cv_model <- function(dat, targets, predictors_name){
    
      library(randomForest)
      library(caret)
      library(MLmetrics)
      library(Metrics)
    
      # set up error measures
      sumfct <- function(data, lev = NULL, model = NULL){
                mape <- MLmetrics::MAPE(y_pred = data$pred, y_true = data$obs)
                RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
                MAE <- mean(abs(data$obs - data$pred))
                BIAS <- mean(data$obs - data$pred)
                Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
                c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
                }
    
      for (k in 1:length(dat)) {
    
        a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
        b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
        ab <- list(a, b)
    
        for (i in 1:length(targets)) {
          for (j in 1:length(ab)) {
    
    
            # specifiy trainControl
            control <- caret::trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
                                           summaryFunction = sumfct)
    
            tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))
    
            set.seed(42)
            model <- caret::train(formula(paste0(targets[i], 
                                                 " ~ ", 
                                                 paste(predictors_name, sep = '', 
                                                 collapse = ' + '))),
                                  data = ab[[j]],
                                  method="rf",
                                  ntree = 25, 
                                  metric= "RMSE", 
                                  tuneGrid=tunegrid, 
                                  trControl=control)
    
    
          }
        }
      }
    
    }
    
    predictors_name <- c("Time", "Chick", "Diet")
    targets <- "weight"
    
    dat <- as.data.frame(ChickWeight)
    dat$vari <- rep(NA, nrow(dat))
    dat$vari[c(1:10,320:350)] <- "a"
    dat$vari[-c(1:10,320:350)] <- "b"
    
    d <- list(dat[1:300,], dat[301:578,])
    
    ## use 2 of the cores
    library(doParallel)
    cl <- makePSOCKcluster(2)
    registerDoParallel(cl)
    
    cv_model(dat = d, targets = targets, predictors_name = predictors_name)
    
    # end parallel computing
    stopCluster(cl)
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 2018-05-16
      • 2015-11-06
      • 2018-05-29
      • 2014-04-29
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多