【问题标题】:Julia @parallel for loop with return statementJulia @parallel for 带有 return 语句的循环
【发布时间】:2017-04-15 16:30:25
【问题描述】:

如何在满足条件后立即为所有工作人员返回的函数中编写并行 for 循环?

即像这样:

function test(n)
  @sync @parallel for i in 1:1000
    {... statement ...}
    if {condition}
      return test(n+1)
    end
  end
end

所有工作人员都停止在 for 循环上工作,只有主进程返回? (并且其他进程再次开始使用下一个 for 循环?)

【问题讨论】:

    标签: for-loop parallel-processing julia


    【解决方案1】:

    这个问题似乎是执行“令人尴尬的并行”搜索任务的基本模式。 @parallel for 构造适用于分区工作,但没有 break 短路逻辑用于在单个流程流中作为 for 停止。

    为了演示如何在 Julia 中执行此操作,请考虑一个玩具问题,即找到组合锁与多个*的组合。可以使用某种方法检查*的每个设置是否正确(花费combodelay 时间 - 参见下面的代码)。找到一个*的正确编号后,搜索下一个*。高级伪代码就像OP问题中给出的sn-p。

    以下是执行此操作的运行代码(在 0.5 和 0.6 上)。有的cmets解释细节,代码单块给出,方便剪切粘贴。

    # combination lock problem parameters
    const wheel_max = 1000  # size of wheel
    @everywhere const magic_number = [55,10,993]  # secret combination
    const wheel_count = length(magic_number)  # number of wheels
    const combodelay = 0.01 # delay time to check single combination
    
    # parallel short-circuit parameters
    const check_to_work_ratio = 160  # ratio to limit short-circuit overhead
    
    function find_combo(wheel,combo=Int[])
      done = SharedArray{Int}(1)       # shared variable to hold if and what combo
      done[1] = 0                      #  succeded. 0 means not found yet
      # setup counters to limit parallel overhead
      @sync begin
        @everywhere global localdone = false
        @everywhere global checktime = 0.0
        @everywhere global worktime = 0.0
      end
      # do the parallel work
      @sync @parallel for i in 1:wheel_max
        global localdone
        global checktime
        global worktime
        # if not checking too much, look at shared variable
        if !localdone && check_to_work_ratio*checktime < worktime
          tic()
          localdone = done[1]>0
          checktime += toq()
        end
        # if no process found combo, check another combo
        if !localdone
          tic()
          sleep(combodelay) # simulated work delay, {..statement..} from OP
          if i==magic_number[wheel]    # {condition} from OP
            done[1] = i              
            localdone = true
          end
          worktime += toq()
        else
          break
        end
      end
      if done[1]>0 # check if shared variable indicates combo for wheel found
        push!(combo,done[1])
        return wheel<wheel_count ? find_combo(wheel+1,combo) : (combo,true)
      else
        return (combo,false)
      end
    end
    
    function find_combo_noparallel(wheel,combo=Int[])
      found = false
      i = 0
      for i in 1:wheel_max
        sleep(combodelay)
        if i==magic_number[wheel]
          found = true
          break
        end
      end
      if found
        push!(combo,i)
        return wheel<wheel_count ? 
          find_combo_noparallel(wheel+1,combo) : (combo,true)
      else
        return (combo,false)
      end
    end
    
    function find_combo_nostop(wheel,combo=Int[])
      done = SharedArray{Int}(1)
      done[1] = 0
      @sync @parallel for i in 1:wheel_max
        sleep(combodelay)
        if i==magic_number[wheel]
          done[1] = i
        end
      end
      if done[1]>0
        push!(combo,done[1])
        return wheel<wheel_count ? 
          find_combo_nostop(wheel+1,combo) : (combo,true)
      else
        return (combo,false)
      end
    end
    
    result = find_combo(1)
    println("parallel with short-circuit stopping:       $result")
    @assert result == (magic_number, true)
    
    result = find_combo_noparallel(1)
    println("single process with short-circuit stopping: $result")
    @assert result == (magic_number, true)
    
    result = find_combo_nostop(1)
    println("parallel without short-circuit stopping:    $result")
    @assert result == (magic_number, true)
    
    println("\ntimings")
    
    print("parallel with short-circuit stopping        ")
    @time find_combo(1);
    print("single process with short-circuit stopping  ")
    @time find_combo_noparallel(1)
    print("parallel without short-circuit stopping     ")
    @time find_combo_nostop(1)
    
    nothing
    

    可能会有更好看的实现,一些元编程可以隐藏一些短路机制。但这应该是一个好的开始。

    结果应大致如下所示:

    parallel with short-circuit stopping:       ([55,10,993],true)
    single process with short-circuit stopping: ([55,10,993],true)
    parallel without short-circuit stopping:    ([55,10,993],true)
    
    timings
    parallel with short-circuit stopping          4.473687 seconds
    single process with short-circuit stopping   11.963329 seconds
    parallel without short-circuit stopping      11.316780 seconds
    

    这是为演示使用 3 个工作进程计算得出的。真正的问题应该有更多的进程和每个进程的更多工作,然后短路的好处就会很明显。

    【讨论】:

    • 抱歉,现在才将其设置为正确!使用 SharedArray 是一种方法!