【问题标题】:R Triangular MeshR三角网
【发布时间】:2018-01-18 12:05:53
【问题描述】:

我正在为ggtern 包进行一些开发,并且我正在尝试生成一个有效的算法来处理三元热图。具体来说,我使用以下帖子 (Ternary Heatmap) 作为起点。

考虑下面的函数,它基于上述链接(部分):

# Produce Points for Triangular Mesh
triMesh = function( n = 1){
  n    = max(as.integer(n[1]),1)
  p    = data.frame()
  cnt  = 0
  inc  = 1.0/n
  stp  = seq(0,1,length.out = n + 1 )

  for (ix in seq_along(stp)){
    z <- stp[ix]
    y <- 1 - z
    x <- 0
    while ( y >= 0 ) {
      p   <- rbind(p, c(cnt, x, y, z))
      y   <- y - inc #Step x down
      x   <- x + inc #Step y up
      cnt <- cnt + 1          #Increment Count
    }
  }
  colnames(p) = c("IDPoint","x","y","z")
  p = round(p[with(p,order(y,x,-z)),],6)
  rownames(p) = 1:nrow(p) - 1
  p
}

这是我的版本,在语法上更简洁:

# Produce Points for Triangular Mesh
triMesh2 = function( n = 1 ){
  n   = as.integer(max(n[1],1))

  #Nested plyr calls
  result = ldply(0:n,function(y){ ##OUTER
    ldply(0:(n-y),function(x){    ##INNER
      data.frame(x,y,z = n -x -y) ##DIFF
    })
  })

  result        = data.frame( 1:nrow(result)-1,result/n)
  names(result) = c('IDPoint','x','y','z')
  result
}

现在,使用微基准测试,第一个算法的完成速度提高了几倍:

> microbenchmark(triMesh(10))
Unit: milliseconds
        expr      min      lq     mean   median       uq      max neval
 triMesh(10) 6.447525 6.91798 8.432698 7.334905 8.727805 23.37242   100

> microbenchmark(triMesh2(10))
Unit: milliseconds
         expr      min       lq     mean   median       uq     max neval
 triMesh2(10) 27.26659 29.34891 32.50808 31.43524 34.92925 51.8585   100
> 

我想知道是否有人可以将第二种算法的性能提高到第一种算法附近(或更好)...

干杯

【问题讨论】:

    标签: r optimization plyr microbenchmark


    【解决方案1】:

    通常简单地使用向量会更快:

    triMesh3 <- function(n = 1){
      n <- as.integer(max(n[1], 1))
      result <- lapply(0:n, function(y){ 
        l <- lapply(0:(n - y), function(x){    
          c(x = x, y = y, z = n - x - y) 
        })
        Reduce(rbind, l)
      })
      result <- Reduce(rbind, result)
      row.names(result) <- NULL
      result <- cbind(1:nrow(result) - 1, result/n)
      result <- as.data.frame(result)
      names(result) <- c('IDPoint', 'x', 'y', 'z')
      result
    }
    
    all.equal(triMesh3(12), triMesh2(12))
    # [1] TRUE
    microbenchmark::microbenchmark(triMesh3(10),
                                   triMesh2(10), 
                                   triMesh(10), times = 100, unit = "relative")
    # Unit: relative
    #         expr      min       lq     mean   median       uq      max neval cld
    # triMesh3(10)  1.00000  1.00000  1.00000  1.00000  1.00000  1.00000   100 a  
    # triMesh2(10) 92.16829 89.07131 86.66111 88.32173 85.68915 63.29785   100   c
    #  triMesh(10) 30.60108 29.70537 29.61635 29.83430 30.11924 32.40393   100  b 
    

    @CPak你打了我一点,我也想更新我的答案:

    triMesh4_minem <- function(n = 1){
      n <- as.integer(max(n[1], 1))
      y1 <- 0:n
      ys <- n - y1 + 1
      y <- sapply(1:(n + 1), function(x) y1[1:ys[x]])
      y <- unlist(y)
      x <- rep(y1, times = ys)
      result2 <- cbind(1:(length(x)) - 1, y/n, x/n, (n - y - x)/n)
      result <- as.data.frame.matrix(result2)
      names(result) <- c('IDPoint', 'x', 'y', 'z')
      result
    }
    all.equal(triMesh4_minem(2), triMesh4_cpack(2))
    # [1] TRUE
    
    microbenchmark::microbenchmark(triMesh4_minem(1e4),
                                   triMesh4_cpack(1e4),
                                   times = 10, unit = "relative")
    # Unit: relative
    #                 expr      min       lq     mean   median       uq      max neval cld
    # triMesh4_minem(10000) 2.659507 2.572209 2.121967 1.965973 1.906203 1.905907    10   b
    # triMesh4_cpack(10000) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10  a 
    

    【讨论】:

    • 非常感谢,即使你最后得到了类似的结果,由于 N 的缩放问题,我已经把它交给了 CPak。
    【解决方案2】:

    我想提供另一种生成数据的方法。 Reduce(...) 的问题在于它通常不能随着 N 的增加而很好地扩展

    triMesh4 <- function(n=1) {
        n <- as.integer(max(n[1], 1))
        temp <- seq(0, n, 1)
        df <- data.frame(
                    x = unlist(sapply((n+1):1, function(i) temp[1:i])),
                    y = rep(0:n, (n+1):1)
                )
        df$z <- n - df$x - df$y
        df <- cbind(0:(nrow(df)-1), df / n)
        names(df) <- c('IDPoint', 'x', 'y', 'z')
        return(df)
    }
    
    all.equal(triMesh3(12), triMesh4(12))
    # [1] TRUE
    
    library(microbenchmark)
    N <- c(12, 16, 100)
    lapply(N, function(i) microbenchmark(triMesh3(i), triMesh4(i), times=10L, unit="relative"))
    # [[1]]
    # Unit: relative
            # expr      min       lq     mean   median       uq      max neval
     # triMesh3(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10
     # triMesh4(i) 1.484984 1.472767 1.466758 1.474142 1.470987 1.392629    10
    
    # [[2]]
    # Unit: relative
            # expr      min       lq     mean   median       uq       max neval
     # triMesh3(i) 1.075225 1.081014 1.017441 1.024083 1.015504 0.8398393    10
     # triMesh4(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000    10
    
    # [[3]]
    # Unit: relative
            # expr      min       lq     mean  median       uq      max neval
     # triMesh3(i) 23.67992 23.33367 22.79632 23.2149 21.89245 21.32084    10
     # triMesh4(i)  1.00000  1.00000  1.00000  1.0000  1.00000  1.00000    10
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2016-08-12
      • 2010-10-26
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2022-06-10
      相关资源
      最近更新 更多