【问题标题】:codility GenomicRangeQuery algorithm comparsion speed Java vs Swiftcodility GenomicRangeQuery 算法比较速度 Java vs Swift
【发布时间】:2016-06-25 00:25:23
【问题描述】:

我将解决 GenomicRangeQuery 任务的代码从 Java 改写为 Swift。 Jave 中的代码获得 100/100 分,但 Swift 中的代码未通过所有性能测试。我试图理解为什么,因为代码中的逻辑是相同的。我想知道为什么 Swift 代码执行这么长时间。我是否在我不知道的快速代码中使用了一些非常慢的部分。请查看从here 复制的这段 Java 代码。

class Solution {
  public int[] solveGenomicRange(String S, int[] P, int[] Q) {
    //used jagged array to hold the prefix sums of each A, C and G genoms
    //we don't need to get prefix sums of T, you will see why.
    int[][] genoms = new int[3][S.length()+1];
    //if the char is found in the index i, then we set it to be 1 else they are 0
    // 3 short values are needed for this reason
    short a, c, g;
    for (int i=0; i<S.length(); i++) {
      a = 0; c = 0; g = 0;
      if ('A' == (S.charAt(i))) {
        a=1;
      }
      if ('C' == (S.charAt(i))) {
        c=1;
      }
      if ('G' == (S.charAt(i))) {
        g=1;
      }
      //here we calculate prefix sums. To learn what's prefix sums look at here https://codility.com/media/train/3-PrefixSums.pdf
      genoms[0][i+1] = genoms[0][i] + a;
      genoms[1][i+1] = genoms[1][i] + c;
      genoms[2][i+1] = genoms[2][i] + g;
    }

    int[] result = new int[P.length];
    //here we go through the provided P[] and Q[] arrays as intervals
    for (int i=0; i<P.length; i++) {
      int fromIndex = P[i];
      //we need to add 1 to Q[i],
      //because our genoms[0][0], genoms[1][0] and genoms[2][0]
      //have 0 values by default, look above genoms[0][i+1] = genoms[0][i] + a;
      int toIndex = Q[i]+1;
      if (genoms[0][toIndex] - genoms[0][fromIndex] > 0) {
        result[i] = 1;
      } else if (genoms[1][toIndex] - genoms[1][fromIndex] > 0) {
        result[i] = 2;
      } else if (genoms[2][toIndex] - genoms[2][fromIndex] > 0) {
        result[i] = 3;
      } else {
        result[i] = 4;
      }
    }
    return result;
  }
}

这里将相同的代码重写为 Swift 2.1

public func solution(inout S:String, inout _ P:[Int], inout _ Q:[Int]) -> [Int] {
  let len = S.characters.count

  //used jagged array to hold the prefix sums of each A, C and G genoms
  //we don't need to get prefix sums of T, you will see why.
  var genoms = [[Int]](count: 3, repeatedValue: [Int](count: len+1, repeatedValue: 0))

  //if the char is found in the index i, then we set it to be 1 else they are 0
  // 3 short values are needed for this reason
  var a,c,g:Int
  for i in 0..<len {
    a=0; c=0; g=0
    let char = S[S.startIndex.advancedBy(i)]
    switch char {
    case "A": a=1;
    case "C": c=1;
    case "G": g=1;
    default: ()
    }

    //here we calculate prefix sums. To learn what's prefix sums look at here https://codility.com/media/train/3-PrefixSums.pdf
    genoms[0][i+1] = genoms[0][i] + a
    genoms[1][i+1] = genoms[1][i] + c
    genoms[2][i+1] = genoms[2][i] + g

  }

  var result: [Int] = [Int](count: P.count, repeatedValue: 0)
  //here we go through the provided P[] and Q[] arrays as intervals
  for i in 0..<P.count {
    let fromIndex = P[i]
    //we need to add 1 to Q[i],
    //because our genoms[0][0], genoms[1][0] and genoms[2][0]
    //have 0 values by default, look above genoms[0][i+1] = genoms[0][i] + a;
    let toIndex = Q[i] + 1

    if (genoms[0][toIndex] - genoms[0][fromIndex] > 0) {
      result[i] = 1;
    } else if (genoms[1][toIndex] - genoms[1][fromIndex] > 0) {
      result[i] = 2;
    } else if (genoms[2][toIndex] - genoms[2][fromIndex] > 0) {
      result[i] = 3;
    } else {
      result[i] = 4;
    }
  }
  return result
}

有人知道为什么当 Java 代码通过所有测试时,这段 Swift 代码无法通过所有性能测试吗?我想我在 Swift 中遇到了一些敏感的瓶颈,但我不知道在哪里。

如果有人不了解代码,这是任务的link

【问题讨论】:

  • S.startIndex.advancedBy(i) 可能会很慢。您可以尝试使用字节数组而不是字符串。不过更好的是,使用 Profiler 来确定周期将花费在哪里 - Instruments 尤其擅长此操作。
  • 你是对的。非常感谢!遗憾的是,使用普通 String api 按索引访问一个字符是如此缓慢。我创建了这样的字节数组 let seq = Array(S.utf8) 并对其进行操作。
  • 另外,考虑一下:对于 Swift:“检测到的时间复杂度:O(N * M)”,对于 Java:“检测到的时间复杂度:O(N + M)”,其中@987654327 @ 是 DNA 序列的字符数,M 是查询数。很明显,与 Java 相比,Swift 的算法。你现在的任务是找出为什么 ;)
  • 这是 Swift 中获得 100% 的解决方案 stackoverflow.com/questions/55160288/…

标签: performance swift2 prefix-sum


【解决方案1】:

这个用于解决 GenomicRangeQuery 问题的 Java 代码在编码方面的得分为 100%。 它使用 4 个简单的数组来进行前缀求和。 我把它贴在这里作为一种替代方法。 时间复杂度为 O(n+m)

public int[] solution4(String S, int[] P, int[] Q){

    char[]chars=S.toCharArray();
    int n=chars.length;

    int[]contaA=new int[n+1];
    int[]contaC=new int[n+1];
    int[]contaG=new int[n+1];
    int[]contaT=new int[n+1];

    for (int i=1;i<n+1;i++){
        contaA[i]=contaA[i-1];
        contaC[i]=contaC[i-1];
        contaG[i]=contaG[i-1];
        contaT[i]=contaT[i-1];
        if (chars[i-1]=='A')contaA[i]+=1;
        if (chars[i-1]=='C')contaC[i]+=1;
        if (chars[i-1]=='G')contaG[i]+=1;
        if (chars[i-1]=='T')contaT[i]+=1;
    }

    int[] arrayContadores=new int[P.length];

    for (int i=0;i<P.length;i++){
        int primeiro=P[i];
        int ultimo=Q[i];

        int A=contaFatia(contaA,primeiro,ultimo);
        int C=contaFatia(contaC,primeiro,ultimo);
        int G=contaFatia(contaG,primeiro,ultimo);
        int T=contaFatia(contaT,primeiro,ultimo);

        if (A>0){arrayContadores[i]=1;
        }else if (C>0) {
            arrayContadores[i] = 2;
        }else if(G>0){
            arrayContadores[i]=3;
        }else if (T>0){
            arrayContadores[i]=4;
        }

    }
    return arrayContadores;
}


public int contaFatia(int[]P,int x,int y){
    return P[y+1]-P[x];
}

【讨论】:

【解决方案2】:
public func solution(_ S : inout String, _ P : inout [Int], _ Q : inout [Int]) -> [Int] {

   var retArr = [Int]()
   var chrArr = [Character]()


   for chr in S {
       chrArr.append(chr)
   }


   for i in 0..<P.count {

       var minFactor = 4

       if P[i] - Q[i] == 0 {
           if chrArr[P[i]] == "A"{
               minFactor = 1
           }else if chrArr[P[i]] == "C"{
               minFactor = 2
           }else if chrArr[P[i]] == "G"{
               minFactor = 3
           }
       }else {
           for j in P[i]...Q[i] {

               if chrArr[j] == "A"{
                   minFactor = 1
                       break
               }else if chrArr[j] == "C"{
                       minFactor = 2
               }else if chrArr[j] == "G"{
                       if minFactor > 2 {
                           minFactor = 3
                           }
                   }
               }
       }

       retArr.append(minFactor)
   }

   return retArr
}

【讨论】:

    【解决方案3】:

    我在 Swift 中玩了一段时间,试图找到正确的解决方案。这是我最近的一次。

    public func solution(_ S : inout String, _ P : inout [Int], _ Q : inout [Int]) -> [Int] {
        let N = S.count + 1
        var outerImpacts: ContiguousArray<ContiguousArray<Int>> = []
        outerImpacts.reserveCapacity(N)
        for i in 0..<N {
            if i > 0 {
                var innerImpacts = outerImpacts[i - 1]
                switch S[S.index(S.startIndex, offsetBy: i - 1)] {
                case "A":
                    innerImpacts[0] += 1
                case "C":
                    innerImpacts[1] += 1
                case "G":
                    innerImpacts[2] += 1
                case "T":
                    innerImpacts[3] += 1
                default:
                    break
                }
                outerImpacts.append(innerImpacts)
            } else {
                outerImpacts.append(ContiguousArray<Int>(repeating: 0, count: 4))
            }
        }
    
        let M: Int = P.count
        var minimalImpacts: [Int] = []
        minimalImpacts.reserveCapacity(M)
        for i in 0..<M {
            for j in 0..<4 where (outerImpacts[Q[i] + 1][j] - outerImpacts[P[i]][j]) > 0 {
                minimalImpacts.append(j + 1)
                break
            }
        }
    
        return minimalImpacts
    }
    

    【讨论】:

      猜你喜欢
      • 2020-08-06
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2019-03-07
      • 2020-05-10
      • 1970-01-01
      • 1970-01-01
      • 2011-10-21
      相关资源
      最近更新 更多