【问题标题】:All Longest Common Subsequences所有最长公共子序列
【发布时间】:2011-09-10 01:21:31
【问题描述】:

[注意:我事先搜索过,找不到解决所有子序列的 LCS 问题的建议。]

我在编写“最长公共子序列”问题的解决方案时遇到了麻烦,我返回两个输入字符串的所有最长公共子序列。我查看了Wikipedia page 并尝试在那里实现伪代码,但我的“backtrackAll”方法遇到了问题。我相信我在下面正确计算了 LCS 矩阵,但我的“backtrackAll”方法返回一个空集。关于我做错了什么的任何提示?

public static void main (String[] args) {
    String s1 = "AGCAT";
    String s2 = "GAC";
    int[][] matrix = computeMatrix(s1,s2);
    HashSet<String> set = backtrackAll(matrix,s1,s2,s1.length(),s2.length());
    //more stuff would go here...
}

private static int[][] computeMatrix(String s1, String s2) {
    int[][] C = new int[s1.length()+1][s2.length()+1];
    for (int i = 1; i < s1.length()+1; i++) {
        for (int j = 1; j < s2.length()+1; j++) {
            if (s1.charAt(i-1) == s2.charAt(j-1)) {
                C[i][j] = C[i-1][j-1] + 1;
            } else {
                C[i][j] = Math.max(C[i][j-1], C[i-1][j]);
            }
        }
    }
    return C;
}

private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
    if (i == 0 || j == 0) {
        return new HashSet<String>();
    } else if (s1.charAt(i-1) == s2.charAt(j-1)) {
        HashSet<String> R = backtrackAll(C,s1,s2,i-1,j-1);
        HashSet<String> new_set = new HashSet<String>();
        for (String Z: R) {
            new_set.add(Z + s1.charAt(i-1));
        }
        return new_set;
    } else {
        HashSet<String> R = new HashSet<String>();
        if (C[i][j-1] >= C[i-1][j]) {
            R = backtrackAll(C, s1, s2, i, j-1);
        } 
        if (C[i-1][j] >= C[i][j-1]) {
            R.addAll(backtrackAll(C,s1,s2,i-1,j));
        }
        return R;
    }
}

【问题讨论】:

    标签: java lcs


    【解决方案1】:

    我稍微修改了一下。现在可以了。您还应该考虑何时返回 null HashSet,在这种情况下必须添加最后一个匹配的字符。

    private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
        // System.out.println(i+" " + j);
        if (i == 0 || j == 0) {
            // System.out.println("0t");
            return new HashSet<String>();
        } else if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
            // System.out.println("2t");
            HashSet<String> R = backtrackAll(C, s1, s2, i - 1, j - 1);
            HashSet<String> new_set = new HashSet<String>();
    
            for (String Z : R) {
                // System.out.println("1t");
                new_set.add(Z + s1.charAt(i - 1));
            }
            new_set.add("" + s1.charAt(i - 1));
            return new_set;
        } else {
            // System.out.println("3t");
            HashSet<String> R = new HashSet<String>();
            if (C[i][j - 1] >= C[i - 1][j]) {
                R = backtrackAll(C, s1, s2, i, j - 1);
            }
            if (C[i - 1][j] >= C[i][j - 1]) {
                R.addAll(backtrackAll(C, s1, s2, i - 1, j));
            }
            return R;
        }
    }
    

    【讨论】:

      【解决方案2】:

      由于这是“家庭作业”,这里有一些提示。

      1. 确保您了解您编写的算法。这可能是找出您的实施有什么问题的最重要的一步。

      2. 尝试使用调试器找出发生了什么。将您认为应该发生的事情与实际发生的事情进行比较。

      3. 尝试在代码中添加一些assert 语句来检查您认为应该成立的前置条件、后置条件和不变量。 (使用java -ea ... 运行)

      4. 坚持正常的 Java 命名约定。变量名以小写字母开头。变量名中没有下划线。

      【讨论】:

        【解决方案3】:

        第二个答案打印了所有内容,但不仅是最长的,我的也是正确的。

        private static HashSet<String> backtrackAll(int[][] C, String s1, String s2, int i, int j) {
            if (i == 0 || j == 0) {
                HashSet<String> set = new HashSet<String>();
                set.add("");
                return set;
        
            } else if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
                HashSet<String> R = backtrackAll(C, s1, s2, i - 1, j - 1);
                HashSet<String> new_set = new HashSet<String>();
        
                for (String Z : R) {
                    new_set.add(Z + s1.charAt(i - 1));
                }
                return new_set;
            } else {
                HashSet<String> R = new HashSet<String>();
                if (C[i][j - 1] >= C[i - 1][j]) {
                    R = backtrackAll(C, s1, s2, i, j - 1);
                }
                if (C[i - 1][j] >= C[i][j - 1]) {
                    R.addAll(backtrackAll(C, s1, s2, i - 1, j));
                }
                return R;
            }
        }
        

        【讨论】:

          【解决方案4】:

          这里有两个版本的 C# 来获取最长的公共子序列(你可以参考:http://codingworkout.blogspot.com/2014/07/longest-common-subsequence.html

          1. 基于包含最长公共子序列长度的缓存表的回溯

          2. 在缓存时,而不是缓存,legths,捕获 lcs 本身。

          版本 1(基于 lcs 前缀长度的回溯):

          string[] GetLongestCommonSubsequences(string A, string B, int aIndex, int bIndex, 
                  int[][] DP_LCS_AllPrefixes_Cache)
              {
                  if(DP_LCS_AllPrefixes_Cache[aIndex][bIndex] == 0)
                  {
                      return null;
                  }
                  if(A[aIndex-1] == B[bIndex -1])
                  {
                      var r = this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex - 1,
                          DP_LCS_AllPrefixes_Cache);
                      if(r == null)
                      {
                          return new string[] { A[aIndex - 1].ToString() };
                      }
                      return r.Select(s => s + A[aIndex - 1].ToString()).ToArray();
                  }
                  int lcs_up_direction = DP_LCS_AllPrefixes_Cache[aIndex - 1][bIndex];
                  int lcs_left_direction = DP_LCS_AllPrefixes_Cache[aIndex][bIndex-1];
                  if(lcs_up_direction == lcs_left_direction)
                  {
                      string[] lcs_up = this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex,
                          DP_LCS_AllPrefixes_Cache);
                      string[] lcs_left = this.GetLongestCommonSubsequences(A, B, aIndex, bIndex-1,
                          DP_LCS_AllPrefixes_Cache);
                      return lcs_up.Union(lcs_left).ToArray();
                  }
                  if(lcs_up_direction > lcs_left_direction)
                  {
                      return this.GetLongestCommonSubsequences(A, B, aIndex - 1, bIndex, 
                          DP_LCS_AllPrefixes_Cache);
                  }
                  return this.GetLongestCommonSubsequences(A, B, aIndex, bIndex - 1, DP_LCS_AllPrefixes_Cache);
              }
          

          **递归函数的调用者是**

          string[] GetLongestCommonSubsequences(string A, string B, int[][] DP_LCS_AllPrefixes_Cache)
                  {
                      var r = this.GetLongestCommonSubsequences(A, B, A.Length, B.Length,
                          DP_LCS_AllPrefixes_Cache);
                      return r;
                  }
          

          版本 2 - 缓存捕获所有前缀的 lcs

          class LCS_Prefix
                  {
                      public int Length = 0;
                      public string[] Subsequences = null;
                  }
                  LCS_Prefix[][] LCS_OfAllPrefixes_Subsequences(string A, string B)
                  {
                      A.ThrowIfNullOrWhiteSpace("a");
                      B.ThrowIfNullOrWhiteSpace("b");
                      LCS_Prefix[][] LCS_DP_OfAllPrefixes_Subsequences_Cache = new LCS_Prefix[A.Length + 1][];
                      for (int i = 0; i < LCS_DP_OfAllPrefixes_Subsequences_Cache.Length; i++)
                      {
                          LCS_DP_OfAllPrefixes_Subsequences_Cache[i] = new LCS_Prefix[B.Length + 1];
                          for(int j = 0; j< LCS_DP_OfAllPrefixes_Subsequences_Cache[i].Length; j++)
                          {
                              LCS_DP_OfAllPrefixes_Subsequences_Cache[i][j] = new LCS_Prefix();
                          }
                      }
                      for (int rowIndexOfCache = 1; rowIndexOfCache <= A.Length; rowIndexOfCache++)
                      {
                          for (int columnIndexOfCache = 1; columnIndexOfCache <= B.Length; columnIndexOfCache++)
                          {
                              //LCS(Ai, Bj) = 0 if i <=0, or j <= 0
                              //              LCS(Ai, Bj) + 1 if Ai == Bj
                              //              Max(LCS(Ai-1, Bj), LCS(Ai, Bj-1))
                              LCS_Prefix lcsPrefix = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache][columnIndexOfCache];
                              if (A[rowIndexOfCache - 1] == B[columnIndexOfCache - 1])
                              {
                                  var lcs_Prefix_Diagnoal = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache - 1]
                                      [columnIndexOfCache - 1];
                                  lcsPrefix.Length = lcs_Prefix_Diagnoal.Length + 1;
                                  if (lcs_Prefix_Diagnoal.Subsequences == null)
                                  {
                                      lcsPrefix.Subsequences = new string[] { A[rowIndexOfCache - 1].ToString() };
                                  }
                                  else
                                  {
                                      lcsPrefix.Subsequences = lcs_Prefix_Diagnoal.Subsequences
                                          .Select(s => s + A[rowIndexOfCache - 1]).ToArray();
                                  }
                              }
                              else
                              {
                                  LCS_Prefix prefix1_Upward = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache - 1][columnIndexOfCache];
                                  var prefix2_Leftward = LCS_DP_OfAllPrefixes_Subsequences_Cache[rowIndexOfCache][columnIndexOfCache-1];
                                  if(prefix1_Upward.Length == prefix2_Leftward.Length)
                                  {
                                      Assert.IsTrue(prefix1_Upward.Length == prefix2_Leftward.Length);
                                      Assert.IsTrue((prefix1_Upward.Subsequences == null &&
                                                      prefix2_Leftward.Subsequences == null)
                                                  || (prefix1_Upward.Subsequences != null
                                                      && prefix2_Leftward.Subsequences != null));
                                      if (prefix1_Upward.Subsequences != null)
                                      {
                                          Assert.IsTrue(prefix1_Upward.Subsequences.All(s1 => prefix2_Leftward.Subsequences.Any(s2 => (s2.Length == s1.Length))));
                                      }
          
                                      lcsPrefix.Length = prefix1_Upward.Length;
                                      if (prefix1_Upward.Subsequences != null)
                                      {
                                          lcsPrefix.Subsequences = prefix1_Upward.Subsequences
                                              .Union(prefix2_Leftward.Subsequences).ToArray();
                                      }
                                      else
                                      {
                                          Assert.IsNull(prefix2_Leftward.Subsequences);
                                      }
                                  }
                                  else if(prefix1_Upward.Length > prefix2_Leftward.Length)
                                  {
                                      lcsPrefix.Length = prefix1_Upward.Length;
                                      lcsPrefix.Subsequences = prefix1_Upward.Subsequences;
                                  }
                                  else
                                  {
                                      lcsPrefix.Length = prefix2_Leftward.Length;
                                      lcsPrefix.Subsequences = prefix2_Leftward.Subsequences;
                                  }
                              }
                          }
                      }
                      return LCS_DP_OfAllPrefixes_Subsequences_Cache;
                  }
          

          单元测试

          [TestMethod]
                  public void LCS_Tests()
                  {
                      string A = "AGCAT", B = "GAC";
                      var DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 2);
                      var lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
                      Assert.IsNotNull(lcs_sequences);
                      Assert.IsTrue(lcs_sequences.Any(s => "AC".Equals(s)));
                      Assert.IsTrue(lcs_sequences.Any(s => "GC".Equals(s)));
                      Assert.IsTrue(lcs_sequences.Any(s => "GA".Equals(s)));
                      var DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 2);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
                         .Any(s => "AC".Equals(s)));
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
                          .Any(s => "GC".Equals(s)));
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
                          .Any(s => "GA".Equals(s)));
                      A = "ABCDGH"; B = "AEDFHR";
                      DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 3);
                      lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
                      Assert.IsNotNull(lcs_sequences);
                      Assert.IsTrue(lcs_sequences.Any(s => "ADH".Equals(s)));
                      DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 3);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
                          .Any(s => "ADH".Equals(s)));
                      A = "AGGTAB"; B = "GXTXAYB";
                      DP_LCS_AllPrefixes_Cache = this.LCS_OfAllPrefixes_Length(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Cache[A.Length][B.Length] == 4);
                      lcs_sequences = this.GetLongestCommonSubsequences(A, B, DP_LCS_AllPrefixes_Cache);
                      Assert.IsNotNull(lcs_sequences);
                      Assert.IsTrue(lcs_sequences.Any(s => "GTAB".Equals(s)));
                      DP_LCS_AllPrefixes_Subsequences_Cache = this.LCS_OfAllPrefixes_Subsequences(A, B);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Length == 4);
                      Assert.IsTrue(DP_LCS_AllPrefixes_Subsequences_Cache[A.Length][B.Length].Subsequences
                          .Any(s => "GTAB".Equals(s)));
                  }
          

          【讨论】:

            猜你喜欢
            • 1970-01-01
            • 2011-03-01
            • 2021-11-04
            • 2011-02-25
            • 2013-02-13
            • 1970-01-01
            • 1970-01-01
            • 2012-08-29
            相关资源
            最近更新 更多