用字典解决完整的字谜答案

【问题标题】：Solving full anagrams with a dictionary用字典解决完整的字谜
【发布时间】：2015-03-10 22:02:52
【问题描述】：

我正在解决一个经典问题。准确地说，我正在尝试解决一个完整的字谜。

任何以另一种顺序精确再现字母的单词或短语都是字谜。

我有一个字谜、字典和哈希。我需要想出最初经过哈希处理的短语，因此程序应该使用给定的字典生成所有排列，并检查其中是否有任何一个是答案。

总而言之，有人为我隐藏了一条消息，我需要破解它！

样本输入：

scofriybaarae dict.txt FD8D80332CCA32905F11860FB866CA92

以下所有短语都是 scofriybaarae 的有效字谜，因此它们所包含的单词可能不同或顺序不同。

是一个海湾

弗里斯科湾

弗里斯科湾区

但是只有最后一个是答案。这是因为 frisco bay area 的 MD5 与作为参数给出的 MD5 匹配。

我们可以拆分处理字典、生成组合和检查 md5 的任务。

我使用字母树，该树的某些节点可能表示单词。一个分支的结尾总是一个词的结尾。这意味着单个分支可以表示许多单词，例如粗体字母表示完整性

airport

在上面的示例中，存储了两个单词，因此在您浏览时很容易划掉使用过的字母。

尽管我对求解器的性能不满意，但我的程序可以非常快地从字典中构建树。

我发现的问题只是我不知道如何缓解的大量组合。例如，给定 13 个字母和一些长度从 1 到 13 的字典单词。在这种情况下，有 6227020800 个单字母单词的组合，您可以想象可能还有多少组合。

我注意到我输入的词越短，它就越慢。

我想知道我是在正确的轨道上还是只是在概念上是错误的？

我应该使用数据库引擎吗？

为了您的方便，我的字典中有一大段：

bay ara area aera fbaer frisco friscob friscoba afriscoar friscobay 贝弗里斯科aabceforsy

package margana;

import java.io.*;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Solution {

    private final String givenLetterSet;
    private String file;
    private final ExecutorService executorService = Executors.newFixedThreadPool(16);

    LetterNode root = new LetterNode('\u03A9', null); // omega root node
    private Map<Character, Long> countedOriginalLetters = new HashMap<Character, Long>();

    /**
     * Mixed Anatree class
     */
    public static class LetterNode implements Comparable<LetterNode> {
        private final char letter;// does not matter for the root node
        private boolean ending;
        private Map<Character, LetterNode> leaves = new HashMap<Character, LetterNode>();
        private LetterNode motherNode;
        private String wholeCachedWord;
        private int length = 1;

        public LetterNode(char oneLetter, LetterNode mom) {
            letter = oneLetter;
            if (mom != null) {
                if (mom.motherNode != null) {
                    length += mom.length;// all consecutive nodes minus mom length
                }
                motherNode = mom;
            }
        }

        public char getLetter() {
            return letter;
        }

        public Character getCharacter() {
            return Character.valueOf(letter);
        }

        public boolean isEnding() {
            return ending;
        }

        public void setEnding(boolean ending) {
            this.ending = ending;
        }

        public Map<Character, LetterNode> getLeaves() {
            return leaves;
        }

        public int getLength() {
            return length;
        }

        public LetterNode getMotherNode() {
            return motherNode;
        }

        public String compileNodesIntoWord() {
            if (wholeCachedWord != null) {
                return wholeCachedWord;
            }
            LetterNode node = motherNode;
            StringBuilder buffer = new StringBuilder(length);
            buffer.append(letter);
            while (node.motherNode != null) {
                buffer.insert(0, node.letter);
                if (node.motherNode.motherNode == null) {
                    break;
                }
                node = node.motherNode;
            }
            wholeCachedWord = buffer.toString();
            return wholeCachedWord;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            LetterNode that = (LetterNode) o;
            if (letter != that.letter) {
                return false;
            }
            return true;
        }

        @Override
        public int hashCode() {
            return (int) letter;
        }

        @Override
        public int compareTo(LetterNode o) {
            return Character.compare(letter, o.letter);
        }

        @Override
        public String toString() {
            if (ending) {
                return compileNodesIntoWord();
            }
            return String.valueOf(letter);
        }
    }

    public Solution(String anagram, String dictionaryFile) {
        file = dictionaryFile;
        byte[] tempArray = anagram.toLowerCase().replaceAll(" ", "").getBytes();
        Arrays.sort(tempArray);
        givenLetterSet = new String(tempArray);
        for (char oneChar : anagram.toLowerCase().toCharArray()) {
            Long numberOfOccurrences = countedOriginalLetters.get(Character.valueOf(oneChar));
            if (numberOfOccurrences == null) {
                countedOriginalLetters.put(new Character(oneChar), new Long(1));
            } else {
                countedOriginalLetters.put(new Character(oneChar), new Long(numberOfOccurrences.longValue() + 1));
            }
        }
    }

    /**
     * Rule out rubbish words
     *
     * @param oneWord
     * @return
     */
    private boolean invalidAgainstGivenSentence(String oneWord) {
        if (oneWord.length() > givenLetterSet.length()) {
            return true;
        }
        for (char oneChar : oneWord.toLowerCase().toCharArray()) {
/*            if (oneChar == "'".charAt(0)) {// to regards ' as a letter
                continue;
            }*/
            Long amountOfParticularLetter = countedOriginalLetters.get(Character.valueOf(oneChar));
            if (amountOfParticularLetter == null) {
                return true;
            }
        }
        return false;
    }

    public void growTree() throws IOException {
        BufferedReader br = new BufferedReader(new FileReader(file));
        String oneWord;
        long depth = 0; // for fun
        long candidate = 0;
        boolean isNewWord = false;
        while ((oneWord = br.readLine()) != null) {
            if (invalidAgainstGivenSentence(oneWord)) {
                continue;//is not a valid chunk of the given anagram
            }
            LetterNode previousNode = root;
            isNewWord = false;
            for (char one : oneWord.toCharArray()) {
                LetterNode currentLetter = previousNode.getLeaves().get(Character.valueOf(one));
                if (currentLetter == null) {// letter does not exists, let us add it
                    LetterNode newNode = new LetterNode(one, previousNode);
                    previousNode.getLeaves().put(Character.valueOf(one), newNode);
                    currentLetter = newNode;
                    isNewWord = true;
                }
                previousNode = currentLetter;
            }
            if (isNewWord) {
                candidate += 1;
            }
            previousNode.setEnding(true);
            depth = Math.max(depth, previousNode.getLength());
        }
        System.out.println("Created an anatree comprising of " + candidate + " words, and " + depth + " levels");
        br.close();
    }

    public void solve(String md5) throws NoSuchAlgorithmException {
        List<LetterNode> foundWords = new ArrayList<LetterNode>();
        LinkedList<Character> input = new LinkedList<Character>();
        Set<Character> inputSet = new HashSet<Character>();
        for (Character one : givenLetterSet.toCharArray()) {
            input.add(one);
            inputSet.add(one);
        }
        NavigableSet<LetterNode> firstLevel = new TreeSet(root.getLeaves().values()).descendingSet();
        for (LetterNode node: firstLevel) {
            if (inputSet.contains(node.getCharacter())) {
                executorService.execute(new SolverRunnable(foundWords, input, node, md5.toLowerCase()));
            }
        }
        executorService.shutdown();
    }

    class SolverRunnable implements Runnable {
        private List<LetterNode> initialWords;
        private List<Character> spareCharacters;
        private LetterNode initialNode;
        private String md5Hash;

        public SolverRunnable(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) {
            initialNode = route;
            initialWords = foundWords;
            spareCharacters = spareLetters;
            md5Hash = md5;
        }

        public void run() {
            System.out.println("Started solving branch '" + initialNode.getCharacter() + "' from root ");
            try {
                solve(initialWords, spareCharacters, initialNode, md5Hash);
            } catch (NoSuchAlgorithmException e) {
                e.printStackTrace();
            }
        }
    }

    private void solve(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) throws NoSuchAlgorithmException {
        List<LetterNode> localFoundWords = new ArrayList<LetterNode>(foundWords);
        List<Character> workspace = new LinkedList<Character>();
        LetterNode current = route;
        workspace.addAll(spareLetters);
        while (!current.getLeaves().isEmpty()) {
            if (!workspace.contains(current.getCharacter())) {
                break;
            }
            workspace.remove(current.getCharacter());
            if (current.getLeaves().size() > 1) {// start solving recursively then quit
                for (LetterNode node: new TreeSet<LetterNode>(current.getLeaves().values())) {//checking every branch
                    if (workspace.contains(node.getCharacter())) {
                        solve(localFoundWords, workspace, node, md5);
                    }
                }
                break;//we solve routes without forks
            }
            if (workspace.isEmpty()) {
                break;
            }
            if (current.isEnding()) {//recursively solving a shorter word first then continue
                localFoundWords.add(current);
                startOver(workspace, localFoundWords, md5);
                localFoundWords.remove(current);
            }
            current = (LetterNode) current.getLeaves().values().toArray()[0];
        }
        if (current.isEnding()) {
            localFoundWords.add(current);
            workspace.remove(current.getCharacter());
            if (workspace.isEmpty()) {
                check(localFoundWords, md5);
                return;
            }
            startOver(workspace, localFoundWords, md5);
        }
    }

    private void check(List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
        if (isPreliminaryValid(localFoundWords)) {
            String phrase = concatenateNodesWithSpaces(localFoundWords);
            if (md5.equalsIgnoreCase(digest(phrase))) {
                System.out.println(phrase);
                executorService.shutdownNow();
                System.exit(0);
            }
        }
    }

    private void startOver(List<Character> workspace, List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
        for (LetterNode node: root.getLeaves().values()) {
            if (workspace.contains(node.getCharacter())) {
                solve(localFoundWords, workspace, node, md5);
            }
        }
    }

    public boolean isPreliminaryValid(List<LetterNode> words) {
        StringBuilder builder = new StringBuilder();
        int total = 0;
        for (LetterNode word : words) {
            builder.append(word.compileNodesIntoWord());
            total += word.length;
        }
        if (total != givenLetterSet.length()) {
            return false;
        }
        char[] letters = builder.toString().toCharArray();
        Arrays.sort(letters);
        return new String(letters).equals(givenLetterSet);
    }

    public static String concatenateNodesWithSpaces(List<LetterNode> words) {
        StringBuilder builder = new StringBuilder();
        int spaces = words.size() - 1;
        for (LetterNode word : words) {
            builder.append(word.compileNodesIntoWord());
            if (spaces > 0) {
                spaces--;
                builder.append(" ");
            }
        }
        return builder.toString();
    }

    public static String digest(String original) throws NoSuchAlgorithmException {
        MessageDigest md = MessageDigest.getInstance("MD5");
        md.update(original.getBytes());
        StringBuilder sb = new StringBuilder(34);
        for (byte b : md.digest()) {
            sb.append(String.format("%02x", b & 0xff));
        }
        return sb.toString();
    }

    public static void main(String[] args) throws IOException, NoSuchAlgorithmException {
        Solution s = new Solution(args[0], args[1]);
        s.growTree();
/*
        s.solve("BE2B1B1409746B5416F44FB6D9C16A55");// cop pop
        //s.solve("493DF2D8AC7EDB14CD50CA07A539A805");// cop p'op
*/
        s.solve(args[2]); //frisco bay area
    }

}

【问题讨论】：

我不完全清楚这个问题是什么。你能介绍一些示例输入和输出吗？我不清楚哈希与此有什么关系。你只是想解决一个字谜？
完全忽略你的字谜可能会更快（长度除外），只需在字典中创建单词的组合，直到你得到匹配的哈希。
请查看我对问题的更新。
在问题中，它可以是任何字谜，还是生成的短语是否由空格分隔的proper 单词组成？您的字典似乎包含大量垃圾词，例如“aabceforsy”和afriscoar。把它们拿出来，你就减少了问题空间。您还可以权衡您的字典，使常用词排在第一位。
我们应该把相同字母数量相等或更少的所有东西都视为正确的。关键是我们不知道最初加密的是什么，所以所有单词都是合法的。

标签： java algorithm constraint-programming anagram

【解决方案1】：

可能的解决方案（nodejs）：

var
  md5 = require('MD5'),
  fs = require('fs');

function createIndex(str) {
  var i, index = {}, chr;
  for (i = 0; i < str.length; i++) {
    chr = str[i];
    index[chr] = (index[chr] || 0) + 1;
  }
  return index;
}

function indexContains(index, subIndex) {
  var chr;
  for (chr in subIndex) {
    if (subIndex.hasOwnProperty(chr) && (!index.hasOwnProperty(chr) || subIndex[chr] > index[chr])) {
      return false;
    }
  }
  return true;
}

function excludeIndex(index, subIndex) {
  var newIndex = {}, chr, value, empty = true;
  for (chr in index) {
    if (index.hasOwnProperty(chr)) {
      value = index[chr];
      if (subIndex.hasOwnProperty(chr)) {
        value -= subIndex[chr];
      }
      if (value) {
        newIndex[chr] = value;
        empty = false;
      }
    }
  }
  return empty ? null : newIndex;
}

function uniqueByProperty(items, property) {
  return items.filter(function (item, index) {
    var i, value = item[property];
    for (i = 0; i < index; i++) {
      if (items[i][property] === value) {
        return false;
      }
    }
    return true;
  });
}

function findAnagram(charsIndex, dict, prevWords, targetHash) {
  var i, item, nextCharsIndex, result, words;  
  dict = dict.filter(function (item) {
    return indexContains(charsIndex, item.index);
  });  
  if (!prevWords.length) {    
    dict = uniqueByProperty(dict, 'word');
  }  
  for (i = 0; i < dict.length; i++) {
    item = dict[i];
    nextCharsIndex = excludeIndex(charsIndex, item.index);
    words = prevWords.concat(item.word);
    if (nextCharsIndex) {
      result = findAnagram(nextCharsIndex, dict, words, targetHash);
      if (result) {
        return result;
      }
    } else {
      result = words.join(' ');
      if (md5(result) === targetHash) {
        return result;
      }
    }
  }
  return null;
}

var      
  dict = fs.readFileSync('./data/wordlist.txt', 'utf8').split('\n')
    .filter(function (str) {
      return str.replace(/ /, '');
    })
    .map(function (word) {
      return {word: word, index: createIndex(word)};
    }),
  initialStr = "poultry outwits ants",
  finalMD5 = "4624d200580677270a54ccff86b9610e",    
  result = findAnagram(createIndex(initialStr.replace(/ /, '')), dict, [], finalMD5); 

console.log(result);

【讨论】：

好的，感谢发布。你已经知道撇号了。
事实上，可以使用与位置无关的散列进行操作，例如基于质数的散列。每个单词都可以表示为其字母的总和。这种方法的美妙之处在于产品始终不是质数。至于数据结构，可以建立一棵树或求和图，然后寻找产生原始哈希和的路径。