【发布时间】:2015-03-10 22:02:52
【问题描述】:
我正在解决一个经典问题。准确地说,我正在尝试解决一个完整的字谜。
任何以另一种顺序精确再现字母的单词或短语都是字谜。
我有一个字谜、字典和哈希。我需要想出最初经过哈希处理的短语,因此程序应该使用给定的字典生成所有排列,并检查其中是否有任何一个是答案。
总而言之,有人为我隐藏了一条消息,我需要破解它!
样本输入:
scofriybaarae dict.txt FD8D80332CCA32905F11860FB866CA92
以下所有短语都是 scofriybaarae 的有效字谜,因此它们所包含的单词可能不同或顺序不同。
是一个海湾
弗里斯科湾
弗里斯科湾区
但是只有最后一个是答案。这是因为 frisco bay area 的 MD5 与作为参数给出的 MD5 匹配。
我们可以拆分处理字典、生成组合和检查 md5 的任务。
我使用字母树,该树的某些节点可能表示单词。一个分支的结尾总是一个词的结尾。这意味着单个分支可以表示许多单词,例如粗体字母表示完整性
airport
在上面的示例中,存储了两个单词,因此在您浏览时很容易划掉使用过的字母。
尽管我对求解器的性能不满意,但我的程序可以非常快地从字典中构建树。
我发现的问题只是我不知道如何缓解的大量组合。例如,给定 13 个字母和一些长度从 1 到 13 的字典单词。在这种情况下,有 6227020800 个单字母单词的组合,您可以想象可能还有多少组合。
我注意到我输入的词越短,它就越慢。
我想知道我是在正确的轨道上还是只是在概念上是错误的?
我应该使用数据库引擎吗?
为了您的方便,我的字典中有一大段:
bay ara area aera fbaer frisco friscob friscoba afriscoar friscobay 贝弗里斯科aabceforsy
package margana;
import java.io.*;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class Solution {
private final String givenLetterSet;
private String file;
private final ExecutorService executorService = Executors.newFixedThreadPool(16);
LetterNode root = new LetterNode('\u03A9', null); // omega root node
private Map<Character, Long> countedOriginalLetters = new HashMap<Character, Long>();
/**
* Mixed Anatree class
*/
public static class LetterNode implements Comparable<LetterNode> {
private final char letter;// does not matter for the root node
private boolean ending;
private Map<Character, LetterNode> leaves = new HashMap<Character, LetterNode>();
private LetterNode motherNode;
private String wholeCachedWord;
private int length = 1;
public LetterNode(char oneLetter, LetterNode mom) {
letter = oneLetter;
if (mom != null) {
if (mom.motherNode != null) {
length += mom.length;// all consecutive nodes minus mom length
}
motherNode = mom;
}
}
public char getLetter() {
return letter;
}
public Character getCharacter() {
return Character.valueOf(letter);
}
public boolean isEnding() {
return ending;
}
public void setEnding(boolean ending) {
this.ending = ending;
}
public Map<Character, LetterNode> getLeaves() {
return leaves;
}
public int getLength() {
return length;
}
public LetterNode getMotherNode() {
return motherNode;
}
public String compileNodesIntoWord() {
if (wholeCachedWord != null) {
return wholeCachedWord;
}
LetterNode node = motherNode;
StringBuilder buffer = new StringBuilder(length);
buffer.append(letter);
while (node.motherNode != null) {
buffer.insert(0, node.letter);
if (node.motherNode.motherNode == null) {
break;
}
node = node.motherNode;
}
wholeCachedWord = buffer.toString();
return wholeCachedWord;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LetterNode that = (LetterNode) o;
if (letter != that.letter) {
return false;
}
return true;
}
@Override
public int hashCode() {
return (int) letter;
}
@Override
public int compareTo(LetterNode o) {
return Character.compare(letter, o.letter);
}
@Override
public String toString() {
if (ending) {
return compileNodesIntoWord();
}
return String.valueOf(letter);
}
}
public Solution(String anagram, String dictionaryFile) {
file = dictionaryFile;
byte[] tempArray = anagram.toLowerCase().replaceAll(" ", "").getBytes();
Arrays.sort(tempArray);
givenLetterSet = new String(tempArray);
for (char oneChar : anagram.toLowerCase().toCharArray()) {
Long numberOfOccurrences = countedOriginalLetters.get(Character.valueOf(oneChar));
if (numberOfOccurrences == null) {
countedOriginalLetters.put(new Character(oneChar), new Long(1));
} else {
countedOriginalLetters.put(new Character(oneChar), new Long(numberOfOccurrences.longValue() + 1));
}
}
}
/**
* Rule out rubbish words
*
* @param oneWord
* @return
*/
private boolean invalidAgainstGivenSentence(String oneWord) {
if (oneWord.length() > givenLetterSet.length()) {
return true;
}
for (char oneChar : oneWord.toLowerCase().toCharArray()) {
/* if (oneChar == "'".charAt(0)) {// to regards ' as a letter
continue;
}*/
Long amountOfParticularLetter = countedOriginalLetters.get(Character.valueOf(oneChar));
if (amountOfParticularLetter == null) {
return true;
}
}
return false;
}
public void growTree() throws IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
String oneWord;
long depth = 0; // for fun
long candidate = 0;
boolean isNewWord = false;
while ((oneWord = br.readLine()) != null) {
if (invalidAgainstGivenSentence(oneWord)) {
continue;//is not a valid chunk of the given anagram
}
LetterNode previousNode = root;
isNewWord = false;
for (char one : oneWord.toCharArray()) {
LetterNode currentLetter = previousNode.getLeaves().get(Character.valueOf(one));
if (currentLetter == null) {// letter does not exists, let us add it
LetterNode newNode = new LetterNode(one, previousNode);
previousNode.getLeaves().put(Character.valueOf(one), newNode);
currentLetter = newNode;
isNewWord = true;
}
previousNode = currentLetter;
}
if (isNewWord) {
candidate += 1;
}
previousNode.setEnding(true);
depth = Math.max(depth, previousNode.getLength());
}
System.out.println("Created an anatree comprising of " + candidate + " words, and " + depth + " levels");
br.close();
}
public void solve(String md5) throws NoSuchAlgorithmException {
List<LetterNode> foundWords = new ArrayList<LetterNode>();
LinkedList<Character> input = new LinkedList<Character>();
Set<Character> inputSet = new HashSet<Character>();
for (Character one : givenLetterSet.toCharArray()) {
input.add(one);
inputSet.add(one);
}
NavigableSet<LetterNode> firstLevel = new TreeSet(root.getLeaves().values()).descendingSet();
for (LetterNode node: firstLevel) {
if (inputSet.contains(node.getCharacter())) {
executorService.execute(new SolverRunnable(foundWords, input, node, md5.toLowerCase()));
}
}
executorService.shutdown();
}
class SolverRunnable implements Runnable {
private List<LetterNode> initialWords;
private List<Character> spareCharacters;
private LetterNode initialNode;
private String md5Hash;
public SolverRunnable(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) {
initialNode = route;
initialWords = foundWords;
spareCharacters = spareLetters;
md5Hash = md5;
}
public void run() {
System.out.println("Started solving branch '" + initialNode.getCharacter() + "' from root ");
try {
solve(initialWords, spareCharacters, initialNode, md5Hash);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
}
private void solve(List<LetterNode> foundWords, List<Character> spareLetters, LetterNode route, String md5) throws NoSuchAlgorithmException {
List<LetterNode> localFoundWords = new ArrayList<LetterNode>(foundWords);
List<Character> workspace = new LinkedList<Character>();
LetterNode current = route;
workspace.addAll(spareLetters);
while (!current.getLeaves().isEmpty()) {
if (!workspace.contains(current.getCharacter())) {
break;
}
workspace.remove(current.getCharacter());
if (current.getLeaves().size() > 1) {// start solving recursively then quit
for (LetterNode node: new TreeSet<LetterNode>(current.getLeaves().values())) {//checking every branch
if (workspace.contains(node.getCharacter())) {
solve(localFoundWords, workspace, node, md5);
}
}
break;//we solve routes without forks
}
if (workspace.isEmpty()) {
break;
}
if (current.isEnding()) {//recursively solving a shorter word first then continue
localFoundWords.add(current);
startOver(workspace, localFoundWords, md5);
localFoundWords.remove(current);
}
current = (LetterNode) current.getLeaves().values().toArray()[0];
}
if (current.isEnding()) {
localFoundWords.add(current);
workspace.remove(current.getCharacter());
if (workspace.isEmpty()) {
check(localFoundWords, md5);
return;
}
startOver(workspace, localFoundWords, md5);
}
}
private void check(List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
if (isPreliminaryValid(localFoundWords)) {
String phrase = concatenateNodesWithSpaces(localFoundWords);
if (md5.equalsIgnoreCase(digest(phrase))) {
System.out.println(phrase);
executorService.shutdownNow();
System.exit(0);
}
}
}
private void startOver(List<Character> workspace, List<LetterNode> localFoundWords, String md5) throws NoSuchAlgorithmException {
for (LetterNode node: root.getLeaves().values()) {
if (workspace.contains(node.getCharacter())) {
solve(localFoundWords, workspace, node, md5);
}
}
}
public boolean isPreliminaryValid(List<LetterNode> words) {
StringBuilder builder = new StringBuilder();
int total = 0;
for (LetterNode word : words) {
builder.append(word.compileNodesIntoWord());
total += word.length;
}
if (total != givenLetterSet.length()) {
return false;
}
char[] letters = builder.toString().toCharArray();
Arrays.sort(letters);
return new String(letters).equals(givenLetterSet);
}
public static String concatenateNodesWithSpaces(List<LetterNode> words) {
StringBuilder builder = new StringBuilder();
int spaces = words.size() - 1;
for (LetterNode word : words) {
builder.append(word.compileNodesIntoWord());
if (spaces > 0) {
spaces--;
builder.append(" ");
}
}
return builder.toString();
}
public static String digest(String original) throws NoSuchAlgorithmException {
MessageDigest md = MessageDigest.getInstance("MD5");
md.update(original.getBytes());
StringBuilder sb = new StringBuilder(34);
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
public static void main(String[] args) throws IOException, NoSuchAlgorithmException {
Solution s = new Solution(args[0], args[1]);
s.growTree();
/*
s.solve("BE2B1B1409746B5416F44FB6D9C16A55");// cop pop
//s.solve("493DF2D8AC7EDB14CD50CA07A539A805");// cop p'op
*/
s.solve(args[2]); //frisco bay area
}
}
【问题讨论】:
-
我不完全清楚这个问题是什么。你能介绍一些示例输入和输出吗?我不清楚哈希与此有什么关系。你只是想解决一个字谜?
-
完全忽略你的字谜可能会更快(长度除外),只需在字典中创建单词的组合,直到你得到匹配的哈希。
-
请查看我对问题的更新。
-
在问题中,它可以是任何字谜,还是生成的短语是否由空格分隔的
proper单词组成?您的字典似乎包含大量垃圾词,例如“aabceforsy”和afriscoar。把它们拿出来,你就减少了问题空间。您还可以权衡您的字典,使常用词排在第一位。 -
我们应该把相同字母数量相等或更少的所有东西都视为正确的。关键是我们不知道最初加密的是什么,所以所有单词都是合法的。
标签: java algorithm constraint-programming anagram