您可以使用unordered_map 开头。
#include <iostream>
#include <iomanip>
#include <unordered_map>
int main() {
std::string str = "0579870512057";
// a map of substrings and the number of times they appear
std::unordered_map<std::string, size_t> pm;
for(auto a=str.begin(); a!=str.end()-1; ++a) {
for(auto b=a+1; b!=str.end(); ++b) {
// std::string(a, b+1) creates the substring from your iterators
// the unordered_map::operator[] creates a key, value pair
// if the key doesn't exist (value is default constructed)
// ++ increases the value by 1
++pm[ std::string(a, b+1) ];
}
}
// display the number of times each substring appears (if it appeared more than once)
// and display the length (size) of each
for(auto& p : pm) {
if(p.second>1)
std::cout << std::setw(3) << p.second << " "
<< std::setw(12) << p.first.size() << " " << p.first << "\n";
}
}
稍微复杂一点的方法是创建一个自定义set,以保持循环模式从最长到最短排序,但它的优点是您会知道结果中的第一个模式将是最长的。除了找到的模式之外,它也不会复制原始输入。
#include <iostream>
#include <set>
struct comp { // a custom comparison class to sort the result
bool operator()(const std::string& a, const std::string& b) const {
if(b.size()==a.size()) return a < b; // lowest "value" when length is equal
else return b.size() < a.size(); // but longest pattern goes first
} // note b before a when comparing sizes
};
using string_pattern_set = std::set<std::string, comp>; // custom set type using "comp"
string_pattern_set get_recurring_patterns(const std::string& str) {
string_pattern_set sorted_result;
for(size_t len=str.size()-1; len>0; --len) {
auto end = str.end()-len;
for(auto pos=str.begin(); pos<end; ++pos) {
for(auto check=pos+1; check<=end; ++check) {
if(std::equal(pos, pos+len, check)) {
sorted_result.emplace(pos, pos+len);
break;
}
}
}
}
return sorted_result;
}
int main() {
string_pattern_set res = get_recurring_patterns("0579870512057");
if(res.size())
std::cout << "the longest recurring pattern is "
<< res.begin()->size() << " char(s)\n";
// display all patterns from longest to shortest
for(auto& str : res) std::cout << str << "\n";
}
如果您真的担心内存消耗,第三种选择是根本不存储重复出现的字符串,而是存储匹配子字符串的 begin 和 end 迭代器。这种方法适用于预期重复模式较长且丰富的情况。
#include <iostream>
#include <set>
#include <algorithm>
#include <iterator>
// type to store begin and end iterators for matching patterns
using strits = std::pair<std::string::const_iterator, std::string::const_iterator>;
struct comp { // a custom comparison class to sort the result
bool operator()(const strits& a, const strits& b) const {
if((b.second-b.first)==(a.second-a.first)) // lowest "value" when length is equal
return std::lexicographical_compare(a.first, a.second, b.first, b.second);
else // but longest pattern goes first
return (b.second-b.first) < (a.second-a.first);
} // note b before a when comparing sizes
};
using string_pattern_set = std::set<strits, comp>; // custom set type using "comp"
template<bool LONGEST_ONLY = false, bool BIG_STRING = false>
string_pattern_set get_recurring_patterns(const std::string& str) {
string_pattern_set sorted_result;
for(size_t len=str.size()-1; len>0; --len) {
auto end = str.end()-len;
for(auto begin=str.begin(); begin<end; ++begin) {
if constexpr (BIG_STRING) {
// skip patterns already found to be recurring.
// in C++20, use set::contains instead
if(sorted_result.find(strits(begin, begin+len))!=sorted_result.end())
continue;
}
for(auto check=begin+1; check<=end; ++check) {
if(std::equal(begin, begin+len, check)) {
sorted_result.emplace(begin, begin+len);
break;
}
}
}
if constexpr (LONGEST_ONLY)
if(!sorted_result.empty()) break;
}
return sorted_result;
}
int main() {
string_pattern_set res = get_recurring_patterns("0579870512057");
if(res.size()) {
const auto& [begin, end] = *res.begin();
std::cout << "the longest recurring pattern is "
<< (end - begin)<< " char(s)\n";
}
// display all patterns from longest to shortest
for(const auto& [begin, end] : res) {
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
std::cout << "\n";
}
}