我不会为此使用正则表达式。
假设你要解析成这个数据结构:
struct ClassDecl {
std::string name;
std::vector<std::string> interfaces;
};
使用 Boost Spirit,我们定义一个标识符的解析规则:
qi::rule<It, std::string()> ident = qi::alpha >> *(qi::alnum | qi::char_('_'));
这比“希望[[:word:]]+ 能做到”要准确得多。
您现在可以解析整个类声明:
"class" >> ident >> -("implements" >> (ident % ','))
完整演示
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iostream>
struct ClassDecl {
std::string name;
std::vector<std::string> interfaces;
};
ClassDecl parse_class_declaration(std::string const& text) {
namespace qi = boost::spirit::qi;
using It = std::string::const_iterator;
ClassDecl result;
qi::rule<It, std::string()> ident = qi::alpha >> *(qi::alnum | qi::char_('_'));
if (!qi::phrase_parse(
text.begin(), text.end(),
"class" >> ident >> -("implements" >> (ident % ',')),
qi::space, result.name, result.interfaces))
{
throw std::runtime_error("parse_class_declaration");
}
return result;
}
int main() {
for (auto test_case : {
"class AClass",
"class BClass implements Interface1",
"class CClass implements Interface1, Interface2, Interface3,Interface4",
})
{
auto decl = parse_class_declaration(test_case);
std::cout << "Parsed '" << decl.name << "' implementing " << decl.interfaces.size() << " interfaces\n";
for (auto& itf: decl.interfaces) {
std::cout << " - " << itf << "\n";
}
}
}
打印
Parsed 'AClass' implementing 0 interfaces
Parsed 'BClass' implementing 1 interfaces
- Interface1
Parsed 'CClass' implementing 4 interfaces
- Interface1
- Interface2
- Interface3
- Interface4