【问题标题】:How to implement #ifdef in a boost::spirit::qi grammar?如何在 boost::spirit::qi 语法中实现#ifdef?
【发布时间】:2015-08-21 16:37:18
【问题描述】:

根据某些 boost phoenix 函数的结果,有没有一种好方法可以使解析方式不同的语法非终结符?

在我的用例中,我有一个语法,其中包括 CPP 样式的 #define 指令和 #ifdef #else #endif 指令。 (它实际上不是 C 预处理器,尽管它只是别人做出的一些粗略的模仿。)当我在 qi 中解析它时,我将我的语法(在它的 ctor 中)传递给适用于融合的“预处理器数据库”对象的引用结构,并且我已经调整了允许添加 PP 定义/检查 PP 定义的 phoenix 函数。我已经做到了,以便 #define 指令具有注册新定义的语义操作。

当我尝试实现#ifdef #else 指令时,我不确定我应该做什么。我能想到的唯一方法是向我的所有语法非终结符的所有属性类型添加一个布尔标志,标记它是否在丢弃的#ifdef 分支中,并在解析我的 AST 之后爬过再次折腾标记的家伙。但这很不雅,一定有更好的方法,对吧?

如果可能,我希望能够跟踪原始行号(在解析 ifdef 之前)。

我希望问题很清楚,如果不是,我可以制作一个最小的示例来展示我正在尝试做的事情,但我的实际语法很大。

编辑:好的,我制作了一个 SSCCE:

所以这是一个解析非常简单的对语法的程序,并且有一些最小的预处理器语言,包括define和ifdef。我了解如何使用语义操作,以便匹配的东西会导致 C++ 回调被触发,并且该部分似乎正在工作。但是我不明白的是如何使用回调将信息反馈到语法中,即“如果这个 phoenix 函数返回 false 则以不同方式解析”。我想知道怎么说“如果这个 phoenix 函数返回布尔值 false 作为这个语义动作的一部分,那么任意声明非终结符不匹配并回溯就足够了。”实际上,既然我正在编写所有这些内容,我想我知道“迷你 XML”示例必须以某种方式执行此操作,因为它使用局部变量来强制开始和结束标记必须匹配?所以我想我可以对它在那里的工作方式进行逆向工程。但显然我还没有通过阅读文档/研究示例来弄清楚。

请注意,我认为这与您的第一个建议不同,只需跳过语法即可。问题是我也不知道如何使跳过语法的行为取决于 boost phoenix 函数的输出,这又是同样的问题。我现在唯一知道如何在 qi 中处理 phoenix 是,触发 void 回调,并制作分配给属性值的东西。

#define BOOST_SPIRIT_USE_PHOENIX_V3

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/variant/recursive_variant.hpp>

#include <cassert>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

typedef std::string pp_sym;
typedef std::set<pp_sym> pp_data;

void add(pp_data & defines, const pp_sym & s) { defines.insert(s); }
void remove(pp_data & defines, const pp_sym & s) { defines.erase(s); }
bool search(pp_data & defines, const pp_sym & s) { return defines.count(s); }

BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_add_define_, add, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_remove_define_, remove, 2);
BOOST_PHOENIX_ADAPT_FUNCTION(bool, pp_search_define_, search, 2);

typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> PairVec;

/***
 * Grammar definitions
 */

template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, PairVec()> {
    qi::rule<Iterator, PairVec()> main;
    qi::rule<Iterator, Pair()> pair;
    qi::rule<Iterator, Str()> first;
    qi::rule<Iterator, Str()> second;

    qi::rule<Iterator, pp_sym()> pp_symbol;
    qi::rule<Iterator> pp_directive;
    qi::rule<Iterator, pp_sym()> define_directive;
    qi::rule<Iterator, pp_sym()> undef_directive;
    qi::rule<Iterator, pp_sym()> if_directive;
    qi::rule<Iterator> else_directive;
    qi::rule<Iterator> endif_directive;

    qi::rule<Iterator> ws;

    simple_grammar(pp_data & preprocessor_data)
            : simple_grammar::base_type(main)
    {
        using qi::lit;
        using qi::char_;
        using namespace qi::labels;

        ws = char_(" \t\r\n");

        first = !lit('#') >> *(char_ - '=') >> lit('=');
        second = *(char_ - '\n') >> lit('\n');
        pair = first >> second;

        pp_symbol = +char_("A-Za-z_");

        pp_directive = &lit('#')
                >> ((define_directive [ pp_add_define_(ref(preprocessor_data), _1) ] )
                | (undef_directive [ pp_remove_define_(ref(preprocessor_data), _1) ] )
                | if_directive // [ ??? ]
                | else_directive
                | endif_directive)
                >> *(char_ - '\n') >> lit('\n');

        main = (pp_directive >> -main) | (pair >> -main);

        define_directive = lit("#define ") >> pp_symbol >> &ws;
        undef_directive  = lit("#undef ") >> pp_symbol >> &ws;
        if_directive     = lit("#ifdef ") >> pp_symbol >> &ws;
        else_directive   = lit("#else");
        endif_directive  = lit("#endif");
    }
};

const char * example_1 = ""
"#define FOO\n"
"led_zeppelin=9\n"
"the_shins=9\n"
"dead_mau5=6\n"
"portishead=10\n"
"#ifdef FOO\n"
"foo_fighters=7\n"
"#else\n"
"the_who=6\n"
"#endif\n"
"kanye_west=4\n"
"#undef FOO\n"
"#define BAR\n";

int main() {
    std::string temp{example_1};

    typedef std::string::const_iterator str_it;

    typedef simple_grammar<str_it> my_grammar;
    pp_data defines;
    my_grammar gram(defines); // Our grammar
    PairVec ast; // Our tree

    str_it it = temp.begin();
    str_it end = temp.end();

    bool b = qi::parse(it, end, gram, ast);

    assert(b);
    assert(defines.count("FOO") == 0);
    assert(defines.count("BAR") == 1);

    std::cout << "Parsed a list:\n\n";

    for( const auto & p : ast) {
        std::cout << p.first << "\n\t\t\t=\t" << p.second << std::endl;
    }
    return 0;
}

对我来说,上面的输出是(如预期的那样):

$ ./main 
Parsed a list:

led_zeppelin
            =   9
the_shins
            =   9
dead_mau5
            =   6
portishead
            =   10
foo_fighters
            =   7
the_who
            =   6
kanye_west
            =   4

但是我想做的是让 ifdef 部分做你自然期望的事情,并允许嵌套 ifdef 子句。

【问题讨论】:

  • 请确实制作一个SSCCE

标签: c++ parsing c++11 boost boost-spirit-qi


【解决方案1】:

针对添加到问题中的“SSCCE”代码:

AST

正确处理嵌套定义的唯一方法(包括条件块包含 #define/#undef 指令的情况!)是使用表示块树的 AST¹:

namespace Common {
    typedef std::string pp_sym;
}

namespace Ast {
    using Common::pp_sym;

    typedef std::string Str;
    typedef std::pair<Str, Str> Pair;
    typedef std::vector<Pair> Pairs;

    struct ConditionalBlock;

    namespace tag {
        struct define;
        struct undefine;
    }

    template <typename Tag> struct Directive {
        pp_sym name;
    };

    typedef Directive<tag::define> Define; 
    typedef Directive<tag::undefine> Undef; 

    typedef boost::make_recursive_variant<
                Pairs,
                boost::recursive_wrapper<ConditionalBlock>,
                Define,
                Undef
            >::type Block;

    typedef std::vector<Block> Blocks;

    struct ConditionalBlock {
        pp_sym required;
        Blocks if_, else_;
    };
}

为了便于在不使用语义操作的情况下解析这些:

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)

完成。

解析

由于上述工作,我们现在可以完全按照我们的意愿定义解析器!

注意事项:

  • 现在使用跳过程序来避免硬编码所需的空白数量或空白不容忍
  • 现在使用seek[eol] 忽略直到行尾
  • 现在使用distinct 解析标识符(参见boost::spirit::qi keywords and identifiers
  • 现在将#else 的外观设为可选(请参阅-else
  • 删除所有语义操作
  • 无需更多工作即可启用调试信息

    start   = skip(blank) [ blocks ];
    blocks  = *block;
    block   = define | undef | conditional_block | +pair; 
    
    pair      = !char_("#") >> +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
    pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];
    
    define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
    undef  = '#' >> distinct(alnum | '_') [ "undef"  ] >> pp_symbol >> seek[*eol];
    
    else_  = '#' >> distinct(alnum | '_') [ "else"   ] >> seek[*eol];
    endif  = '#' >> distinct(alnum | '_') [ "endif"  ] >> seek[*eol];
    
    conditional_block = 
        ('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
        >> *(!(else_|endif) >> block) 
        >> -else_
        >> *(!endif >> block)
        >> endif
        ;
    
    BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
    

我会说这很清晰,它会导致 ast 包含您以后可能想要使用的所有信息

处理逻辑

既然我们已经将处理与解析分开,处理就是对树的单次访问。我们使用单个函数对象Logic::Preprocessor 兼作变体访问者:

Logic::Preprocess pp({{"EXTERNAL"}} , "    ");
pp(ast);

在此示例中,我们从定义的预处理器符号 EXTERNAL 开始(就好像它是在“外部”定义的,就像在命令行上一样)。

访问者的实现非常简单,但让我展示一下操作位,即在哪里采用条件和忽略分支。为了使事情变得非常完整,我什至遍历了 满意的分支,只是为了表明完整的 AST 在那里,但是使用函数对象的 en isolated 实例,所以没有效果:

    void operator()(Ast::ConditionalBlock const& cb) const {
        bool const satisfied = ctx.defined.count(cb.required);

        auto old_indent = indent;
        indent += "\t";
        std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";

        Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch

        (satisfied? *this : isolated)(cb.if_);
        std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
        (satisfied? isolated : *this)(cb.else_);

        std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
        indent.resize(indent.size()-1);
    }
    void operator()(Ast::Define const& directive) const {
        ctx.defined.insert(directive.name);

        std::cout << indent << "#define\t" << directive.name;
        report();
    }
    void operator()(Ast::Undef const& directive) const {
        ctx.defined.erase(directive.name);

        std::cout << indent << "#undef\t" << directive.name;
        report();
    }

演示

观察这个文档,它甚至嵌套了条件块并在条件分支中定义符号(因此,有条件地),是如何被正确解释的:

#define FOO
led_zeppelin=9
the_shins=9
dead_mau5=6
portishead=10
#ifdef FOO
foo_fighters=7
#define ZOO
#else
the_who=6
#define QUX
#endif

#ifdef EXTERNAL

#ifdef ZOO
zoowasdefined=yes
#else
zoowasdefined=no
#endif

#ifdef QUX
quxwasdefined=yes
#else
quxwasdefined=no
#endif
#endif

kanye_west=4
#undef FOO
#define BAR

我们的演示程序打印:Live On Coliru

Preprocess results:

    #define FOO // effective: EXTERNAL FOO 
    led_zeppelin=9
    the_shins=9
    dead_mau5=6
    portishead=10
    #ifdef FOO // true
        foo_fighters=7
        #define ZOO // effective: EXTERNAL FOO ZOO 
    #else  // ifdef FOO
        // the_who=6
        // #define  QUX // effective: EXTERNAL FOO QUX 
    #endif  // ifdef FOO
    #ifdef EXTERNAL // true
        #ifdef ZOO // true
            zoowasdefined=yes
        #else  // ifdef ZOO
            // zoowasdefined=no
        #endif  // ifdef ZOO
        #ifdef QUX // false
            // quxwasdefined=yes
        #else  // ifdef QUX
            quxwasdefined=no
        #endif  // ifdef QUX
    #else  // ifdef EXTERNAL
    #endif  // ifdef EXTERNAL
    kanye_west=4
    #undef  FOO // effective: EXTERNAL ZOO 
    #define BAR // effective: BAR EXTERNAL ZOO 


Defines still in effect: BAR EXTERNAL ZOO 

完整列表

Live On Coliru

#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG

#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/variant.hpp>

#include <cassert>

namespace phx = boost::phoenix;
namespace qi  = boost::spirit::qi;
namespace qr  = boost::spirit::repository::qi;

namespace Common {
    typedef std::string pp_sym;
}

namespace Ast {
    using Common::pp_sym;

    typedef std::string Str;
    typedef std::pair<Str, Str> Pair;
    typedef std::vector<Pair> Pairs;

    struct ConditionalBlock;

    namespace tag {
        struct define;
        struct undefine;
    }

    template <typename Tag> struct Directive {
        pp_sym name;
    };

    typedef Directive<tag::define> Define; 
    typedef Directive<tag::undefine> Undef; 

    typedef boost::make_recursive_variant<
                Pairs,
                boost::recursive_wrapper<ConditionalBlock>,
                Define,
                Undef
            >::type Block;

    typedef std::vector<Block> Blocks;

    struct ConditionalBlock {
        pp_sym required;
        Blocks if_, else_;
    };
}

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)

/***
 * Grammar definitions
 */

template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, Ast::Blocks()> {

    simple_grammar() : simple_grammar::base_type(start)
    {
        using namespace qi;
        using qr::distinct;
        using qr::seek;

        start   = skip(blank) [ blocks ];
        blocks  = *block;
        block   = define | undef | conditional_block | +pair; 

        pair      = +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
        pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];

        define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
        undef  = '#' >> distinct(alnum | '_') [ "undef"  ] >> pp_symbol >> seek[*eol];

        else_  = '#' >> distinct(alnum | '_') [ "else"   ] >> seek[*eol];
        endif  = '#' >> distinct(alnum | '_') [ "endif"  ] >> seek[*eol];

        conditional_block = 
            ('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
            >> *(!(else_|endif) >> block) 
            >> -else_
            >> *(!endif >> block)
            >> endif
            ;

        BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
    }

private:
    using Skipper = qi::blank_type;

    qi::rule<Iterator, Ast::Blocks()> start;

    qi::rule<Iterator, Ast::Blocks(), Skipper> blocks;
    qi::rule<Iterator, Ast::Block(),  Skipper> block;

    // directive
    qi::rule<Iterator, Ast::ConditionalBlock(), Skipper> conditional_block;
    qi::rule<Iterator, Ast::Define(),           Skipper> define;
    qi::rule<Iterator, Ast::Undef(),            Skipper> undef;
    // empty directives
    qi::rule<Iterator, Skipper> else_, endif;

    // lexeme
    qi::rule<Iterator, Ast::Pair()>   pair;
    qi::rule<Iterator, Ast::pp_sym()> pp_symbol;
};

namespace Logic {
    using Common::pp_sym;

    typedef std::set<pp_sym> pp_syms;

    struct context {
        pp_syms defined;
    };

    struct Preprocess : boost::static_visitor<void> {
        context ctx;
        std::string indent;

        Preprocess(context ctx = {}, std::string indent = "") 
            : ctx(std::move(ctx)), indent(std::move(indent))
        { }

        void operator()(Ast::Blocks const& blocks) {
            for (auto& b : blocks)
                boost::apply_visitor(*this, b);
        }
        void operator()(Ast::Block const& block) {
            boost::apply_visitor(*this, block);
        }
        void operator()(Ast::Pairs const& pairs) {
            for (auto& p : pairs)
                std::cout << indent << p.first << "=" << p.second << "\n";
        }
        void operator()(Ast::ConditionalBlock const& cb) {
            bool const satisfied = ctx.defined.count(cb.required);

            auto old_indent = indent;
            indent += "\t";
            std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";

            Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch

            (satisfied? *this : isolated)(cb.if_);
            std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
            (satisfied? isolated : *this)(cb.else_);

            std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
            indent.resize(indent.size()-1);
        }
        void operator()(Ast::Define const& directive) {
            ctx.defined.insert(directive.name);

            std::cout << indent << "#define\t" << directive.name;
            report();
        }
        void operator()(Ast::Undef const& directive) {
            ctx.defined.erase(directive.name);

            std::cout << indent << "#undef\t" << directive.name;
            report();
        }

      private:
        void report() const {
            std::cout << "\t// effective: ";
            for (auto& sym : ctx.defined) std::cout << sym << " ";
            std::cout << "\n";
        }
    };

}

int main() {
    typedef boost::spirit::istream_iterator It;

    typedef simple_grammar<It> my_grammar;

    my_grammar gram; // Our grammar
    Ast::Blocks ast; // Our tree

    It it(std::cin >> std::noskipws), end;

    bool b = qi::parse(it, end, gram, ast);

    if (it != end)
        std::cout << "Remaining input: '" << std::string(it, end) << "'\n";

    assert(b);

    std::cout << "Preprocess results:\n\n";

    Logic::Preprocess pp({{"EXTERNAL"}} , "    ");
    pp(ast);

    std::cout << "\n\nDefines still in effect: ";
    for (auto& sym : pp.ctx.defined) std::cout << sym << " ";
}

奖励:调试信息

除了上述输出之外,启用调试信息还会产生以下详细的跟踪信息:

<start>
  <try>#define FOO\nled_zepp</try>
  <blocks>
    <try>#define FOO\nled_zepp</try>
    <block>
      <try>#define FOO\nled_zepp</try>
      <define>
        <try>#define FOO\nled_zepp</try>
        <pp_symbol>
          <try>FOO\nled_zeppelin=9\nt</try>
          <success>\nled_zeppelin=9\nthe_</success>
          <attributes>[[F, O, O]]</attributes>
        </pp_symbol>
        <success>led_zeppelin=9\nthe_s</success>
        <attributes>[[[F, O, O]]]</attributes>
      </define>
      <success>led_zeppelin=9\nthe_s</success>
      <attributes>[[[F, O, O]]]</attributes>
    </block>
    <block>
      <try>led_zeppelin=9\nthe_s</try>
      <define>
        <try>led_zeppelin=9\nthe_s</try>
        <fail/>
      </define>
      <undef>
        <try>led_zeppelin=9\nthe_s</try>
        <fail/>
      </undef>
      <conditional_block>
        <try>led_zeppelin=9\nthe_s</try>
        <fail/>
      </conditional_block>
      <pair>
        <try>led_zeppelin=9\nthe_s</try>
        <success>the_shins=9\ndead_mau</success>
        <attributes>[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]]]</attributes>
      </pair>
      <pair>
        <try>the_shins=9\ndead_mau</try>
        <success>dead_mau5=6\nportishe</success>
        <attributes>[[[t, h, e, _, s, h, i, n, s], [9]]]</attributes>
      </pair>
      <pair>
        <try>dead_mau5=6\nportishe</try>
        <success>portishead=10\n#ifdef</success>
        <attributes>[[[d, e, a, d, _, m, a, u, 5], [6]]]</attributes>
      </pair>
      <pair>
        <try>portishead=10\n#ifdef</try>
        <success>#ifdef FOO\nfoo_fight</success>
        <attributes>[[[p, o, r, t, i, s, h, e, a, d], [1, 0]]]</attributes>
      </pair>
      <pair>
        <try>#ifdef FOO\nfoo_fight</try>
        <fail/>
      </pair>
      <success>#ifdef FOO\nfoo_fight</success>
      <attributes>[[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]], [[t, h, e, _, s, h, i, n, s], [9]], [[d, e, a, d, _, m, a, u, 5], [6]], [[p, o, r, t, i, s, h, e, a, d], [1, 0]]]]</attributes>
    </block>
    <block>
      <try>#ifdef FOO\nfoo_fight</try>
      <define>
        <try>#ifdef FOO\nfoo_fight</try>
        <fail/>
      </define>
      <undef>
        <try>#ifdef FOO\nfoo_fight</try>
        <fail/>
      </undef>
      <conditional_block>
        <try>#ifdef FOO\nfoo_fight</try>
        <pp_symbol>
          <try>FOO\nfoo_fighters=7\n#</try>
          <success>\nfoo_fighters=7\n#def</success>
          <attributes>[[F, O, O]]</attributes>
        </pp_symbol>
        <else_>
          <try>foo_fighters=7\n#defi</try>
          <fail/>
        </else_>
        <endif>
          <try>foo_fighters=7\n#defi</try>
          <fail/>
        </endif>
        <block>
          <try>foo_fighters=7\n#defi</try>
          <define>
            <try>foo_fighters=7\n#defi</try>
            <fail/>
          </define>
          <undef>
            <try>foo_fighters=7\n#defi</try>
            <fail/>
          </undef>
          <conditional_block>
            <try>foo_fighters=7\n#defi</try>
            <fail/>
          </conditional_block>
          <pair>
            <try>foo_fighters=7\n#defi</try>
            <success>#define ZOO\n#else\nth</success>
            <attributes>[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]</attributes>
          </pair>
          <pair>
            <try>#define ZOO\n#else\nth</try>
            <fail/>
          </pair>
          <success>#define ZOO\n#else\nth</success>
          <attributes>[[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]]</attributes>
        </block>
        <else_>
          <try>#define ZOO\n#else\nth</try>
          <fail/>
        </else_>
        <endif>
          <try>#define ZOO\n#else\nth</try>
          <fail/>
        </endif>
        <block>
          <try>#define ZOO\n#else\nth</try>
          <define>
            <try>#define ZOO\n#else\nth</try>
            <pp_symbol>
              <try>ZOO\n#else\nthe_who=6\n</try>
              <success>\n#else\nthe_who=6\n#de</success>
              <attributes>[[Z, O, O]]</attributes>
            </pp_symbol>
            <success>#else\nthe_who=6\n#def</success>
            <attributes>[[[Z, O, O]]]</attributes>
          </define>
          <success>#else\nthe_who=6\n#def</success>
          <attributes>[[[Z, O, O]]]</attributes>
        </block>
        <else_>
          <try>#else\nthe_who=6\n#def</try>
          <success>the_who=6\n#define QU</success>
          <attributes>[]</attributes>
        </else_>
        <else_>
          <try>#else\nthe_who=6\n#def</try>
          <success>the_who=6\n#define QU</success>
          <attributes>[]</attributes>
        </else_>
        <endif>
          <try>the_who=6\n#define QU</try>
          <fail/>
        </endif>
        <block>
          <try>the_who=6\n#define QU</try>
          <define>
            <try>the_who=6\n#define QU</try>
            <fail/>
          </define>
          <undef>
            <try>the_who=6\n#define QU</try>
            <fail/>
          </undef>
          <conditional_block>
            <try>the_who=6\n#define QU</try>
            <fail/>
          </conditional_block>
          <pair>
            <try>the_who=6\n#define QU</try>
            <success>#define QUX\n#endif\n\n</success>
            <attributes>[[[t, h, e, _, w, h, o], [6]]]</attributes>
          </pair>
          <pair>
            <try>#define QUX\n#endif\n\n</try>
            <fail/>
          </pair>
          <success>#define QUX\n#endif\n\n</success>
          <attributes>[[[[t, h, e, _, w, h, o], [6]]]]</attributes>
        </block>
        <endif>
          <try>#define QUX\n#endif\n\n</try>
          <fail/>
        </endif>
        <block>
          <try>#define QUX\n#endif\n\n</try>
          <define>
            <try>#define QUX\n#endif\n\n</try>
            <pp_symbol>
              <try>QUX\n#endif\n\n#ifdef E</try>
              <success>\n#endif\n\n#ifdef EXTE</success>
              <attributes>[[Q, U, X]]</attributes>
            </pp_symbol>
            <success>#endif\n\n#ifdef EXTER</success>
            <attributes>[[[Q, U, X]]]</attributes>
          </define>
          <success>#endif\n\n#ifdef EXTER</success>
          <attributes>[[[Q, U, X]]]</attributes>
        </block>
        <endif>
          <try>#endif\n\n#ifdef EXTER</try>
          <success>#ifdef EXTERNAL\n\n#if</success>
          <attributes>[]</attributes>
        </endif>
        <endif>
          <try>#endif\n\n#ifdef EXTER</try>
          <success>#ifdef EXTERNAL\n\n#if</success>
          <attributes>[]</attributes>
        </endif>
        <success>#ifdef EXTERNAL\n\n#if</success>
        <attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
      </conditional_block>
      <success>#ifdef EXTERNAL\n\n#if</success>
      <attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
    </block>
    <block>
      <try>#ifdef EXTERNAL\n\n#if</try>
      <define>
        <try>#ifdef EXTERNAL\n\n#if</try>
        <fail/>
      </define>
      <undef>
        <try>#ifdef EXTERNAL\n\n#if</try>
        <fail/>
      </undef>
      <conditional_block>
        <try>#ifdef EXTERNAL\n\n#if</try>
        <pp_symbol>
          <try>EXTERNAL\n\n#ifdef ZOO</try>
          <success>\n\n#ifdef ZOO\nzoowasd</success>
          <attributes>[[E, X, T, E, R, N, A, L]]</attributes>
        </pp_symbol>
        <else_>
          <try>#ifdef ZOO\nzoowasdef</try>
          <fail/>
        </else_>
        <endif>
          <try>#ifdef ZOO\nzoowasdef</try>
          <fail/>
        </endif>
        <block>
          <try>#ifdef ZOO\nzoowasdef</try>
          <define>
            <try>#ifdef ZOO\nzoowasdef</try>
            <fail/>
          </define>
          <undef>
            <try>#ifdef ZOO\nzoowasdef</try>
            <fail/>
          </undef>
          <conditional_block>
            <try>#ifdef ZOO\nzoowasdef</try>
            <pp_symbol>
              <try>ZOO\nzoowasdefined=ye</try>
              <success>\nzoowasdefined=yes\n#</success>
              <attributes>[[Z, O, O]]</attributes>
            </pp_symbol>
            <else_>
              <try>zoowasdefined=yes\n#e</try>
              <fail/>
            </else_>
            <endif>
              <try>zoowasdefined=yes\n#e</try>
              <fail/>
            </endif>
            <block>
              <try>zoowasdefined=yes\n#e</try>
              <define>
                <try>zoowasdefined=yes\n#e</try>
                <fail/>
              </define>
              <undef>
                <try>zoowasdefined=yes\n#e</try>
                <fail/>
              </undef>
              <conditional_block>
                <try>zoowasdefined=yes\n#e</try>
                <fail/>
              </conditional_block>
              <pair>
                <try>zoowasdefined=yes\n#e</try>
                <success>#else\nzoowasdefined=</success>
                <attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]</attributes>
              </pair>
              <pair>
                <try>#else\nzoowasdefined=</try>
                <fail/>
              </pair>
              <success>#else\nzoowasdefined=</success>
              <attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]]</attributes>
            </block>
            <else_>
              <try>#else\nzoowasdefined=</try>
              <success>zoowasdefined=no\n#en</success>
              <attributes>[]</attributes>
            </else_>
            <else_>
              <try>#else\nzoowasdefined=</try>
              <success>zoowasdefined=no\n#en</success>
              <attributes>[]</attributes>
            </else_>
            <endif>
              <try>zoowasdefined=no\n#en</try>
              <fail/>
            </endif>
            <block>
              <try>zoowasdefined=no\n#en</try>
              <define>
                <try>zoowasdefined=no\n#en</try>
                <fail/>
              </define>
              <undef>
                <try>zoowasdefined=no\n#en</try>
                <fail/>
              </undef>
              <conditional_block>
                <try>zoowasdefined=no\n#en</try>
                <fail/>
              </conditional_block>
              <pair>
                <try>zoowasdefined=no\n#en</try>
                <success>#endif\n\n#ifdef QUX\nq</success>
                <attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]</attributes>
              </pair>
              <pair>
                <try>#endif\n\n#ifdef QUX\nq</try>
                <fail/>
              </pair>
              <success>#endif\n\n#ifdef QUX\nq</success>
              <attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]</attributes>
            </block>
            <endif>
              <try>#endif\n\n#ifdef QUX\nq</try>
              <success>#ifdef QUX\nquxwasdef</success>
              <attributes>[]</attributes>
            </endif>
            <endif>
              <try>#endif\n\n#ifdef QUX\nq</try>
              <success>#ifdef QUX\nquxwasdef</success>
              <attributes>[]</attributes>
            </endif>
            <success>#ifdef QUX\nquxwasdef</success>
            <attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
          </conditional_block>
          <success>#ifdef QUX\nquxwasdef</success>
          <attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
        </block>

....
</start>

¹ 或者您应该在解析时匹配一个相当复杂的树。如有疑问,请将解析与处理分开。这与Boost Spirit: "Semantic actions are evil"?密切相关

【讨论】:

    【解决方案2】:

    只需定义一个语法并实现匹配的规则。

    你做什么取决于你想对结果做什么。如果目标是忽略该块,只需将语法添加到skipper(例如'#ifdef' &gt;&gt; spirit::repository::qi::seek[ qi::eol &gt;&gt; "#endif" &gt;&gt; qi::eol ]或类似)

    考虑使用 Boost Wave,它是一个成熟的预处理器,用 Spirit 编写并且已经随 Boost 一起提供。

    【讨论】:

    • 我宁愿不使用wave,它看起来很复杂,文档也很密集,看了几次我宁愿只使用qi,也许我认为是lex。也许在更复杂的不同应用程序中,当我对这些东西有更多经验时
    • 宁愿不使用 wave,它看起来很复杂,文档也很密集 - 而 Spirit 不是?
    • marko:我只能说,我觉得我已经成功地“摸索”了qi 并用它来制作许多运行良好的简单解析器,而且我遇到了一个更难的与他人共度时光。
    • 也许我只是需要更多精神方面的经验,但我确实觉得 qi 是拥有最好的文档和示例的那个。
    【解决方案3】:

    通过阅读精神文档,我认为解决基本问题的正确方法(引用自己)

    根据某些 boost phoenix 函数的结果,有没有一种好方法可以使解析方式不同的语法非终结符?

    就是使用boost::spirit::qi::eps。从文档( http://www.boost.org/doc/libs/1_41_0/libs/spirit/doc/html/spirit/qi/reference/auxiliary/eps.html):

    语义谓词

    语义谓词允许您在语法中的任何位置附加条件函数。在这个角色中,epsilon 接受一个返回 true 或 false 的惰性参数。 Lazy Argument 通常是一种用于解决语法歧义的测试。当 Lazy Argument 结果评估为 false 时,将报告解析失败。否则将报告空匹配。一般形式为:

    eps(f) &gt;&gt; rest;

    调用惰性参数 f 进行语义测试(例如,检查符号是否在符号表中)。如果 test 返回 true,则将评估 rest。否则,生产将提前返回,没有任何休息。

    打算尝试使用此技术扩展 SSCCE 并尽快编辑此答案...


    好的,这就是我的最终结果。我认为它仍然有一些缺点,比如它不能完全正确地处理嵌套的 ifdef,而且我的语法有一些代码重复。我认为简短的回答是,您不应该尝试在任何甚至中等复杂的语法中实现 ifdef,您可能应该始终进行某种两阶段处理,即使语法非常简单,否则会产生很多问题.但无论如何,我认为这是一个很好的使用精神的练习。

    #define BOOST_SPIRIT_USE_PHOENIX_V3
    
    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    #include <boost/spirit/include/phoenix_object.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/spirit/include/phoenix_fusion.hpp>
    #include <boost/spirit/include/phoenix_stl.hpp>
    #include <boost/fusion/adapted/struct/adapt_struct.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/std_pair.hpp>
    #include <boost/variant/recursive_variant.hpp>
    
    #include <cassert>
    #include <cmath>
    #include <memory>
    #include <string>
    #include <utility>
    #include <vector>
    
    namespace fusion = boost::fusion;
    namespace phoenix = boost::phoenix;
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    
    typedef std::string pp_sym;
    typedef std::set<pp_sym> pp_data;
    
    void add(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #define " << s << std::endl;*/ defines.insert(s); }
    void remove(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #undef " << s << std::endl;*/ defines.erase(s); }
    bool search(pp_data & defines, const pp_sym & s) { /*std::cout << "Parser: #ifdef " << s << std::endl;*/ return defines.count(s); }
    
    BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_add_define_, add, 2);
    BOOST_PHOENIX_ADAPT_FUNCTION(void, pp_remove_define_, remove, 2);
    BOOST_PHOENIX_ADAPT_FUNCTION(bool, pp_search_define_, search, 2);
    
    typedef std::string Str;
    typedef std::pair<Str, Str> Pair;
    typedef std::vector<Pair> PairVec;
    
    /***
     * Grammar definitions
     */
    
    template <typename Iterator>
    struct simple_grammar : qi::grammar<Iterator, PairVec()> {
        qi::rule<Iterator, PairVec()> main;
        qi::rule<Iterator, PairVec(), qi::locals<std::string>> if_block;
        qi::rule<Iterator, PairVec()> if_true_block;
        qi::rule<Iterator, PairVec()> if_false_block;
        qi::rule<Iterator, Pair()> pair;
        qi::rule<Iterator, Str()> first;
        qi::rule<Iterator, Str()> second;
    
        qi::rule<Iterator, pp_sym()> pp_symbol;
        qi::rule<Iterator> pp_directive;
        qi::rule<Iterator, pp_sym()> define_directive;
        qi::rule<Iterator, pp_sym()> undef_directive;
        qi::rule<Iterator, pp_sym()> if_directive;
        qi::rule<Iterator> else_directive;
        qi::rule<Iterator> endif_directive;
    
        qi::rule<Iterator> ws;
        qi::rule<Iterator> skip_to_eol;
    
        simple_grammar(pp_data & preprocessor_data)
                : simple_grammar::base_type(main)
        {
            using qi::lit;
            using qi::char_;
            using qi::omit;
            using qi::eps;
            using namespace qi::labels;
    
            ws = char_(" \t\r\n");
    
            first = !lit('#') >> *(char_ - '=') >> lit('=');
            second = *(char_ - '\n') >> lit('\n');
            pair = first >> second;
    
            pp_symbol = +char_("A-Za-z_");
    
            skip_to_eol = *(char_ - '\n') >> lit('\n');
    
            pp_directive = &lit('#')
                    >> ((define_directive [ pp_add_define_(ref(preprocessor_data), _1) ] )
                    | (undef_directive [ pp_remove_define_(ref(preprocessor_data), _1) ] )
                    | else_directive
                    | endif_directive)
                    >> skip_to_eol;
    
            main = (if_block >> -main) | (pp_directive >> -main) | (pair >> -main);
    
            define_directive = lit("#define ") >> pp_symbol >> &ws;
            undef_directive  = lit("#undef ") >> pp_symbol >> &ws;
            if_directive     = lit("#ifdef ") >> pp_symbol >> &ws;
            else_directive   = lit("#else");
            endif_directive  = lit("#endif");
    
    
            if_block = omit[if_directive[_a = _1] ] >> skip_to_eol
                        >> ((eps( pp_search_define_(ref(preprocessor_data), _a) ) > if_true_block ) | if_false_block)
                        >> endif_directive >> skip_to_eol;
            if_false_block = omit[ *(char_ - else_directive - endif_directive) ] >> -(else_directive >> skip_to_eol >> if_true_block);
            if_true_block = !endif_directive >> 
                    ( (else_directive >> skip_to_eol >> if_false_block) 
                    | (if_block >> -if_true_block)
                    | (pp_directive >> -if_true_block)
                    | (pair >> -if_true_block)); 
        }
    };
    
    #define CHECK(C) \
    do { \
        if (!(C)) { \
            std::cout << "Check \"" << #C << "\" failed!" << std::endl; \
        } \
    } while(0)
    
    #define CHECK_ITS(STR, IT, END) \
    do { \
        if (IT != END) { \
            std::cout << "Failed to fully parse \"" << STR << "\"\n"; \
            std::cout << "Stopped at \"" << std::string(IT, END) << "\"" << std::endl; \
        } \
    } while(0)
    
    typedef std::string::const_iterator str_it;
    typedef simple_grammar<str_it> my_grammar;
    
    void unit_test() {
        std::cout << " --- unit tests ---" << std::endl;
    
        pp_data defines;
        my_grammar gram(defines); // Our grammar
    
        {
            std::cout << "test 1\n";
    
            std::string temp = "#define ZED\n";
            str_it it = temp.begin();
            str_it end = temp.end();
    
            std::string ast;
            bool check1 = qi::parse(it, end, gram.define_directive >> gram.skip_to_eol, ast);
            CHECK(check1);
            CHECK_ITS(temp, it, end);
            CHECK(ast == "ZED");
        }
    
        {
            std::cout << "test 2\n";
    
            std::string temp = "#define ZED\n";
            str_it it = temp.begin();
            str_it end = temp.end();
    
            bool check1 = qi::parse(it, end, gram.pp_directive);
            CHECK(check1);
            CHECK_ITS(temp, it, end);
            CHECK(defines.count("ZED") == 1);
        }
    
        {
            std::cout << "test 3\n";
    
            std::string temp = "#undef ZED\n";
            str_it it = temp.begin();
            str_it end = temp.end();
    
            bool check1 = qi::parse(it, end, gram.pp_directive);
            CHECK(check1);
            CHECK_ITS(temp, it, end);
            CHECK(defines.count("ZED") == 0);
        }
    
        std::cout << " --- end unit tests ---" << std::endl;
    }
    
    std::ostream & operator << (std::ostream & ss, const PairVec & pv) {
        ss << "Parsed a list:\n\n";
    
        for( const auto & p : pv) {
            ss << p.first << "\n\t\t\t=\t" << p.second << std::endl;
        }
        return ss;
    }
    
    PairVec test_case(pp_data & defines, int & result, const std::string & temp) {
        my_grammar gram(defines); // Our grammar
        PairVec ast; // Our tree
    
        str_it it = temp.begin();
        str_it end = temp.end();
    
        bool parse_successful = qi::parse(it, end, gram, ast);
        CHECK(parse_successful);
        CHECK_ITS(temp, it, end);
    
        std::cout << ast;
    
        result |= parse_successful ? 0 : 1;
        return ast;
    }
    
    bool have_name(const PairVec & pv, const Str & name) {
        return pv.end() != std::find_if(pv.begin(), pv.end(), [&](const Pair & p) { return p.first == name; });
    }
    
    int main() {
        unit_test();
    
        int result = 0;
        {
            std::cout << "Test case 1" << std::endl;
            pp_data defines;
            PairVec ast = test_case(defines, result, ""
    "#define FOO\n"
    "led_zeppelin=9\n"
    "the_shins=9\n"
    "dead_mau5=6\n"
    "portishead=10\n"
    "#ifdef FOO\n"
    "foo_fighters=7\n"
    "#else\n"
    "the_who=6\n"
    "#endif\n"
    "kanye_west=4\n"
    "#undef FOO\n"
    "#define BAR\n");
    
            CHECK(defines.count("FOO") == 0);
            CHECK(defines.count("BAR") == 1);
            if (!have_name (ast, "foo_fighters")) { std::cout << "error no foo" << std::endl;}
        }
    
        {
            std::cout << "Test case 2" << std::endl;
            pp_data defines;
            PairVec ast = test_case(defines, result, ""
    "#define WOO\n"
    "led_zeppelin=9\n"
    "the_shins=9\n"
    "dead_mau5=6\n"
    "portishead=10\n"
    "#ifdef FOO\n"
    "foo_fighters=7\n"
    "#else\n"
    "the_who=6\n"
    "#endif\n"
    "kanye_west=4\n"
    "#undef FOO\n"
    "#define BAR\n"
    "#define ZED\n");
    
            CHECK(defines.count("FOO") == 0);
            CHECK(defines.count("BAR") == 1);
            CHECK(defines.count("WOO") == 1);
            CHECK(defines.count("ZED") == 1);
            CHECK(defines.count("GOO") == 0);
            CHECK(!have_name(ast, "foo_fighters"));
            CHECK(have_name(ast, "the_who"));
        }
    
        return result;
    }
    

    【讨论】:

    • 呵呵。不知何故,我的浏览器以前没有向我显示过这个答案。所以我做了我自己的版本。它确实可以处理嵌套条件,甚至可以正确处理嵌套块中的定义/取消定义。我会更新我的答案
    • 在此处发布:stackoverflow.com/a/32162566/85371 希望这至少可以帮助您了解一种可以考虑的方法 /cc @JeffTrull
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多