【问题标题】:How to parse rtsp url with boost qi?如何用 boost qi 解析 rtsp url?
【发布时间】:2021-06-07 14:33:27
【问题描述】:

我正在尝试像这样解析 RTSP-url:
namespace qi = boost::spirit::qi;
qi::int_parser<unsigned char, 16, 2, 2> hex_byte;

struct UrlParser : qi::grammar<stringIterator_t, UrlStruct()>
{
    UrlParser() : UrlParser::base_type(start)
    {
        using boost::spirit::int_;
        namespace ascii = boost::spirit::ascii;

        using boost::spirit::ascii::char_;
        
        // Parses http://, rtsp://, ftp://....
        protocol %= +(char_ - ':') >> "://"; 

        // Parsers for url decoding
        encodeChar.add("+", ' ');
        passwordRule = *((encodeChar | "%" >> hex_byte | char_) - '@');
        loginRule = *((encodeChar | "%" >> hex_byte | char_) - ':');

        // Parses user:pass@, user:@, :pass@
        credentials %= loginRule >> ':' >> passwordRule >> '@';

        // Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
        host %= +(!(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi)) >> char_);

        // Parses port number in ":80", string
        port %= ':' >> int_;

        // Parses script path in "/video.cgi?resulution=1" string.
        path %= *(char_ - boost::spirit::eol);

        start %= -protocol
            >> -credentials
            >> host
            >> -port
            >> path
            ;
    }

    qi::rule<stringIterator_t, UrlStruct()>                 start;
    qi::rule<stringIterator_t, std::string()>               protocol;
    qi::rule<stringIterator_t, UrlStruct::stringPair_t()>       credentials;
    qi::rule<stringIterator_t, std::string()>               host;
    qi::rule<stringIterator_t, int()>                       port;
    qi::rule<stringIterator_t, std::string()>               path;

private:
    qi::rule<stringIterator_t, std::string()>               loginRule;
    qi::rule<stringIterator_t, std::string()>               passwordRule;
    qi::symbols<char const, char const>                     encodeChar;

};

UrlStruct 如下所示:

struct UrlStruct
{   
    typedef boost::optional<std::string>                    optString_t;
    typedef boost::optional<int>                            port_t;
    typedef boost::fusion::vector<std::string, std::string> stringPair_t;
    typedef boost::optional<stringPair_t>                   credentials_t;

    optString_t         protocol;

    credentials_t       credentials;

    std::string         host;

    port_t              port;

    std::string         path;
};

当我有这样的网址时,这是可行的:

rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov

但是当我在密码中输入“@”时,这不起作用。网址例如:

rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov

我该如何解决这个问题?

【问题讨论】:

  • 让你的问题代码独立是一种很好的礼仪。例如,hex_byte 丢失了,很多人在弄清楚如何让你的代码编译/调用解析器时会遇到很大的麻烦。

标签: c++ boost-spirit boost-spirit-qi


【解决方案1】:

相对明显的解决方法是 URL 转义 @:

Live On Coliru

UrlParser p;
for (std::string const input : {
        "rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
        "rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
        "rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
        })
{
    stringIterator_t f = begin(input), l = end(input);
    Ast::UrlStruct u;
    if (parse(f, l, p, u)) {
        std::cout << "Parsed: " << u << "\n";
    } else {
        std::cout << "Failed\n";
    }

打印

Parsed: ( rtsp  (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp  (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Parsed: ( rtsp  (admin adm) in@wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)

如您所见,第二个(使用%40 表示@)解析得很好。

以灵活的方式修复它

您的主机规则已经有一些负面的前瞻性断言:

host = +(
    !(char_("/") | (char_(":")) >> +ascii::digit >> (char_("/") | boost::spirit::eoi))
    >> char_);

我会将其简化为等价物:

host = +(~char_("/") - (port >> ('/' | qi::eoi)));

顺便说一句,我认为 ":nnnn" 不是端口,除非它位于主机名的末尾,否则它可能是错误的并且可能不安全。我想你有它,所以你仍然可以接受 IPV6 地址?另见What is the nicest way to parse this in C++?

同样,您的 password 规则有一个减法解析器:

password = *((encodeChar | "%" >> hex_byte | char_) - '@');

你想要的很接近;你不想禁止 '@' 本身,但前提是后面跟着一个有效的主机生产:

login    = *((encodeChar | "%" >> hex_byte | char_) - ':');
password = *((encodeChar | "%" >> hex_byte | char_) - 
        ('@' >> host >> eoh));

我为end-of-host 介绍了一个漂亮的简写:

auto eoh = qi::copy('/' | qi::eoi);

这是使用host 规则删除重复。请注意,现在我们通过确保主机部分中不出现@ 来完成这项工作(根据 RFC,可能有更多字符要排除,但这是使您的测试通过的最低要求):

host = +(~char_("@/") - (portSpec >> eoh));

演示

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;

namespace Ast {
    struct UrlStruct {
        using optString_t = boost::optional<std::string>;
        using port_t      = boost::optional<int>;
        struct Credentials { // more natural, for ease of debug output really
            std::string login, password;
        };
        using credentials_t = boost::optional<Credentials>;

        optString_t   protocol;
        credentials_t credentials;
        std::string   host;
        port_t        port;
        std::string   path;
    };
    using boost::fusion::operator<<;
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct::Credentials, login, password)
BOOST_FUSION_ADAPT_STRUCT(Ast::UrlStruct, protocol, credentials, host, port, path)
using stringIterator_t = std::string::const_iterator;

struct UrlParser : qi::grammar<stringIterator_t, Ast::UrlStruct()>
{
    UrlParser() : UrlParser::base_type(start)
    {
        namespace ascii = boost::spirit::ascii;

        using boost::spirit::ascii::char_;

        // Parses http://, rtsp://, ftp://....
        protocol = +(char_ - ':') >> "://"; 

        // Parsers for url decoding
        encodeChar.add("+", ' ');
        auto eoh = qi::copy('/' | qi::eoi);
        login    = *((encodeChar | "%" >> hex_byte | char_) - ':');
        password = *((encodeChar | "%" >> hex_byte | char_) - 
                ('@' >> host >> eoh));

        // Parses user:pass@, user:@, :pass@
        credentials = login >> ':' >> password >> '@';

        // Parses host name or ip address in 192.168.0.1:80 192.168.0.1/script.cgi
        host = +(~char_("@/") - (portSpec >> eoh));

        // Parses port number in ":80", string
        portSpec = ':' >> port_number;

        // Parses script path in "/video.cgi?resulution=1" string.
        path = *(char_ - qi::eol);

        start = -protocol
            >> -credentials
            >> host
            >> -portSpec
            >> path
            ;

        BOOST_SPIRIT_DEBUG_NODES((start)(protocol)(credentials)(host)(portSpec)(
            path)(login)(password))
    }

private:
  qi::rule<stringIterator_t, Ast::UrlStruct()>              start;
  qi::rule<stringIterator_t, std::string()>                 protocol;
  qi::rule<stringIterator_t, Ast::UrlStruct::Credentials()> credentials;
  qi::rule<stringIterator_t, std::string()>                 host;
  qi::rule<stringIterator_t, int()>                         portSpec;
  qi::rule<stringIterator_t, std::string()>                 path;

  qi::rule<stringIterator_t, std::string()> login;
  qi::rule<stringIterator_t, std::string()> password;
  qi::symbols<char const, char const>       encodeChar;

  qi::uint_parser<uint8_t, 16, 2, 2> hex_byte;
  qi::uint_parser<uint16_t> port_number;
};

int main() {
    UrlParser p;
    for (std::string const input : {
            "rtsp://admin:admin@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
            "rtsp://admin:adm%40in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
            "rtsp://admin:adm@in@wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov",
            })
    {
        stringIterator_t f = begin(input), l = end(input);
        Ast::UrlStruct u;
        if (parse(f, l, p, u)) {
            std::cout << "Yay: " << u << "\n";
        } else {
            std::cout << "Nay\n";

        }

        if (f != l) {
            std::cout << "Remaining: " << std::quoted(std::string(f, l)) << "\n";
        }
    }
}

现在打印:

Yay: ( rtsp  (admin admin) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp  (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)
Yay: ( rtsp  (admin adm@in) wowzaec2demo.streamlock.net -- /vod/mp4:BigBuckBunny_115k.mov)

其他方面

  • 端口确实应该限制为 16 位范围内的正整数。

  • 使用 BOOST_SPIRIT_DEBUG 更轻松地调试语法

  • 不要使用实现细节(fusion::vector)。取而代之的是std::pair,或者实际上是我所做的另一个改编的结构。这使得实现operator&lt;&lt; 以进行快速调试变得不那么痛苦了。

  • 您在输入结束检测方面略有不一致。在大多数地方你使用eoi,但是在path你接受直到eol。您可能希望使事情更加一致或决定从 eol 开始的任何输入的含义。

  • 现实生活中的 URI 有时包含登录名但不包含密码(也没有:@)。我认为您的语法可能想要支持这一点?

  • 现在解析器的一个结构性缺陷是效率。所有负面的前瞻可能会导致比预期更多的回溯。 - (':' &gt;&gt; port) 应该没问题,但 - ('@' &gt;&gt; host) 可能会偷看很多字符。实际上,我认为这不应该是一个问题(当然,除非您经常使用带有许多 & 字符的很长的密码)。

    但严肃地说,我认为这是解析器试图在同一抽象级别上做太多事情的标志。如果你分而治之,事情会容易得多:首先是schema://HOSTSTUFF/PATHSTUFF,然后是HOSTSTUFF,就像I linked earlier一样。路径/查询部分也是如此,因为那里有无数有趣的东西,当你第一次分离 URI 的顶级分支时,你真的不需要费心。

【讨论】:

    猜你喜欢
    • 2011-01-26
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2012-12-01
    • 1970-01-01
    • 1970-01-01
    • 2020-11-09
    相关资源
    最近更新 更多