Boost Spirit X3:将(一些)空格解析为枚举

Boost Spirit X3: Parsing (some) whitespace into an enum

我有一个解析器,我想在其中捕获某些类型的空格作为枚举值并保留 "text" 值的空格。

我的空白解析器非常基础(注意:我在这里添加管道符只是为了 test/dev 目的):

struct whitespace_p : x3::symbols<Whitespace>
{
    whitespace_p()
    {
        add
        ("\n", Whitespace::NEWLINE)
        ("\t", Whitespace::TAB)
        ("|", Whitespace::PIPE)
        ;
    }
} whitespace;

我想将所有内容捕获到我的枚举或 std::strings:

struct Element : x3::variant<Whitespace, std::string>
{
    using base_type::base_type;
    using base_type::operator=;
};

为了解析我的输入,我使用了这样的东西:

const auto contentParser
    = x3::rule<class ContentParserID, Element, true> { "contentParser" }
    = x3::no_skip[+(x3::char_ - (whitespace))]
        | whitespace
    ;

using Elements = std::vector<Element>;
const auto elementsParser
    = x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
    = contentParser >> *(contentParser);

但问题是解析器在遇到第一个制表符或换行符时停止。

代码:http://coliru.stacked-crooked.com/a/d2cda4ce721279a4

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>

namespace x3 = boost::spirit::x3;

enum Whitespace
{
    NEWLINE,
    TAB,
    PIPE
};

struct whitespace_p : x3::symbols<Whitespace>
{
    whitespace_p()
    {
        add
        ("\n", Whitespace::NEWLINE)
        ("\t", Whitespace::TAB)
        ("|", Whitespace::PIPE)
        ;
    }
} whitespace;

struct Element : x3::variant<Whitespace, std::string>
{
    using base_type::base_type;
    using base_type::operator=;
};

const auto contentParser
    = x3::rule<class ContentParserID, Element, true> { "contentParser" }
    = x3::no_skip[+(x3::char_ - (whitespace))]
        | whitespace
    ;

using Elements = std::vector<Element>;
const auto elementsParser
    = x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
    = contentParser >> *(contentParser);

struct print_visitor
    : public boost::static_visitor<std::string>
{
    std::string operator()(const Whitespace& ws) const
    {
        if (ws == Whitespace::NEWLINE) 
        {
            return "newline";
        }
        else if (ws == Whitespace::PIPE)
        {
            return "pipe";
        }
        else
        {
            return "tab";
        }
    }

    std::string operator()(const std::string& str) const
    {
        return str;
    }
};

int main() 
{
    const std::string text = "Hello \n World";
    std::string::const_iterator start = std::begin(text);
    const std::string::const_iterator stop = std::end(text);

    Elements elements{};

    bool result =
        phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);

    if (!result) 
    {
        std::cout << "failed to parse!\n";
    } 
    else if (start != stop)
    {
        std::cout << "unparsed: " << std::string{start, stop} << '\n';
    }
    else
    {
        for (const auto& e : elements)
        {
            std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
        }
    }
}

如果我解析文本 Hello | World,那么我会得到我期望的结果。但是,如果我改为使用 Hello \n World,则吞下 \n 之后的空格并且永远不会解析 World。理想情况下,我希望看到这个输出:

element: [Hello ]
element: [newline]
element: [ World]

我怎样才能做到这一点?谢谢!

我关于船长问题的 goto 参考资料:Boost spirit skipper issues

在这种情况下,你让它与 no_skip[] 一起工作。没错。

no_skip 类似于 lexeme 除了它不预先跳过,来自源 (boost/spirit/home/x3/directive/no_skip.hpp):

// same as lexeme[], but does not pre-skip

备选方案

在你的情况下,我会翻转逻辑:只需调整船长本身。

此外,不要向 skipper 提供 phrase_parse,因为您的语法对 skipper 的正确值高度敏感。

你的整个语法可能是:

const auto p  = x3::skip(x3::space - whitespace) [
        *(+x3::graph | whitespace)
    ];

这里是Live Demo On Coliru

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>

namespace x3 = boost::spirit::x3;

enum Whitespace { NEWLINE, TAB, PIPE };

struct whitespace_p : x3::symbols<Whitespace> {
    whitespace_p() {
        add
            ("\n", Whitespace::NEWLINE)
            ("\t", Whitespace::TAB)
            ("|", Whitespace::PIPE)
        ;
    }
} static const whitespace;

struct Element : x3::variant<Whitespace, std::string> {
    using base_type::base_type;
    using base_type::operator=;
};

using Elements = std::vector<Element>;

static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
    struct print_visitor {
        std::ostream& os;

        auto& operator()(Whitespace ws) const {
            switch(ws) {
                case Whitespace::NEWLINE: return os << "[newline]";
                case Whitespace::PIPE: return os << "[pipe]";
                case Whitespace::TAB: return os << "[tab]";
            }
            return os << "?";
        }

        auto& operator()(const std::string& str) const { return os << std::quoted(str); }
    } vis{os};
    return boost::apply_visitor(vis, el);
}

int main() {
    std::string const text = "\tHello \n World";
    auto start = begin(text), stop = end(text);

    const auto p  = x3::skip(x3::space - whitespace) [
            *(+x3::graph | whitespace)
        ];

    Elements elements;

    if (!parse(start, stop, p, elements)) {
        std::cout << "failed to parse!\n";
    } else {
        std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
    }

    if (start != stop) {
        std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
    }
}

版画

[tab]
"Hello"
[newline]
"World"

更简单?

看来您在这里根本不需要任何船长。为什么不呢:

const auto p  = *(+~x3::char_("\n\t|") | whitespace);

虽然我们在这里,但不需要符号来映射枚举:

struct Element : x3::variant<char, std::string> {
    // ...
};
using Elements = std::vector<Element>;

然后

const auto p
    = x3::rule<struct ID, Element> {}
    = +~x3::char_("\n\t|") | x3::char_;

Live On Coliru

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>

namespace x3 = boost::spirit::x3;

struct Element : x3::variant<char, std::string> {
    using variant = x3::variant<char, std::string>;
    using variant::variant;
    using variant::operator=;

    friend std::ostream& operator<<(std::ostream& os, Element const& el) {
        struct print_visitor {
            std::ostream& os;

            auto& operator()(char ws) const {
                switch(ws) {
                    case '\n': return os << "[newline]";
                    case '\t': return os << "[pipe]";
                    case '|': return os << "[tab]";
                }
                return os << "?";
            }

            auto& operator()(const std::string& str) const { return os << std::quoted(str); }
        } vis{os};
        return boost::apply_visitor(vis, el);
    }
};
using Elements = std::vector<Element>;

int main() {
    std::string const text = "\tHello \n World";
    auto start = begin(text);
    auto const stop = end(text);

    Elements elements;
    const auto p
        = x3::rule<struct ID, Element> {}
        = +~x3::char_("\n\t|") | x3::char_;

    if (!parse(start, stop, *p, elements)) {
        std::cout << "failed to parse!\n";
    } else {
        std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
    }

    if (start != stop) {
        std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
    }
}

版画

[pipe]
"Hello "
[newline]
" World"

问题是您在第 76 行使用了 phrase_parser 而不是解析器。 尝试使用

之类的东西
bool result =
        parse(start, stop, elementsParser, elements);

您的 phrase_parser 被指示跳过空格,这是您真正不想要的。

看第一个回答How to use boost::spirit to parse a sequence of words into a vector?