Boost Spirit X3:将(一些)空格解析为枚举
Boost Spirit X3: Parsing (some) whitespace into an enum
我有一个解析器,我想在其中捕获某些类型的空格作为枚举值并保留 "text" 值的空格。
我的空白解析器非常基础(注意:我在这里添加管道符只是为了 test/dev 目的):
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
我想将所有内容捕获到我的枚举或 std::string
s:
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
为了解析我的输入,我使用了这样的东西:
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
但问题是解析器在遇到第一个制表符或换行符时停止。
代码:http://coliru.stacked-crooked.com/a/d2cda4ce721279a4
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE)
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main()
{
const std::string text = "Hello \n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result)
{
std::cout << "failed to parse!\n";
}
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << '\n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
}
}
}
如果我解析文本 Hello | World
,那么我会得到我期望的结果。但是,如果我改为使用 Hello \n World
,则吞下 \n
之后的空格并且永远不会解析 World
。理想情况下,我希望看到这个输出:
element: [Hello ]
element: [newline]
element: [ World]
我怎样才能做到这一点?谢谢!
我关于船长问题的 goto 参考资料:Boost spirit skipper issues
在这种情况下,你让它与 no_skip[]
一起工作。没错。
no_skip
类似于 lexeme
除了它不预先跳过,来自源 (boost/spirit/home/x3/directive/no_skip.hpp):
// same as lexeme[], but does not pre-skip
备选方案
在你的情况下,我会翻转逻辑:只需调整船长本身。
此外,不要向 skipper 提供 phrase_parse
,因为您的语法对 skipper 的正确值高度敏感。
你的整个语法可能是:
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text), stop = end(text);
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
版画
[tab]
"Hello"
[newline]
"World"
更简单?
看来您在这里根本不需要任何船长。为什么不呢:
const auto p = *(+~x3::char_("\n\t|") | whitespace);
虽然我们在这里,但不需要符号来映射枚举:
struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;
然后
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case '\n': return os << "[newline]";
case '\t': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
版画
[pipe]
"Hello "
[newline]
" World"
问题是您在第 76 行使用了 phrase_parser 而不是解析器。
尝试使用
之类的东西
bool result =
parse(start, stop, elementsParser, elements);
您的 phrase_parser 被指示跳过空格,这是您真正不想要的。
看第一个回答How to use boost::spirit to parse a sequence of words into a vector?
我有一个解析器,我想在其中捕获某些类型的空格作为枚举值并保留 "text" 值的空格。
我的空白解析器非常基础(注意:我在这里添加管道符只是为了 test/dev 目的):
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
我想将所有内容捕获到我的枚举或 std::string
s:
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
为了解析我的输入,我使用了这样的东西:
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
但问题是解析器在遇到第一个制表符或换行符时停止。
代码:http://coliru.stacked-crooked.com/a/d2cda4ce721279a4
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE)
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main()
{
const std::string text = "Hello \n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result)
{
std::cout << "failed to parse!\n";
}
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << '\n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
}
}
}
如果我解析文本 Hello | World
,那么我会得到我期望的结果。但是,如果我改为使用 Hello \n World
,则吞下 \n
之后的空格并且永远不会解析 World
。理想情况下,我希望看到这个输出:
element: [Hello ]
element: [newline]
element: [ World]
我怎样才能做到这一点?谢谢!
我关于船长问题的 goto 参考资料:Boost spirit skipper issues
在这种情况下,你让它与 no_skip[]
一起工作。没错。
no_skip
类似于 lexeme
除了它不预先跳过,来自源 (boost/spirit/home/x3/directive/no_skip.hpp):
// same as lexeme[], but does not pre-skip
备选方案
在你的情况下,我会翻转逻辑:只需调整船长本身。
此外,不要向 skipper 提供 phrase_parse
,因为您的语法对 skipper 的正确值高度敏感。
你的整个语法可能是:
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text), stop = end(text);
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
版画
[tab]
"Hello"
[newline]
"World"
更简单?
看来您在这里根本不需要任何船长。为什么不呢:
const auto p = *(+~x3::char_("\n\t|") | whitespace);
虽然我们在这里,但不需要符号来映射枚举:
struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;
然后
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case '\n': return os << "[newline]";
case '\t': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
版画
[pipe]
"Hello "
[newline]
" World"
问题是您在第 76 行使用了 phrase_parser 而不是解析器。 尝试使用
之类的东西bool result =
parse(start, stop, elementsParser, elements);
您的 phrase_parser 被指示跳过空格,这是您真正不想要的。
看第一个回答How to use boost::spirit to parse a sequence of words into a vector?