精神解析器输出中的附加符号
Additional symbols in spirit parser output
我们尝试解析简单的number/text(在文本中存在数字,因此我们必须将输入序列拆分为 2 个元素类型(TEXT 和 NUMBER)向量)语法,其中数字可以采用以下格式:
+10.90
10.90
10
+10
-10
所以我们写语法:
struct CMyTag
{
TagTypes tagName;
std::string tagData;
std::vector<CMyTag> tagChild;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (TagTypes, tagName) (std::string, tagData) (std::vector<CMyTag>, tagChild))
template <typename Iterator>
struct TextWithNumbers_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
TextWithNumbers_grammar() :
TextWithNumbers_grammar::base_type(line)
{
line = +(numbertag | texttag);
number = qi::lexeme[-(qi::lit('+') | '-') >> +qi::digit >> *(qi::char_('.') >> +qi::digit)];
numbertag = qi::attr(NUMBER) >> number;
text = +(~qi::digit - (qi::char_("+-") >> qi::digit));
texttag = qi::attr(TEXT) >> text;
}
qi::rule<Iterator, std::string()> number, text;
qi::rule<Iterator, CMyTag()> numbertag, texttag;
qi::rule<Iterator, std::vector<CMyTag>()> line;
};
一切正常,但如果我们尝试解析这一行:
wernwl kjwnwenrlwe +10.90+ klwnfkwenwf
我们按预期获得了 3 个元素向量,但此向量中的最后一个元素将带有文本 (CMyTag.tagData):
++ klwnfkwenwf
添加了附加符号“+”。
我们还尝试将语法重写为简单的跳过数字规则:
text = qi::skip(number)[+~qi::digit];
但是解析器死于分段错误异常
回溯时不回滚属性值。实际上,这仅在容器属性(例如 vector<>
或 string
)中可见。
在这种情况下,首先解析numbertag规则并解析+
符号。然后,数字规则失败,并且已经匹配的 +
留在输入中。
我不知道确切你想做什么,但看起来你只是想要:
line = +(numbertag | texttag);
numbertag = attr(NUMBER) >> raw[double_];
texttag = attr(TEXT) >> raw[+(char_ - double_)];
对于输入 "wernwl kjwnwenrlwe +10.90e3++ klwnfkwenwf"
它打印
Parse success: 5 elements
TEXT 'wernwl kjwnwenrlwe '
NUMBER '+10.90'
TEXT 'e'
NUMBER '3'
TEXT '++ klwnfkwenwf'
现场演示
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
enum TagTypes { NUMBER, TEXT, };
struct CMyTag {
TagTypes tagName;
std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (TagTypes, tagName) (std::string, tagData))
template <typename Iterator>
struct TextWithNumbers_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
TextWithNumbers_grammar() : TextWithNumbers_grammar::base_type(line)
{
using namespace qi;
line = +(numbertag | texttag);
numbertag = attr(NUMBER) >> raw[number];
texttag = attr(TEXT) >> raw[+(char_ - number)];
}
private:
template <typename T>
struct simple_real_policies : boost::spirit::qi::real_policies<T>
{
template <typename It> // No exponent
static bool parse_exp(It&, It const&) { return false; }
template <typename It, typename Attribute> // No exponent
static bool parse_exp_n(It&, It const&, Attribute&) { return false; }
};
qi::real_parser<double, simple_real_policies<double> > number;
qi::rule<Iterator, CMyTag()> numbertag, texttag;
qi::rule<Iterator, std::vector<CMyTag>()> line;
};
int main() {
std::string const input = "wernwl kjwnwenrlwe +10.90e3++ klwnfkwenwf";
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
std::vector<CMyTag> data;
TextWithNumbers_grammar<It> g;
if (qi::parse(f, l, g, data)) {
std::cout << "Parse success: " << data.size() << " elements\n";
for (auto& s : data) {
std::cout << (s.tagName == NUMBER?"NUMBER":"TEXT")
<< "\t'" << s.tagData << "'\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
我们尝试解析简单的number/text(在文本中存在数字,因此我们必须将输入序列拆分为 2 个元素类型(TEXT 和 NUMBER)向量)语法,其中数字可以采用以下格式:
+10.90
10.90
10
+10
-10
所以我们写语法:
struct CMyTag
{
TagTypes tagName;
std::string tagData;
std::vector<CMyTag> tagChild;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (TagTypes, tagName) (std::string, tagData) (std::vector<CMyTag>, tagChild))
template <typename Iterator>
struct TextWithNumbers_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
TextWithNumbers_grammar() :
TextWithNumbers_grammar::base_type(line)
{
line = +(numbertag | texttag);
number = qi::lexeme[-(qi::lit('+') | '-') >> +qi::digit >> *(qi::char_('.') >> +qi::digit)];
numbertag = qi::attr(NUMBER) >> number;
text = +(~qi::digit - (qi::char_("+-") >> qi::digit));
texttag = qi::attr(TEXT) >> text;
}
qi::rule<Iterator, std::string()> number, text;
qi::rule<Iterator, CMyTag()> numbertag, texttag;
qi::rule<Iterator, std::vector<CMyTag>()> line;
};
一切正常,但如果我们尝试解析这一行:
wernwl kjwnwenrlwe +10.90+ klwnfkwenwf
我们按预期获得了 3 个元素向量,但此向量中的最后一个元素将带有文本 (CMyTag.tagData):
++ klwnfkwenwf
添加了附加符号“+”。 我们还尝试将语法重写为简单的跳过数字规则:
text = qi::skip(number)[+~qi::digit];
但是解析器死于分段错误异常
回溯时不回滚属性值。实际上,这仅在容器属性(例如 vector<>
或 string
)中可见。
在这种情况下,首先解析numbertag规则并解析+
符号。然后,数字规则失败,并且已经匹配的 +
留在输入中。
我不知道确切你想做什么,但看起来你只是想要:
line = +(numbertag | texttag);
numbertag = attr(NUMBER) >> raw[double_];
texttag = attr(TEXT) >> raw[+(char_ - double_)];
对于输入 "wernwl kjwnwenrlwe +10.90e3++ klwnfkwenwf"
它打印
Parse success: 5 elements
TEXT 'wernwl kjwnwenrlwe '
NUMBER '+10.90'
TEXT 'e'
NUMBER '3'
TEXT '++ klwnfkwenwf'
现场演示
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
enum TagTypes { NUMBER, TEXT, };
struct CMyTag {
TagTypes tagName;
std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (TagTypes, tagName) (std::string, tagData))
template <typename Iterator>
struct TextWithNumbers_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
TextWithNumbers_grammar() : TextWithNumbers_grammar::base_type(line)
{
using namespace qi;
line = +(numbertag | texttag);
numbertag = attr(NUMBER) >> raw[number];
texttag = attr(TEXT) >> raw[+(char_ - number)];
}
private:
template <typename T>
struct simple_real_policies : boost::spirit::qi::real_policies<T>
{
template <typename It> // No exponent
static bool parse_exp(It&, It const&) { return false; }
template <typename It, typename Attribute> // No exponent
static bool parse_exp_n(It&, It const&, Attribute&) { return false; }
};
qi::real_parser<double, simple_real_policies<double> > number;
qi::rule<Iterator, CMyTag()> numbertag, texttag;
qi::rule<Iterator, std::vector<CMyTag>()> line;
};
int main() {
std::string const input = "wernwl kjwnwenrlwe +10.90e3++ klwnfkwenwf";
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
std::vector<CMyTag> data;
TextWithNumbers_grammar<It> g;
if (qi::parse(f, l, g, data)) {
std::cout << "Parse success: " << data.size() << " elements\n";
for (auto& s : data) {
std::cout << (s.tagName == NUMBER?"NUMBER":"TEXT")
<< "\t'" << s.tagData << "'\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}