Boost.Spirit 将表达式转换为 AST
Boost.Spirit transform expression to AST
使用 Boost.Spirit 将某些表达式转换为 AST 的正确方法是什么?
我尝试构建它,但我认为它很乱,可以简化很多。
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace ast {
struct unary_operator;
struct binary_operator;
struct expression {
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>,
boost::recursive_wrapper<expression>
> type;
expression() {
}
template<typename Expr>
expression(const Expr &expr)
: expr(expr) {
}
expression &operator+=(expression rhs);
expression &operator-=(expression rhs);
expression &operator*=(expression rhs);
expression &operator/=(expression rhs);
expression &and_(expression rhs);
expression &or_(expression rhs);
expression &equals(expression rhs);
expression ¬_equals(expression rhs);
expression &less_than(expression rhs);
expression &less_equals(expression rhs);
expression &greater_than(expression rhs);
expression &greater_equals(expression rhs);
expression &factor(expression rhs);
expression &dot(expression rhs);
type expr;
};
struct unary_operator {
std::string op;
expression rhs;
unary_operator() {}
unary_operator(std::string op, expression rhs)
: op(std::move(op)), rhs(std::move(rhs)) {
}
};
struct binary_operator {
std::string op;
expression lhs;
expression rhs;
binary_operator() {}
binary_operator(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {
}
};
expression &expression::operator+=(expression rhs) {
expr = binary_operator("+", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator-=(expression rhs) {
expr = binary_operator("-", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator*=(expression rhs) {
expr = binary_operator("*", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator/=(expression rhs) {
expr = binary_operator("/", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::and_(expression rhs) {
expr = binary_operator("&&", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::or_(expression rhs) {
expr = binary_operator("||", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::equals(expression rhs) {
expr = binary_operator("==", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::not_equals(expression rhs) {
expr = binary_operator("!=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_than(expression rhs) {
expr = binary_operator("<", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_equals(expression rhs) {
expr = binary_operator("<=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_than(expression rhs) {
expr = binary_operator(">", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_equals(expression rhs) {
expr = binary_operator(">=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::factor(expression rhs) {
expr = binary_operator("**", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::dot(expression rhs) {
expr = binary_operator(".", std::move(expr), std::move(rhs));
return *this;
}
struct printer {
void operator()(const double n) const {
std::cout << n;
}
void operator()(const std::string &s) const {
std::cout << s;
}
void operator()(const expression &ast) const {
boost::apply_visitor(*this, ast.expr);
}
void operator()(const binary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.lhs.expr);
std::cout << ", ";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
void operator()(const unary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
};
struct operators {
struct and_ {
};
struct or_ {
};
struct equals {
};
struct not_equals {
};
struct less_than {
};
struct less_equals {
};
struct greater_than {
};
struct greater_equals {
};
struct factor {
};
struct dot {
};
expression &operator()(expression &lhs, expression rhs, and_) const {
return lhs.and_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, or_) const {
return lhs.or_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, equals) const {
return lhs.equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, not_equals) const {
return lhs.not_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_than) const {
return lhs.less_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_equals) const {
return lhs.less_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_than) const {
return lhs.greater_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_equals) const {
return lhs.greater_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, factor) const {
return lhs.factor(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, dot) const {
return lhs.dot(std::move(rhs));
}
};
}
namespace qi = boost::spirit::qi;
struct expectation_handler {
template<typename Iterator>
void operator()(Iterator first, Iterator last, const boost::spirit::info &info) const {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
}
};
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression(), qi::ascii::space_type> {
grammar()
: grammar::base_type(expression) {
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
expression = logical.alias() > qi::eoi;
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
equality = relational[qi::_val = qi::_1]
>> *(
((qi::lit("==") > relational[op(qi::_val, qi::_1, ast::operators::equals{})]) |
(qi::lit("!=") > relational[op(qi::_val, qi::_1, ast::operators::not_equals{})]))
);
relational = additive[qi::_val = qi::_1]
>> *(
((qi::lit("<") > relational[op(qi::_val, qi::_1, ast::operators::less_than{})]) |
(qi::lit("<=") > relational[op(qi::_val, qi::_1, ast::operators::less_equals{})]) |
(qi::lit(">") > relational[op(qi::_val, qi::_1, ast::operators::greater_than{})]) |
(qi::lit(">=") > relational[op(qi::_val, qi::_1, ast::operators::greater_equals{})]))
);
additive = multiplicative[qi::_val = qi::_1]
>> *(
((qi::lit("+") > multiplicative[qi::_val += qi::_1]) |
(qi::lit("-") > multiplicative[qi::_val -= qi::_1]))
);
multiplicative = factor[qi::_val = qi::_1]
>> *(
((qi::lit("*") > factor[qi::_val *= qi::_1]) |
(qi::lit("/") > factor[qi::_val /= qi::_1]))
);
factor = primary[qi::_val = qi::_1]
>> *((qi::lit("**")) > primary[op(qi::_val, qi::_1, ast::operators::factor{})]);
primary =
qi::double_[qi::_val = qi::_1]
| ('(' > expression[qi::_val = qi::_1] > ')')
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})])
| variable[qi::_val = qi::_1]
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})]);
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(boost::phoenix::ref(err_handler), qi::_3, qi::_2, qi::_4));
}
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, logical, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable;
boost::phoenix::function<ast::operators> op;
expectation_handler err_handler;
};
int main(int argc, const char *argv[]) {
std::string input("2 + 5 + t.a");
auto it_begin(input.begin()), it_end(input.end());
grammar<decltype(it_begin)> parser;
ast::expression expression;
qi::phrase_parse(it_begin, it_end, parser, qi::ascii::space, expression);
ast::printer printer;
printer(expression);
return 0;
}
版画
op:+(op:+(2, 5), op:.(t, a))
我将按照“发现”您的代码的顺序进行叙述。然后我会提出一些我认为最后最重要的调整。
我很喜欢你所做的很多事情。
一些名称可以(应该?)改进。例如。 ast::operators
没有任何暗示它的目的。它是二元运算符表达式的惰性工厂。
因此,将其命名为 make_binary
或类似名称。
与包装它的 phoenix::function<>
包装器相同。 op
在语义动作中没有很好地表达那里发生的事情。
与其让 op
(别名 make_binary
)角色在 _val 参数上 side-effectful,不如考虑将其设为 return 不同的值.然后一切都可以变得不可变,语义动作更好地表达意图:
rule = expr [ _val = foo(_val, _1, _2, _3) ];
表示 _val 已更新为根据给定参数创建的内容。
在语法层面,事情看起来并不“整洁”。可以通过简单地 using namespace qi::labels
和摆脱冗余的 qi::lit()
包装器来改进其中的很多内容,例如
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
进入
using ast::operators;
using namespace qi::labels;
logical = equality[_val = _1]
>> *(
(("&&" > equality[op(_val, _1, operators::and_{})]) |
("||" > equality[op(_val, _1, operators::or_{})]))
);
你在语法中检查 eoi
(对你有好处!)。然而,它被放在一个递归规则中:
expression = logical.alias() > qi::eoi;
这意味着 (a+b)*3
永远不会解析,因为 )
出现在需要 eoi
的地方。通过将 eoi
放在顶层来修复它。
您在语法级别有一个 skipper,这意味着人们必须输入正确的 skipper。如果他们不这样做,他们就会破坏语法。相反,将船长设置为内部以便您控制它,并且界面更易于使用(正确):
start = qi::skip(qi::ascii::space) [ expression ];
用法:
if (qi::parse(it_begin, it_end, parser, expression)) {
或者也许:
if (qi::parse(it_begin, it_end, parser > qi::eoi, expression)) {
我知道 driver 代码 (main
) 可能超出了您的审核范围,但我会向您展示缺失的 error-handling,因为它可能非常微妙 w.r.t。部分解析:
int main() {
ast::printer printer;
grammar<std::string::const_iterator> parser;
for (std::string const input : {
"2 + 5 + t.a",
"(2 + 5) + t.a", // note the removed eoi constraint
"2 + 5 * t.a",
"2 * 5 - t.a",
"partial match",
"uhoh *",
})
try {
std::cout << "----- " << std::quoted(input) << " ---- \n";
auto it_begin(input.begin()), it_end(input.end());
ast::expression expression;
if (qi::parse(it_begin, it_end, parser, expression)) {
printer(expression);
std::cout << std::endl;
} else {
std::cout << "Not matched\n";
}
if (it_begin != it_end) {
std::string tail(it_begin, it_end);
std::cout << "Remaining unparsed input: " << std::quoted(tail) << "\n";
}
} catch(std::exception const& e) {
std::cout << "Exception: " << std::quoted(e.what()) << "\n";
}
}
请注意,除非您命名规则,否则期望不会提供有用的消息。
Exception: Expected <unnamed-rule> at ""
命名它们的惯用方法是使用 DEBUG 宏:
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expression)(logical)(equality)
(relational)(additive)(multiplicative)
(factor)(unary)(binary)(primary)
(variable)
)
现在:
Exception: Expected <factor> at ""
Intermission: superficial changes to here: Live On Coliru
在打印机中有很多重复 (apply_visitor(*this
...),由于 operator()
,它的可读性略差。我的偏好是中继到 call
或 apply
函数
同样在打印机中,不要硬编码输出流。有一天(TM)你会想要格式化为一个字符串。或者std::cerr
,或者一个文件
Combining these notes on the printer: Live On Coliru
struct printer {
std::ostream& _os;
template <typename T> std::ostream& operator()(T const& v) const
{ return call(v); }
private:
std::ostream& call(expression const& ast) const {
return boost::apply_visitor(*this, ast.expr);
}
std::ostream& call(binary_operator const& expr) const {
_os << "op:" << expr.op << "(";
call(expr.lhs) << ", ";
return call(expr.rhs) << ')';
}
std::ostream& call(unary_operator const& expr) const {
_os << "op:" << expr.op << "(";
return call(expr.rhs) << ')';
}
template <typename Lit>
std::ostream& call(Lit const& v) const { return _os << v; }
};
这个的逻辑扩展是让它成为一个实际的 output manipulator:
std::cout << "Parsed: " << fmt_expr{expression} << std::endl;
Again, Live On Coliru, also simplified the printer
visitor again:
std::ostream& call(binary_operator const& expr) const {
return _os
<< "op:" << expr.op
<< "(" << fmt_expr{expr.lhs}
<< ", " << fmt_expr{expr.rhs} << ')';
}
在 AST 中,您将实际运算符动态存储为字符串。在我看来,仅对所有 ast-building 重载(ast::operator::operator()
以及 ast::expr
的所有委托成员)静态编码运算符没有太多价值。相反,每次只传递一个字符串?
现在builder命名空间可以消失,不对称的factory成员,整个phoenix函数是grammar-local:
struct make_binary_f {
ast::binary_operator operator()(ast::expression lhs, ast::expression rhs, std::string op) const {
return { op, lhs, rhs };
}
};
boost::phoenix::function<make_binary_f> make;
Another in-between station Live On Coliru
ACHIEVEMENT UNLOCKED
Code down 113 lines of code (now 218 instead of 331 lines of code)
随机现货:
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
'_'
等同于 qi::lit('_')
,而不是 qi::char_('_')
,因此这将删除所有下划线。使用 char_,或使用 raw[]
直接从源迭代器构造参数。
现在我们进入细节:我们可以使用自动属性传播来代替 [_val=_1]
(参见 Boost Spirit: "Semantic actions are evil"? and operator %=
rule init)。
找出更常见的子表达式。连同上一个项目符号:
primary
= qi::double_[_val = _1]
| ('(' > expression[_val = _1] > ')')
>> *("." > variable[_val = make(_val, _1, ".")])
| variable[_val = _1]
>> *("." > variable[_val = make(_val, _1, ".")]);
变为:
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *("." > variable[_val = make(_val, _1, ".")])
;
将变体类型提升到 expression
之外,这样您就可以在 expression
之前实现递归类型。此外,考虑从变体 (LSK) 派生的 expression
。在您的情况下,没有真正需要嵌套表达式,因为 unary/binary 节点已经强加了顺序。所以你的整个 AST 可以是:
struct unary_operator;
struct binary_operator;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct unary_operator { expression rhs; std::string op; } ;
struct binary_operator { expression lhs; expression rhs; std::string op; } ;
在语法 class 中移动 expectation_handler
(它对其他任何东西都没有用),并可选择使用 phoenix::function 对其进行现代化改造?无论如何,由于仿函数是无状态的,因此不需要 ref
(当然也不需要 ref
而不是 cref
):
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(expectation_handler{}, _3, _2, _4));
其实,做到就可以了
auto handler = [](Iterator first, Iterator last, const boost::spirit::info &info) {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
};
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(handler, _3, _2, _4));
小问题:使用 std::quoted
而不是“假”引号 :)
迟到的脑波,你可以提取大部分语义动作:
auto make_bin =
_val = px::bind(make_<ast::binary_expr>{}, _val, _2, _1);
只要所有肢体都是stateless/by值,那不是问题(不过对比Assigning parsers to auto variables!)。现在只需让运算符公开属性:
expression %= equality
>> *(
(qi::string("&&") > equality)[make_bin] |
(qi::string("||") > equality)[make_bin]
);
equality %= relational
>> *(
(qi::string("==") > relational)[make_bin] |
(qi::string("!=") > relational)[make_bin]
);
relational %= additive
>> *(
(qi::string("<") > relational)[make_bin] |
(qi::string("<=") > relational)[make_bin] |
(qi::string(">") > relational)[make_bin] |
(qi::string(">=") > relational)[make_bin]
);
additive %= multiplicative
>> *(
(qi::string("+") > multiplicative)[make_bin] |
(qi::string("-") > multiplicative)[make_bin]
);
multiplicative %= factor
>> *(
(qi::string("*") > factor)[make_bin] |
(qi::string("/") > factor)[make_bin]
);
factor %= primary
>> *(
(qi::string("**") > primary)[make_bin]
);
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *(qi::string(".") > variable)[make_bin]
;
其实刚刚查了一下phoenix::construct
可以做聚合:
auto make_bin =
_val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
还删除了未使用的 unary_*
机器,将 IO 操纵器移至 io
命名空间(而不是 ast
)并重新引入 eoi
签入main
driver...
哎呀,用一些 c++17 你可以组合每个产品的分支:
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
完整演示,103代码片段
只是没能将它降低到 100 LoC 以下,但我在此过程中添加了更多测试用例。
-
-
Live Demo On Coliru(我发现聚合的 phoenix::construct<>
需要 GCC 或最近的提升或两者都有,所以添加了一个构造函数)
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace ast {
struct binary_expr;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<binary_expr>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct binary_expr {
binary_expr(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
std::string op; expression lhs; expression rhs;
};
}
namespace io {
struct fmt_expr { // io manipulator
ast::expression const& _ref;
friend std::ostream& operator<<(std::ostream& os, fmt_expr manip);
};
struct formatter_visitor {
std::ostream& _os;
template <typename T> std::ostream& operator()(T const& v) const
{ return call(v); }
private:
std::ostream& call(ast::expression const& v) const {
return boost::apply_visitor(*this, v);
}
std::ostream& call(ast::binary_expr const& expr) const {
return _os << "op:" << expr.op
<< "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')';
}
template <typename Lit>
std::ostream& call(Lit const& v) const { return _os << v; }
};
std::ostream& operator<<(std::ostream& os, fmt_expr manip) {
return formatter_visitor{os}(manip._ref);
}
}
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression()> {
grammar() : grammar::base_type(start) {
using namespace qi::labels;
auto make_bin = _val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
variable = qi::lexeme[qi::alpha >> *(qi::alnum | qi::char_('_'))];
primary %= qi::double_ | (('(' > expression > ')') | variable)
>> *(op(".") > variable)[make_bin];
start = qi::skip(qi::ascii::space) [ qi::eps > expression ] > qi::eoi;
qi::on_error<qi::fail>(start, boost::phoenix::bind([](auto first, auto last, auto const& info) {
std::stringstream msg;
msg << "Expected " << info << " at " << std::quoted(std::string(first, last));
throw std::runtime_error(msg.str());
}, _3, _2, _4));
BOOST_SPIRIT_DEBUG_NODES((expression)(equality)(relational)(additive)
(multiplicative)(factor)(unary)(binary)(primary)(variable))
}
private:
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable; // lexeme
};
int main() {
using io::fmt_expr;
grammar<std::string::const_iterator> parser;
for (std::string const s : { "2 + 5 + t.a", "(2 + 5) + t.a", "2 + 5 * t.a",
"2 * 5 - t.a", "partial match", "uhoh *", "under_scores", "" })
try {
ast::expression expression;
qi::parse(s.begin(), s.end(), parser, expression);
std::cout << std::quoted(s) << " -> " << fmt_expr{expression} << "\n";
} catch(std::exception const& e) {
std::cout << "Exception: " << e.what() << "\n";
}
}
打印
"2 + 5 + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"(2 + 5) + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"2 + 5 * t.a" -> op:+(2, op:*(5, op:.(t, a)))
"2 * 5 - t.a" -> op:-(op:*(2, 5), op:.(t, a))
Exception: Expected <eoi> at " match"
Exception: Expected <factor> at ""
"under_scores" -> under_scores
超出范围
我将考虑超出范围的事情与您的 grammar/ast 语义相关。
运算符优先级有点吵。您想要的是一些元数据,允许您“仅组合”二进制操作数并出现正确的优先级,如下所示:
expression %= primary
>> *(
(binop > expression) [_val = make_bin(_1, _val, _2)]
);
我已将此策略应用于 extended chat at this answer and the resulting code is on github: https://github.com/sehe/qi-extended-parser-evaluator
如果您有 C++14 支持,请考虑使用 X3。编译时间会少很多。
使用 Boost.Spirit 将某些表达式转换为 AST 的正确方法是什么?
我尝试构建它,但我认为它很乱,可以简化很多。
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace ast {
struct unary_operator;
struct binary_operator;
struct expression {
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>,
boost::recursive_wrapper<expression>
> type;
expression() {
}
template<typename Expr>
expression(const Expr &expr)
: expr(expr) {
}
expression &operator+=(expression rhs);
expression &operator-=(expression rhs);
expression &operator*=(expression rhs);
expression &operator/=(expression rhs);
expression &and_(expression rhs);
expression &or_(expression rhs);
expression &equals(expression rhs);
expression ¬_equals(expression rhs);
expression &less_than(expression rhs);
expression &less_equals(expression rhs);
expression &greater_than(expression rhs);
expression &greater_equals(expression rhs);
expression &factor(expression rhs);
expression &dot(expression rhs);
type expr;
};
struct unary_operator {
std::string op;
expression rhs;
unary_operator() {}
unary_operator(std::string op, expression rhs)
: op(std::move(op)), rhs(std::move(rhs)) {
}
};
struct binary_operator {
std::string op;
expression lhs;
expression rhs;
binary_operator() {}
binary_operator(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {
}
};
expression &expression::operator+=(expression rhs) {
expr = binary_operator("+", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator-=(expression rhs) {
expr = binary_operator("-", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator*=(expression rhs) {
expr = binary_operator("*", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator/=(expression rhs) {
expr = binary_operator("/", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::and_(expression rhs) {
expr = binary_operator("&&", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::or_(expression rhs) {
expr = binary_operator("||", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::equals(expression rhs) {
expr = binary_operator("==", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::not_equals(expression rhs) {
expr = binary_operator("!=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_than(expression rhs) {
expr = binary_operator("<", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_equals(expression rhs) {
expr = binary_operator("<=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_than(expression rhs) {
expr = binary_operator(">", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_equals(expression rhs) {
expr = binary_operator(">=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::factor(expression rhs) {
expr = binary_operator("**", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::dot(expression rhs) {
expr = binary_operator(".", std::move(expr), std::move(rhs));
return *this;
}
struct printer {
void operator()(const double n) const {
std::cout << n;
}
void operator()(const std::string &s) const {
std::cout << s;
}
void operator()(const expression &ast) const {
boost::apply_visitor(*this, ast.expr);
}
void operator()(const binary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.lhs.expr);
std::cout << ", ";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
void operator()(const unary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
};
struct operators {
struct and_ {
};
struct or_ {
};
struct equals {
};
struct not_equals {
};
struct less_than {
};
struct less_equals {
};
struct greater_than {
};
struct greater_equals {
};
struct factor {
};
struct dot {
};
expression &operator()(expression &lhs, expression rhs, and_) const {
return lhs.and_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, or_) const {
return lhs.or_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, equals) const {
return lhs.equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, not_equals) const {
return lhs.not_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_than) const {
return lhs.less_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_equals) const {
return lhs.less_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_than) const {
return lhs.greater_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_equals) const {
return lhs.greater_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, factor) const {
return lhs.factor(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, dot) const {
return lhs.dot(std::move(rhs));
}
};
}
namespace qi = boost::spirit::qi;
struct expectation_handler {
template<typename Iterator>
void operator()(Iterator first, Iterator last, const boost::spirit::info &info) const {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
}
};
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression(), qi::ascii::space_type> {
grammar()
: grammar::base_type(expression) {
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
expression = logical.alias() > qi::eoi;
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
equality = relational[qi::_val = qi::_1]
>> *(
((qi::lit("==") > relational[op(qi::_val, qi::_1, ast::operators::equals{})]) |
(qi::lit("!=") > relational[op(qi::_val, qi::_1, ast::operators::not_equals{})]))
);
relational = additive[qi::_val = qi::_1]
>> *(
((qi::lit("<") > relational[op(qi::_val, qi::_1, ast::operators::less_than{})]) |
(qi::lit("<=") > relational[op(qi::_val, qi::_1, ast::operators::less_equals{})]) |
(qi::lit(">") > relational[op(qi::_val, qi::_1, ast::operators::greater_than{})]) |
(qi::lit(">=") > relational[op(qi::_val, qi::_1, ast::operators::greater_equals{})]))
);
additive = multiplicative[qi::_val = qi::_1]
>> *(
((qi::lit("+") > multiplicative[qi::_val += qi::_1]) |
(qi::lit("-") > multiplicative[qi::_val -= qi::_1]))
);
multiplicative = factor[qi::_val = qi::_1]
>> *(
((qi::lit("*") > factor[qi::_val *= qi::_1]) |
(qi::lit("/") > factor[qi::_val /= qi::_1]))
);
factor = primary[qi::_val = qi::_1]
>> *((qi::lit("**")) > primary[op(qi::_val, qi::_1, ast::operators::factor{})]);
primary =
qi::double_[qi::_val = qi::_1]
| ('(' > expression[qi::_val = qi::_1] > ')')
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})])
| variable[qi::_val = qi::_1]
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})]);
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(boost::phoenix::ref(err_handler), qi::_3, qi::_2, qi::_4));
}
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, logical, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable;
boost::phoenix::function<ast::operators> op;
expectation_handler err_handler;
};
int main(int argc, const char *argv[]) {
std::string input("2 + 5 + t.a");
auto it_begin(input.begin()), it_end(input.end());
grammar<decltype(it_begin)> parser;
ast::expression expression;
qi::phrase_parse(it_begin, it_end, parser, qi::ascii::space, expression);
ast::printer printer;
printer(expression);
return 0;
}
版画
op:+(op:+(2, 5), op:.(t, a))
我将按照“发现”您的代码的顺序进行叙述。然后我会提出一些我认为最后最重要的调整。
我很喜欢你所做的很多事情。
一些名称可以(应该?)改进。例如。
ast::operators
没有任何暗示它的目的。它是二元运算符表达式的惰性工厂。因此,将其命名为
make_binary
或类似名称。与包装它的
phoenix::function<>
包装器相同。op
在语义动作中没有很好地表达那里发生的事情。与其让
op
(别名make_binary
)角色在 _val 参数上 side-effectful,不如考虑将其设为 return 不同的值.然后一切都可以变得不可变,语义动作更好地表达意图:rule = expr [ _val = foo(_val, _1, _2, _3) ];
表示 _val 已更新为根据给定参数创建的内容。
在语法层面,事情看起来并不“整洁”。可以通过简单地
using namespace qi::labels
和摆脱冗余的qi::lit()
包装器来改进其中的很多内容,例如logical = equality[qi::_val = qi::_1] >> *( ((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) | (qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})])) );
进入
using ast::operators; using namespace qi::labels; logical = equality[_val = _1] >> *( (("&&" > equality[op(_val, _1, operators::and_{})]) | ("||" > equality[op(_val, _1, operators::or_{})])) );
你在语法中检查
eoi
(对你有好处!)。然而,它被放在一个递归规则中:expression = logical.alias() > qi::eoi;
这意味着
(a+b)*3
永远不会解析,因为)
出现在需要eoi
的地方。通过将eoi
放在顶层来修复它。您在语法级别有一个 skipper,这意味着人们必须输入正确的 skipper。如果他们不这样做,他们就会破坏语法。相反,将船长设置为内部以便您控制它,并且界面更易于使用(正确):
start = qi::skip(qi::ascii::space) [ expression ];
用法:
if (qi::parse(it_begin, it_end, parser, expression)) {
或者也许:
if (qi::parse(it_begin, it_end, parser > qi::eoi, expression)) {
我知道 driver 代码 (
main
) 可能超出了您的审核范围,但我会向您展示缺失的 error-handling,因为它可能非常微妙 w.r.t。部分解析:int main() { ast::printer printer; grammar<std::string::const_iterator> parser; for (std::string const input : { "2 + 5 + t.a", "(2 + 5) + t.a", // note the removed eoi constraint "2 + 5 * t.a", "2 * 5 - t.a", "partial match", "uhoh *", }) try { std::cout << "----- " << std::quoted(input) << " ---- \n"; auto it_begin(input.begin()), it_end(input.end()); ast::expression expression; if (qi::parse(it_begin, it_end, parser, expression)) { printer(expression); std::cout << std::endl; } else { std::cout << "Not matched\n"; } if (it_begin != it_end) { std::string tail(it_begin, it_end); std::cout << "Remaining unparsed input: " << std::quoted(tail) << "\n"; } } catch(std::exception const& e) { std::cout << "Exception: " << std::quoted(e.what()) << "\n"; } }
请注意,除非您命名规则,否则期望不会提供有用的消息。
Exception: Expected <unnamed-rule> at ""
命名它们的惯用方法是使用 DEBUG 宏:
BOOST_SPIRIT_DEBUG_NODES( (start) (expression)(logical)(equality) (relational)(additive)(multiplicative) (factor)(unary)(binary)(primary) (variable) )
现在:
Exception: Expected <factor> at ""
Intermission: superficial changes to here: Live On Coliru
在打印机中有很多重复 (
apply_visitor(*this
...),由于operator()
,它的可读性略差。我的偏好是中继到call
或apply
函数同样在打印机中,不要硬编码输出流。有一天(TM)你会想要格式化为一个字符串。或者
std::cerr
,或者一个文件Combining these notes on the printer: Live On Coliru
struct printer { std::ostream& _os; template <typename T> std::ostream& operator()(T const& v) const { return call(v); } private: std::ostream& call(expression const& ast) const { return boost::apply_visitor(*this, ast.expr); } std::ostream& call(binary_operator const& expr) const { _os << "op:" << expr.op << "("; call(expr.lhs) << ", "; return call(expr.rhs) << ')'; } std::ostream& call(unary_operator const& expr) const { _os << "op:" << expr.op << "("; return call(expr.rhs) << ')'; } template <typename Lit> std::ostream& call(Lit const& v) const { return _os << v; } };
这个的逻辑扩展是让它成为一个实际的 output manipulator:
std::cout << "Parsed: " << fmt_expr{expression} << std::endl;
Again, Live On Coliru, also simplified the
printer
visitor again:std::ostream& call(binary_operator const& expr) const { return _os << "op:" << expr.op << "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')'; }
在 AST 中,您将实际运算符动态存储为字符串。在我看来,仅对所有 ast-building 重载(
ast::operator::operator()
以及ast::expr
的所有委托成员)静态编码运算符没有太多价值。相反,每次只传递一个字符串?现在builder命名空间可以消失,不对称的factory成员,整个phoenix函数是grammar-local:
struct make_binary_f { ast::binary_operator operator()(ast::expression lhs, ast::expression rhs, std::string op) const { return { op, lhs, rhs }; } }; boost::phoenix::function<make_binary_f> make;
Another in-between station Live On Coliru
ACHIEVEMENT UNLOCKED
Code down 113 lines of code (now 218 instead of 331 lines of code)
随机现货:
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
'_'
等同于qi::lit('_')
,而不是qi::char_('_')
,因此这将删除所有下划线。使用 char_,或使用raw[]
直接从源迭代器构造参数。现在我们进入细节:我们可以使用自动属性传播来代替
[_val=_1]
(参见 Boost Spirit: "Semantic actions are evil"? andoperator %=
rule init)。找出更常见的子表达式。连同上一个项目符号:
primary = qi::double_[_val = _1] | ('(' > expression[_val = _1] > ')') >> *("." > variable[_val = make(_val, _1, ".")]) | variable[_val = _1] >> *("." > variable[_val = make(_val, _1, ".")]);
变为:
primary %= qi::double_ | (('(' > expression > ')') | variable) >> *("." > variable[_val = make(_val, _1, ".")]) ;
将变体类型提升到
expression
之外,这样您就可以在expression
之前实现递归类型。此外,考虑从变体 (LSK) 派生的expression
。在您的情况下,没有真正需要嵌套表达式,因为 unary/binary 节点已经强加了顺序。所以你的整个 AST 可以是:struct unary_operator; struct binary_operator; typedef boost::variant< double, std::string, boost::recursive_wrapper<unary_operator>, boost::recursive_wrapper<binary_operator> > expr_variant; struct expression : expr_variant { using expr_variant::expr_variant; using expr_variant::operator=; }; struct unary_operator { expression rhs; std::string op; } ; struct binary_operator { expression lhs; expression rhs; std::string op; } ;
在语法 class 中移动
expectation_handler
(它对其他任何东西都没有用),并可选择使用 phoenix::function 对其进行现代化改造?无论如何,由于仿函数是无状态的,因此不需要ref
(当然也不需要ref
而不是cref
):qi::on_error<qi::fail>( expression, boost::phoenix::bind(expectation_handler{}, _3, _2, _4));
其实,做到就可以了
auto handler = [](Iterator first, Iterator last, const boost::spirit::info &info) { std::stringstream msg; msg << "Expected " << info << " at \"" << std::string(first, last) << "\""; throw std::runtime_error(msg.str()); }; qi::on_error<qi::fail>( expression, boost::phoenix::bind(handler, _3, _2, _4));
小问题:使用
std::quoted
而不是“假”引号 :)迟到的脑波,你可以提取大部分语义动作:
auto make_bin = _val = px::bind(make_<ast::binary_expr>{}, _val, _2, _1);
只要所有肢体都是stateless/by值,那不是问题(不过对比Assigning parsers to auto variables!)。现在只需让运算符公开属性:
expression %= equality >> *( (qi::string("&&") > equality)[make_bin] | (qi::string("||") > equality)[make_bin] ); equality %= relational >> *( (qi::string("==") > relational)[make_bin] | (qi::string("!=") > relational)[make_bin] ); relational %= additive >> *( (qi::string("<") > relational)[make_bin] | (qi::string("<=") > relational)[make_bin] | (qi::string(">") > relational)[make_bin] | (qi::string(">=") > relational)[make_bin] ); additive %= multiplicative >> *( (qi::string("+") > multiplicative)[make_bin] | (qi::string("-") > multiplicative)[make_bin] ); multiplicative %= factor >> *( (qi::string("*") > factor)[make_bin] | (qi::string("/") > factor)[make_bin] ); factor %= primary >> *( (qi::string("**") > primary)[make_bin] ); primary %= qi::double_ | (('(' > expression > ')') | variable) >> *(qi::string(".") > variable)[make_bin] ;
其实刚刚查了一下
phoenix::construct
可以做聚合:auto make_bin = _val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
还删除了未使用的
unary_*
机器,将 IO 操纵器移至io
命名空间(而不是ast
)并重新引入eoi
签入main
driver...哎呀,用一些 c++17 你可以组合每个产品的分支:
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); }; expression %= equality >> *(op("&&","||") > equality)[make_bin]; equality %= relational >> *(op("==","!=") > relational)[make_bin]; relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin]; additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin]; multiplicative %= factor >> *(op("*","/") > factor)[make_bin]; factor %= primary >> *(op("**") > primary)[make_bin];
完整演示,103代码片段
只是没能将它降低到 100 LoC 以下,但我在此过程中添加了更多测试用例。
Live Demo On Coliru(我发现聚合的
phoenix::construct<>
需要 GCC 或最近的提升或两者都有,所以添加了一个构造函数)
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace ast {
struct binary_expr;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<binary_expr>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct binary_expr {
binary_expr(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
std::string op; expression lhs; expression rhs;
};
}
namespace io {
struct fmt_expr { // io manipulator
ast::expression const& _ref;
friend std::ostream& operator<<(std::ostream& os, fmt_expr manip);
};
struct formatter_visitor {
std::ostream& _os;
template <typename T> std::ostream& operator()(T const& v) const
{ return call(v); }
private:
std::ostream& call(ast::expression const& v) const {
return boost::apply_visitor(*this, v);
}
std::ostream& call(ast::binary_expr const& expr) const {
return _os << "op:" << expr.op
<< "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')';
}
template <typename Lit>
std::ostream& call(Lit const& v) const { return _os << v; }
};
std::ostream& operator<<(std::ostream& os, fmt_expr manip) {
return formatter_visitor{os}(manip._ref);
}
}
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression()> {
grammar() : grammar::base_type(start) {
using namespace qi::labels;
auto make_bin = _val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
variable = qi::lexeme[qi::alpha >> *(qi::alnum | qi::char_('_'))];
primary %= qi::double_ | (('(' > expression > ')') | variable)
>> *(op(".") > variable)[make_bin];
start = qi::skip(qi::ascii::space) [ qi::eps > expression ] > qi::eoi;
qi::on_error<qi::fail>(start, boost::phoenix::bind([](auto first, auto last, auto const& info) {
std::stringstream msg;
msg << "Expected " << info << " at " << std::quoted(std::string(first, last));
throw std::runtime_error(msg.str());
}, _3, _2, _4));
BOOST_SPIRIT_DEBUG_NODES((expression)(equality)(relational)(additive)
(multiplicative)(factor)(unary)(binary)(primary)(variable))
}
private:
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable; // lexeme
};
int main() {
using io::fmt_expr;
grammar<std::string::const_iterator> parser;
for (std::string const s : { "2 + 5 + t.a", "(2 + 5) + t.a", "2 + 5 * t.a",
"2 * 5 - t.a", "partial match", "uhoh *", "under_scores", "" })
try {
ast::expression expression;
qi::parse(s.begin(), s.end(), parser, expression);
std::cout << std::quoted(s) << " -> " << fmt_expr{expression} << "\n";
} catch(std::exception const& e) {
std::cout << "Exception: " << e.what() << "\n";
}
}
打印
"2 + 5 + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"(2 + 5) + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"2 + 5 * t.a" -> op:+(2, op:*(5, op:.(t, a)))
"2 * 5 - t.a" -> op:-(op:*(2, 5), op:.(t, a))
Exception: Expected <eoi> at " match"
Exception: Expected <factor> at ""
"under_scores" -> under_scores
超出范围
我将考虑超出范围的事情与您的 grammar/ast 语义相关。
运算符优先级有点吵。您想要的是一些元数据,允许您“仅组合”二进制操作数并出现正确的优先级,如下所示:
expression %= primary >> *( (binop > expression) [_val = make_bin(_1, _val, _2)] );
我已将此策略应用于 extended chat at this answer and the resulting code is on github: https://github.com/sehe/qi-extended-parser-evaluator
如果您有 C++14 支持,请考虑使用 X3。编译时间会少很多。