使用 Boost Spirit 解析 IRC 消息
Parse IRC Message with Boost Spirit
我可以使用 Boost.Spirit 对 运行 进行基本解析,但无法完全解析消息标签 (IRCv3)。我希望这些标签至少可以单独解析为 vector<>
,但希望将它们解析为 map<>
.
#include <string>
#include <optional>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
/// Flags used for IRC protocol messages
enum MSG_FLAGS : uint32_t {
/// Last arg is a trailing parameter
MSG_TRAILING_ARG = (1 << 0),
/// When the message is being wrapped due to excess params, repeat the first arg;
/// e.g., for ISUPPORT this will consistently place the client's name (1st arg) in front of each ISUPPORT message.
MSG_REPEAT_1ST = (1 << 1),
/// Indicates message should never include a prefix; e.g, PING and ERROR for local clients
MSG_NO_PREFIX = (1 << 2),
};
/// Structure describing an IRC protocol message
struct message {
/// IRCv3 tags associated with this message
std::vector<std::string> tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message,
(std::vector<std::string>, tags)
(std::string, prefix),
(std::string, command),
(std::vector<std::string>, args));
std::optional<message> tokenize(std::string const& data)
{
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
namespace phx = boost::phoenix;
using x3::rule;
using x3::int_;
using x3::lit;
using x3::double_;
using x3::lexeme;
using x3::omit;
using ascii::char_;
message msg;
msg.flags = 0;
// parser rules
static auto on_trailing_arg = [&](auto& ctx) { msg.flags |= MSG_TRAILING_ARG; };
static auto const token = lexeme[+(char_ - ' ' - ':')];
static auto const prefix = omit[':'] >> token;
static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
static auto const tags = omit['@'] >> token % ';';
static auto const line = -tags
>> -prefix
>> token
>> ((+token > -trail) | trail);
// run the parse
auto iter = data.begin();
auto const end = data.end();
bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
if (r && iter == end) {
return msg;
} else {
return std::nullopt;
}
}
给定以下 IRC 消息:
"@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello"
我希望 message
对象构造为:
tags = ["aaa=bbb", "ccc", "example.com/ddd=eee"]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]
目前 tags
构造为单个值 (aaa=bbb;ccc;example.com/ddd=eee
)。
我真正想做的是为标签生成一个 map<>
:
tags = [["aaa": "bbb"], "ccc", ["example.com/ddd": "eee"]]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]
第一步:促进和简化 AST:
using Tag = std::string;
using Tags = std::vector<Tag>;
struct message {
/// IRCv3 tags associated with this message
Tags tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);
现在,稍微调整一下规则:
static auto const tagname = lexeme[+~char_(" :;=")];
static auto const tagvalue = tagname; // TODO be more specific?
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = x3::raw[tagname >> -('=' >> tagvalue)];
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
已经区分标签,准备key/value分离:Live On Wandbox
int main() {
auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");
if (m) {
for (auto& tag : m->tags) {
std::cout << "tag: " << std::quoted(tag) << "\n";
}
std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
std::cout << "command: " << std::quoted(m->command) << "\n";
for (auto& arg : m->args) {
std::cout << "arg: " << std::quoted(arg) << "\n";
}
std::cout << "flags: " << m->flags << "\n";
}
}
正在打印:
tag: "aaa=bbb"
tag: "ccc"
tag: "example.com/ddd=eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1
奖励:地图
我不确定您是否需要地图,因为键可能不是唯一的并且顺序可能很重要。但无论如何:
#include <boost/fusion/include/std_pair.hpp>
这就是传播到地图条目的魔力,那么:
using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;
只是调整那些类型定义,
static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
删除 raw[]
指令允许传播到 Tag
对。
查看输出:Live On Wandbox
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <optional>
#include <string>
#include <map>
#include <iostream>
#include <iomanip>
/// Flags used for IRC protocol messages
enum MSG_FLAGS : uint32_t {
/// Last arg is a trailing parameter
MSG_TRAILING_ARG = (1 << 0),
/// When the message is being wrapped due to excess params, repeat the first
/// arg;
/// e.g., for ISUPPORT this will consistently place the client's name (1st
/// arg) in front of each ISUPPORT message.
MSG_REPEAT_1ST = (1 << 1),
/// Indicates message should never include a prefix; e.g, PING and ERROR for
/// local clients
MSG_NO_PREFIX = (1 << 2),
};
/// Structure describing an IRC protocol message
using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;
struct message {
/// IRCv3 tags associated with this message
Tags tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);
std::optional<message> tokenize(std::string const &data) {
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
namespace phx = boost::phoenix;
using ascii::char_;
using x3::double_;
using x3::int_;
using x3::lexeme;
using x3::lit;
using x3::omit;
using x3::rule;
message msg;
msg.flags = 0;
// parser rules
static auto on_trailing_arg = [&](auto &ctx) {
msg.flags |= MSG_TRAILING_ARG;
};
static auto const token = lexeme[+(char_ - ' ' - ':')];
static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
static auto const tagvalue = tagname; // TODO be more specific?
static auto const prefix = omit[':'] >> token;
static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
static auto const line =
-tags >> -prefix >> token >> ((+token > -trail) | trail);
// run the parse
auto iter = data.begin();
auto const end = data.end();
bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
if (r && iter == end) {
return msg;
} else {
return std::nullopt;
}
}
int main() {
auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");
if (m) {
for (auto& [key,value] : m->tags) {
std::cout << "tag: " << std::quoted(key) << "=" << std::quoted(value) << "\n";
}
std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
std::cout << "command: " << std::quoted(m->command) << "\n";
for (auto& arg : m->args) {
std::cout << "arg: " << std::quoted(arg) << "\n";
}
std::cout << "flags: " << m->flags << "\n";
}
}
正在打印:
tag: "aaa"="bbb"
tag: "ccc"=""
tag: "example.com/ddd"="eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1
我可以使用 Boost.Spirit 对 运行 进行基本解析,但无法完全解析消息标签 (IRCv3)。我希望这些标签至少可以单独解析为 vector<>
,但希望将它们解析为 map<>
.
#include <string>
#include <optional>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
/// Flags used for IRC protocol messages
enum MSG_FLAGS : uint32_t {
/// Last arg is a trailing parameter
MSG_TRAILING_ARG = (1 << 0),
/// When the message is being wrapped due to excess params, repeat the first arg;
/// e.g., for ISUPPORT this will consistently place the client's name (1st arg) in front of each ISUPPORT message.
MSG_REPEAT_1ST = (1 << 1),
/// Indicates message should never include a prefix; e.g, PING and ERROR for local clients
MSG_NO_PREFIX = (1 << 2),
};
/// Structure describing an IRC protocol message
struct message {
/// IRCv3 tags associated with this message
std::vector<std::string> tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message,
(std::vector<std::string>, tags)
(std::string, prefix),
(std::string, command),
(std::vector<std::string>, args));
std::optional<message> tokenize(std::string const& data)
{
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
namespace phx = boost::phoenix;
using x3::rule;
using x3::int_;
using x3::lit;
using x3::double_;
using x3::lexeme;
using x3::omit;
using ascii::char_;
message msg;
msg.flags = 0;
// parser rules
static auto on_trailing_arg = [&](auto& ctx) { msg.flags |= MSG_TRAILING_ARG; };
static auto const token = lexeme[+(char_ - ' ' - ':')];
static auto const prefix = omit[':'] >> token;
static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
static auto const tags = omit['@'] >> token % ';';
static auto const line = -tags
>> -prefix
>> token
>> ((+token > -trail) | trail);
// run the parse
auto iter = data.begin();
auto const end = data.end();
bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
if (r && iter == end) {
return msg;
} else {
return std::nullopt;
}
}
给定以下 IRC 消息:
"@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello"
我希望 message
对象构造为:
tags = ["aaa=bbb", "ccc", "example.com/ddd=eee"]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]
目前 tags
构造为单个值 (aaa=bbb;ccc;example.com/ddd=eee
)。
我真正想做的是为标签生成一个 map<>
:
tags = [["aaa": "bbb"], "ccc", ["example.com/ddd": "eee"]]
prefix = "nick!ident@host.com"
command = "PRIVMSG"
args = ["me", "Hello"]
第一步:促进和简化 AST:
using Tag = std::string;
using Tags = std::vector<Tag>;
struct message {
/// IRCv3 tags associated with this message
Tags tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);
现在,稍微调整一下规则:
static auto const tagname = lexeme[+~char_(" :;=")];
static auto const tagvalue = tagname; // TODO be more specific?
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = x3::raw[tagname >> -('=' >> tagvalue)];
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
已经区分标签,准备key/value分离:Live On Wandbox
int main() {
auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");
if (m) {
for (auto& tag : m->tags) {
std::cout << "tag: " << std::quoted(tag) << "\n";
}
std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
std::cout << "command: " << std::quoted(m->command) << "\n";
for (auto& arg : m->args) {
std::cout << "arg: " << std::quoted(arg) << "\n";
}
std::cout << "flags: " << m->flags << "\n";
}
}
正在打印:
tag: "aaa=bbb"
tag: "ccc"
tag: "example.com/ddd=eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1
奖励:地图
我不确定您是否需要地图,因为键可能不是唯一的并且顺序可能很重要。但无论如何:
#include <boost/fusion/include/std_pair.hpp>
这就是传播到地图条目的魔力,那么:
using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;
只是调整那些类型定义,
static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
// ...
static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
删除 raw[]
指令允许传播到 Tag
对。
查看输出:Live On Wandbox
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <optional>
#include <string>
#include <map>
#include <iostream>
#include <iomanip>
/// Flags used for IRC protocol messages
enum MSG_FLAGS : uint32_t {
/// Last arg is a trailing parameter
MSG_TRAILING_ARG = (1 << 0),
/// When the message is being wrapped due to excess params, repeat the first
/// arg;
/// e.g., for ISUPPORT this will consistently place the client's name (1st
/// arg) in front of each ISUPPORT message.
MSG_REPEAT_1ST = (1 << 1),
/// Indicates message should never include a prefix; e.g, PING and ERROR for
/// local clients
MSG_NO_PREFIX = (1 << 2),
};
/// Structure describing an IRC protocol message
using Tags = std::map<std::string, std::string>;
using Tag = std::pair<std::string, std::string>;
struct message {
/// IRCv3 tags associated with this message
Tags tags;
/// Source prefix - usually blank from clients
std::string prefix;
/// Command that was received
std::string command;
/// Command arguments
std::vector<std::string> args;
/// Flags for internal processing (not received via IRC)
uint32_t flags;
};
BOOST_FUSION_ADAPT_STRUCT(message, tags, prefix, command, args);
std::optional<message> tokenize(std::string const &data) {
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
namespace phx = boost::phoenix;
using ascii::char_;
using x3::double_;
using x3::int_;
using x3::lexeme;
using x3::lit;
using x3::omit;
using x3::rule;
message msg;
msg.flags = 0;
// parser rules
static auto on_trailing_arg = [&](auto &ctx) {
msg.flags |= MSG_TRAILING_ARG;
};
static auto const token = lexeme[+(char_ - ' ' - ':')];
static auto const tagname = rule<struct tagname_, std::string> {"tagname"} = lexeme[+~char_(" :;=")];
static auto const tagvalue = tagname; // TODO be more specific?
static auto const prefix = omit[':'] >> token;
static auto const trail = (omit[':'] >> lexeme[*char_])[on_trailing_arg];
static auto const tag = rule<struct tags_, Tag> {"tag"} = tagname >> -('=' >> tagvalue);
static auto const tags = rule<struct tags_, Tags> {"tags"} = omit['@'] >> tag % ';';
static auto const line =
-tags >> -prefix >> token >> ((+token > -trail) | trail);
// run the parse
auto iter = data.begin();
auto const end = data.end();
bool r = x3::phrase_parse(iter, end, line, ascii::space, msg);
if (r && iter == end) {
return msg;
} else {
return std::nullopt;
}
}
int main() {
auto m = tokenize("@aaa=bbb;ccc;example.com/ddd=eee :nick!ident@host.com PRIVMSG me :Hello");
if (m) {
for (auto& [key,value] : m->tags) {
std::cout << "tag: " << std::quoted(key) << "=" << std::quoted(value) << "\n";
}
std::cout << "prefix: " << std::quoted(m->prefix) << "\n";
std::cout << "command: " << std::quoted(m->command) << "\n";
for (auto& arg : m->args) {
std::cout << "arg: " << std::quoted(arg) << "\n";
}
std::cout << "flags: " << m->flags << "\n";
}
}
正在打印:
tag: "aaa"="bbb"
tag: "ccc"=""
tag: "example.com/ddd"="eee"
prefix: "nick!ident@host.com"
command: "PRIVMSG"
arg: "me"
arg: "Hello"
flags: 1