Mongo 聚合,按 2 个不同数组中的相同字段分组
Mongo Aggregation, group by same field in 2 different arrays
我想使用以下数据集计算每个代理人每月赚取的总佣金:
db.comm.find()
/* 0 */
{
month: 1,
deals: [
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 1, comm: 10000 } },
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 2, comm: 13000 } },
{ agent: { _id: 2, name: 'Sue' }, deal: { _id: 3, comm: 20000 } }
],
referrals: [
{ agent: { _id: 1, name: 'Joe' }, referral_comm: 3000 },
{ agent: { _id: 3, name: 'Pete' }, referral_comm: 2500, other_comm: 1000 }
]
}
/* 1 */
{
month: 2,
deals: [
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 4, comm: 11000 } },
{ agent: { _id: 3, name: 'Pete' }, deal: { _id: 5, comm: 21000 } }
],
referrals: [
{ agent: { _id: 2, name: 'Sue' }, referral_comm: 2100, other_comm: 1100 },
{ agent: { _id: 4, name: 'Judy' }, referral_comm: 1100 }
]
}
我在以下管道中得到的结果不正确:
db.comm.aggregate([
{
$unwind: "$deals"
},
{
$project: {
month: 1,
agent: "$deals.agent",
comm: "$deals.deal.comm",
referrals: 1
}
},
{
$unwind: "$referrals"
},
{
$project: {
month: 1,
agent: 1,
comm: 1,
referral_comm: {
$add: [
"$referrals.referral_comm",
"$referrals.other_comm"
]
}
}
},
{
$project: {
month: 1,
agent: 1,
comm: {
$add: [ "$comm", "$referral_comm" ]
}
}
},
{
$group: {
_id: {
month: "$month",
agent: "$agent"
},
total: {
$sum: "$comm"
}
}
}
])
结果是:
/* 0 */
{
"result" : [
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 23500 //expected 20000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 30000 //expected 26000
}, //missing Pete in Month 2
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 24200 //expected 21000
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 14200 //expected 11000
}
], //missing Sue and Judy
"ok" : 1
}
我的问题是我似乎没有在管道中找到合适的组合来组合交易和推荐,以便佣金保留在同一个代理人手中。 Mongo 中的聚合框架可以做到这一点吗?还是只能使用 MapReduce?
稍微尝试了一下,似乎有一种方法可以通过聚合管道解决这个问题,但坦率地说,您有一个糟糕的文档结构来编写管道来计算佣金 / (month *代理人)。您是否考虑过让每个文档都代表一个 "commission event",例如交易或推荐,而不是让每个文档代表一个月?
{
"agent" : { "_id" : 1, "name" : "Joe" },
"month" : 1,
"type" : "deal",
"deal_id" : 1,
"comm" : 10000
}
这些文档的管道非常简单
db.test.aggregate([
{ "$group" : {
"_id" : { "agent_id" : "$agent._id", "month" : "$month" },
"comm" : { "$sum" : "$comm" }
} }
])
我认为这种替代文档结构很有意义,因为
- 聚合更容易编写(也更快,但从来没有很快,因为它正在处理每个文档)
- 您可以查询我认为您可能关心的信息,以及更具体的信息,使用这种结构,比月文档结构更容易;例如,对于您的月份文档,您将如何查询大于某个金额的所有佣金?
- 月文档会增长,可能需要在磁盘上移动,这会影响性能(仅适用于 mmap 存储引擎)
我想我找到了一个可行的管道,尽管很贵:
db.comm.aggregate([
{
$unwind: "$deals"
},
{
$unwind: "$referrals"
},
{
$group: {
_id: {
month: "$month"
},
deals: {
$push: {
agent: "$deals.agent",
comm: { $ifNull: ["$deals.deal.comm", 0] }
}
},
referrals: {
$push: {
agent: "$referrals.agent",
comm: {
$add: [{ $ifNull: ["$referrals.referral_comm", 0] }, { $ifNull: ["$referrals.other_comm", 0] }]
}
}
}
}
},
{
$project: {
month: "$_id.month",
comms: {
$setUnion: ["$deals","$referrals"]
}
}
}, {
$unwind: "$comms"
}, {
$group: {
_id: {
month: "$month",
agent: "$comms.agent"
},
total: {
$sum: "$comms.comm"
}
}
}
])
这些步骤是解除交易和推荐,以便创建具有相同字段的新数组。 Mongo 2.6 允许 $setUnion 创建这些不同数组的并集。请注意,为了获得正确的结果,我必须使用 $ifNull 添加对缺失字段的检查。最后,$unwind 和 $group 只是获取跨月和代理的合并总数。
这会产生:
/* 0 */
{
"result" : [
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 3200
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 4,
"name" : "Judy"
}
},
"total" : 1100
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 11000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 20000
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 21000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 3500
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 26000
}
],
"ok" : 1
}
我想使用以下数据集计算每个代理人每月赚取的总佣金:
db.comm.find()
/* 0 */
{
month: 1,
deals: [
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 1, comm: 10000 } },
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 2, comm: 13000 } },
{ agent: { _id: 2, name: 'Sue' }, deal: { _id: 3, comm: 20000 } }
],
referrals: [
{ agent: { _id: 1, name: 'Joe' }, referral_comm: 3000 },
{ agent: { _id: 3, name: 'Pete' }, referral_comm: 2500, other_comm: 1000 }
]
}
/* 1 */
{
month: 2,
deals: [
{ agent: { _id: 1, name: 'Joe' }, deal: { _id: 4, comm: 11000 } },
{ agent: { _id: 3, name: 'Pete' }, deal: { _id: 5, comm: 21000 } }
],
referrals: [
{ agent: { _id: 2, name: 'Sue' }, referral_comm: 2100, other_comm: 1100 },
{ agent: { _id: 4, name: 'Judy' }, referral_comm: 1100 }
]
}
我在以下管道中得到的结果不正确:
db.comm.aggregate([
{
$unwind: "$deals"
},
{
$project: {
month: 1,
agent: "$deals.agent",
comm: "$deals.deal.comm",
referrals: 1
}
},
{
$unwind: "$referrals"
},
{
$project: {
month: 1,
agent: 1,
comm: 1,
referral_comm: {
$add: [
"$referrals.referral_comm",
"$referrals.other_comm"
]
}
}
},
{
$project: {
month: 1,
agent: 1,
comm: {
$add: [ "$comm", "$referral_comm" ]
}
}
},
{
$group: {
_id: {
month: "$month",
agent: "$agent"
},
total: {
$sum: "$comm"
}
}
}
])
结果是:
/* 0 */
{
"result" : [
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 23500 //expected 20000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 30000 //expected 26000
}, //missing Pete in Month 2
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 24200 //expected 21000
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 14200 //expected 11000
}
], //missing Sue and Judy
"ok" : 1
}
我的问题是我似乎没有在管道中找到合适的组合来组合交易和推荐,以便佣金保留在同一个代理人手中。 Mongo 中的聚合框架可以做到这一点吗?还是只能使用 MapReduce?
稍微尝试了一下,似乎有一种方法可以通过聚合管道解决这个问题,但坦率地说,您有一个糟糕的文档结构来编写管道来计算佣金 / (month *代理人)。您是否考虑过让每个文档都代表一个 "commission event",例如交易或推荐,而不是让每个文档代表一个月?
{
"agent" : { "_id" : 1, "name" : "Joe" },
"month" : 1,
"type" : "deal",
"deal_id" : 1,
"comm" : 10000
}
这些文档的管道非常简单
db.test.aggregate([
{ "$group" : {
"_id" : { "agent_id" : "$agent._id", "month" : "$month" },
"comm" : { "$sum" : "$comm" }
} }
])
我认为这种替代文档结构很有意义,因为
- 聚合更容易编写(也更快,但从来没有很快,因为它正在处理每个文档)
- 您可以查询我认为您可能关心的信息,以及更具体的信息,使用这种结构,比月文档结构更容易;例如,对于您的月份文档,您将如何查询大于某个金额的所有佣金?
- 月文档会增长,可能需要在磁盘上移动,这会影响性能(仅适用于 mmap 存储引擎)
我想我找到了一个可行的管道,尽管很贵:
db.comm.aggregate([
{
$unwind: "$deals"
},
{
$unwind: "$referrals"
},
{
$group: {
_id: {
month: "$month"
},
deals: {
$push: {
agent: "$deals.agent",
comm: { $ifNull: ["$deals.deal.comm", 0] }
}
},
referrals: {
$push: {
agent: "$referrals.agent",
comm: {
$add: [{ $ifNull: ["$referrals.referral_comm", 0] }, { $ifNull: ["$referrals.other_comm", 0] }]
}
}
}
}
},
{
$project: {
month: "$_id.month",
comms: {
$setUnion: ["$deals","$referrals"]
}
}
}, {
$unwind: "$comms"
}, {
$group: {
_id: {
month: "$month",
agent: "$comms.agent"
},
total: {
$sum: "$comms.comm"
}
}
}
])
这些步骤是解除交易和推荐,以便创建具有相同字段的新数组。 Mongo 2.6 允许 $setUnion 创建这些不同数组的并集。请注意,为了获得正确的结果,我必须使用 $ifNull 添加对缺失字段的检查。最后,$unwind 和 $group 只是获取跨月和代理的合并总数。
这会产生:
/* 0 */
{
"result" : [
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 3200
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 4,
"name" : "Judy"
}
},
"total" : 1100
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 11000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 2,
"name" : "Sue"
}
},
"total" : 20000
},
{
"_id" : {
"month" : 2,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 21000
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 3,
"name" : "Pete"
}
},
"total" : 3500
},
{
"_id" : {
"month" : 1,
"agent" : {
"_id" : 1,
"name" : "Joe"
}
},
"total" : 26000
}
],
"ok" : 1
}