在 mongodb 3.2 中优化管道查询
Optimize pipeline query in mongodb 3.2
我有下面 Mongodb 3.2 的示例 mongodb 数据,我想优化管道:
{"_id": {"$oid":"5808578b33fa6f161c9747f8"},"_class":"exceltest.TestBean","bookName":"Test6","revenue":10.0,"unitsSold":1,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747f9"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":11.0,"unitsSold":2,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fa"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":12.0,"unitsSold":3,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fb"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":13.0,"unitsSold":4,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fc"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":14.0,"unitsSold":5,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fd"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":15.0,"unitsSold":6,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fe"},"_class":"exceltest.TestBean","bookName":"Test10","revenue":16.0,"unitsSold":7,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747ff"},"_class":"exceltest.TestBean","bookName":"Test11","revenue":100.0,"unitsSold":100,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e462"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":20.0,"unitsSold":10,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e463"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":19.0,"unitsSold":9,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e464"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":18.0,"unitsSold":8,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e465"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":17.0,"unitsSold":7,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e466"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":16.0,"unitsSold":6,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e467"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":15.0,"unitsSold":5,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e468"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":14.0,"unitsSold":4,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e469"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":13.0,"unitsSold":3,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e46a"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":12.0,"unitsSold":2,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e46b"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":11.0,"unitsSold":1,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
我有以下查询:
db.books.aggregate([
{$match:{'bookCategory.categoryCode' : 'Cooking/'}},
{$unwind:'$bookCategory'},
/* This unwind creating performance problems, with 1M records, with 100 (Book Categories / book) elements in subdocuments causes 100M documents to be retrieved, may cause memory problems for our memory size : 4GB */
/*Pipeline 1 */
{ "$group": {
"_id": {
"categoryCode": "$bookCategory.categoryCode",
"book": "$bookName"
},
"revenue": { $sum:"$revenue" },
"unitsSold": { $sum:"$unitsSold" }
}
}
,
{$match:{'_id.categoryCode' : {$regex : 'Cooking/'}}},
/*Pipeline 2 */
{ "$group": {
"_id": "$_id.categoryCode",
"books": {
"$push": {
// "category": "$_id.categoryCode",
"book":"$_id.book",
"revenue": { $sum:"$revenue" },
"unitsSold": { $sum:"$unitsSold" }
},
},
"topRevenue": { $sum: "$revenue" },
"topUnitsSold": { $sum:"$unitsSold" }
}},
{ "$sort": { "topRevenue": -1 } },
{ "$limit": 3},
{ "$project": {
"books": { "$slice": [ "$books", 3 ] },
"topRevenue": 1,
"topUnitsSold": 1
}}
])
执行Pipeline 1时,生成48个文档,输出如下:
/* 1 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test1"
},
"revenue" : 11,
"unitsSold" : 2
}
/* 2 */
{
"_id" : {
"categoryCode" : "Cooking",
"book" : "Test6"
},
"revenue" : 10,
"unitsSold" : 1
}
/* 3 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test2"
},
"revenue" : 12,
"unitsSold" : 3
}
/* 4 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test6"
},
"revenue" : 10,
"unitsSold" : 1
}
.........等等
我只想在管道 1 阶段解决这个问题。请让我知道 mongodb 3.2 是否可行。请让我知道 spring-data-mongodb.
是否可行
请帮忙
此致
克里斯
你在第一阶段做匹配保持它在那里,然后我建议你通过管道放置尽可能少的数据,所以在匹配之后添加一个项目。
- 仅包含您需要的字段。
- 仅包含相关数组项,因此展开生成 "lesser" 个文档
第二个可以使用 array $filter operator。类似于:
$project: {
bookCategory: {
$filter: {
input: "$bookCategory",
as: "bookCat",
cond: { $eq: [ "$$bookCat.categoryCode", "Cooking/"] }
}
}
}
这应该会减少文档数量并提高聚合性能。
您甚至可以消除第一个匹配项,因为投影消除了不匹配 $eq 语句的数组项。
只需尝试一些组合,看看哪种组合在您的场景中有效(并且性能最高)
更新:
我创建了一个小聚合,使用数组过滤器来限制数组中的项目数(以及展开)。
这将书籍按类别分组。我将书籍文档放在书籍数组中(供参考),但您可以将其限制为仅标题(数据越少,聚合速度越快)。
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
bookName : 1,
revenue : 1,
unitsSold : 1,
bookCategory: {
$filter: {
input: "$bookCategory",
as: "bookCat",
cond: { $eq:[ 'Cooking', {$substr:["$$bookCat.categoryCode",0,7]}] }
}
}
}
},
// Stage 2
{
$unwind: "$bookCategory"
},
// Stage 3
{
$group: {
_id: {
categoryCode: "$bookCategory.categoryCode",
},
books : { $push: "$$ROOT" },
revenue: { $sum:"$revenue" },
unitsSold: { $sum:"$unitsSold" }
}
}
]
);
我假设你在代码中创建了这个聚合,这样你就可以根据你想要的值(食物/烹饪)构建数组过滤器
我有下面 Mongodb 3.2 的示例 mongodb 数据,我想优化管道:
{"_id": {"$oid":"5808578b33fa6f161c9747f8"},"_class":"exceltest.TestBean","bookName":"Test6","revenue":10.0,"unitsSold":1,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747f9"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":11.0,"unitsSold":2,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fa"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":12.0,"unitsSold":3,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fb"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":13.0,"unitsSold":4,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fc"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":14.0,"unitsSold":5,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fd"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":15.0,"unitsSold":6,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747fe"},"_class":"exceltest.TestBean","bookName":"Test10","revenue":16.0,"unitsSold":7,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"5808578b33fa6f161c9747ff"},"_class":"exceltest.TestBean","bookName":"Test11","revenue":100.0,"unitsSold":100,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"},{"categoryCode":"Cooking/Beverages/Bartending"},{"categoryCode":"Food Receipe/Taste"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e462"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":20.0,"unitsSold":10,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e463"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":19.0,"unitsSold":9,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e464"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":18.0,"unitsSold":8,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e465"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":17.0,"unitsSold":7,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e466"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":16.0,"unitsSold":6,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e467"},"_class":"exceltest.TestBean","bookName":"Test1","revenue":15.0,"unitsSold":5,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e468"},"_class":"exceltest.TestBean","bookName":"Test2","revenue":14.0,"unitsSold":4,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e469"},"_class":"exceltest.TestBean","bookName":"Test3","revenue":13.0,"unitsSold":3,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e46a"},"_class":"exceltest.TestBean","bookName":"Test4","revenue":12.0,"unitsSold":2,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
{"_id":{"$oid":"580857b833fa6f0c3499e46b"},"_class":"exceltest.TestBean","bookName":"Test5","revenue":11.0,"unitsSold":1,"bookCategory":[{"categoryCode":"Cooking/"},{"categoryCode":"Cooking/Beverages"},{"categoryCode":"Food Receipe/"},{"categoryCode":"Food Receipe/Bartending"}]}
我有以下查询:
db.books.aggregate([
{$match:{'bookCategory.categoryCode' : 'Cooking/'}},
{$unwind:'$bookCategory'},
/* This unwind creating performance problems, with 1M records, with 100 (Book Categories / book) elements in subdocuments causes 100M documents to be retrieved, may cause memory problems for our memory size : 4GB */
/*Pipeline 1 */
{ "$group": {
"_id": {
"categoryCode": "$bookCategory.categoryCode",
"book": "$bookName"
},
"revenue": { $sum:"$revenue" },
"unitsSold": { $sum:"$unitsSold" }
}
}
,
{$match:{'_id.categoryCode' : {$regex : 'Cooking/'}}},
/*Pipeline 2 */
{ "$group": {
"_id": "$_id.categoryCode",
"books": {
"$push": {
// "category": "$_id.categoryCode",
"book":"$_id.book",
"revenue": { $sum:"$revenue" },
"unitsSold": { $sum:"$unitsSold" }
},
},
"topRevenue": { $sum: "$revenue" },
"topUnitsSold": { $sum:"$unitsSold" }
}},
{ "$sort": { "topRevenue": -1 } },
{ "$limit": 3},
{ "$project": {
"books": { "$slice": [ "$books", 3 ] },
"topRevenue": 1,
"topUnitsSold": 1
}}
])
执行Pipeline 1时,生成48个文档,输出如下:
/* 1 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test1"
},
"revenue" : 11,
"unitsSold" : 2
}
/* 2 */
{
"_id" : {
"categoryCode" : "Cooking",
"book" : "Test6"
},
"revenue" : 10,
"unitsSold" : 1
}
/* 3 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test2"
},
"revenue" : 12,
"unitsSold" : 3
}
/* 4 */
{
"_id" : {
"categoryCode" : "Food Receipe/Taste",
"book" : "Test6"
},
"revenue" : 10,
"unitsSold" : 1
} .........等等
我只想在管道 1 阶段解决这个问题。请让我知道 mongodb 3.2 是否可行。请让我知道 spring-data-mongodb.
是否可行请帮忙
此致
克里斯
你在第一阶段做匹配保持它在那里,然后我建议你通过管道放置尽可能少的数据,所以在匹配之后添加一个项目。
- 仅包含您需要的字段。
- 仅包含相关数组项,因此展开生成 "lesser" 个文档
第二个可以使用 array $filter operator。类似于:
$project: {
bookCategory: {
$filter: {
input: "$bookCategory",
as: "bookCat",
cond: { $eq: [ "$$bookCat.categoryCode", "Cooking/"] }
}
}
}
这应该会减少文档数量并提高聚合性能。
您甚至可以消除第一个匹配项,因为投影消除了不匹配 $eq 语句的数组项。
只需尝试一些组合,看看哪种组合在您的场景中有效(并且性能最高)
更新: 我创建了一个小聚合,使用数组过滤器来限制数组中的项目数(以及展开)。 这将书籍按类别分组。我将书籍文档放在书籍数组中(供参考),但您可以将其限制为仅标题(数据越少,聚合速度越快)。
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
bookName : 1,
revenue : 1,
unitsSold : 1,
bookCategory: {
$filter: {
input: "$bookCategory",
as: "bookCat",
cond: { $eq:[ 'Cooking', {$substr:["$$bookCat.categoryCode",0,7]}] }
}
}
}
},
// Stage 2
{
$unwind: "$bookCategory"
},
// Stage 3
{
$group: {
_id: {
categoryCode: "$bookCategory.categoryCode",
},
books : { $push: "$$ROOT" },
revenue: { $sum:"$revenue" },
unitsSold: { $sum:"$unitsSold" }
}
}
]
);
我假设你在代码中创建了这个聚合,这样你就可以根据你想要的值(食物/烹饪)构建数组过滤器