mongo和spring-data-mongo中的聚合查询
Aggregation query in mongo and spring-data-mongo
大家好,我在查询数据时遇到了一个大问题。我有这样的文件:
{
"_id" : NumberLong(999789748357864),
"text" : "#asd #weila #asd2 welcome in my house",
"date" : ISODate("2016-12-13T21:44:37.000Z"),
"dateString" : "2016-12-13",
"hashtags" : [
"asd",
"weila",
"asd2"
]
}
我想构建两个查询:
1) 计算每天的主题标签数量,例如这样:
{_id:"2016-12-13",
hashtags:[
{hashtag:"asd",count:20},
{hashtag:"weila",count:18},
{hashtag:"asd2",count:10},
....
]
}
{_id:"2016-12-14",
hashtags:[
{hashtag:"asd",count:18},
{hashtag:"asd2",count:14},
{hashtag:"weila",count:10},
....
]
}
2)另一个是一样的但是我想设置一个时间段从2016-12-13到2016-12-17
对于第一个,我写了这个查询,我得到了我搜索的内容,但是在 Spring 数据 Mongo 中,我不知道如何写。
db.comment.aggregate([
{$unwind:"$hashtags"},
{"$group":{
"_id":{
"date" : "$dateString",
"hashtag": "$hashtags"
},
"count":{"$sum":1}
}
},
{"$group":{
"_id": "$_id.date",
"hashtags": {
"$push": {
"hashtag": "$_id.hashtag",
"count": "$count"
}},
"count": { "$sum": "$count" }
}},
{"$sort": { count: -1}},
{"$unwind": "$hashtags"},
{"$sort": { "count": -1, "hashtags.count": -1}},
{"$group": {
"_id": "$_id",
"hashtags": { "$push": "$hashtags" },
"count": { "$first": "$count" }
}},
{$project:{name:1,hashtags: { $slice: ["$hashtags", 2 ]}}}
]);
在第二个小组阶段之后,您仍然可以使用相同聚合操作的一小部分减去管道步骤,但对于过滤方面,您必须在初始 $match
管道步骤。
以下mongoshell例子
显示您如何过滤特定日期范围的聚合:
1) 设置一个时间段从2016-12-13到2016-12-14:
var startDate = new Date("2016-12-13");
startDate.setHours(0,0,0,0);
var endDate = new Date("2016-12-14");
endDate.setHours(23,59,59,999);
var pipeline = [
{
"$match": {
"date": { "$gte": startDate, "$lte": endDate }
}
}
{ "$unwind": "$hashtags" },
{
"$group": {
"_id": {
"date": "$dateString",
"hashtag": "$hashtags"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.date",
"hashtags": {
"$push": {
"hashtag": "$_id.hashtag",
"count": "$count"
}
}
}
}
]
db.comment.aggregate(pipeline)
2) 设置一个时间段从2016-12-13到2016-12-17:
var startDate = new Date("2016-12-13");
startDate.setHours(0,0,0,0);
var endDate = new Date("2016-12-17");
endDate.setHours(23,59,59,999);
// run the same pipeline as above but with the date range query set as required
Spring 等效数据(未测试):
import static org.springframework.data.mongodb.core.aggregation.Aggregation.*;
Aggregation agg = newAggregation(
match(Criteria.where("date").gte(startDate).lte(endDate)),
unwind("hashtags"),
group("dateString", "hashtags").count().as("count"),
group("_id.dateString")
.push(new BasicDBObject
("hashtag", "$_id.hashtags").append
("count", "$count")
).as("hashtags")
);
AggregationResults<Comment> results = mongoTemplate.aggregate(agg, Comment.class);
List<Comment> comments = results.getMappedResults();
大家好,我在查询数据时遇到了一个大问题。我有这样的文件:
{
"_id" : NumberLong(999789748357864),
"text" : "#asd #weila #asd2 welcome in my house",
"date" : ISODate("2016-12-13T21:44:37.000Z"),
"dateString" : "2016-12-13",
"hashtags" : [
"asd",
"weila",
"asd2"
]
}
我想构建两个查询:
1) 计算每天的主题标签数量,例如这样:
{_id:"2016-12-13",
hashtags:[
{hashtag:"asd",count:20},
{hashtag:"weila",count:18},
{hashtag:"asd2",count:10},
....
]
}
{_id:"2016-12-14",
hashtags:[
{hashtag:"asd",count:18},
{hashtag:"asd2",count:14},
{hashtag:"weila",count:10},
....
]
}
2)另一个是一样的但是我想设置一个时间段从2016-12-13到2016-12-17
对于第一个,我写了这个查询,我得到了我搜索的内容,但是在 Spring 数据 Mongo 中,我不知道如何写。
db.comment.aggregate([
{$unwind:"$hashtags"},
{"$group":{
"_id":{
"date" : "$dateString",
"hashtag": "$hashtags"
},
"count":{"$sum":1}
}
},
{"$group":{
"_id": "$_id.date",
"hashtags": {
"$push": {
"hashtag": "$_id.hashtag",
"count": "$count"
}},
"count": { "$sum": "$count" }
}},
{"$sort": { count: -1}},
{"$unwind": "$hashtags"},
{"$sort": { "count": -1, "hashtags.count": -1}},
{"$group": {
"_id": "$_id",
"hashtags": { "$push": "$hashtags" },
"count": { "$first": "$count" }
}},
{$project:{name:1,hashtags: { $slice: ["$hashtags", 2 ]}}}
]);
在第二个小组阶段之后,您仍然可以使用相同聚合操作的一小部分减去管道步骤,但对于过滤方面,您必须在初始 $match
管道步骤。
以下mongoshell例子 显示您如何过滤特定日期范围的聚合:
1) 设置一个时间段从2016-12-13到2016-12-14:
var startDate = new Date("2016-12-13");
startDate.setHours(0,0,0,0);
var endDate = new Date("2016-12-14");
endDate.setHours(23,59,59,999);
var pipeline = [
{
"$match": {
"date": { "$gte": startDate, "$lte": endDate }
}
}
{ "$unwind": "$hashtags" },
{
"$group": {
"_id": {
"date": "$dateString",
"hashtag": "$hashtags"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.date",
"hashtags": {
"$push": {
"hashtag": "$_id.hashtag",
"count": "$count"
}
}
}
}
]
db.comment.aggregate(pipeline)
2) 设置一个时间段从2016-12-13到2016-12-17:
var startDate = new Date("2016-12-13");
startDate.setHours(0,0,0,0);
var endDate = new Date("2016-12-17");
endDate.setHours(23,59,59,999);
// run the same pipeline as above but with the date range query set as required
Spring 等效数据(未测试):
import static org.springframework.data.mongodb.core.aggregation.Aggregation.*;
Aggregation agg = newAggregation(
match(Criteria.where("date").gte(startDate).lte(endDate)),
unwind("hashtags"),
group("dateString", "hashtags").count().as("count"),
group("_id.dateString")
.push(new BasicDBObject
("hashtag", "$_id.hashtags").append
("count", "$count")
).as("hashtags")
);
AggregationResults<Comment> results = mongoTemplate.aggregate(agg, Comment.class);
List<Comment> comments = results.getMappedResults();