如何根据共同的数组元素对文档进行匹配和排序
How to match and sort documents based on array elements in common
var UserSchema = Schema (
{
android_id: String,
created: {type: Date, default:Date.now},
interests: [{ type: Schema.Types.ObjectId, ref: 'Interests' }],
});
Users.aggregate([
{ $match: {android_id: {$ne: userID}, interests: {$elemMatch: {$in: ids}} }},
{ $group: { _id: { android_id: '$android_id'},count: {$sum: 1}}},
{ $sort: {count: -1}},
{ $limit: 5 }],
我需要找到前 5 个 android_ids 与我最感兴趣的用户(ids 数组)。我也可以使用 interests 数组中仅匹配元素的数组。
您在这里似乎走对了方向,但您确实需要考虑数组对比较有特殊的考虑。
这里的基本开始是找到所有不是当前用户的用户,并且您至少还需要当前用户的 "interests" 数组。您似乎已经这样做了,但是在这里让我们考虑您拥有将在列表中使用的当前用户的整个 user
对象。
这使得您的 "top 5" 基本上是 "Not me, and the most interests in common" 的乘积,这意味着您基本上需要计算每个 "overlap" 的兴趣用户与当前用户相比。
这基本上就是 $setIntersection
of the two arrays or "sets" where the elements in common are returned. In order to count how many are in common, there is also the $size
运算符。所以你这样申请:
Users.aggregate(
[
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"android_id": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } },
{ "$limit": 5 }
],
function(err,result) {
}
);
"common"中返回的结果是数据中当前用户与被考察用户共同兴趣的个数。此数据然后由 $sort
in order to put the largest number of common interests on top, and then $limit
returns 仅处理前 5 个。
如果由于某种原因您的 MongoDB 版本目前低于 MongoDB 2.6,其中引入了 $setIntersection
和 $size
运算符,那么您仍然可以这样做, 但它只需要更长的时间来处理数组。
主要是您需要 $unwind
数组并单独处理每个匹配项:
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": [
{ "$cond": [{ "$eq": [ "$interests", user.interests[0] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[1] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[2] ] },1,0 ] }
]
}
}
}},
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
在管道中生成条件匹配的编码更实用:
var pipeline = [
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" }
];
var group =
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": []
}
}
}};
user.interests.forEach(function(interest) {
group.$group.common.$sum.$add.push(
{ "$cond": [{ "$eq": [ "$interests", interest ] }, 1, 0 ] }
);
});
pipeline.push(group);
pipeline = pipeline.concat([
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
])
User.aggregate(pipeline,function(err,result) {
});
其中的关键要素是"both"当前用户和正在检查的用户"interests"分开比较,看是否"equal"。 $cond
的结果属性为 1
为真或 0
为假。
任何 returns(并且每对最多只预期为 1
)再次传递给 $sum
accumulator which counts the matches in common. You can alternately $match
条件 $in
:
{ "$unwind": "$interests" },
{ "$match": { "interests": { "$in": user.interests } },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"common": { "$sum": 1 }
}}
但这自然会破坏数组内容,因为不匹配的内容会被过滤掉。所以这取决于你希望在回复中有什么。
这是获取 "common" 计数的基本过程,用于 $sort
和 $limit
等进一步处理,以便获得您的 "top 5".
只是为了好玩,下面是一个基本的 node.js 清单,用于显示常见匹配的效果:
var async = require('async'),
猫鼬=要求('mongoose'),
架构 = mongoose.Schema;
mongoose.connect('mongodb://localhost/sample');
var interestSchema = new Schema({
name: String
});
var userSchema = new Schema({
name: String,
interests: [{ type: Schema.Types.ObjectId, ref: 'Interest' }]
});
var Interest = mongoose.model( 'Interest', interestSchema );
var User = mongoose.model( 'User', userSchema );
var interestHash = {};
async.series(
[
function(callback) {
async.each([Interest,User],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
async.each(
[
"Tennis",
"Football",
"Gaming",
"Cooking",
"Yoga"
],
function(interest,callback) {
Interest.create({ name: interest},function(err,obj) {
if (err) callback(err);
interestHash[obj.name] = obj._id;
callback();
});
},
callback
);
},
function(callback) {
async.each(
[
{ name: "Bob", interests: ["Tennis","Football","Gaming"] },
{ name: "Tom", interests: ["Football","Cooking","Yoga"] },
{ name: "Sue", interests: ["Tennis","Gaming","Yoga","Cooking"] }
],
function(data,callback) {
data.interests = data.interests.map(function(interest) {
return interestHash[interest];
});
User.create(data,function(err,user) {
//console.log(user);
callback(err);
})
},
callback
);
},
function(callback) {
async.waterfall(
[
function(callback) {
User.findOne({ name: "Bob" },callback);
},
function(user,callback) {
console.log(user);
User.aggregate(
[
{ "$match": {
"_id": { "$ne": user._id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"name": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } }
],
function(err,result) {
if (err) callback(err);
Interest.populate(result,'interests',function(err,result) {
console.log(result);
callback(err);
});
}
);
}
],
callback
);
}
],
function(err) {
if (err) throw err;
//console.dir(interestHash);
mongoose.disconnect();
}
);
将输出:
{ _id: 55dbd7be0e5516ac16ea62d1,
name: 'Bob',
__v: 0,
interests:
[ 55dbd7be0e5516ac16ea62cc,
55dbd7be0e5516ac16ea62cd,
55dbd7be0e5516ac16ea62ce ] }
[ { _id: 55dbd7be0e5516ac16ea62d3,
name: 'Sue',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cc, name: 'Tennis', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62ce, name: 'Gaming', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 } ],
common: 2 },
{ _id: 55dbd7be0e5516ac16ea62d2,
name: 'Tom',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cd, name: 'Football', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 } ],
common: 1 } ]
var UserSchema = Schema (
{
android_id: String,
created: {type: Date, default:Date.now},
interests: [{ type: Schema.Types.ObjectId, ref: 'Interests' }],
});
Users.aggregate([
{ $match: {android_id: {$ne: userID}, interests: {$elemMatch: {$in: ids}} }},
{ $group: { _id: { android_id: '$android_id'},count: {$sum: 1}}},
{ $sort: {count: -1}},
{ $limit: 5 }],
我需要找到前 5 个 android_ids 与我最感兴趣的用户(ids 数组)。我也可以使用 interests 数组中仅匹配元素的数组。
您在这里似乎走对了方向,但您确实需要考虑数组对比较有特殊的考虑。
这里的基本开始是找到所有不是当前用户的用户,并且您至少还需要当前用户的 "interests" 数组。您似乎已经这样做了,但是在这里让我们考虑您拥有将在列表中使用的当前用户的整个 user
对象。
这使得您的 "top 5" 基本上是 "Not me, and the most interests in common" 的乘积,这意味着您基本上需要计算每个 "overlap" 的兴趣用户与当前用户相比。
这基本上就是 $setIntersection
of the two arrays or "sets" where the elements in common are returned. In order to count how many are in common, there is also the $size
运算符。所以你这样申请:
Users.aggregate(
[
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"android_id": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } },
{ "$limit": 5 }
],
function(err,result) {
}
);
"common"中返回的结果是数据中当前用户与被考察用户共同兴趣的个数。此数据然后由 $sort
in order to put the largest number of common interests on top, and then $limit
returns 仅处理前 5 个。
如果由于某种原因您的 MongoDB 版本目前低于 MongoDB 2.6,其中引入了 $setIntersection
和 $size
运算符,那么您仍然可以这样做, 但它只需要更长的时间来处理数组。
主要是您需要 $unwind
数组并单独处理每个匹配项:
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": [
{ "$cond": [{ "$eq": [ "$interests", user.interests[0] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[1] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[2] ] },1,0 ] }
]
}
}
}},
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
在管道中生成条件匹配的编码更实用:
var pipeline = [
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" }
];
var group =
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": []
}
}
}};
user.interests.forEach(function(interest) {
group.$group.common.$sum.$add.push(
{ "$cond": [{ "$eq": [ "$interests", interest ] }, 1, 0 ] }
);
});
pipeline.push(group);
pipeline = pipeline.concat([
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
])
User.aggregate(pipeline,function(err,result) {
});
其中的关键要素是"both"当前用户和正在检查的用户"interests"分开比较,看是否"equal"。 $cond
的结果属性为 1
为真或 0
为假。
任何 returns(并且每对最多只预期为 1
)再次传递给 $sum
accumulator which counts the matches in common. You can alternately $match
条件 $in
:
{ "$unwind": "$interests" },
{ "$match": { "interests": { "$in": user.interests } },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"common": { "$sum": 1 }
}}
但这自然会破坏数组内容,因为不匹配的内容会被过滤掉。所以这取决于你希望在回复中有什么。
这是获取 "common" 计数的基本过程,用于 $sort
和 $limit
等进一步处理,以便获得您的 "top 5".
只是为了好玩,下面是一个基本的 node.js 清单,用于显示常见匹配的效果: var async = require('async'), 猫鼬=要求('mongoose'), 架构 = mongoose.Schema;
mongoose.connect('mongodb://localhost/sample');
var interestSchema = new Schema({
name: String
});
var userSchema = new Schema({
name: String,
interests: [{ type: Schema.Types.ObjectId, ref: 'Interest' }]
});
var Interest = mongoose.model( 'Interest', interestSchema );
var User = mongoose.model( 'User', userSchema );
var interestHash = {};
async.series(
[
function(callback) {
async.each([Interest,User],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
async.each(
[
"Tennis",
"Football",
"Gaming",
"Cooking",
"Yoga"
],
function(interest,callback) {
Interest.create({ name: interest},function(err,obj) {
if (err) callback(err);
interestHash[obj.name] = obj._id;
callback();
});
},
callback
);
},
function(callback) {
async.each(
[
{ name: "Bob", interests: ["Tennis","Football","Gaming"] },
{ name: "Tom", interests: ["Football","Cooking","Yoga"] },
{ name: "Sue", interests: ["Tennis","Gaming","Yoga","Cooking"] }
],
function(data,callback) {
data.interests = data.interests.map(function(interest) {
return interestHash[interest];
});
User.create(data,function(err,user) {
//console.log(user);
callback(err);
})
},
callback
);
},
function(callback) {
async.waterfall(
[
function(callback) {
User.findOne({ name: "Bob" },callback);
},
function(user,callback) {
console.log(user);
User.aggregate(
[
{ "$match": {
"_id": { "$ne": user._id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"name": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } }
],
function(err,result) {
if (err) callback(err);
Interest.populate(result,'interests',function(err,result) {
console.log(result);
callback(err);
});
}
);
}
],
callback
);
}
],
function(err) {
if (err) throw err;
//console.dir(interestHash);
mongoose.disconnect();
}
);
将输出:
{ _id: 55dbd7be0e5516ac16ea62d1,
name: 'Bob',
__v: 0,
interests:
[ 55dbd7be0e5516ac16ea62cc,
55dbd7be0e5516ac16ea62cd,
55dbd7be0e5516ac16ea62ce ] }
[ { _id: 55dbd7be0e5516ac16ea62d3,
name: 'Sue',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cc, name: 'Tennis', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62ce, name: 'Gaming', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 } ],
common: 2 },
{ _id: 55dbd7be0e5516ac16ea62d2,
name: 'Tom',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cd, name: 'Football', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 } ],
common: 1 } ]