如何根据共同的数组元素对文档进行匹配和排序

How to match and sort documents based on array elements in common

var UserSchema = Schema (
{
         android_id: String,
         created: {type: Date, default:Date.now},
         interests: [{ type: Schema.Types.ObjectId, ref: 'Interests' }],

});

 Users.aggregate([
        { $match: {android_id: {$ne: userID}, interests: {$elemMatch: {$in: ids}} }},
        { $group: { _id: { android_id: '$android_id'},count: {$sum: 1}}},
        { $sort: {count: -1}},
        { $limit: 5 }], 

我需要找到前 5 个 android_ids 与我最感兴趣的用户(ids 数组)。我也可以使用 interests 数组中仅匹配元素的数组。

您在这里似乎走对了方向,但您确实需要考虑数组对比较有特殊的考虑。

这里的基本开始是找到所有不是当前用户的用户,并且您至少还需要当前用户的 "interests" 数组。您似乎已经这样做了,但是在这里让我们考虑您拥有将在列表中使用的当前用户的整个 user 对象。

这使得您的 "top 5" 基本上是 "Not me, and the most interests in common" 的乘积,这意味着您基本上需要计算每个 "overlap" 的兴趣用户与当前用户相比。

这基本上就是 $setIntersection of the two arrays or "sets" where the elements in common are returned. In order to count how many are in common, there is also the $size 运算符。所以你这样申请:

Users.aggregate(
    [
        { "$match": {
            "android_id": { "$ne": user.android_id },
            "interests": { "$in": user.interests }
        }},
        { "$project": {
            "android_id": 1,
            "interests": 1,
            "common": {
                "$size": {
                    "$setIntersection": [ "$interests", user.interests ]
                }
            }
        }},
        { "$sort": { "common": -1 } },
        { "$limit": 5 }
    ],
    function(err,result) {

    }
);

"common"中返回的结果是数据中当前用户与被考察用户共同兴趣的个数。此数据然后由 $sort in order to put the largest number of common interests on top, and then $limit returns 仅处理前 5 个。

如果由于某种原因您的 MongoDB 版本目前低于 MongoDB 2.6,其中引入了 $setIntersection$size 运算符,那么您仍然可以这样做, 但它只需要更长的时间来处理数组。

主要是您需要 $unwind 数组并单独处理每个匹配项:

        { "$match": {
            "android_id": { "$ne": user.android_id },
            "interests": { "$in": user.interests }
        }},
        { "$unwind": "$interests" },
        { "$group": {
            "_id": "$_id",
            "android_id": { "$first": "$android_id" },
            "interests": { "$push": "$interests" },
            "common": {
              "$sum": {
                "$add": [
                  { "$cond": [{ "$eq": [ "$interests", user.interests[0] ] },1,0 ] },
                  { "$cond": [{ "$eq": [ "$interests", user.interests[1] ] },1,0 ] },
                  { "$cond": [{ "$eq": [ "$interests", user.interests[2] ] },1,0 ] }
                ]
              }
            }
        }},
        { "$sort": { "common": -1 }},
        { "$limit": 5 }

在管道中生成条件匹配的编码更实用:

    var pipeline = [
        { "$match": {
            "android_id": { "$ne": user.android_id },
            "interests": { "$in": user.interests }
        }},
        { "$unwind": "$interests" }
    ];

    var group = 
        { "$group": {
            "_id": "$_id",
            "android_id": { "$first": "$android_id" },
            "interests": { "$push": "$interests" },
            "common": {
              "$sum": {
                "$add": []
              }
            }
        }};

    user.interests.forEach(function(interest) {
      group.$group.common.$sum.$add.push(
        { "$cond": [{ "$eq": [ "$interests", interest ] }, 1, 0 ] }
      );
    });

    pipeline.push(group);

    pipeline = pipeline.concat([
        { "$sort": { "common": -1 }},
        { "$limit": 5 }
    ])

    User.aggregate(pipeline,function(err,result) {

    });

其中的关键要素是"both"当前用户和正在检查的用户"interests"分开比较,看是否"equal"。 $cond 的结果属性为 1 为真或 0 为假。

任何 returns(并且每对最多只预期为 1)再次传递给 $sum accumulator which counts the matches in common. You can alternately $match 条件 $in

        { "$unwind": "$interests" },
        { "$match": { "interests": { "$in": user.interests } },
        { "$group": {
            "_id": "$_id",
            "android_id": { "$first": "$android_id" },
            "common": { "$sum": 1 }
        }}

但这自然会破坏数组内容,因为不匹配的内容会被过滤掉。所以这取决于你希望在回复中有什么。

这是获取 "common" 计数的基本过程,用于 $sort$limit 等进一步处理,以便获得您的 "top 5".

只是为了好玩,下面是一个基本的 node.js 清单,用于显示常见匹配的效果: var async = require('async'), 猫鼬=要求('mongoose'), 架构 = mongoose.Schema;

mongoose.connect('mongodb://localhost/sample');

var interestSchema = new Schema({
  name: String
});

var userSchema = new Schema({
  name: String,
  interests: [{ type: Schema.Types.ObjectId, ref: 'Interest' }]
});

var Interest = mongoose.model( 'Interest', interestSchema );
var User = mongoose.model( 'User', userSchema );

var interestHash = {};

async.series(
  [
    function(callback) {
      async.each([Interest,User],function(model,callback) {
        model.remove({},callback);
      },callback);
    },

    function(callback) {
      async.each(
        [
          "Tennis",
          "Football",
          "Gaming",
          "Cooking",
          "Yoga"
        ],
        function(interest,callback) {
          Interest.create({ name: interest},function(err,obj) {
            if (err) callback(err);
            interestHash[obj.name] = obj._id;
            callback();
          });
        },
        callback
      );
    },

    function(callback) {
      async.each(
        [
          { name: "Bob", interests: ["Tennis","Football","Gaming"] },
          { name: "Tom", interests: ["Football","Cooking","Yoga"] },
          { name: "Sue", interests: ["Tennis","Gaming","Yoga","Cooking"] }
        ],
        function(data,callback) {
          data.interests = data.interests.map(function(interest) {
            return interestHash[interest];
          });
          User.create(data,function(err,user) {
            //console.log(user);
            callback(err);
          })
        },
        callback
      );
    },

    function(callback) {
      async.waterfall(
        [
          function(callback) {
            User.findOne({ name: "Bob" },callback);
          },
          function(user,callback) {
            console.log(user);
            User.aggregate(
              [
                { "$match": {
                  "_id": { "$ne": user._id },
                  "interests": { "$in": user.interests }
                }},
                { "$project": {
                  "name": 1,
                  "interests": 1,
                  "common": {
                    "$size": {
                      "$setIntersection": [ "$interests", user.interests ]
                    }
                  }
                }},
                { "$sort": { "common": -1 } }
              ],
              function(err,result) {
                if (err) callback(err);
                Interest.populate(result,'interests',function(err,result) {
                  console.log(result);
                  callback(err);
                });
              }
            );
          }
        ],
        callback
      );
    }

  ],
  function(err) {
    if (err) throw err;
    //console.dir(interestHash);
    mongoose.disconnect();
  }
);

将输出:

{ _id: 55dbd7be0e5516ac16ea62d1,
  name: 'Bob',
  __v: 0,
  interests:
   [ 55dbd7be0e5516ac16ea62cc,
     55dbd7be0e5516ac16ea62cd,
     55dbd7be0e5516ac16ea62ce ] }
[ { _id: 55dbd7be0e5516ac16ea62d3,
    name: 'Sue',
    interests:
     [ { _id: 55dbd7be0e5516ac16ea62cc, name: 'Tennis', __v: 0 },
       { _id: 55dbd7be0e5516ac16ea62ce, name: 'Gaming', __v: 0 },
       { _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 },
       { _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 } ],
    common: 2 },
  { _id: 55dbd7be0e5516ac16ea62d2,
    name: 'Tom',
    interests:
     [ { _id: 55dbd7be0e5516ac16ea62cd, name: 'Football', __v: 0 },
       { _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 },
       { _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 } ],
    common: 1 } ]