在 MongoDB 中加入两个 collections 与 MapReduce

Join two collections with MapReduce in MongoDB

我已经知道 MongoDB 不支持连接操作,但我必须使用 mapReduce 范例模拟 $lookup(来自聚合框架)。

我的两个collections是:

// Employees sample 
{
  "_id" : "1234",
  "first_name" : "John",
  "last_name" : "Bush",
  "departments" : 
  [ 
    { "dep_id" : "d001", "hire_date" : "date001" },
    { "dep_id" : "d004", "hire_date" : "date004" }
  ]
}
{ 
  "_id" : "5678", 
  "first_name" : "Johny", 
  "last_name" : "Cash", 
  "departments" : [ { "dep_id" : "d001", "hire_date" : "date03" } ] 
}
{ 
  "_id" : "9012", 
  "first_name" : "Susan", 
  "last_name" : "Bowdy", 
  "departments" : [ { "dep_id" : "d004", "hire_date" : "date04" } ] 
}

// Departments sample 
{
  "_id" : "d001",
  "dep_name" : "Sales",
  "employees" : [ "1234", "5678" ]
},
{
  "_id" : "d004",
  "name" : "Quality M",
  "employees" : [ "1234", "9012" ]
}

实际上我想要这样的结果:

{
  "_id" : "1234",
  "value" : 
  {
    "first_name" : "John",
    "departments" :
    [
      { "dep_id" : "d001", "dep_name" : "Sales" },
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ]
  }
}
{ 
  "_id" : "5678", 
  "value" : 
  { 
    "first_name" : "Johnny", 
    "departments" : [ { "dep_id" : "d001", "dep_name" : "Sales" } ]
  } 
}
{ 
  "_id" : "9012", 
  "value" : 
  { 
    "first_name" : "Susan", 
    "departments" : [ { "dep_id" : "d004", "dep_name" : "Quality M" } ] 
  } 
}

公共字段是 dep_id(来自员工)和 _id(来自部门)。

我的代码是下一个,但它没有按我的需要工作。

var mapD = function() {
  for (var i=0; i<this.employees.length; i++) {
    emit(this.employees[i], { dep_id: 0, dep_name: this.dep_name });
  }
}

var mapE = function() {
  for (var i=0; i<this.departments.length; i++) {
    emit(this._id, { dep_id: this.departments[i].dep_id, dep_name: 0 });
  }
}

var reduceLookUp = function(key, values) {
  var result = {dep_id: 0, dep_name: 0};
  values.forEach(function(value) {
    if (value.dep_name !== null && value.dep_name !== undefined) {
      result.dep_name = values.dep_name;
    }
    if (value.dep_id !== null && value.dep_id !== undefined) {
      result.dep_id = value.dep_id;
    }
  });
  return result;
};

db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });

非常感谢您的帮助!提前致谢。

在你的问题中,first_name 只能从 Employees 集合中获取,dep_name 只能从 Departments 集合中获取。

您可以使用 MapReduce 和聚合框架来实现它。

1. MapReduce 解决方案

如果您按如下方式修改 map 和 reduce 函数

var mapD = function() {
  for (var i=0; i<this.employees.length; i++)
    emit(this.employees[i], { dep_id: this._id, dep_name: this.dep_name });  
}

var mapE = function() { emit(this._id, { first_name: this.first_name }); }

var reduceLookUp = function(key, values) {
  var results = {};
  var departments = [];
  values.forEach(function(value) {
    var department = {};
    if (value.dep_id !== undefined) department["dep_id"] = value.dep_id;
    if (value.dep_name !== undefined) department["dep_name"] = value.dep_name;
    if (Object.keys(department).length > 0) departments.push(department);
    if (value.first_name !== undefined) results["first_name"] = value.first_name;
    if (value.departments !== undefined) results["departments"] = value.departments;
  });
  if (Object.keys(departments).length > 0) results["departments"] = departments;
  return results;
}

然后首先调用 MapReduce

db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });

将插入到 joined 集合中

{ 
  "_id" : "1234", 
  "value" : 
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  }
}

第二次通话时

db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });

应该插入

{ "_id" : "1234", "value" : { "first_name" : "John" } }

但是,根据documentationreduce输出选项将

Merge the new result with the existing result if the output collection already exists. If an existing document has the same key as the new result, apply the reduce function to both the new and the existing documents and overwrite the existing document with the result

因此,在您的情况下,reduce 函数将使用参数再次调用

key = "1234",
values =
[
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  },
  { "first_name" : "John" }
]

最终结果是

{ 
  "_id" : "1234", 
  "value" : 
  { 
    "first_name" : "John", 
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ] 
  } 
}

2。聚合框架方案

对于您的问题更好的解决方案是使用 aggregation framework instead of Map-Reduce. Here you would use $lookup 阶段从 Employees

获取一些数据
db.Departments.aggregate([
  { $unwind: "$employees" },
  { 
    $lookup: 
      { 
        from: "Employees", 
        localField: "employees", 
        foreignField: "_id", 
        as: "employee"
      }
  },
  { $unwind: "$employee" },
  { 
    $group: 
      { 
        "_id": "$employees",
        "first_name": { $first: "$employee.first_name" }, 
        "departments": { $push: { dep_id: "$_id", dep_name: "$dep_name" } } 
      } 
  } 
]);

这将导致

{ 
  "_id" : "1234",
  "first_name" : "John",
  "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
}