Google Datastore 组合(并集)多组实体结果以实现 OR 条件

Google Datastore combine (union) multiple sets of entity results to achieve OR condition

我正在 Google App Engine 上使用 NodeJS 和 Datastore 数据库。

由于Datastore does not have support the OR operator,我需要运行多次查询并合并结果。

我计划 运行 多个查询,然后将结果合并到一个实体对象数组中。我有一个查询已经在工作了。

问题:将 Datastore 返回的两组(或更多组)实体组合起来(包括重复数据删除)的合理有效方法是什么?我相信这将是集合论方面的 "union" 操作。

这是基本查询大纲,它将 运行 多次使用一些不同的过滤器来实现所需的 OR 条件。

  //Set requester username
  const requester = req.user.userName;
  //Create datastore query on Transfer Request kind table
  const task_history = datastore.createQuery('Task');
  //Set query conditions
  task_history.filter('requester', requester);
  //Run datastore query
  datastore.runQuery(task_history, function(err, entities) {
    if(err) {
      console.log('Task History JSON unable to return data results. Error message: ', err);
      return;
      //If query works and returns any entities
    } else if (entities[0]) {
      //Else if query works but does not return any entities return empty JSON response
      res.json(entities); //HOW TO COMBINE (UNION) MULTIPLE SETS OF ENTITIES EFFICIENTLY?
      return;
    }
  });

这是我的原创post:

恕我直言,最有效的方法是在第一阶段使用仅键查询,然后将获得的键组合到一个列表中(包括重复数据删除),然后通过键查找简单地获取实体。来自 Projection queries:

Keys-only queries

A keys-only query (which is a type of projection query) returns just the keys of the result entities instead of the entities themselves, at lower latency and cost than retrieving entire entities.

It is often more economical to do a keys-only query first, and then fetch a subset of entities from the results, rather than executing a general query which may fetch more entities than you actually need.

Here's how to create a keys-only query:

const query = datastore.createQuery()
  .select('__key__')
  .limit(1);

此方法解决了您在尝试直接组合通过常规非键查询获得的实体列表时可能遇到的几个问题:

  • 您无法正确删除重复数据,因为您无法区分具有相同值的不同实体与出现在多重查询结果中的相同实体
  • 通过 属性 值比较实体可能很棘手,而且肯定 slower/more 计算成本比仅比较实体键
  • 如果您不能在单个请求中处理所有结果,那么您将在没有实际使用它们的情况下读取它们而产生不必要的数据存储成本
  • 当只处理实体键时,在多个请求中拆分实体处理(例如,通过任务队列)要简单得多

也有一些缺点:

  • 它可能会有点慢,因为您要访问数据存储两次:一次访问密钥,一次访问实际实体
  • 您无法利用仅通过非键投影查询获取所需属性的优势

这是我根据已接受答案中提供的建议创建的解决方案。

/*History JSON*/
module.exports.treqHistoryJSON = function(req, res) {
  if (!req.user) {
    req.user = {};
    res.json();
    return;
  }

  //Set Requester username
  const loggedin_username = req.user.userName;

  //Get records matching Requester OR Dataowner
  //Google Datastore OR Conditions are not supported
  //Workaround separate parallel queries get records matching Requester and Dataowner then combine results
  async.parallel({
    //Get entity keys matching Requester
    requesterKeys: function(callback) {
      getKeysOnly('TransferRequest', 'requester_username', loggedin_username, (treqs_by_requester) => {
        //Callback pass in response as parameter
        callback(null, treqs_by_requester)
      });
    },
    //Get entity keys matching Dataowner
    dataownerKeys: function(callback) {
      getKeysOnly('TransferRequest', 'dataowner_username', loggedin_username, (treqs_by_dataowner) => {
        callback(null, treqs_by_dataowner)
      });
    }
  }, function(err, getEntities) {
    if (err) {
      console.log('Transfer Request History JSON unable to get entity keys Transfer Request. Error message: ', err);
      return;
    } else {
      //Combine two arrays of entity keys into a single de-duplicated array of entity keys
      let entity_keys_union = unionEntityKeys(getEntities.requesterKeys, getEntities.dataownerKeys);
      //Get key values from entity key 'symbol' object type
      let entity_keys_only = entity_keys_union.map((ent) => {
        return ent[datastore.KEY];
      });
      //Pass in array of entity keys to get full entities
      datastore.get(entity_keys_only, function(err, entities) {
        if(err) {
          console.log('Transfer Request History JSON unable to lookup multiple entities by key for Transfer Request. Error message: ', err);
          return;
          //If query works and returns any entities
        } else {
          processEntitiesToDisplay(res, entities);
        }
      });
    }
  });

};

/*
 * Get keys-only entities by kind and property
 * @kind string name of kind
 * @property_type string property filtering by in query
 * @filter_value string of filter value to match in query
 * getEntitiesCallback callback to collect results
 */
function getKeysOnly(kind, property_type, filter_value, getEntitiesCallback) {
  //Create datastore query
  const keys_query = datastore.createQuery(kind);
  //Set query conditions
  keys_query.filter(property_type, filter_value);
  //Select KEY only
  keys_query.select('__key__');
  datastore.runQuery(keys_query, function(err, entities) {
    if(err) {
      console.log('Get Keys Only query unable to return data results. Error message: ', err);
      return;
    } else {
      getEntitiesCallback(entities);
    }
  });
}

/*
 * Union two arrays of entity keys de-duplicate based on ID value
 * @arr1 array of entity keys
 * @arr2 array of entity keys
 */
function unionEntityKeys(arr1, arr2) {
  //Create new array
  let arr3 = [];
  //For each element in array 1
  for(let i in arr1) {
    let shared = false;
      for (let j in arr2)
        //If ID in array 1 is same as array 2 then this is a duplicate
        if (arr2[j][datastore.KEY]['id'] == arr1[i][datastore.KEY]['id']) {
          shared = true;
          break;
        }
      //If IDs are not the same add element to new array
      if(!shared) {
        arr3.push(arr1[i])
      }
    }
  //Concat array 2 and new array 3
  arr3 = arr3.concat(arr2);
  return arr3;
}