nodejs如何让for循环等到运行下一个循环实例

nodejs how to let for loop wait till running next instance of loop

我将 nodejs 与 mongodb 和 bigquery 一起使用。

所以因为 bigquery 似乎只允许每个命令插入 10k。

所以我计算了主查询,并从 10k 循环到多少页。

我通过计数查询得到 500k,所以 50 页或 500 次循环。

如何让循环等到 运行 循环的下一页?

代码:

var limit = 9999;

mongo_client.connect(mongo_url, function(err, db) {
    var query = {'_id.date_visited':{'$gte':'2016-01-01','$lt':'2016-02-01'}};

    db.collection('my_table').count(query,function(err, count){
        var pages = Math.ceil(count/limit);

        console.log("count: "+count);
        console.log("pages: "+pages);

        for(var page=0;page<pages;page++){
            var skip = page * limit;

            console.log("page: "+page);
            console.log("skip: "+skip);

            //HOW TO MAKE THIS loop wait till running next page of the loop
            db.collection('my_table').find(query).sort({'_id.date_visited':1,'_id.hour_visited':1}).limit(limit).limit(skip).toArray(function(err, db_results) { 
                var documents = [];
                async.each(db_results, function (db_resultsx, cb) {
                    documents.push(db_resultsx);

                    if(documents.length == db_results.length) { 
                        //console.log(documents);
                        bigqueryClient
                          .dataset(dataset)
                          .table('my_table')
                          .insert(documents)
                          .then((insertErrors) => {
                            console.log('Inserted');
                            //documents.forEach((row) => console.log(row));
                            console.error(insertErrors);
                            if (insertErrors && insertErrors.length > 0) {
                              console.log('Insert errors:');
                              insertErrors.forEach((err) => console.error(err));
                            }
                          })
                          .catch((err) => {
                            console.error('ERROR:');
                            console.log(err);
                        });
                    }
                });
            });
        }
    });
});

我可能会用 async.eachSeries 替换 for 循环,这样您就可以决定循环的下一次迭代何时发生,并且由于 async.eachSeries 一次只会进行 运行 1 次操作时间,你不会 运行 陷入同样的​​错误

编辑:

通读代码后,我认为 async.timesSeries(根据我的评论更正,async.timesSeries 是正确的选项)是更好的选择。这是一个例子:

async.timesSeries(pages, function(page, next)
{
    var skip = page * limit;
    // ... the rest of your code here

    // when you want the next iteration to start, simply call:
    next();
    /*
    which will tell async that the current iteration is complete, 
    and it can do the next one. You can pass 2 parameters to next,
    the first parameter is an error, and if error is not null it will
    immediately call the function below, and the second parameter is an
    item you can pass that will be added to an object which will be sent
    as the second parameter in the function below
    */
},
function(err, coll)
{
    /*
    this function will get called if there's an error
    or when all iterations are completed
    */
});

以上代码将替换您的 for 循环

我认为对于这种情况,for 循环不是一个好的解决方案,您可以使用这样的递归调用来循环:

function performQuery(queryIndex) {
    if( queryIndex >= limit ) return;

    db.exec('query', function(err, db_result) {
        // your code
        performQuery(queryIndex+1);
    })
}
performQuery(0);

如果你不想使用递归承诺,并且你事先知道项目的数量,你可以这样做:

// Create a "skip" array (there is certainly a nicer way to do it, with a modulo)
var skips = []; 
for(var page=0;page<pages;page++){
    skips.push(page * limit);
}

// Put your mongoDB read and write code in a function
// (that takes skip in entry and returns an array of documents)
var myMongoDBFunc = function (skip) {
    var documents = [];

    db.collection('my_table')
       .find(query)
       .limit(limit)
       .limit(skip)
       .toArray(function(err, db_results) { 
           ...
       });
   ...
   return documents;
}

// And call it with async.concatSeries that will concatenate the results (documents)
async.concatSeries(skips, myMongoDbFunc, function(err, documents) {
    // Will be called at the end
});

如果您想并行优化和 运行 所有查询,只需将 concatSeries 替换为 concat(但不能保证顺序)。

如果你不关心返回的文档(显然你只是想写点东西),也许你可以使用async.seriesasync.parallel(自己检查,我不特别了解async).