填写记录中缺失的日期
Fill missing dates in records
我有一个 collection 的 ProductViews
:
{
productId: "5b8c0f3204a10228b00a1745",
createdAt: "2018-09-07T17:18:40.759Z"
}
我有一个获取特定产品每日浏览量的查询:
ProductView.aggregate([
{ $match: { productId } },
{ $project: { day: { $substr: ["$createdAt", 0, 10] } } },
{
$group: {
_id: "$day",
count: { $sum: 1 },
time: { $avg: "$createdAt" },
}
},
{ $sort: { _id: 1 } },
{
$project: {
date: '$_id',
views: '$count',
},
},
]).exec((err, result) => ...)
当前给出:
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 },
// ...
]
问题:
问题是,此聚合不会 return { date: '2018-09-03', views: 0 }
天 0
观看次数。这会导致数据显示不正确:[![在此处输入图像描述][1]][1]
结果应如下所示:
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-03', views: 0 }, <=
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 },
// ...
]
P.S.: 传入开始和结束日期,根据这个范围输出结果就完美了
[1]: https://i.stack.imgur.com/uHPBs.png
您需要一些额外的步骤来 return 默认值。首先,您需要使用 $group
并将 _id
设置为 null
以将所有结果收集到一个文档中。然后你可以使用 $map with an array of days as an input. Inside that $map
you can use $indexOfArray to find if that date exists in your current result set. If yes (index != -1
) then you can return that value, otherwise you need to return default subdocument with views
set to 0
. Then you can use $unwind to get back a list of documents and $replaceRoot 将嵌套 stats
提升到顶层。
ProductView.aggregate([
{ $match: { productId: '5b8c0f3204a10228b00a1745' } },
{ $project: { day: { $substr: ["$createdAt", 0, 10] } } },
{
$group: {
_id: "$day",
count: { $sum: 1 },
time: { $avg: "$createdAt" },
}
},
{ $sort: { _id: 1 } },
{
$project: {
date: '$_id',
views: '$count',
},
},
{
$group: {
_id: null,
stats: { $push: "$$ROOT" }
}
},
{
$project: {
stats: {
$map: {
input: [ "2018-09-01", "2018-09-02", "2018-09-03", "2018-09-04", "2018-09-05" ],
as: "date",
in: {
$let: {
vars: { dateIndex: { "$indexOfArray": [ "$stats._id", "$$date" ] } },
in: {
$cond: {
if: { $ne: [ "$$dateIndex", -1 ] },
then: { $arrayElemAt: [ "$stats", "$$dateIndex" ] },
else: { _id: "$$date", date: "$$date", views: 0 }
}
}
}
}
}
}
}
},
{
$unwind: "$stats"
},
{
$replaceRoot: {
newRoot: "$stats"
}
}
]).exec((err, result) => ...)
您可以使用简单循环在应用程序逻辑中生成静态日期列表。我相信这在 MongoDB 中也是可能的(使用 $range),但它可能会使此聚合管道复杂化。让我知道您是否同意,或者您想尝试在 MongoDB.
中生成该日期数组
使用一些 javascript
和 aggregation
技巧。
您需要先找到所提供日期范围内的日期。
function getDates(startDate, stopDate) {
var dateArray = []
var currentDate = moment(startDate)
var stopDate = moment(stopDate)
while (currentDate <= stopDate) {
dateArray.push(moment(currentDate).format('YYYY-MM-DD'))
currentDate = moment(currentDate).add(1, 'days')
}
return dateArray
}
const dummyArray = getDates('2018-09-01', '2018-09-05')
dummyArray = [ "2018-09-01", "2018-09-02", "2018-09-03", "2018-09-04", "2018-09-05" ]
现在,您可以使用以下聚合找到数据库中不可用的日期。
db.collection.aggregate([
{ "$match": { productId } },
{ "$group": {
"_id": { "$substr": ["$createdAt", 0, 10] },
"count": { "$sum": 1 },
"time": { "$avg": "$createdAt" },
}},
{ "$sort": { "_id": 1 } },
{ "$project": { "date": "$_id", "views": "$count" }},
{ "$group": { "_id": null, "data": { "$push": "$$ROOT" }}},
{ "$project": {
"data": {
"$map": {
"input": dummyArray,
"in": {
"k": "$$this",
"v": { "$cond": [{ "$in": ["$$this", "$data.date" ] }, 1, 0 ] }
}
}
}
}},
{ "$unwind": "$data" },
{ "$group": { "_id": "$data.k", "count": { "$sum": "$data.v" }}}
])
输出将是
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-03', views: 0 },
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 }
]
您的问题与 2014 中的 post 相似。
post 中提供的所有答案都是有效的,如果您可以在应用程序代码中生成缺少的天数,事情会简单得多。
由于您要求 mongodb 解决方案并且自 2014 年以来发生了很多变化,我创建了一个新的聚合管道,您可以将其与 3.6 版本一起使用。
ProductView.aggregate([
-- convert the string date into date type for date calcualtions. can avoid this step if you can store the date as date type in collection
{"$addFields":{"createdAt":{"$dateFromString":{"dateString":"$createdAt"}}}},
-- strip the time part so we can add whole milliseconds from epoch to calculate next day
{"$project":{
"day":{"$dateFromParts":{"year":{"$year":"$createdAt"},"month":{"$month":"$createdAt"},"day":{"$dayOfMonth":"$createdAt"}}}
}},
-- generate two sets of data, one that has count by day, other that has unique days, min day and max day
{"$facet":{
"daycounts":[{"$group":{"_id":"$day","count":{"$sum":1}}}],
"maxmindays":[
{"$group":{
"_id":null,
"days":{"$addToSet":"$day"},
"minday":{"$min":{"$divide":[{"$subtract":["$day",new Date("1-1-1970")]},1000]}},
"maxday":{"$max":{"$divide":[{"$subtract":["$day",new Date("1-1-1970")]},1000]}}
}}
]
}},
{"$project":{
"data":{
"$let":{
"vars":{"maxminday":{"$arrayElemAt":["$maxmindays",0]}},
"in":{
-- $range - iterate from min date to max date one day at a time
"$map":{
"input":{"$range":["$$maxminday.minday",{"$add": ["$$maxminday.maxday", 60*60*24]},60*60*24]},
"as":"r",
"in": {
-- convert back to milliseconds to get the day
"$let":{
"vars":{"current":{"$add": [new Date(0), {"$multiply":["$$r", 1000 ]}]}},
"in":{
-- check if the day is in the collection, if yes lookup view inside the daycount facet to get the matching count, else set the view to zero
"$cond":[
{"$in":["$$current","$$maxminday.days"]},
{
"date":{"$substr":["$$current",0,10]},
"views":{"$let":{"vars":{"daycount":{"$arrayElemAt":["$daycounts",{"$indexOfArray":["$daycounts._id","$$current"]}]}},"in":"$$daycount.count"}}
},
{"date":{"$substr":["$$current",0,10]},"views":0}
]
}
}
}
}
}
}
}
}},
-- flatten the array of data
{"$unwind":"$data"},
-- promote the data to top
{"$replaceRoot":{newRoot:"$data"}}
])
如果只有一个或两个并且要处理的文档数量很少,我建议您添加缺少的日期客户端。
也就是说,以下管道仅适用于 MongoDB 4.0+,但稍加努力,我们可以让它在 3.6 中运行。
[
{
$group: {
_id: null,
dates: {
$push: {
$let: {
vars: {
date: {
$dateToParts: {
date: {
$toDate: "$createdAt"
}
}
}
},
in: {
$toDouble: {
$dateFromParts: {
year: "$$date.year",
month: "$$date.month",
day: "$$date.day"
}
}
}
}
}
}
}
},
{
$addFields: {
startDate: {
$divide: [
{
$min: "$dates"
},
1000
]
},
endDate: {
$divide: [
{
"$max": "$dates"
},
1000
]
}
}
},
{
$addFields: {
dates: {
$map: {
input: {
$concatArrays: [
"$dates",
{
$setDifference: [
{
$map: {
input: {
$range: [
{
$toDouble: "$startDate"
},
{
$toDouble: "$endDate"
},
24*60*60
]
},
in: {
$multiply: [
"$$this",
1000
]
}
}
},
"$dates"
]
}
]
},
in: {
$toDate: "$$this"
}
}
}
}
},
{
"$unwind": "$dates"
},
{
"$group": {
_id: "$dates",
views: {
$sum: 1
}
}
},
{
"$sort": {
_id: -1
}
}
]
从 Mongo 5.1
开始,这是新 $densify
聚合运算符的完美用例:
// { date: ISODate("2018-09-01"), views: 1 }
// { date: ISODate("2018-09-02"), views: 3 }
// { date: ISODate("2018-09-04"), views: 2 }
// { date: ISODate("2018-09-05"), views: 5 }
db.collection.aggregate([
{ $densify: {
field: "date",
range: { step: 1, unit: "day", bounds: "full" }
}},
{ $set: { views: { $cond: [ { $not: ["$views"] }, 0, "$views" ] } } }
])
// { date: ISODate("2018-09-01"), views: 1 }
// { date: ISODate("2018-09-02"), views: 3 }
// { date: ISODate("2018-09-03"), views: 0 } <=
// { date: ISODate("2018-09-04"), views: 2 }
// { date: ISODate("2018-09-05"), views: 5 }
这个:
- 通过在文档序列中创建新文档来增加文档 (
$densify
) 的密度,其中缺少 field
(在我们的例子中 field: "date"
)的某些值:
- 我们的致密化步骤是 1 天:
range: { step: 1, unit: "day", ... }
- 并且我们在文档定义的日期范围内进行加密:
bounds: "full"
- 最终将 (
$set
) views
设置为 0
仅适用于在加密阶段包含的新文档 ({ views: { $cond: [ { $not: ["$views"] }, 0, "$views" ] }
)
当然,要使这个与您的特定示例一起使用,您可以使用 $dateToString
($dateToString: { format: "%Y-%m-%d", date: "$date" }
) and back to dates with $dateFromString
($dateFromString: { dateString: "$date" }
)
从日期切换到字符串
关于你的 P.S。 (传入开始和结束日期以根据此范围输出结果),您可以将 bounds: "full"
替换为 bounds: [ISODate("2018-08-25"), ISODate("2018-09-07")]
我有一个 collection 的 ProductViews
:
{
productId: "5b8c0f3204a10228b00a1745",
createdAt: "2018-09-07T17:18:40.759Z"
}
我有一个获取特定产品每日浏览量的查询:
ProductView.aggregate([
{ $match: { productId } },
{ $project: { day: { $substr: ["$createdAt", 0, 10] } } },
{
$group: {
_id: "$day",
count: { $sum: 1 },
time: { $avg: "$createdAt" },
}
},
{ $sort: { _id: 1 } },
{
$project: {
date: '$_id',
views: '$count',
},
},
]).exec((err, result) => ...)
当前给出:
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 },
// ...
]
问题:
问题是,此聚合不会 return { date: '2018-09-03', views: 0 }
天 0
观看次数。这会导致数据显示不正确:[![在此处输入图像描述][1]][1]
结果应如下所示:
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-03', views: 0 }, <=
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 },
// ...
]
P.S.: 传入开始和结束日期,根据这个范围输出结果就完美了 [1]: https://i.stack.imgur.com/uHPBs.png
您需要一些额外的步骤来 return 默认值。首先,您需要使用 $group
并将 _id
设置为 null
以将所有结果收集到一个文档中。然后你可以使用 $map with an array of days as an input. Inside that $map
you can use $indexOfArray to find if that date exists in your current result set. If yes (index != -1
) then you can return that value, otherwise you need to return default subdocument with views
set to 0
. Then you can use $unwind to get back a list of documents and $replaceRoot 将嵌套 stats
提升到顶层。
ProductView.aggregate([
{ $match: { productId: '5b8c0f3204a10228b00a1745' } },
{ $project: { day: { $substr: ["$createdAt", 0, 10] } } },
{
$group: {
_id: "$day",
count: { $sum: 1 },
time: { $avg: "$createdAt" },
}
},
{ $sort: { _id: 1 } },
{
$project: {
date: '$_id',
views: '$count',
},
},
{
$group: {
_id: null,
stats: { $push: "$$ROOT" }
}
},
{
$project: {
stats: {
$map: {
input: [ "2018-09-01", "2018-09-02", "2018-09-03", "2018-09-04", "2018-09-05" ],
as: "date",
in: {
$let: {
vars: { dateIndex: { "$indexOfArray": [ "$stats._id", "$$date" ] } },
in: {
$cond: {
if: { $ne: [ "$$dateIndex", -1 ] },
then: { $arrayElemAt: [ "$stats", "$$dateIndex" ] },
else: { _id: "$$date", date: "$$date", views: 0 }
}
}
}
}
}
}
}
},
{
$unwind: "$stats"
},
{
$replaceRoot: {
newRoot: "$stats"
}
}
]).exec((err, result) => ...)
您可以使用简单循环在应用程序逻辑中生成静态日期列表。我相信这在 MongoDB 中也是可能的(使用 $range),但它可能会使此聚合管道复杂化。让我知道您是否同意,或者您想尝试在 MongoDB.
中生成该日期数组使用一些 javascript
和 aggregation
技巧。
您需要先找到所提供日期范围内的日期。
function getDates(startDate, stopDate) {
var dateArray = []
var currentDate = moment(startDate)
var stopDate = moment(stopDate)
while (currentDate <= stopDate) {
dateArray.push(moment(currentDate).format('YYYY-MM-DD'))
currentDate = moment(currentDate).add(1, 'days')
}
return dateArray
}
const dummyArray = getDates('2018-09-01', '2018-09-05')
dummyArray = [ "2018-09-01", "2018-09-02", "2018-09-03", "2018-09-04", "2018-09-05" ]
现在,您可以使用以下聚合找到数据库中不可用的日期。
db.collection.aggregate([
{ "$match": { productId } },
{ "$group": {
"_id": { "$substr": ["$createdAt", 0, 10] },
"count": { "$sum": 1 },
"time": { "$avg": "$createdAt" },
}},
{ "$sort": { "_id": 1 } },
{ "$project": { "date": "$_id", "views": "$count" }},
{ "$group": { "_id": null, "data": { "$push": "$$ROOT" }}},
{ "$project": {
"data": {
"$map": {
"input": dummyArray,
"in": {
"k": "$$this",
"v": { "$cond": [{ "$in": ["$$this", "$data.date" ] }, 1, 0 ] }
}
}
}
}},
{ "$unwind": "$data" },
{ "$group": { "_id": "$data.k", "count": { "$sum": "$data.v" }}}
])
输出将是
[
{ date: '2018-09-01', views: 1 },
{ date: '2018-09-02', views: 3 },
{ date: '2018-09-03', views: 0 },
{ date: '2018-09-04', views: 2 },
{ date: '2018-09-05', views: 5 }
]
您的问题与 2014 中的 post 相似。
post 中提供的所有答案都是有效的,如果您可以在应用程序代码中生成缺少的天数,事情会简单得多。
由于您要求 mongodb 解决方案并且自 2014 年以来发生了很多变化,我创建了一个新的聚合管道,您可以将其与 3.6 版本一起使用。
ProductView.aggregate([
-- convert the string date into date type for date calcualtions. can avoid this step if you can store the date as date type in collection
{"$addFields":{"createdAt":{"$dateFromString":{"dateString":"$createdAt"}}}},
-- strip the time part so we can add whole milliseconds from epoch to calculate next day
{"$project":{
"day":{"$dateFromParts":{"year":{"$year":"$createdAt"},"month":{"$month":"$createdAt"},"day":{"$dayOfMonth":"$createdAt"}}}
}},
-- generate two sets of data, one that has count by day, other that has unique days, min day and max day
{"$facet":{
"daycounts":[{"$group":{"_id":"$day","count":{"$sum":1}}}],
"maxmindays":[
{"$group":{
"_id":null,
"days":{"$addToSet":"$day"},
"minday":{"$min":{"$divide":[{"$subtract":["$day",new Date("1-1-1970")]},1000]}},
"maxday":{"$max":{"$divide":[{"$subtract":["$day",new Date("1-1-1970")]},1000]}}
}}
]
}},
{"$project":{
"data":{
"$let":{
"vars":{"maxminday":{"$arrayElemAt":["$maxmindays",0]}},
"in":{
-- $range - iterate from min date to max date one day at a time
"$map":{
"input":{"$range":["$$maxminday.minday",{"$add": ["$$maxminday.maxday", 60*60*24]},60*60*24]},
"as":"r",
"in": {
-- convert back to milliseconds to get the day
"$let":{
"vars":{"current":{"$add": [new Date(0), {"$multiply":["$$r", 1000 ]}]}},
"in":{
-- check if the day is in the collection, if yes lookup view inside the daycount facet to get the matching count, else set the view to zero
"$cond":[
{"$in":["$$current","$$maxminday.days"]},
{
"date":{"$substr":["$$current",0,10]},
"views":{"$let":{"vars":{"daycount":{"$arrayElemAt":["$daycounts",{"$indexOfArray":["$daycounts._id","$$current"]}]}},"in":"$$daycount.count"}}
},
{"date":{"$substr":["$$current",0,10]},"views":0}
]
}
}
}
}
}
}
}
}},
-- flatten the array of data
{"$unwind":"$data"},
-- promote the data to top
{"$replaceRoot":{newRoot:"$data"}}
])
如果只有一个或两个并且要处理的文档数量很少,我建议您添加缺少的日期客户端。
也就是说,以下管道仅适用于 MongoDB 4.0+,但稍加努力,我们可以让它在 3.6 中运行。
[
{
$group: {
_id: null,
dates: {
$push: {
$let: {
vars: {
date: {
$dateToParts: {
date: {
$toDate: "$createdAt"
}
}
}
},
in: {
$toDouble: {
$dateFromParts: {
year: "$$date.year",
month: "$$date.month",
day: "$$date.day"
}
}
}
}
}
}
}
},
{
$addFields: {
startDate: {
$divide: [
{
$min: "$dates"
},
1000
]
},
endDate: {
$divide: [
{
"$max": "$dates"
},
1000
]
}
}
},
{
$addFields: {
dates: {
$map: {
input: {
$concatArrays: [
"$dates",
{
$setDifference: [
{
$map: {
input: {
$range: [
{
$toDouble: "$startDate"
},
{
$toDouble: "$endDate"
},
24*60*60
]
},
in: {
$multiply: [
"$$this",
1000
]
}
}
},
"$dates"
]
}
]
},
in: {
$toDate: "$$this"
}
}
}
}
},
{
"$unwind": "$dates"
},
{
"$group": {
_id: "$dates",
views: {
$sum: 1
}
}
},
{
"$sort": {
_id: -1
}
}
]
从 Mongo 5.1
开始,这是新 $densify
聚合运算符的完美用例:
// { date: ISODate("2018-09-01"), views: 1 }
// { date: ISODate("2018-09-02"), views: 3 }
// { date: ISODate("2018-09-04"), views: 2 }
// { date: ISODate("2018-09-05"), views: 5 }
db.collection.aggregate([
{ $densify: {
field: "date",
range: { step: 1, unit: "day", bounds: "full" }
}},
{ $set: { views: { $cond: [ { $not: ["$views"] }, 0, "$views" ] } } }
])
// { date: ISODate("2018-09-01"), views: 1 }
// { date: ISODate("2018-09-02"), views: 3 }
// { date: ISODate("2018-09-03"), views: 0 } <=
// { date: ISODate("2018-09-04"), views: 2 }
// { date: ISODate("2018-09-05"), views: 5 }
这个:
- 通过在文档序列中创建新文档来增加文档 (
$densify
) 的密度,其中缺少field
(在我们的例子中field: "date"
)的某些值:- 我们的致密化步骤是 1 天:
range: { step: 1, unit: "day", ... }
- 并且我们在文档定义的日期范围内进行加密:
bounds: "full"
- 我们的致密化步骤是 1 天:
- 最终将 (
$set
)views
设置为0
仅适用于在加密阶段包含的新文档 ({ views: { $cond: [ { $not: ["$views"] }, 0, "$views" ] }
)
当然,要使这个与您的特定示例一起使用,您可以使用 $dateToString
($dateToString: { format: "%Y-%m-%d", date: "$date" }
) and back to dates with $dateFromString
($dateFromString: { dateString: "$date" }
)
关于你的 P.S。 (传入开始和结束日期以根据此范围输出结果),您可以将 bounds: "full"
替换为 bounds: [ISODate("2018-08-25"), ISODate("2018-09-07")]