MongoDB 按多个字段聚合分组并取平均值
MongoDB Aggregate group by multiple fields and get average
如何先按"patient_group"分组,再按"neuro-time-point"分组,求"hricph"的平均值?
{ "cases" : { "neuro" : { "neuro-time-point" : "0-12", "hricph" : 1 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "12-24", "hricph" : 2 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "24-36", "hricph" : 3 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "0-12", "hricph" : 1 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "24-36", "hricph" : 5 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "36-48", "hricph" : 5 }, "patient_group" : "HSD" } }
期望的结果(或接近此的结果,采用为不同患者组呈现多折线图的格式):
{ "patient_group" : "HSD",
"avg_hricph_val" :
{
"neuro_time_point" : "0-12",
"hricph" : 1
},
{
"neuro_time_point" : "12-24",
"hricph" : 2
},
{
"neuro_time_point" : "24-36",
"hricph" : 4
},
{
"neuro_time_point" : "36-48",
"hricph" : 5
}
}
这是我尝试过的方法:
db.test_collection.aggregate([ { "$group": {
"_id": {
"patient_group": "$cases.patient_group",
"neuro_time_point" : "$cases.neuro.neuro-time-point",
"hricph" : "$cases.neuro.hricph"
},
"avg_hricph_val": { "$avg": 1 }
}}
])
这个怎么样:
db.test_collection.aggregate(
[
{
"$group": {
"_id": { "id": "$cases.patient_group", "neuro_time_point": "$cases.neuro.neuro-time-point" },
"hricph": { "$avg": "$cases.neuro.hricph" }
}
},
{
"$group": {
"_id": null, "patient_group": { "$first": "$_id.id" },
"avg_hricph_val": { "$addToSet": { "neuro_time_point": "$_id.neuro_time_point", "hricph": "$hricph" }}
}
},
{
"$project": { "patient_group": 1, "_id": 0, "avg_hricph_val": 1 }
}
]
)
结果
{
"patient_group" : "HSD",
"avg_hricph_val" : [
{
"neuro_time_point" : "0-12",
"hricph" : 1
},
{
"neuro_time_point" : "12-24",
"hricph" : 2
},
{
"neuro_time_point" : "24-36",
"hricph" : 4
},
{
"neuro_time_point" : "36-48",
"hricph" : 5
}
]
}
如何先按"patient_group"分组,再按"neuro-time-point"分组,求"hricph"的平均值?
{ "cases" : { "neuro" : { "neuro-time-point" : "0-12", "hricph" : 1 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "12-24", "hricph" : 2 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "24-36", "hricph" : 3 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "0-12", "hricph" : 1 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "24-36", "hricph" : 5 }, "patient_group" : "HSD" } }
{ "cases" : { "neuro" : { "neuro-time-point" : "36-48", "hricph" : 5 }, "patient_group" : "HSD" } }
期望的结果(或接近此的结果,采用为不同患者组呈现多折线图的格式):
{ "patient_group" : "HSD",
"avg_hricph_val" :
{
"neuro_time_point" : "0-12",
"hricph" : 1
},
{
"neuro_time_point" : "12-24",
"hricph" : 2
},
{
"neuro_time_point" : "24-36",
"hricph" : 4
},
{
"neuro_time_point" : "36-48",
"hricph" : 5
}
}
这是我尝试过的方法:
db.test_collection.aggregate([ { "$group": {
"_id": {
"patient_group": "$cases.patient_group",
"neuro_time_point" : "$cases.neuro.neuro-time-point",
"hricph" : "$cases.neuro.hricph"
},
"avg_hricph_val": { "$avg": 1 }
}}
])
这个怎么样:
db.test_collection.aggregate(
[
{
"$group": {
"_id": { "id": "$cases.patient_group", "neuro_time_point": "$cases.neuro.neuro-time-point" },
"hricph": { "$avg": "$cases.neuro.hricph" }
}
},
{
"$group": {
"_id": null, "patient_group": { "$first": "$_id.id" },
"avg_hricph_val": { "$addToSet": { "neuro_time_point": "$_id.neuro_time_point", "hricph": "$hricph" }}
}
},
{
"$project": { "patient_group": 1, "_id": 0, "avg_hricph_val": 1 }
}
]
)
结果
{
"patient_group" : "HSD",
"avg_hricph_val" : [
{
"neuro_time_point" : "0-12",
"hricph" : 1
},
{
"neuro_time_point" : "12-24",
"hricph" : 2
},
{
"neuro_time_point" : "24-36",
"hricph" : 4
},
{
"neuro_time_point" : "36-48",
"hricph" : 5
}
]
}