pymongo 按多个键分组并基于另一个键连接值数组
pymongo group by multiple keys and concat values array based on another key
我有 mongo 个结构如下的文档。
{
"_id": {
"$oid": "615eb369514212cb0a27ba74"
},
"FragCount": 0,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [5, 0, -5, 8, -2, -6, 2, -2, 3, -3, 0, 1, -1, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 3, 1, -4, 1, 0, -1, 5, -5, 2, -1, -1, 0, 0, 4, -3, -1, 2, 13, -15, 13, -13, 0, 0, 0, 1, -1, 1, 7, -8, 9, -5, -4, 4, -4, 1, 3, -4, 4, -4, 3, -2, -1, 5, -5, 10, -5, -5, 0, 1, -1, 1, -1],
"GatewayId": "7d62eb89",
"BurstDataOffset": 0,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
{
"_id": {
"$oid": "615eb369514212cb0a27ba75"
},
"FragCount": 1,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [1, 0, -1, 0, 0, 0, 3, -3, 3, 0, -3, 3, -3, 5, 1, -6, 5, -5, 5, -2, -3, 3, 0, -3, 4, -4, 3, -2, -1, 3, -3, 4, -3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"GatewayId": "7d62eb89",
"BurstDataOffset": 99,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
{
"_id": {
"$oid": "615eb369514212cb0a27ba76"
},
"FragCount": 2,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"GatewayId": "7d62eb89",
"BurstDataOffset": 198,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
我需要通过 MeasurementId 和 SensorNodeId 查询数据,两者的组合总是不同的。然后每个组合都有 3 个不同的 BurstId。对于每个 burstID,ValueMapping 是常量。对于我希望能够连接值数组的每个 burstID,此连接的顺序也很重要。它需要基于始终为 1-n 的 FragCount 进行连接。
最终结构需要类似于:
[
{
"MeasurementID": xxx,
"SensorNodeID": 'YYYYY',
"GatewayID": 'YYYYY',
"{ValueMapping key 1}" : [Concatenated values array],
"{ValueMapping key 2}" : [Concatenated values array],
"{ValueMapping key 3}" : [Concatenated values array],
},
{
"MeasurementID": xxx,
"SensorNodeID": 'YYYYY',
"GatewayID": 'YYYYY',
"{ValueMapping key 1}" : [Concatenated values array],
"{ValueMapping key 2}" : [Concatenated values array],
"{ValueMapping key 3}" : [Concatenated values array],
},
]
collection.aggregate([
{
'$sort': {
"BurstId": 1,
'FragCount': 1
}
},
{
'$group': {
'_id': {
'SensorNodeId': '$SensorNodeId',
'MeasurementId': '$MeasurementId',
'GatewayId': '$GatewayId',
'ValueMapping': '$ValueMapping'
},
'BurstId': { '$first': '$BurstId' },
'Values': {
'$push': '$Values'
}
}
},
{
'$sort': {
"BurstId": 1
}
},
{
'$group': {
'_id': {
'SensorNodeId': '$_id.SensorNodeId',
'MeasurementId': '$_id.MeasurementId',
'GatewayId': '$_id.GatewayId',
},
'BurstId': { '$push': '$BurstId' },
'ValueMapping': {
'$push': {
'k': { '$concat': [{ '$toString': "$_id.ValueMapping" }] }, 'v': {
"$reduce": {
"input": "$Values",
"initialValue": [],
"in": { "$concatArrays": ["$$value", "$$this"] }
}
}
}
}
}
},
{
'$replaceRoot': {
'newRoot': {
'$mergeObjects': [{
'SensorNodeId': '$_id.SensorNodeId',
'MeasurementId': '$_id.MeasurementId',
'GatewayId': '$_id.GatewayId',
}, { '$arrayToObject': "$ValueMapping" },]
}
}
}
])
我有 mongo 个结构如下的文档。
{
"_id": {
"$oid": "615eb369514212cb0a27ba74"
},
"FragCount": 0,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [5, 0, -5, 8, -2, -6, 2, -2, 3, -3, 0, 1, -1, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 3, 1, -4, 1, 0, -1, 5, -5, 2, -1, -1, 0, 0, 4, -3, -1, 2, 13, -15, 13, -13, 0, 0, 0, 1, -1, 1, 7, -8, 9, -5, -4, 4, -4, 1, 3, -4, 4, -4, 3, -2, -1, 5, -5, 10, -5, -5, 0, 1, -1, 1, -1],
"GatewayId": "7d62eb89",
"BurstDataOffset": 0,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
{
"_id": {
"$oid": "615eb369514212cb0a27ba75"
},
"FragCount": 1,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [1, 0, -1, 0, 0, 0, 3, -3, 3, 0, -3, 3, -3, 5, 1, -6, 5, -5, 5, -2, -3, 3, 0, -3, 4, -4, 3, -2, -1, 3, -3, 4, -3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"GatewayId": "7d62eb89",
"BurstDataOffset": 99,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
{
"_id": {
"$oid": "615eb369514212cb0a27ba76"
},
"FragCount": 2,
"ValueMapping": 3,
"DataType": 19,
"BurstId": 55,
"SensorNodeId": "29a24a99",
"Values": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"GatewayId": "7d62eb89",
"BurstDataOffset": 198,
"DataSize": 1002,
"Type": "burst",
"MeasurementId": 110,
"MeasurementTimeInterval": 150
}
我需要通过 MeasurementId 和 SensorNodeId 查询数据,两者的组合总是不同的。然后每个组合都有 3 个不同的 BurstId。对于每个 burstID,ValueMapping 是常量。对于我希望能够连接值数组的每个 burstID,此连接的顺序也很重要。它需要基于始终为 1-n 的 FragCount 进行连接。
最终结构需要类似于:
[
{
"MeasurementID": xxx,
"SensorNodeID": 'YYYYY',
"GatewayID": 'YYYYY',
"{ValueMapping key 1}" : [Concatenated values array],
"{ValueMapping key 2}" : [Concatenated values array],
"{ValueMapping key 3}" : [Concatenated values array],
},
{
"MeasurementID": xxx,
"SensorNodeID": 'YYYYY',
"GatewayID": 'YYYYY',
"{ValueMapping key 1}" : [Concatenated values array],
"{ValueMapping key 2}" : [Concatenated values array],
"{ValueMapping key 3}" : [Concatenated values array],
},
]
collection.aggregate([
{
'$sort': {
"BurstId": 1,
'FragCount': 1
}
},
{
'$group': {
'_id': {
'SensorNodeId': '$SensorNodeId',
'MeasurementId': '$MeasurementId',
'GatewayId': '$GatewayId',
'ValueMapping': '$ValueMapping'
},
'BurstId': { '$first': '$BurstId' },
'Values': {
'$push': '$Values'
}
}
},
{
'$sort': {
"BurstId": 1
}
},
{
'$group': {
'_id': {
'SensorNodeId': '$_id.SensorNodeId',
'MeasurementId': '$_id.MeasurementId',
'GatewayId': '$_id.GatewayId',
},
'BurstId': { '$push': '$BurstId' },
'ValueMapping': {
'$push': {
'k': { '$concat': [{ '$toString': "$_id.ValueMapping" }] }, 'v': {
"$reduce": {
"input": "$Values",
"initialValue": [],
"in": { "$concatArrays": ["$$value", "$$this"] }
}
}
}
}
}
},
{
'$replaceRoot': {
'newRoot': {
'$mergeObjects': [{
'SensorNodeId': '$_id.SensorNodeId',
'MeasurementId': '$_id.MeasurementId',
'GatewayId': '$_id.GatewayId',
}, { '$arrayToObject': "$ValueMapping" },]
}
}
}
])