检查深层嵌套列表中的条件
Checking for criteria within a deep nested list
我正在使用 MongoDB 和 PyMongo 并具有以下数据结构。
[
{
"position": 367,
"entropy": 0.1327801096975522,
"variants_flattened": [
"GFRHQNSEG",
"GFRHQNSEG",
"GFRHQNSEG",
"GFRHQNAEG"
],
"supports": 51,
"sequences": [
{
"position": 367,
"sequence": "GFRHQNSEG",
"count": 50,
"conservation": 98.03921568627452,
"motif_short": "I",
"motif_long": "Index",
"id": [
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country": [
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
},
{
"position": 367,
"sequence": "GFRHQNAEG",
"count": 1,
"conservation": 1.9607843137254902,
"motif_short": "Ma",
"motif_long": "Major",
"id": [
"QBM69728.1"
],
"strain": [
"Influenza A virus A/China/70793/2016"
],
"country": [
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/China/70793/2016"
]
}
],
"variants": 2
}
]
根级别列表包含多个结构相似的对象。
我需要的是获取其中“motif_short”等于“I”的实例(仅限“序列”列表中的特定对象)。
预期的输出是(在这个特定的例子中,只有一个输出对象,但在一个实例中可以有多个对象匹配这个条件):
{
"position": 367,
"sequence": "GFRHQNSEG",
"count": 50,
"conservation": 98.03921568627452,
"motif_short": "I",
"motif_long": "Index",
"id": [
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country": [
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
}
我是 MongoDB 的新手,尝试过一些选项,例如聚合,但我的起点是正确的。请帮帮我。
提前致谢!
您可以使用聚合 $project
和 $filter
来解决这个问题。请针对此特定问题尝试以下脚本:
#if col is our collection object in pymongo
result = col.aggregate([{'$project': {'sequences': { '$filter': { 'input': '$sequences', 'as': 's', 'cond': { '$eq': ['$$s.motif_short', 'I'] } } } }}])
此查询投射在 motif_short 等于“I”的序列和过滤器上。你会得到这样的结果:
{
"_id":"xyz",
"sequences":[
{
"position":367,
"sequence":"GFRHQNSEG",
"count":50,
"conservation":98.03921568627452,
"motif_short":"I",
"motif_long":"Index",
"id":[
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain":[
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country":[
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host":[
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
}
]
}
我正在使用 MongoDB 和 PyMongo 并具有以下数据结构。
[
{
"position": 367,
"entropy": 0.1327801096975522,
"variants_flattened": [
"GFRHQNSEG",
"GFRHQNSEG",
"GFRHQNSEG",
"GFRHQNAEG"
],
"supports": 51,
"sequences": [
{
"position": 367,
"sequence": "GFRHQNSEG",
"count": 50,
"conservation": 98.03921568627452,
"motif_short": "I",
"motif_long": "Index",
"id": [
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country": [
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
},
{
"position": 367,
"sequence": "GFRHQNAEG",
"count": 1,
"conservation": 1.9607843137254902,
"motif_short": "Ma",
"motif_long": "Major",
"id": [
"QBM69728.1"
],
"strain": [
"Influenza A virus A/China/70793/2016"
],
"country": [
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/China/70793/2016"
]
}
],
"variants": 2
}
]
根级别列表包含多个结构相似的对象。
我需要的是获取其中“motif_short”等于“I”的实例(仅限“序列”列表中的特定对象)。
预期的输出是(在这个特定的例子中,只有一个输出对象,但在一个实例中可以有多个对象匹配这个条件):
{
"position": 367,
"sequence": "GFRHQNSEG",
"count": 50,
"conservation": 98.03921568627452,
"motif_short": "I",
"motif_long": "Index",
"id": [
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country": [
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host": [
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
}
我是 MongoDB 的新手,尝试过一些选项,例如聚合,但我的起点是正确的。请帮帮我。
提前致谢!
您可以使用聚合 $project
和 $filter
来解决这个问题。请针对此特定问题尝试以下脚本:
#if col is our collection object in pymongo
result = col.aggregate([{'$project': {'sequences': { '$filter': { 'input': '$sequences', 'as': 's', 'cond': { '$eq': ['$$s.motif_short', 'I'] } } } }}])
此查询投射在 motif_short 等于“I”的序列和过滤器上。你会得到这样的结果:
{
"_id":"xyz",
"sequences":[
{
"position":367,
"sequence":"GFRHQNSEG",
"count":50,
"conservation":98.03921568627452,
"motif_short":"I",
"motif_long":"Index",
"id":[
"APQ31289.1",
"ASU55526.1",
"ASU55528.1",
"APQ31291.1"
],
"strain":[
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
],
"country":[
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin",
"HA Hemagglutinin"
],
"host":[
"Influenza A virus A/Xiamen/s200/2016",
"Influenza A virus A/Shandong-Zhifu/164/2016",
"Influenza A virus A/Shandong-Zhifu/1185/2016",
"Influenza A virus A/Xiamen/s228/2016"
]
}
]
}