使用 python 递归 json 到 csv
Recursive json to csv with python
我有一个 json 文件可以在 json 中转换它,但这里的情况是嵌套的 json 结构:
[ {
"node":[
{
"node":[
{
"node":[
{
"node":[
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Industry is in bank exclusion list"
]
},
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Borrower is currently under bankruptcy law"
]
},
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Borrower is flagged as Unwilling"
]
},
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Borrower is flagged as non-viable"
]
},
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Borrower has blocked access of bank to Tiresias"
]
},
{
"valBool":false,
"valStr1":[
"true"
],
"valStr2":[
"Borrower is default (NPE/NPF eba status) "
]
},
{
"valBool":true,
"valStr1":[
"false"
],
"valStr2":[
"Default value"
]
}
]
}
]
},
{
"node":[
{
"node":[
{
"node":[
{
"node":[
{
"node":[
{
"valBool":false,
"valStr1":[
"1"
],
"valStr2":[
"There are less \nthan 10 employees"
]
},
{
"valBool":false,
"valStr1":[
"1"
],
"valStr2":[
"Annual turnover is \nlower than annual \nturnover threshold"
]
},
{
"valBool":false,
"valStr1":[
"1"
],
"valStr2":[
"Total assets are \nlower than total \nassets threshold"
]
},
{
"valBool":true,
"valStr1":[
"0"
],
"valStr2":[
"Default"
]
}
]
}
]
}
]
}
]
}
]
}
]
}
] } ]
如您所见,'node' 可以在任何级别找到。我尝试了一些递归方法,但输出不是我们想要的。
我们需要将每个具有三个值的节点取出来,并将它们写入 csv 上的一行。
预期的输出应该是:
valBool,valStr1,valStr2
false,"true","Industry is in bank exclusion list"
false,"true","Borrower is currently under bankruptcy law"
我试过 this 但输出只是在新行中附加每个值,在键上写下整个路径。
有什么想法吗?
谢谢!
如果你的数据是
processing 是一个列表或字典。如果它是一个列表,您只需在其项目上追溯调用您的函数。如果它是你尝试的字典
打印与 'valBool'
、'valStr1'
和 'valStr2'
关联的值,如果
它们存在并递归地调用关联的值
'node'
如果有的话。
data = [ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Industry is in bank exclusion list" ] }, { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Borrower is currently under bankruptcy law" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as Unwilling" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as non-viable" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower has blocked access of bank to Tiresias" ] }, { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Borrower is default (NPE/NPF eba status) " ] }, { "valBool":True, "valStr1":[ "false" ], "valStr2":[ "Default value" ] } ] } ] }, { "node":[ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "There are less \nthan 10 employees" ] }, { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "Annual turnover is \nlower than annual \nturnover threshold" ] }, { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "Total assets are \nlower than total \nassets threshold" ] }, { "valBool":True, "valStr1":[ "0" ], "valStr2":[ "Default" ] } ] } ] } ] } ] } ] } ] } ] } ]
result = list()
def loop(data):
if isinstance(data, list):
for item in data: # data is a list => recursive call on all its items
loop(item)
elif isinstance(data, dict): # data is a dictionary
try:
row = f"{data['valBool']};{data['valStr1'][0]};{data['valStr2'][0]}"
print(row)
result.append(row)
except KeyError: # dictionary does not have all valXXX keys
pass
if 'node' in data: # recursive call if the dictionary has a "node" key
loop(data['node'])
print('valBool;valStr1;valStr2')
loop(data)
这并不完全是您期望的输出,但您会发现
如何修改。
[编辑] 修改代码以将行放入列表 result
我有一个 json 文件可以在 json 中转换它,但这里的情况是嵌套的 json 结构:
[ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Industry is in bank exclusion list" ] }, { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Borrower is currently under bankruptcy law" ] }, { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as Unwilling" ] }, { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as non-viable" ] }, { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Borrower has blocked access of bank to Tiresias" ] }, { "valBool":false, "valStr1":[ "true" ], "valStr2":[ "Borrower is default (NPE/NPF eba status) " ] }, { "valBool":true, "valStr1":[ "false" ], "valStr2":[ "Default value" ] } ] } ] }, { "node":[ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":false, "valStr1":[ "1" ], "valStr2":[ "There are less \nthan 10 employees" ] }, { "valBool":false, "valStr1":[ "1" ], "valStr2":[ "Annual turnover is \nlower than annual \nturnover threshold" ] }, { "valBool":false, "valStr1":[ "1" ], "valStr2":[ "Total assets are \nlower than total \nassets threshold" ] }, { "valBool":true, "valStr1":[ "0" ], "valStr2":[ "Default" ] } ] } ] } ] } ] } ] } ] } ] } ]
如您所见,'node' 可以在任何级别找到。我尝试了一些递归方法,但输出不是我们想要的。 我们需要将每个具有三个值的节点取出来,并将它们写入 csv 上的一行。
预期的输出应该是:
valBool,valStr1,valStr2
false,"true","Industry is in bank exclusion list"
false,"true","Borrower is currently under bankruptcy law"
我试过 this 但输出只是在新行中附加每个值,在键上写下整个路径。
有什么想法吗?
谢谢!
如果你的数据是
processing 是一个列表或字典。如果它是一个列表,您只需在其项目上追溯调用您的函数。如果它是你尝试的字典
打印与 'valBool'
、'valStr1'
和 'valStr2'
关联的值,如果
它们存在并递归地调用关联的值
'node'
如果有的话。
data = [ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Industry is in bank exclusion list" ] }, { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Borrower is currently under bankruptcy law" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as Unwilling" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower is flagged as non-viable" ] }, { "valBool": False, "valStr1":[ "true" ], "valStr2":[ "Borrower has blocked access of bank to Tiresias" ] }, { "valBool":False, "valStr1":[ "true" ], "valStr2":[ "Borrower is default (NPE/NPF eba status) " ] }, { "valBool":True, "valStr1":[ "false" ], "valStr2":[ "Default value" ] } ] } ] }, { "node":[ { "node":[ { "node":[ { "node":[ { "node":[ { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "There are less \nthan 10 employees" ] }, { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "Annual turnover is \nlower than annual \nturnover threshold" ] }, { "valBool":False, "valStr1":[ "1" ], "valStr2":[ "Total assets are \nlower than total \nassets threshold" ] }, { "valBool":True, "valStr1":[ "0" ], "valStr2":[ "Default" ] } ] } ] } ] } ] } ] } ] } ] } ]
result = list()
def loop(data):
if isinstance(data, list):
for item in data: # data is a list => recursive call on all its items
loop(item)
elif isinstance(data, dict): # data is a dictionary
try:
row = f"{data['valBool']};{data['valStr1'][0]};{data['valStr2'][0]}"
print(row)
result.append(row)
except KeyError: # dictionary does not have all valXXX keys
pass
if 'node' in data: # recursive call if the dictionary has a "node" key
loop(data['node'])
print('valBool;valStr1;valStr2')
loop(data)
这并不完全是您期望的输出,但您会发现 如何修改。
[编辑] 修改代码以将行放入列表 result