解析嵌套 JSON 并将其写入 CSV(重新访问)
Parsing nested JSON and writing it to CSV (Revisited)
初学Python,给出答案如下post:
Parsing nested JSON and writing it to CSV
如何定义输入文件才能使此代码正常工作?我知道我必须将 "outputfile" 定义为我正在写入的 path/filename,但我只是不知道输入文件应该去哪里?
编辑:为清楚起见,我有一个 JSON 文件用于输入,并希望将其转换为 CSV 文件作为输出。我只想知道如何编写将采用示例(来自上面)的代码,并让它指定一个特定的 JSON 文件作为输入。同样为了清楚起见,JSON 文件的名称将保持不变,但内容每天都会更改,所以我只需要知道将 open() 放在哪里
以及如何在脚本中调用它。
EDIT_2:
inputfile = "/some/file.json"
outputfile = "/some/file.csv"
with open(inputfile, 'r') as inf:
with open(outputfile, 'w') as outf:
writer = None # will be set to a csv.DictWriter later
fp = open(inputfile, 'r')
json_value = fp.read()
data = json.loads(json_value)
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
我得到的错误是...
for k, v in item.items():
AttributeError: 'list' object 没有属性 'items'
我想我可能没有正确阅读 JSON 文件... Python 新手压力源。
EDIT_3(更新了 JSON 结构):
这是我正在使用的 JSON 文件中的一个 'entry'(NIST/NVD JSON 文件):
{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "6208",
"CVE_data_timestamp" : "2017-08-14T18:06Z",
"CVE_Items" : [ {
"cve" : {
"CVE_data_meta" : {
"ID" : "CVE-2003-1547"
},
"affects" : {
"vendor" : {
"vendor_data" : [ {
"vendor_name" : "francisco_burzi",
"product" : {
"product_data" : [ {
"product_name" : "php-nuke",
"version" : {
"version_data" : [ {
"version_value" : "6.5"
}, {
"version_value" : "6.5_beta1"
}, {
"version_value" : "6.5_rc3"
}, {
"version_value" : "6.5_rc2"
}, {
"version_value" : "6.5_rc1"
} ]
}
} ]
}
} ]
}
},
"problemtype" : {
"problemtype_data" : [ {
"description" : [ {
"lang" : "en",
"value" : "CWE-79"
} ]
} ]
},
"references" : {
"reference_data" : [ {
"url" : "http://secunia.com/advisories/8478"
}, {
"url" : "http://securityreason.com/securityalert/3718"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
}, {
"url" : "http://www.securityfocus.com/bid/7248"
}, {
"url" : "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
} ]
},
"description" : {
"description_data" : [ {
"lang" : "en",
"value" : "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
} ]
}
},
"configurations" : {
"CVE_data_version" : "4.0",
"nodes" : [ {
"operator" : "OR",
"cpe" : [ {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
} ]
} ]
},
"impact" : {
"baseMetricV2" : {
"cvssV2" : {
"vectorString" : "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
"accessVector" : "NETWORK",
"accessComplexity" : "MEDIUM",
"authentication" : "NONE",
"confidentialityImpact" : "NONE",
"integrityImpact" : "PARTIAL",
"availabilityImpact" : "NONE",
"baseScore" : 4.3
},
"severity" : "MEDIUM",
"exploitabilityScore" : 8.6,
"impactScore" : 2.9,
"obtainAllPrivilege" : false,
"obtainUserPrivilege" : false,
"obtainOtherPrivilege" : false,
"userInteractionRequired" : true
}
},
"publishedDate" : "2003-12-31T05:00Z",
"lastModifiedDate" : "2017-08-08T01:29Z"
}]
}
我希望键是 CSV 文件的 headers(如 lastModifiedDate、cpe23Uri 等)。一旦我在 CSV 文件中有了 headers 和数据,我就可以过滤掉白色 space 和 select 我想要的列。
幸运的是,您的 JSON 数据足以让 json.load()
读取和解析....但只是说您希望密钥是 headers 并不具体够了——每个 'entry' 的不同级别都有很多(如下所示)。请注意,链接问题的 OP 如何不仅定义了输入,而且还具体定义了如何将其中的数据映射到 CSV 文件中的值列,其格式也已显示——不仅仅是一些 hand-waving 关于将键映射到 file-headers.
无论如何,这里有一些东西可以帮助您做到这一点。它将读取与您正在阅读的 JSON object 中的 top-level "CVE_Items"
键相关联的列表中的每个 'entry',并以良好的格式打印出来。从输出中,您应该能够选择要提取的列并将其作为行写入 CSV 文件,并且可以 fill-in 执行此操作的代码。
import json
inputfile = "some_file.json"
outputfile = "some_file.csv"
with open(outputfile, 'w', newline='') as outf:
with open(inputfile, 'r') as fp:
data = json.load(fp)
# Here is where you should convert each entry into a row of CSV data.
# All this does now is show the contents of each entry in "CVE_Items" list.
for entry in data["CVE_Items"]:
print(json.dumps(entry, indent=4))
示例中单个条目的输出 JSON 您添加到问题中的数据:
{
"cve": {
"CVE_data_meta": {
"ID": "CVE-2003-1547"
},
"affects": {
"vendor": {
"vendor_data": [
{
"vendor_name": "francisco_burzi",
"product": {
"product_data": [
{
"product_name": "php-nuke",
"version": {
"version_data": [
{
"version_value": "6.5"
},
{
"version_value": "6.5_beta1"
},
{
"version_value": "6.5_rc3"
},
{
"version_value": "6.5_rc2"
},
{
"version_value": "6.5_rc1"
}
]
}
}
]
}
}
]
}
},
"problemtype": {
"problemtype_data": [
{
"description": [
{
"lang": "en",
"value": "CWE-79"
}
]
}
]
},
"references": {
"reference_data": [
{
"url": "http://secunia.com/advisories/8478"
},
{
"url": "http://securityreason.com/securityalert/3718"
},
{
"url": "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
},
{
"url": "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
},
{
"url": "http://www.securityfocus.com/bid/7248"
},
{
"url": "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
}
]
},
"description": {
"description_data": [
{
"lang": "en",
"value": "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
}
]
}
},
"configurations": {
"CVE_data_version": "4.0",
"nodes": [
{
"operator": "OR",
"cpe": [
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
}
]
}
]
},
"impact": {
"baseMetricV2": {
"cvssV2": {
"vectorString": "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
"accessVector": "NETWORK",
"accessComplexity": "MEDIUM",
"authentication": "NONE",
"confidentialityImpact": "NONE",
"integrityImpact": "PARTIAL",
"availabilityImpact": "NONE",
"baseScore": 4.3
},
"severity": "MEDIUM",
"exploitabilityScore": 8.6,
"impactScore": 2.9,
"obtainAllPrivilege": false,
"obtainUserPrivilege": false,
"obtainOtherPrivilege": false,
"userInteractionRequired": true
}
},
"publishedDate": "2003-12-31T05:00Z",
"lastModifiedDate": "2017-08-08T01:29Z"
}
初学Python,给出答案如下post:
Parsing nested JSON and writing it to CSV
如何定义输入文件才能使此代码正常工作?我知道我必须将 "outputfile" 定义为我正在写入的 path/filename,但我只是不知道输入文件应该去哪里?
编辑:为清楚起见,我有一个 JSON 文件用于输入,并希望将其转换为 CSV 文件作为输出。我只想知道如何编写将采用示例(来自上面)的代码,并让它指定一个特定的 JSON 文件作为输入。同样为了清楚起见,JSON 文件的名称将保持不变,但内容每天都会更改,所以我只需要知道将 open() 放在哪里 以及如何在脚本中调用它。
EDIT_2:
inputfile = "/some/file.json"
outputfile = "/some/file.csv"
with open(inputfile, 'r') as inf:
with open(outputfile, 'w') as outf:
writer = None # will be set to a csv.DictWriter later
fp = open(inputfile, 'r')
json_value = fp.read()
data = json.loads(json_value)
for key, item in sorted(data.items(), key=itemgetter(0)):
row = {}
nested_name, nested_items = '', {}
for k, v in item.items():
if not isinstance(v, dict):
row[k] = v
else:
assert not nested_items, 'Only one nested structure is supported'
nested_name, nested_items = k, v
if writer is None:
# build fields for each first key of each nested item first
fields = sorted(row)
# sorted keys of first item in key sorted order
nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
fields.extend('__'.join((nested_name, k)) for k in nested_keys)
writer = csv.DictWriter(outf, fields)
writer.writeheader()
for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
writer.writerow(row)
我得到的错误是...
for k, v in item.items():
AttributeError: 'list' object 没有属性 'items'
我想我可能没有正确阅读 JSON 文件... Python 新手压力源。
EDIT_3(更新了 JSON 结构): 这是我正在使用的 JSON 文件中的一个 'entry'(NIST/NVD JSON 文件):
{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "6208",
"CVE_data_timestamp" : "2017-08-14T18:06Z",
"CVE_Items" : [ {
"cve" : {
"CVE_data_meta" : {
"ID" : "CVE-2003-1547"
},
"affects" : {
"vendor" : {
"vendor_data" : [ {
"vendor_name" : "francisco_burzi",
"product" : {
"product_data" : [ {
"product_name" : "php-nuke",
"version" : {
"version_data" : [ {
"version_value" : "6.5"
}, {
"version_value" : "6.5_beta1"
}, {
"version_value" : "6.5_rc3"
}, {
"version_value" : "6.5_rc2"
}, {
"version_value" : "6.5_rc1"
} ]
}
} ]
}
} ]
}
},
"problemtype" : {
"problemtype_data" : [ {
"description" : [ {
"lang" : "en",
"value" : "CWE-79"
} ]
} ]
},
"references" : {
"reference_data" : [ {
"url" : "http://secunia.com/advisories/8478"
}, {
"url" : "http://securityreason.com/securityalert/3718"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
}, {
"url" : "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
}, {
"url" : "http://www.securityfocus.com/bid/7248"
}, {
"url" : "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
} ]
},
"description" : {
"description_data" : [ {
"lang" : "en",
"value" : "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
} ]
}
},
"configurations" : {
"CVE_data_version" : "4.0",
"nodes" : [ {
"operator" : "OR",
"cpe" : [ {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
}, {
"vulnerable" : true,
"cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
"cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
} ]
} ]
},
"impact" : {
"baseMetricV2" : {
"cvssV2" : {
"vectorString" : "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
"accessVector" : "NETWORK",
"accessComplexity" : "MEDIUM",
"authentication" : "NONE",
"confidentialityImpact" : "NONE",
"integrityImpact" : "PARTIAL",
"availabilityImpact" : "NONE",
"baseScore" : 4.3
},
"severity" : "MEDIUM",
"exploitabilityScore" : 8.6,
"impactScore" : 2.9,
"obtainAllPrivilege" : false,
"obtainUserPrivilege" : false,
"obtainOtherPrivilege" : false,
"userInteractionRequired" : true
}
},
"publishedDate" : "2003-12-31T05:00Z",
"lastModifiedDate" : "2017-08-08T01:29Z"
}]
}
我希望键是 CSV 文件的 headers(如 lastModifiedDate、cpe23Uri 等)。一旦我在 CSV 文件中有了 headers 和数据,我就可以过滤掉白色 space 和 select 我想要的列。
幸运的是,您的 JSON 数据足以让 json.load()
读取和解析....但只是说您希望密钥是 headers 并不具体够了——每个 'entry' 的不同级别都有很多(如下所示)。请注意,链接问题的 OP 如何不仅定义了输入,而且还具体定义了如何将其中的数据映射到 CSV 文件中的值列,其格式也已显示——不仅仅是一些 hand-waving 关于将键映射到 file-headers.
无论如何,这里有一些东西可以帮助您做到这一点。它将读取与您正在阅读的 JSON object 中的 top-level "CVE_Items"
键相关联的列表中的每个 'entry',并以良好的格式打印出来。从输出中,您应该能够选择要提取的列并将其作为行写入 CSV 文件,并且可以 fill-in 执行此操作的代码。
import json
inputfile = "some_file.json"
outputfile = "some_file.csv"
with open(outputfile, 'w', newline='') as outf:
with open(inputfile, 'r') as fp:
data = json.load(fp)
# Here is where you should convert each entry into a row of CSV data.
# All this does now is show the contents of each entry in "CVE_Items" list.
for entry in data["CVE_Items"]:
print(json.dumps(entry, indent=4))
示例中单个条目的输出 JSON 您添加到问题中的数据:
{
"cve": {
"CVE_data_meta": {
"ID": "CVE-2003-1547"
},
"affects": {
"vendor": {
"vendor_data": [
{
"vendor_name": "francisco_burzi",
"product": {
"product_data": [
{
"product_name": "php-nuke",
"version": {
"version_data": [
{
"version_value": "6.5"
},
{
"version_value": "6.5_beta1"
},
{
"version_value": "6.5_rc3"
},
{
"version_value": "6.5_rc2"
},
{
"version_value": "6.5_rc1"
}
]
}
}
]
}
}
]
}
},
"problemtype": {
"problemtype_data": [
{
"description": [
{
"lang": "en",
"value": "CWE-79"
}
]
}
]
},
"references": {
"reference_data": [
{
"url": "http://secunia.com/advisories/8478"
},
{
"url": "http://securityreason.com/securityalert/3718"
},
{
"url": "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
},
{
"url": "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
},
{
"url": "http://www.securityfocus.com/bid/7248"
},
{
"url": "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
}
]
},
"description": {
"description_data": [
{
"lang": "en",
"value": "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
}
]
}
},
"configurations": {
"CVE_data_version": "4.0",
"nodes": [
{
"operator": "OR",
"cpe": [
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
},
{
"vulnerable": true,
"cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
"cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
}
]
}
]
},
"impact": {
"baseMetricV2": {
"cvssV2": {
"vectorString": "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
"accessVector": "NETWORK",
"accessComplexity": "MEDIUM",
"authentication": "NONE",
"confidentialityImpact": "NONE",
"integrityImpact": "PARTIAL",
"availabilityImpact": "NONE",
"baseScore": 4.3
},
"severity": "MEDIUM",
"exploitabilityScore": 8.6,
"impactScore": 2.9,
"obtainAllPrivilege": false,
"obtainUserPrivilege": false,
"obtainOtherPrivilege": false,
"userInteractionRequired": true
}
},
"publishedDate": "2003-12-31T05:00Z",
"lastModifiedDate": "2017-08-08T01:29Z"
}