解析嵌套 JSON 并将其写入 CSV(重新访问)

Parsing nested JSON and writing it to CSV (Revisited)

初学Python,给出答案如下post:

Parsing nested JSON and writing it to CSV

如何定义输入文件才能使此代码正常工作?我知道我必须将 "outputfile" 定义为我正在写入的 path/filename,但我只是不知道输入文件应该去哪里?

编辑:为清楚起见,我有一个 JSON 文件用于输入,并希望将其转换为 CSV 文件作为输出。我只想知道如何编写将采用示例(来自上面)的代码,并让它指定一个特定的 JSON 文件作为输入。同样为了清楚起见,JSON 文件的名称将保持不变,但内容每天都会更改,所以我只需要知道将 open() 放在哪里 以及如何在脚本中调用它。

EDIT_2:

inputfile = "/some/file.json"
outputfile = "/some/file.csv"
with open(inputfile, 'r') as inf:
    with open(outputfile, 'w') as outf:
        writer = None  # will be set to a csv.DictWriter later
            fp = open(inputfile, 'r')
            json_value = fp.read()
            data = json.loads(json_value)

        for key, item in sorted(data.items(), key=itemgetter(0)):
            row = {}
            nested_name, nested_items = '', {}
            for k, v in item.items():
                if not isinstance(v, dict):
                    row[k] = v
                else:
                    assert not nested_items, 'Only one nested structure is supported'
                    nested_name, nested_items = k, v

            if writer is None:
                # build fields for each first key of each nested item first
                fields = sorted(row)

                # sorted keys of first item in key sorted order
                nested_keys = sorted(sorted(nested_items.items(), key=itemgetter(0))[0][1])
                fields.extend('__'.join((nested_name, k)) for k in nested_keys)

                writer = csv.DictWriter(outf, fields)
                writer.writeheader()

            for nkey, nitem in sorted(nested_items.items(), key=itemgetter(0)):
                row.update(('__'.join((nested_name, k)), v) for k, v in nitem.items())
                writer.writerow(row)

我得到的错误是...

for k, v in item.items():

AttributeError: 'list' object 没有属性 'items'

我想我可能没有正确阅读 JSON 文件... Python 新手压力源。

EDIT_3(更新了 JSON 结构): 这是我正在使用的 JSON 文件中的一个 'entry'(NIST/NVD JSON 文件):

    {
      "CVE_data_type" : "CVE",
      "CVE_data_format" : "MITRE",
      "CVE_data_version" : "4.0",
      "CVE_data_numberOfCVEs" : "6208",
      "CVE_data_timestamp" : "2017-08-14T18:06Z",
      "CVE_Items" : [ {
        "cve" : {
          "CVE_data_meta" : {
            "ID" : "CVE-2003-1547"
          },
          "affects" : {
            "vendor" : {
              "vendor_data" : [ {
                "vendor_name" : "francisco_burzi",
                "product" : {
                  "product_data" : [ {
                    "product_name" : "php-nuke",
                    "version" : {
                      "version_data" : [ {
                        "version_value" : "6.5"
                      }, {
                        "version_value" : "6.5_beta1"
                      }, {
                        "version_value" : "6.5_rc3"
                      }, {
                        "version_value" : "6.5_rc2"
                      }, {
                        "version_value" : "6.5_rc1"
                      } ]
                    }
                  } ]
                }
              } ]
            }
          },
          "problemtype" : {
            "problemtype_data" : [ {
              "description" : [ {
                "lang" : "en",
                "value" : "CWE-79"
              } ]
            } ]
          },
          "references" : {
            "reference_data" : [ {
              "url" : "http://secunia.com/advisories/8478"
            }, {
              "url" : "http://securityreason.com/securityalert/3718"
            }, {
              "url" : "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
            }, {
              "url" : "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
            }, {
              "url" : "http://www.securityfocus.com/bid/7248"
            }, {
              "url" : "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
            } ]
          },
          "description" : {
            "description_data" : [ {
              "lang" : "en",
              "value" : "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
            } ]
          }
        },
        "configurations" : {
          "CVE_data_version" : "4.0",
          "nodes" : [ {
            "operator" : "OR",
            "cpe" : [ {
              "vulnerable" : true,
              "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5",
              "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
            }, {
              "vulnerable" : true,
              "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
              "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
            }, {
              "vulnerable" : true,
              "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
              "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
            }, {
              "vulnerable" : true,
              "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
              "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
            }, {
              "vulnerable" : true,
              "cpeMatchString" : "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
              "cpe23Uri" : "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
            } ]
          } ]
        },
        "impact" : {
          "baseMetricV2" : {
            "cvssV2" : {
              "vectorString" : "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
              "accessVector" : "NETWORK",
              "accessComplexity" : "MEDIUM",
              "authentication" : "NONE",
              "confidentialityImpact" : "NONE",
              "integrityImpact" : "PARTIAL",
              "availabilityImpact" : "NONE",
              "baseScore" : 4.3
            },
            "severity" : "MEDIUM",
            "exploitabilityScore" : 8.6,
            "impactScore" : 2.9,
            "obtainAllPrivilege" : false,
            "obtainUserPrivilege" : false,
            "obtainOtherPrivilege" : false,
            "userInteractionRequired" : true
          }
        },
        "publishedDate" : "2003-12-31T05:00Z",
        "lastModifiedDate" : "2017-08-08T01:29Z"
      }]
}

我希望键是 CSV 文件的 headers(如 lastModifiedDate、cpe23Uri 等)。一旦我在 CSV 文件中有了 headers 和数据,我就可以过滤掉白色 space 和 select 我想要的列。

幸运的是,您的 JSON 数据足以让 json.load() 读取和解析....但只是说您希望密钥是 headers 并不具体够了——每个 'entry' 的不同级别都有很多(如下所示)。请注意,链接问题的 OP 如何不仅定义了输入,而且还具体定义了如何将其中的数据映射到 CSV 文件中的值列,其格式也已显示——不仅仅是一些 hand-waving 关于将键映射到 file-headers.

无论如何,这里有一些东西可以帮助您做到这一点。它将读取与您正在阅读的 JSON object 中的 top-level "CVE_Items" 键相关联的列表中的每个 'entry',并以良好的格式打印出来。从输出中,您应该能够选择要提取的列并将其作为行写入 CSV 文件,并且可以 fill-in 执行此操作的代码。

import json

inputfile = "some_file.json"
outputfile = "some_file.csv"

with open(outputfile, 'w', newline='') as outf:
    with open(inputfile, 'r') as fp:
        data = json.load(fp)

    # Here is where you should convert each entry into a row of CSV data.
    # All this does now is show the contents of each entry in "CVE_Items" list.
    for entry in data["CVE_Items"]:
        print(json.dumps(entry, indent=4))

示例中单个条目的输出 JSON 您添加到问题中的数据:

{
    "cve": {
        "CVE_data_meta": {
            "ID": "CVE-2003-1547"
        },
        "affects": {
            "vendor": {
                "vendor_data": [
                    {
                        "vendor_name": "francisco_burzi",
                        "product": {
                            "product_data": [
                                {
                                    "product_name": "php-nuke",
                                    "version": {
                                        "version_data": [
                                            {
                                                "version_value": "6.5"
                                            },
                                            {
                                                "version_value": "6.5_beta1"
                                            },
                                            {
                                                "version_value": "6.5_rc3"
                                            },
                                            {
                                                "version_value": "6.5_rc2"
                                            },
                                            {
                                                "version_value": "6.5_rc1"
                                            }
                                        ]
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        },
        "problemtype": {
            "problemtype_data": [
                {
                    "description": [
                        {
                            "lang": "en",
                            "value": "CWE-79"
                        }
                    ]
                }
            ]
        },
        "references": {
            "reference_data": [
                {
                    "url": "http://secunia.com/advisories/8478"
                },
                {
                    "url": "http://securityreason.com/securityalert/3718"
                },
                {
                    "url": "http://www.securityfocus.com/archive/1/archive/1/316925/30/25250/threaded"
                },
                {
                    "url": "http://www.securityfocus.com/archive/1/archive/1/317230/30/25220/threaded"
                },
                {
                    "url": "http://www.securityfocus.com/bid/7248"
                },
                {
                    "url": "https://exchange.xforce.ibmcloud.com/vulnerabilities/11675"
                }
            ]
        },
        "description": {
            "description_data": [
                {
                    "lang": "en",
                    "value": "Cross-site scripting (XSS) vulnerability in block-Forums.php in the Splatt Forum module for PHP-Nuke 6.x allows remote attackers to inject arbitrary web script or HTML via the subject parameter."
                }
            ]
        }
    },
    "configurations": {
        "CVE_data_version": "4.0",
        "nodes": [
            {
                "operator": "OR",
                "cpe": [
                    {
                        "vulnerable": true,
                        "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5",
                        "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5:*:*:*:*:*:*:*"
                    },
                    {
                        "vulnerable": true,
                        "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_beta1",
                        "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_beta1:*:*:*:*:*:*:*"
                    },
                    {
                        "vulnerable": true,
                        "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc1",
                        "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc1:*:*:*:*:*:*:*"
                    },
                    {
                        "vulnerable": true,
                        "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc2",
                        "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc2:*:*:*:*:*:*:*"
                    },
                    {
                        "vulnerable": true,
                        "cpeMatchString": "cpe:/a:francisco_burzi:php-nuke:6.5_rc3",
                        "cpe23Uri": "cpe:2.3:a:francisco_burzi:php-nuke:6.5_rc3:*:*:*:*:*:*:*"
                    }
                ]
            }
        ]
    },
    "impact": {
        "baseMetricV2": {
            "cvssV2": {
                "vectorString": "(AV:N/AC:M/Au:N/C:N/I:P/A:N)",
                "accessVector": "NETWORK",
                "accessComplexity": "MEDIUM",
                "authentication": "NONE",
                "confidentialityImpact": "NONE",
                "integrityImpact": "PARTIAL",
                "availabilityImpact": "NONE",
                "baseScore": 4.3
            },
            "severity": "MEDIUM",
            "exploitabilityScore": 8.6,
            "impactScore": 2.9,
            "obtainAllPrivilege": false,
            "obtainUserPrivilege": false,
            "obtainOtherPrivilege": false,
            "userInteractionRequired": true
        }
    },
    "publishedDate": "2003-12-31T05:00Z",
    "lastModifiedDate": "2017-08-08T01:29Z"
}