如何提取特定的嵌套 JSON 值做循环? (Python)
How to extract specific nested JSON value doing loop? (Python)
{
"127.0.0.1":{
"addresses":{
"ipv4":"127.0.0.1"
},
"hostnames":[
{
"name":"localhost",
"type":"PTR"
}
],
"status":{
"reason":"conn-refused",
"state":"up"
},
"tcp":{
"5000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
},
"6000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
}
},
"vendor":{
}
}
}
我想在这里提取“vulners”值我试过了 -
results = []
for x in collection.find({},{"scan": 1, "_id": 0 }):
results.append(json.loads(json_util.dumps(x)))
portnumber = []
datay = []
datapro = []
for result in results:
ips = result['scan']
for ip in ips:
ports = result['scan'][ip]['tcp']
ipdomain = result['scan'][ip]['hostnames']
for ip2 in ipdomain:
ip3 = ip2['name']
for port in ports:
portnumber.append(port)
datax = ports[port]['script']
datay.append(datax)
datapro2 = ports[port]['product']
datapro.append(datapro2)
date = datetime.datetime.now()
date_now = date.strftime("%x, %X")
pass_json_var = {'domain': ip3, 'ports': portnumber, 'product': datapro, 'vulnerabilities': datay, "date": date_now}
if isinstance(pass_json_var, list):
domaindata.insert_many(pass_json_var)
else:
domaindata.insert_one(pass_json_var)
好的,如果“结果”输出给我一个“漏洞”值,那么它可以正常工作,但是当它是多个具有漏洞值的端口时,它就不起作用了!
如何访问 'vulners' 值?希望有人也能指导我,请尝试给出一个动态的解决方案
非常感谢!
基于模型的方法
此方法基于您要解析的数据模型。从我的角度来看,这在开始时需要做更多的工作。这样做的好处是,您将获得清晰的错误消息,并且可以通过调整数据模型来控制行为。
- 为要解析的数据建立模型
from typing import Any, Optional
from pydantic import BaseModel, Field
class ExScript(BaseModel):
vulners:str = ""
class Ex30000(BaseModel):
script:ExScript = Field(default=Any)
class ExTcp(BaseModel):
root:Ex30000= Field(default=Any, alias="30000")
class ExRoot(BaseModel):
tcp:ExTcp = Field() # Required
class Base(BaseModel):
root:ExRoot = Field(default=Any, alias="35.0.0.0.0")
- 将您的输入数据更改为原始字符串,否则您将不得不转义 \n 和 \t
input_will_work = r"""{
"35.0.0.0.0": {
"hostnames": [
{
"name": "domain.com",
"type": "PTR"
}
],
"addresses": {
"ipv4": "35.0.0.0"
},
"vendor": {},
"status": {
"state": "up",
"reason": "syn-ack"
},
"tcp": {
"30000": {
"state": "open",
"reason": "syn-ack",
"name": "http",
"product": "nginx",
"version": "1.20.0",
"extrainfo": "",
"conf": "10",
"cpe": "cpe:/a:igor_sysoev:nginx:1.20.0",
"script": {
"http-server-header": "nginx/1.20.0",
"vulners": "\n cpe:/a:igor_sysoev:nginx:1.20.0: \n \tNGINX:CVE-2021-23017\t6.8\thttps://vulners.com/nginx/NGINX:CVE-2021-23017\n \t9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t6.8\thttps://vulners.com/githubexploit/9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t*EXPLOIT*\n \t1337DAY-ID-36300\t6.8\thttps://vulners.com/zdt/1337DAY-ID-36300\t*EXPLOIT*\n \tPACKETSTORM:162830\t0.0\thttps://vulners.com/packetstorm/PACKETSTORM:162830\t*EXPLOIT*"
}
}
}
}
}
"""
input_will_fail = r"""{
"35.0.0.0.0": {}
}
"""
3.1 这应该会给你预期的结果
obj1 = Base.parse_raw(input_will_work)
print(obj1.root.tcp.root.script.vulners)
3.2 这应该抛出异常
obj2 = Base.parse_raw(input_will_fail)
用jsonpath搜索数据
应该return 所有名称为 vulners
的对象
from jsonpath_ng import jsonpath, parse
import json
obj = json.loads(input_will_work)
p = parse('$..vulners')
for match in p.find(obj):
print(match.value)
更新:
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
# Result is the data from mongo db
# result = collection.find({})["scan"]
result = {
"127.0.0.1": {
"addresses": {
"ipv4": "127.0.0.1"
},
"hostnames": [
{
"name": "localhost",
"type": "PTR"
}
],
"status": {
"reason": "conn-refused",
"state": "up"
},
"tcp": {
"5000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
},
"6000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
}
},
"vendor": {
}
}
}
def scandata():
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
print(ip_address, ip_address_data)
scandata()
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
@app.route('/api/vulnerableports', methods=['GET'])
def show_vulnerableports():
data = []
resultz = []
for x in collection.find({}, {"scan": 1, "_id": 0}):
resultz.append(json.loads(json_util.dumps(x)))
for resultx in resultz:
result = resultx['scan']
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
data.append({ip_address: ip_address_data})
return jsonify(data)
这就是解决方案!我不得不遍历脚本然后访问漏洞!
{
"127.0.0.1":{
"addresses":{
"ipv4":"127.0.0.1"
},
"hostnames":[
{
"name":"localhost",
"type":"PTR"
}
],
"status":{
"reason":"conn-refused",
"state":"up"
},
"tcp":{
"5000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
},
"6000":{
"conf":"10",
"cpe":"cpe:/a:python:python:3.9.2",
"extrainfo":"Python 3.9.2",
"name":"http",
"product":"Werkzeug httpd",
"reason":"syn-ack",
"script":{
"vulners":"\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state":"open",
"version":"1.0.1"
}
},
"vendor":{
}
}
}
我想在这里提取“vulners”值我试过了 -
results = []
for x in collection.find({},{"scan": 1, "_id": 0 }):
results.append(json.loads(json_util.dumps(x)))
portnumber = []
datay = []
datapro = []
for result in results:
ips = result['scan']
for ip in ips:
ports = result['scan'][ip]['tcp']
ipdomain = result['scan'][ip]['hostnames']
for ip2 in ipdomain:
ip3 = ip2['name']
for port in ports:
portnumber.append(port)
datax = ports[port]['script']
datay.append(datax)
datapro2 = ports[port]['product']
datapro.append(datapro2)
date = datetime.datetime.now()
date_now = date.strftime("%x, %X")
pass_json_var = {'domain': ip3, 'ports': portnumber, 'product': datapro, 'vulnerabilities': datay, "date": date_now}
if isinstance(pass_json_var, list):
domaindata.insert_many(pass_json_var)
else:
domaindata.insert_one(pass_json_var)
好的,如果“结果”输出给我一个“漏洞”值,那么它可以正常工作,但是当它是多个具有漏洞值的端口时,它就不起作用了!
如何访问 'vulners' 值?希望有人也能指导我,请尝试给出一个动态的解决方案
非常感谢!
基于模型的方法
此方法基于您要解析的数据模型。从我的角度来看,这在开始时需要做更多的工作。这样做的好处是,您将获得清晰的错误消息,并且可以通过调整数据模型来控制行为。
- 为要解析的数据建立模型
from typing import Any, Optional
from pydantic import BaseModel, Field
class ExScript(BaseModel):
vulners:str = ""
class Ex30000(BaseModel):
script:ExScript = Field(default=Any)
class ExTcp(BaseModel):
root:Ex30000= Field(default=Any, alias="30000")
class ExRoot(BaseModel):
tcp:ExTcp = Field() # Required
class Base(BaseModel):
root:ExRoot = Field(default=Any, alias="35.0.0.0.0")
- 将您的输入数据更改为原始字符串,否则您将不得不转义 \n 和 \t
input_will_work = r"""{
"35.0.0.0.0": {
"hostnames": [
{
"name": "domain.com",
"type": "PTR"
}
],
"addresses": {
"ipv4": "35.0.0.0"
},
"vendor": {},
"status": {
"state": "up",
"reason": "syn-ack"
},
"tcp": {
"30000": {
"state": "open",
"reason": "syn-ack",
"name": "http",
"product": "nginx",
"version": "1.20.0",
"extrainfo": "",
"conf": "10",
"cpe": "cpe:/a:igor_sysoev:nginx:1.20.0",
"script": {
"http-server-header": "nginx/1.20.0",
"vulners": "\n cpe:/a:igor_sysoev:nginx:1.20.0: \n \tNGINX:CVE-2021-23017\t6.8\thttps://vulners.com/nginx/NGINX:CVE-2021-23017\n \t9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t6.8\thttps://vulners.com/githubexploit/9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t*EXPLOIT*\n \t1337DAY-ID-36300\t6.8\thttps://vulners.com/zdt/1337DAY-ID-36300\t*EXPLOIT*\n \tPACKETSTORM:162830\t0.0\thttps://vulners.com/packetstorm/PACKETSTORM:162830\t*EXPLOIT*"
}
}
}
}
}
"""
input_will_fail = r"""{
"35.0.0.0.0": {}
}
"""
3.1 这应该会给你预期的结果
obj1 = Base.parse_raw(input_will_work)
print(obj1.root.tcp.root.script.vulners)
3.2 这应该抛出异常
obj2 = Base.parse_raw(input_will_fail)
用jsonpath搜索数据
应该return 所有名称为 vulners
的对象from jsonpath_ng import jsonpath, parse
import json
obj = json.loads(input_will_work)
p = parse('$..vulners')
for match in p.find(obj):
print(match.value)
更新:
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
# Result is the data from mongo db
# result = collection.find({})["scan"]
result = {
"127.0.0.1": {
"addresses": {
"ipv4": "127.0.0.1"
},
"hostnames": [
{
"name": "localhost",
"type": "PTR"
}
],
"status": {
"reason": "conn-refused",
"state": "up"
},
"tcp": {
"5000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
},
"6000": {
"conf": "10",
"cpe": "cpe:/a:python:python:3.9.2",
"extrainfo": "Python 3.9.2",
"name": "http",
"product": "Werkzeug httpd",
"reason": "syn-ack",
"script": {
"vulners": "\n cpe:/a:python:python:3.9.2: \n \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
},
"state": "open",
"version": "1.0.1"
}
},
"vendor": {
}
}
}
def scandata():
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
print(ip_address, ip_address_data)
scandata()
def extract_data(ip_address_data):
domains = ip_address_data["hostnames"]
ports_data = []
# Each port can have different products and vulners
# So that data is grouped together in a dictionary
for port in ip_address_data["tcp"].keys():
port_data = ip_address_data["tcp"][port]
product = port_data["product"]
vulners = port_data['script']['vulners']
ports_data.append({
"port": port,
"product": product,
"vulners": vulners
})
return {
"domains": domains,
"ports_data": ports_data
}
@app.route('/api/vulnerableports', methods=['GET'])
def show_vulnerableports():
data = []
resultz = []
for x in collection.find({}, {"scan": 1, "_id": 0}):
resultz.append(json.loads(json_util.dumps(x)))
for resultx in resultz:
result = resultx['scan']
for ip_address in result:
ip_address_data = extract_data(
result[ip_address]
)
data.append({ip_address: ip_address_data})
return jsonify(data)
这就是解决方案!我不得不遍历脚本然后访问漏洞!