Python3 - for 循环仅 returns 一次迭代

Python3 - for loop only returns one iteration

我有一个 python 脚本来检查我的漏洞扫描器正在扫描哪些实例。 API 响应在 XML 中,我使用带有 etreelxml 库来遍历响应并解析每个 <EC2_INSTANCE_ID> 标记。但是,我的循环只返回一个实例 ID。下面是我的代码、XML 和我的脚本的输出。

CODE:

import os
import requests
import boto3
import lxml
from lxml import etree

s3 = boto3.resource('s3')

def apiLogin():
    global s 
    s = requests.Session()

    qualys_username = "xxxxxx"
    qualys_password = "xxxxxx"

    payload = {'action':'login', 'username':qualys_username, 
'password':qualys_password} 
    s.headers.update({'X-Requested-With':qualys_username})
    r = s.post('https://qualysapi.qualys.com/api/2.0/fo/session/', 
data=payload)

def launchReport():
    payload = {'action':'list', 'use_tags':'1', 'tag_set_include':'xxxxxx', 'host_metadata':'ec2', 'host_metadata_fields':'instanceId'}
r = s.post('https://qualysapi.qualys.com/api/2.0/fo/asset/host/', data=payload)

    os.chdir('/tmp')
    f = open('qualys_instances.xml','w')
    print(r.text, file=f)

    file = open('qualys_instances.xml','rb')
    s3.Bucket('xxxxxx').put_object(Key='qualys_instances.xml', Body=file)

def formatReport():
    bucket = 'xxxxxx'
    key = 'xxxxxx'

    os.chdir('/tmp')
    obj = s3.Object(bucket, key)
    body = obj.get()['Body'].read()

    doc = etree.fromstring(body)
    host_list = doc.xpath('//EC2_INSTANCE_ID')

    for i in host_list:
        print(i)
        k = open('qualys_instances.txt','w')
        print(i.text, file=k)

    file = open('qualys_instances.txt','rb')
    s3.Bucket('nwm-all-instances').put_object(Key='qualys_instances.txt', Body=file)

XML:

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd">
<HOST_LIST_OUTPUT>
  <RESPONSE>
    <DATETIME>2018-05-21T21:38:06Z</DATETIME>
    <HOST_LIST>
      <HOST>
        <ID>xxxxxx</ID>
        <IP>xxxxxx</IP>
        <TRACKING_METHOD>EC2</TRACKING_METHOD>
        <EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID>
        <METADATA>
          <EC2>
            <ATTRIBUTE>
              <NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME>
              <LAST_STATUS>Fail</LAST_STATUS>
              <VALUE><![CDATA[]]></VALUE>
              <LAST_SUCCESS_DATE></LAST_SUCCESS_DATE>
              <LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE>
              <LAST_ERROR><![CDATA[QualysShell not available]]> . 
</LAST_ERROR>
            </ATTRIBUTE>
          </EC2>
        </METADATA>
      </HOST>
      <HOST>
        <ID>xxxxxx</ID>
        <IP>xxxxxx</IP>
        <TRACKING_METHOD>EC2</TRACKING_METHOD>
        <EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID>
        <METADATA>
          <EC2>
            <ATTRIBUTE>
              <NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME>
              <LAST_STATUS>Fail</LAST_STATUS>
              <VALUE><![CDATA[]]></VALUE>
              <LAST_SUCCESS_DATE></LAST_SUCCESS_DATE>
              <LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE>
              <LAST_ERROR><![CDATA[QualysShell not available]]> . 
</LAST_ERROR>
            </ATTRIBUTE>
          </EC2>
        </METADATA>
      </HOST>
    </HOST_LIST>
  </RESPONSE>
</HOST_LIST_OUTPUT>

SCRIPT OUTPUT:

i-111111

尝试:

from lxml import etree

xml_data = """<?xml version="1.0" encoding="UTF-8" ?><!DOCTYPE HOST_LIST_OUTPUT SYSTEM "https://qualysapi.qualys.com/api/2.0/fo/asset/host/host_list_output.dtd"><HOST_LIST_OUTPUT><RESPONSE><DATETIME>2018-05-21T21:38:06Z</DATETIME><HOST_LIST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-1111111]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:41:14Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST><HOST><ID>xxxxxx</ID><IP>xxxxxx</IP><TRACKING_METHOD>EC2</TRACKING_METHOD><EC2_INSTANCE_ID><![CDATA[i-222222]]></EC2_INSTANCE_ID><METADATA><EC2><ATTRIBUTE><NAME><![CDATA[latest/dynamic/instance-identity/document/instanceId]]></NAME><LAST_STATUS>Fail</LAST_STATUS><VALUE><![CDATA[]]></VALUE><LAST_SUCCESS_DATE></LAST_SUCCESS_DATE><LAST_ERROR_DATE>2018-05-16T03:27:35Z</LAST_ERROR_DATE><LAST_ERROR><![CDATA[QualysShell not available]]>.</LAST_ERROR></ATTRIBUTE></EC2></METADATA></HOST></HOST_LIST></RESPONSE></HOST_LIST_OUTPUT>"""

# strip_cdata=False must be passed so CDATA sections are preserved
parser = etree.XMLParser(strip_cdata=False)
tree = etree.fromstring(xml_data, parser)

# we will use the full XPath since it could just be '//EC2_INSTANCE_ID/text()'
for ec2_instance_id in tree.xpath('//HOST_LIST_OUTPUT/RESPONSE/HOST_LIST/HOST/EC2_INSTANCE_ID/text()'):
    print(ec2_instance_id)

输出:

i-1111111
i-222222