For 循环在返回第一个元素的结果后停止
For loop stops after returning results of 1st element
我有以下抓取脚本。我需要遍历许多链接,这些链接与数据字典中包含的 T_ID 不同。该脚本仅打印第一个 T_ID 的结果。知道如何改进这个循环以便它打印所有 T_ID 的结果吗?
import requests
import json
import csv
import sys
from bs4 import BeautifulSoup
data = {'T_ID': [3396750, 3396753, 3396755, 3396757, 3396759]}
base_url = "XXXX"
username = "XXXX"
password = "XXXX"
toget = data
allowed_results = 50
max_results = "maxResults=" + str(allowed_results)
tc = "/tcyc?"
result_count = -1
start_index = 0
df = pd.DataFrame(
columns=['id', 'name', 'gId', 'dKey', 'tPlan'])
for eachId in toget['T_ID']:
while result_count != 0:
start_at = "startAt=" + str(start_index)
url = url = f'{base_url}{eachId}{tc}&{start_at}&{max_results}'
response = requests.get(url, auth=(username, password))
json_response = json.loads(response.text)
print(json_response)
page_info = json_response["meta"]["pageInfo"]
start_index = page_info["startIndex"] + allowed_results
result_count = page_info["resultCount"]
items2 = json_response["data"]
print(items2)
for item in items2:
new_item = {'id': item['id'], **item['fields']}
df = df.append(new_item, ignore_index=True)
print (item["id"])
print (item["project"])
print (item["fields"]["name"])
print (item["fields"]["gId"])
print (item["fields"]["dKey"])
print (item["fields"]["tPlan"])
它并没有停下来,它实际上 运行 一路走来。问题是 start_index
在遍历第一个 eachId
之后不再是 0
。所以当它到达下一个 id 时,它会看到类似这样的东西:
`'XXXX.com/3396753/tcyc?&startAt=123&maxResults=50'`
然后可能返回 0
的 result_count
,这意味着 while 循环不会 运行。然后转到下一个id,同样的事情发生。
将循环中的初始 result_count = -1
和 start_index = 0
移动到 while
之前。正如您希望为每个 'T_ID'
:
“重置”
import pandas as pd
import requests
import json
import csv
import sys
from bs4 import BeautifulSoup
data = {'T_ID': [3396750, 3396753, 3396755, 3396757, 3396759]}
base_url = "XXXX"
username = "XXXX"
password = "XXXX"
toget = data
allowed_results = 50
max_results = "maxResults=" + str(allowed_results)
tc = "/tcyc?"
df = pd.DataFrame(
columns=['id', 'name', 'gId', 'dKey', 'tPlan'])
for eachId in toget['T_ID']:
start_index = 0
result_count = -1
while result_count != 0:
start_at = "startAt=" + str(start_index)
url = url = f'{base_url}{eachId}{tc}&{start_at}&{max_results}'
response = requests.get(url, auth=(username, password))
json_response = json.loads(response.text)
print(json_response)
page_info = json_response["meta"]["pageInfo"]
start_index = page_info["startIndex"] + allowed_results
result_count = page_info["resultCount"]
items2 = json_response["data"]
print(items2)
for item in items2:
new_item = {'id': item['id'], **item['fields']}
df = df.append(new_item, ignore_index=True)
print (item["id"])
print (item["project"])
print (item["fields"]["name"])
print (item["fields"]["gId"])
print (item["fields"]["dKey"])
print (item["fields"]["tPlan"])
我有以下抓取脚本。我需要遍历许多链接,这些链接与数据字典中包含的 T_ID 不同。该脚本仅打印第一个 T_ID 的结果。知道如何改进这个循环以便它打印所有 T_ID 的结果吗?
import requests
import json
import csv
import sys
from bs4 import BeautifulSoup
data = {'T_ID': [3396750, 3396753, 3396755, 3396757, 3396759]}
base_url = "XXXX"
username = "XXXX"
password = "XXXX"
toget = data
allowed_results = 50
max_results = "maxResults=" + str(allowed_results)
tc = "/tcyc?"
result_count = -1
start_index = 0
df = pd.DataFrame(
columns=['id', 'name', 'gId', 'dKey', 'tPlan'])
for eachId in toget['T_ID']:
while result_count != 0:
start_at = "startAt=" + str(start_index)
url = url = f'{base_url}{eachId}{tc}&{start_at}&{max_results}'
response = requests.get(url, auth=(username, password))
json_response = json.loads(response.text)
print(json_response)
page_info = json_response["meta"]["pageInfo"]
start_index = page_info["startIndex"] + allowed_results
result_count = page_info["resultCount"]
items2 = json_response["data"]
print(items2)
for item in items2:
new_item = {'id': item['id'], **item['fields']}
df = df.append(new_item, ignore_index=True)
print (item["id"])
print (item["project"])
print (item["fields"]["name"])
print (item["fields"]["gId"])
print (item["fields"]["dKey"])
print (item["fields"]["tPlan"])
它并没有停下来,它实际上 运行 一路走来。问题是 start_index
在遍历第一个 eachId
之后不再是 0
。所以当它到达下一个 id 时,它会看到类似这样的东西:
`'XXXX.com/3396753/tcyc?&startAt=123&maxResults=50'`
然后可能返回 0
的 result_count
,这意味着 while 循环不会 运行。然后转到下一个id,同样的事情发生。
将循环中的初始 result_count = -1
和 start_index = 0
移动到 while
之前。正如您希望为每个 'T_ID'
:
import pandas as pd
import requests
import json
import csv
import sys
from bs4 import BeautifulSoup
data = {'T_ID': [3396750, 3396753, 3396755, 3396757, 3396759]}
base_url = "XXXX"
username = "XXXX"
password = "XXXX"
toget = data
allowed_results = 50
max_results = "maxResults=" + str(allowed_results)
tc = "/tcyc?"
df = pd.DataFrame(
columns=['id', 'name', 'gId', 'dKey', 'tPlan'])
for eachId in toget['T_ID']:
start_index = 0
result_count = -1
while result_count != 0:
start_at = "startAt=" + str(start_index)
url = url = f'{base_url}{eachId}{tc}&{start_at}&{max_results}'
response = requests.get(url, auth=(username, password))
json_response = json.loads(response.text)
print(json_response)
page_info = json_response["meta"]["pageInfo"]
start_index = page_info["startIndex"] + allowed_results
result_count = page_info["resultCount"]
items2 = json_response["data"]
print(items2)
for item in items2:
new_item = {'id': item['id'], **item['fields']}
df = df.append(new_item, ignore_index=True)
print (item["id"])
print (item["project"])
print (item["fields"]["name"])
print (item["fields"]["gId"])
print (item["fields"]["dKey"])
print (item["fields"]["tPlan"])