如何避免在 Python 中使用 concurrent.futures 附加错误?
How can I avoid append errors using concurrent.futures in Python?
我正在尝试使用 API 的信息创建一个 table。当我逐场分析游戏时,它运行良好,但当我尝试使用 "concurrent.futures" 分析大量游戏以加快进程时,它会向 table 附加一些错误信息。每次我 运行 脚本错误出现在不同的行中,这是偶然的。
此外,我注意到打印 game_ids 分析后,脚本没有按顺序查看它们。也许错误就出在这个问题上。
我该如何解决这个问题?谢谢!
这是我正在使用的代码。
import requests as r
import pandas as pd
import concurrent.futures
pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
game_id = [100, 101, 102] #This is an example, I use a large number of games
d = {'game_id' : [],
'atbat_num' : [],
'play_index' : [],
'batter_id' : [],
'batter_name' : [],
'pitcher_id' : [],
'pitcher_name' : [],
'runner_id' : [],
'runner_name' : [],
'event' : [],
'start' : [],
'end' : [],
'movementReason' : []
}
def get_url(gids):
url = (f'http://examplelink.com/str(gids)}/')
req = r.get(url)
json = req.json()
for i in json['allPlays']:
if 'runners' in i:
for p in i['runners']:
d['game_id'].append(gids)
if 'atBatIndex' in i:
d['atbat_num'].append(i['atBatIndex'])
else: d['atbat_num'].append(None)
if 'playIndex' in p['details']:
d['play_index'].append(p['details']['playIndex'])
else: d['play_index'].append(None)
if 'matchup' in i:
if 'batter' in i['matchup']:
d['batter_id'].append(i['matchup']['batter']['id'])
else: d['batter_id'].append(None)
else: d['batter_id'].append(None)
if 'matchup' in i:
if 'batter' in i['matchup']:
d['batter_name'].append(i['matchup']['batter']['fullName'])
else: d['batter_name'].append(None)
else: d['batter_name'].append(None)
if 'matchup' in i:
if 'pitcher' in i['matchup']:
d['pitcher_id'].append(i['matchup']['pitcher']['id'])
else: d['pitcher_id'].append(None)
else: d['pitcher_id'].append(None)
if 'matchup' in i:
if 'pitcher' in i['matchup']:
d['pitcher_name'].append(i['matchup']['pitcher']['fullName'])
else: d['pitcher_name'].append(None)
else: d['pitcher_name'].append(None)
if 'details' in p:
if 'runner' in p['details']:
if 'id' in p['details']['runner']:
d['runner_id'].append(p['details']['runner']['id'])
else: d['runner_id'].append(None)
else: d['runner_id'].append(None)
else: d['runner_id'].append(None)
if 'details' in p:
if 'runner' in p['details']:
if 'fullName' in p['details']['runner']:
d['runner_name'].append(p['details']['runner']['fullName'])
else: d['runner_name'].append(None)
else: d['runner_name'].append(None)
else: d['runner_name'].append(None)
if 'details' in p:
d['event'].append(p['details']['event'])
else: d['event'].append(None)
if 'movement' in p:
d['start'].append(p['movement']['start'])
else: d['start'].append(None)
if 'movement' in p:
d['end'].append(p['movement']['end'])
else: d['end'].append(None)
if 'details' in p:
d['movementReason'].append(p['details']['movementReason'])
else: d['movementReason'].append(None)
print(f'Game {gids} analyzed')
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(get_url, game_id)
table = pd.DataFrame(d)
export_csv = table.to_csv ('runner.csv', index = None, header=True)
Executor.map 并发调用了 func
,因此不能保证结果的排序是根据可迭代的。
我假设数据是以这种方式构建的,以便稍后可以使用 pandas 库将其呈现为表格数据。
我建议您使用不关心顺序的不同数据结构,例如列表。 pandas.DataFrame
data parameter 可以是 列表字典 或 字典列表
d = []
game_id = [100, 101, 102] #This is an example, I use a large number of games
def get_url(gid):
url = f"http://examplelink.com/{gid}/"
req = r.get(url)
json = req.json()
for i in json["allPlays"]:
for p in i.get("runners", []):
matchup = i.get("matchup", {})
batter = matchup.get("batter", {})
pitcher = matchup.get("pitcher", {})
details = p.get("details", {})
runner = details.get("runner", {})
event = details.get("event", {})
d.append(
dict(
game_id=gid,
atbat_num=i.get("atBatIndex"),
play_index=details.get("playIndex"),
batter_id=batter.get("id"),
batter_name=batter.get("fullName"),
pitcher=pitcher.get("id"),
pitcher_name=pitcher.get("fullName"),
runner_id=runner.get("id"),
runner_name=runner.get("fullName"),
event=details.get("event"),
start=p.get("movement", {}).get("start"),
end=p.get("movement", {}).get("end"),
movementReason=details.get("movementReason"),
)
)
print(f"Game {gid} analyzed")
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(get_url, game_id)
我正在尝试使用 API 的信息创建一个 table。当我逐场分析游戏时,它运行良好,但当我尝试使用 "concurrent.futures" 分析大量游戏以加快进程时,它会向 table 附加一些错误信息。每次我 运行 脚本错误出现在不同的行中,这是偶然的。
此外,我注意到打印 game_ids 分析后,脚本没有按顺序查看它们。也许错误就出在这个问题上。
我该如何解决这个问题?谢谢!
这是我正在使用的代码。
import requests as r
import pandas as pd
import concurrent.futures
pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
game_id = [100, 101, 102] #This is an example, I use a large number of games
d = {'game_id' : [],
'atbat_num' : [],
'play_index' : [],
'batter_id' : [],
'batter_name' : [],
'pitcher_id' : [],
'pitcher_name' : [],
'runner_id' : [],
'runner_name' : [],
'event' : [],
'start' : [],
'end' : [],
'movementReason' : []
}
def get_url(gids):
url = (f'http://examplelink.com/str(gids)}/')
req = r.get(url)
json = req.json()
for i in json['allPlays']:
if 'runners' in i:
for p in i['runners']:
d['game_id'].append(gids)
if 'atBatIndex' in i:
d['atbat_num'].append(i['atBatIndex'])
else: d['atbat_num'].append(None)
if 'playIndex' in p['details']:
d['play_index'].append(p['details']['playIndex'])
else: d['play_index'].append(None)
if 'matchup' in i:
if 'batter' in i['matchup']:
d['batter_id'].append(i['matchup']['batter']['id'])
else: d['batter_id'].append(None)
else: d['batter_id'].append(None)
if 'matchup' in i:
if 'batter' in i['matchup']:
d['batter_name'].append(i['matchup']['batter']['fullName'])
else: d['batter_name'].append(None)
else: d['batter_name'].append(None)
if 'matchup' in i:
if 'pitcher' in i['matchup']:
d['pitcher_id'].append(i['matchup']['pitcher']['id'])
else: d['pitcher_id'].append(None)
else: d['pitcher_id'].append(None)
if 'matchup' in i:
if 'pitcher' in i['matchup']:
d['pitcher_name'].append(i['matchup']['pitcher']['fullName'])
else: d['pitcher_name'].append(None)
else: d['pitcher_name'].append(None)
if 'details' in p:
if 'runner' in p['details']:
if 'id' in p['details']['runner']:
d['runner_id'].append(p['details']['runner']['id'])
else: d['runner_id'].append(None)
else: d['runner_id'].append(None)
else: d['runner_id'].append(None)
if 'details' in p:
if 'runner' in p['details']:
if 'fullName' in p['details']['runner']:
d['runner_name'].append(p['details']['runner']['fullName'])
else: d['runner_name'].append(None)
else: d['runner_name'].append(None)
else: d['runner_name'].append(None)
if 'details' in p:
d['event'].append(p['details']['event'])
else: d['event'].append(None)
if 'movement' in p:
d['start'].append(p['movement']['start'])
else: d['start'].append(None)
if 'movement' in p:
d['end'].append(p['movement']['end'])
else: d['end'].append(None)
if 'details' in p:
d['movementReason'].append(p['details']['movementReason'])
else: d['movementReason'].append(None)
print(f'Game {gids} analyzed')
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(get_url, game_id)
table = pd.DataFrame(d)
export_csv = table.to_csv ('runner.csv', index = None, header=True)
Executor.map 并发调用了 func
,因此不能保证结果的排序是根据可迭代的。
我假设数据是以这种方式构建的,以便稍后可以使用 pandas 库将其呈现为表格数据。
我建议您使用不关心顺序的不同数据结构,例如列表。 pandas.DataFrame
data parameter 可以是 列表字典 或 字典列表
d = []
game_id = [100, 101, 102] #This is an example, I use a large number of games
def get_url(gid):
url = f"http://examplelink.com/{gid}/"
req = r.get(url)
json = req.json()
for i in json["allPlays"]:
for p in i.get("runners", []):
matchup = i.get("matchup", {})
batter = matchup.get("batter", {})
pitcher = matchup.get("pitcher", {})
details = p.get("details", {})
runner = details.get("runner", {})
event = details.get("event", {})
d.append(
dict(
game_id=gid,
atbat_num=i.get("atBatIndex"),
play_index=details.get("playIndex"),
batter_id=batter.get("id"),
batter_name=batter.get("fullName"),
pitcher=pitcher.get("id"),
pitcher_name=pitcher.get("fullName"),
runner_id=runner.get("id"),
runner_name=runner.get("fullName"),
event=details.get("event"),
start=p.get("movement", {}).get("start"),
end=p.get("movement", {}).get("end"),
movementReason=details.get("movementReason"),
)
)
print(f"Game {gid} analyzed")
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(get_url, game_id)