通过 python 中的 namedtuple csv 循环跟踪进度
Track progress through namedtuple csv loop in python
使用 collections.namedtuple
,以下 Python 代码通过标识符的 csv 文件(名为 ContentItemId
的列中的整数)为数据库中的记录工作。示例记录是 https://api.aucklandmuseum.com/id/library/ephemera/21291.
其目的是检查给定 id 的 HTTP 状态并将其写入磁盘:
import requests
from collections import namedtuple
import csv
with open('in.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
#Create output file
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
count = 1
for r in records:
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
如何将后一个循环的代码进度(最好是在 25%、50% 和 75% 的接合点)打印到控制台?另外,如果我在底部添加一个未缩进的 print("Complete")
,是否会到达该行?
提前致谢。
编辑:感谢所有帮助。我的(工作!)代码现在看起来像这样:
import csv
import requests
import pandas
import time
from collections import namedtuple
from tqdm import tqdm
with open('active_true_pub_no.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
num = len(records)
print("Checking {} records...\n".format(num))
with tqdm(total=num, bar_format="{percentage:3.0f}% {bar} [{n_fmt}/{total_fmt}] ", ncols=64) as pbar:
for r in records:
pbar.update(1)
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
# time.sleep(.25)
print ('\nSummary: ')
df = pandas.read_csv("out.csv")
print(df['code'].value_counts())
我用pandas
' value_counts
总结了最后的结果
只需使用枚举来跟踪您的进度,例如:
l = len(records)
for i, r in enumerate(records):
# other stuff...
w.writerow([id, code])
# print progress
pr = i / l
if pr in (0.25, 0.5, 0.75, 1.0):
print('Writing to disk... {:%}%'.format(pr))
我假设您指的是已处理记录的百分比。您也可以在循环中执行 print("Complete")
。
count = 0
for r in records:
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
count += 1
if count == len(records):
print("Complete")
# Need the round in case list of records isn't divisible by 4
elif count % round(len(records) / 4) == 0:
# Round fraction to two decimal points and multiply by 100 for
# integer percentage
progress = int(round(count / len(records), 2) * 100)
print("{}%".format(progress))
[纯python解决方案]要打印百分比进度和谁在线(即不占用所有屏幕),您可以执行以下操作:
[...]
count = 1
total = len(record)
for i, r in enumerate(records):
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
print("%.2f \t\r" % (i/total), end='')
要获得进度条,请使用 TQDM:
数据(来自in.csv
):
ContentItemId
21200
21201
21202
21203
21204
21205
21206
...
21296
21297
21298
21299
21300
代码:
from collections import namedtuple
import csv
import requests
from tqdm import tqdm
with open('in.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
#Create output file
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
count = 1
with tqdm(total=len(records)) as pbar:
for r in records:
pbar.update(1)
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
print('Complete!')
- 注意在
for-loop
之前添加 with tqdm(total=len(records)) as pbar:
- 当从控制台运行时,会出现一个进度条,显示完成百分比。
- 注意图像左侧
21/101
,这是 records
列表长度的计数。
tqdm
提供百分比进度条和 complete/total
的计数
# sudo pip3 install tqdm
import time
import tqdm
records = ['a', 'b', 'c', 'd', 'e']
with tqdm.tqdm(smoothing=0.1, total=len(records)) as pbar:
for k, record in enumerate(records):
time.sleep(1)
pbar.update()
它都是相对的,所以让我们做一些通用的数学运算。 :)
# sudo pip3 install tqdm
import time
import tqdm
total = 5000
_number_left = 5000
with tqdm.tqdm(smoothing=0.1, total=total) as pbar:
relatively_done = 0
relatively_done_sum = 0
for k in range(0, 5000, 2): # 0, 2, 4, ... 4998
time.sleep(0.0005)
_number_left -= 2 # input from some worker process for example
absolutely_done = total - _number_left
relatively_done = absolutely_done - relatively_done_sum
relatively_done_sum += relatively_done
pbar.update(relatively_done)
使用 collections.namedtuple
,以下 Python 代码通过标识符的 csv 文件(名为 ContentItemId
的列中的整数)为数据库中的记录工作。示例记录是 https://api.aucklandmuseum.com/id/library/ephemera/21291.
其目的是检查给定 id 的 HTTP 状态并将其写入磁盘:
import requests
from collections import namedtuple
import csv
with open('in.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
#Create output file
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
count = 1
for r in records:
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
如何将后一个循环的代码进度(最好是在 25%、50% 和 75% 的接合点)打印到控制台?另外,如果我在底部添加一个未缩进的 print("Complete")
,是否会到达该行?
提前致谢。
编辑:感谢所有帮助。我的(工作!)代码现在看起来像这样:
import csv
import requests
import pandas
import time
from collections import namedtuple
from tqdm import tqdm
with open('active_true_pub_no.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
num = len(records)
print("Checking {} records...\n".format(num))
with tqdm(total=num, bar_format="{percentage:3.0f}% {bar} [{n_fmt}/{total_fmt}] ", ncols=64) as pbar:
for r in records:
pbar.update(1)
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
# time.sleep(.25)
print ('\nSummary: ')
df = pandas.read_csv("out.csv")
print(df['code'].value_counts())
我用pandas
' value_counts
总结了最后的结果
只需使用枚举来跟踪您的进度,例如:
l = len(records)
for i, r in enumerate(records):
# other stuff...
w.writerow([id, code])
# print progress
pr = i / l
if pr in (0.25, 0.5, 0.75, 1.0):
print('Writing to disk... {:%}%'.format(pr))
我假设您指的是已处理记录的百分比。您也可以在循环中执行 print("Complete")
。
count = 0
for r in records:
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
count += 1
if count == len(records):
print("Complete")
# Need the round in case list of records isn't divisible by 4
elif count % round(len(records) / 4) == 0:
# Round fraction to two decimal points and multiply by 100 for
# integer percentage
progress = int(round(count / len(records), 2) * 100)
print("{}%".format(progress))
[纯python解决方案]要打印百分比进度和谁在线(即不占用所有屏幕),您可以执行以下操作:
[...]
count = 1
total = len(record)
for i, r in enumerate(records):
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
print("%.2f \t\r" % (i/total), end='')
要获得进度条,请使用 TQDM:
数据(来自in.csv
):
ContentItemId
21200
21201
21202
21203
21204
21205
21206
...
21296
21297
21298
21299
21300
代码:
from collections import namedtuple
import csv
import requests
from tqdm import tqdm
with open('in.csv', mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
#Create output file
with open('out.csv', mode='w+') as o:
w = csv.writer(o)
w.writerow(["ContentItemId","code"])
count = 1
with tqdm(total=len(records)) as pbar:
for r in records:
pbar.update(1)
id = r.ContentItemId
url = "https://api.aucklandmuseum.com/id/library/ephemera/" + id
req = requests.get(url, allow_redirects=False)
code = req.status_code
w.writerow([id, code])
print('Complete!')
- 注意在
for-loop
之前添加 - 当从控制台运行时,会出现一个进度条,显示完成百分比。
- 注意图像左侧
21/101
,这是records
列表长度的计数。tqdm
提供百分比进度条和complete/total
的计数
with tqdm(total=len(records)) as pbar:
# sudo pip3 install tqdm
import time
import tqdm
records = ['a', 'b', 'c', 'd', 'e']
with tqdm.tqdm(smoothing=0.1, total=len(records)) as pbar:
for k, record in enumerate(records):
time.sleep(1)
pbar.update()
它都是相对的,所以让我们做一些通用的数学运算。 :)
# sudo pip3 install tqdm
import time
import tqdm
total = 5000
_number_left = 5000
with tqdm.tqdm(smoothing=0.1, total=total) as pbar:
relatively_done = 0
relatively_done_sum = 0
for k in range(0, 5000, 2): # 0, 2, 4, ... 4998
time.sleep(0.0005)
_number_left -= 2 # input from some worker process for example
absolutely_done = total - _number_left
relatively_done = absolutely_done - relatively_done_sum
relatively_done_sum += relatively_done
pbar.update(relatively_done)