如何减少代码的运行时间?

How can I decrease the runtime of my code?

是否可以对这段代码进行任何修改以使其 运行 更快?我的代码目前正在运行,但需要 10 个小时以上才能抓取所有 50,000 个配置文件。请告诉我如何减少 运行 时间。谢谢!

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.common.exceptions import NoSuchElementException


Data = []
text = "test"

driver = webdriver.Chrome("/Users/nzalle/Downloads/chromedriver")
driver.get("https://directory.bcsp.org/")
count = int(input("Number of Pages to Scrape: "))

body = driver.find_element_by_xpath("//body") 
profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")

while len(profile_count) < count:   # Get links up to "count"
    body.send_keys(Keys.END)
    sleep(1)
    profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")

for link in profile_count:   # Calling up links
    temp = link.get_attribute('href')   # temp for
    driver.execute_script("window.open('');")   # open new tab
    driver.switch_to.window(driver.window_handles[1])   # focus new tab
    driver.get(temp)

    # scrape code

    Name = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[1]/div[2]/div').text
    IssuedBy = "Board of Certified Safety Professionals"
    CertificationorDesignaationNumber = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[1]/td[3]/div[2]').text
    CertfiedorDesignatedSince = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[1]/div[2]').text
    try:
        AccreditedBy = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[3]/div[2]/a').text

    except NoSuchElementException:
        AccreditedBy = "N/A"

    try:
        Expires = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[1]/div[2]').text

    except NoSuchElementException:
        Expires = "N/A"

    info = Name, IssuedBy, CertificationorDesignaationNumber, CertfiedorDesignatedSince, AccreditedBy, Expires + "\n"

    Data.extend(info)
    driver.close()
    driver.switch_to.window(driver.window_handles[0])


with open("Spredsheet.txt", "w") as output:
    output.write(','.join(Data))

driver.close()

除了将 issuedBy 移动到 for 循环之外,因为它不需要重复声明(这仍然只会产生非常非常微不足道的差异),您似乎没有什么可以做的改变。由于这是从互联网上抓取数据,因此最大的限制因素将是您的宽带速度。总的来说,你的系统有一个 O(n) 的大 O,这意味着随着数据集的增加,处理时间线性增加。

处理此脚本的时间肯定主要是宽带速度的瓶颈,您无法控制宽带速度,因此,不,您可以更改的不多