python 中未定义名字 Conte

Question

import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')

url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)

# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")

this dowsnt work due to floating web elements on youtube
"""

last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")

    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    
    if new_height == last_height:
       print("break")
       break
    last_height = new_height
    time.sleep(5)
    pageSource = driver.page_source
    soup = BeautifulSoup(pageSource, 'html.parser')
    conte= soup.find_all('div',class_='product-card__body') 
wshoes=[]
for items in conte:
        try:
            title= items.find('div',class_ = 'product-card__title').text
        except:
            title=''
        try:    
            sub_title = items.find('div',class_ ='product-card__subtitle').text
        except:
            sub_title=''
        try:
            color = items.find('div',{'class':'product-card__product-count'}).text
        except:
            color=''
        try:
            link = items.find('a', {'class': 'product-card__link-overlay'})['href']
        except:
            link=''
        try:
            price=items.select_one('div[data-test="product-price"]').text.strip()
            #item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
        except:
            price='-'
        try:
             reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
             #item.find('div',class_ ='product-price-reduced').text
        except:
            reduce_price='-'
        print(title,sub_title,color,price,reduce_price,link)

    
        shoes={
            'title':title,
            'Description':sub_title,
            'QuatityColor':color,
            'Price':price,
            'Reducedprice':reduce_price,
            'Url':link
        }
        wshoes.append(shoes)
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')

DevTools listening on ws://127.0.0.1:58524/devtools/browser/e07b59df-6056-4144-9203-2feb91b19647
[21028:20948:0301/203812.684:ERROR:device_event_log_impl.cc(211)] [20:38:12.685] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[21028:20948:0301/203812.686:ERROR:device_event_log_impl.cc(211)] [20:38:12.687] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[18068:9236:0301/203825.199:ERROR:ssl_client_socket_impl.cc(962)] handshake failed; returned -1, SSL error code 1, net_error -101

Answer 1

您似乎遇到的问题是，在某些情况下，您对 new_height == last_height 的测试可能在第一次尝试时就成功了。因此，未分配变量 conte。

要解决此问题，请将其初始化为 None，除非已分配，否则不要中断。

例如：

import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')

url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)

# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")

this doesn't work due to floating web elements on youtube
"""

last_height = driver.execute_script("return document.documentElement.scrollHeight")
conte = None

while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")

    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.documentElement.scrollHeight")
    
    if new_height == last_height and conte:
       print("break")
       break
       
    last_height = new_height
    time.sleep(5)
    pageSource = driver.page_source
    soup = BeautifulSoup(pageSource, 'html.parser')
    conte = soup.find_all('div',class_='product-card__body') 

wshoes = []

for items in conte:
    try:
        title= items.find('div',class_ = 'product-card__title').text
    except:
        title=''
    try:    
        sub_title = items.find('div',class_ ='product-card__subtitle').text
    except:
        sub_title=''
    try:
        color = items.find('div',{'class':'product-card__product-count'}).text
    except:
        color=''
    try:
        link = items.find('a', {'class': 'product-card__link-overlay'})['href']
    except:
        link=''
    try:
        price=items.select_one('div[data-test="product-price"]').text.strip()
        #item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
    except:
        price='-'
    try:
         reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
         #item.find('div',class_ ='product-price-reduced').text
    except:
        reduce_price='-'
    print(title,sub_title,color,price,reduce_price,link)


    shoes={
        'title':title,
        'Description':sub_title,
        'QuatityColor':color,
        'Price':price,
        'Reducedprice':reduce_price,
        'Url':link
    }
    wshoes.append(shoes)
    
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')

另一种可能的解决方案是将 soup = 和 conte = 行移到循环之外。

python 中未定义名字 Conte

Name Conte not defined in python

python

selenium

webdriver

beautifulsoup

python-requests