python 中未定义名字 Conte
Name Conte not defined in python
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')
url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)
# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")
this dowsnt work due to floating web elements on youtube
"""
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
print("break")
break
last_height = new_height
time.sleep(5)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
conte= soup.find_all('div',class_='product-card__body')
wshoes=[]
for items in conte:
try:
title= items.find('div',class_ = 'product-card__title').text
except:
title=''
try:
sub_title = items.find('div',class_ ='product-card__subtitle').text
except:
sub_title=''
try:
color = items.find('div',{'class':'product-card__product-count'}).text
except:
color=''
try:
link = items.find('a', {'class': 'product-card__link-overlay'})['href']
except:
link=''
try:
price=items.select_one('div[data-test="product-price"]').text.strip()
#item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
except:
price='-'
try:
reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
#item.find('div',class_ ='product-price-reduced').text
except:
reduce_price='-'
print(title,sub_title,color,price,reduce_price,link)
shoes={
'title':title,
'Description':sub_title,
'QuatityColor':color,
'Price':price,
'Reducedprice':reduce_price,
'Url':link
}
wshoes.append(shoes)
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')
DevTools listening on ws://127.0.0.1:58524/devtools/browser/e07b59df-6056-4144-9203-2feb91b19647
[21028:20948:0301/203812.684:ERROR:device_event_log_impl.cc(211)] [20:38:12.685] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[21028:20948:0301/203812.686:ERROR:device_event_log_impl.cc(211)] [20:38:12.687] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[18068:9236:0301/203825.199:ERROR:ssl_client_socket_impl.cc(962)] handshake failed; returned -1, SSL error code 1, net_error -101
您似乎遇到的问题是,在某些情况下,您对 new_height == last_height
的测试可能在第一次尝试时就成功了。因此,未分配变量 conte
。
要解决此问题,请将其初始化为 None
,除非已分配,否则不要中断。
例如:
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')
url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)
# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")
this doesn't work due to floating web elements on youtube
"""
last_height = driver.execute_script("return document.documentElement.scrollHeight")
conte = None
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height and conte:
print("break")
break
last_height = new_height
time.sleep(5)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
conte = soup.find_all('div',class_='product-card__body')
wshoes = []
for items in conte:
try:
title= items.find('div',class_ = 'product-card__title').text
except:
title=''
try:
sub_title = items.find('div',class_ ='product-card__subtitle').text
except:
sub_title=''
try:
color = items.find('div',{'class':'product-card__product-count'}).text
except:
color=''
try:
link = items.find('a', {'class': 'product-card__link-overlay'})['href']
except:
link=''
try:
price=items.select_one('div[data-test="product-price"]').text.strip()
#item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
except:
price='-'
try:
reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
#item.find('div',class_ ='product-price-reduced').text
except:
reduce_price='-'
print(title,sub_title,color,price,reduce_price,link)
shoes={
'title':title,
'Description':sub_title,
'QuatityColor':color,
'Price':price,
'Reducedprice':reduce_price,
'Url':link
}
wshoes.append(shoes)
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')
另一种可能的解决方案是将 soup =
和 conte =
行移到循环之外。
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')
url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)
# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")
this dowsnt work due to floating web elements on youtube
"""
last_height = driver.execute_script("return document.documentElement.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height:
print("break")
break
last_height = new_height
time.sleep(5)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
conte= soup.find_all('div',class_='product-card__body')
wshoes=[]
for items in conte:
try:
title= items.find('div',class_ = 'product-card__title').text
except:
title=''
try:
sub_title = items.find('div',class_ ='product-card__subtitle').text
except:
sub_title=''
try:
color = items.find('div',{'class':'product-card__product-count'}).text
except:
color=''
try:
link = items.find('a', {'class': 'product-card__link-overlay'})['href']
except:
link=''
try:
price=items.select_one('div[data-test="product-price"]').text.strip()
#item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
except:
price='-'
try:
reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
#item.find('div',class_ ='product-price-reduced').text
except:
reduce_price='-'
print(title,sub_title,color,price,reduce_price,link)
shoes={
'title':title,
'Description':sub_title,
'QuatityColor':color,
'Price':price,
'Reducedprice':reduce_price,
'Url':link
}
wshoes.append(shoes)
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')
DevTools listening on ws://127.0.0.1:58524/devtools/browser/e07b59df-6056-4144-9203-2feb91b19647
[21028:20948:0301/203812.684:ERROR:device_event_log_impl.cc(211)] [20:38:12.685] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[21028:20948:0301/203812.686:ERROR:device_event_log_impl.cc(211)] [20:38:12.687] USB: usb_device_handle_win.cc:1049 Failed to read descriptor from node connection: A device attached to the system is not functioning. (0x1F)
[18068:9236:0301/203825.199:ERROR:ssl_client_socket_impl.cc(962)] handshake failed; returned -1, SSL error code 1, net_error -101
您似乎遇到的问题是,在某些情况下,您对 new_height == last_height
的测试可能在第一次尝试时就成功了。因此,未分配变量 conte
。
要解决此问题,请将其初始化为 None
,除非已分配,否则不要中断。
例如:
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from selenium import webdriver
driver = webdriver.Chrome('D:/chromedriver.exe')
url= "https://www.nike.com/gb/w/womens-lifestyle-shoes-13jrmz5e1x6zy7ok"
driver.get(url)
SCROLL_PAUSE_TIME = 1
time.sleep(4)
# Get scroll height
"""last_height = driver.execute_script("return document.body.scrollHeight")
this doesn't work due to floating web elements on youtube
"""
last_height = driver.execute_script("return document.documentElement.scrollHeight")
conte = None
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0,document.documentElement.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.documentElement.scrollHeight")
if new_height == last_height and conte:
print("break")
break
last_height = new_height
time.sleep(5)
pageSource = driver.page_source
soup = BeautifulSoup(pageSource, 'html.parser')
conte = soup.find_all('div',class_='product-card__body')
wshoes = []
for items in conte:
try:
title= items.find('div',class_ = 'product-card__title').text
except:
title=''
try:
sub_title = items.find('div',class_ ='product-card__subtitle').text
except:
sub_title=''
try:
color = items.find('div',{'class':'product-card__product-count'}).text
except:
color=''
try:
link = items.find('a', {'class': 'product-card__link-overlay'})['href']
except:
link=''
try:
price=items.select_one('div[data-test="product-price"]').text.strip()
#item.find('div',{'class':'product-price is--current-price css-s56yt7'}).text
except:
price='-'
try:
reduce_price=items.select_one('div[data-test="product-price-reduced"]').text.strip()
#item.find('div',class_ ='product-price-reduced').text
except:
reduce_price='-'
print(title,sub_title,color,price,reduce_price,link)
shoes={
'title':title,
'Description':sub_title,
'QuatityColor':color,
'Price':price,
'Reducedprice':reduce_price,
'Url':link
}
wshoes.append(shoes)
df = pd.DataFrame(wshoes)
print(df)
df.to_csv('Nike.csv')
print('Saved to csv file')
另一种可能的解决方案是将 soup =
和 conte =
行移到循环之外。