使用 class 和 itemprop 从 div 获取文本
get text from a div with class and itemprop
我正在尝试从中提取文本:
[<div class="menu__vendor-name" itemprop="name">Beno's Flowers & Gifts</div>, <div
class="menu__vendor-name" itemprop="name">Bluebird Diner</div>, <div
class="menu__vendor-name" itemprop="name">Bread Garden Market</div>]
这是我的代码:
import requests
from bs4 import BeautifulSoup
url = 'https://www.chomp.delivery/restaurants'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/75.0.3770.80 Safari/537.36'}
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
restaurant_wrapper = soup.find(class_ = "dd_rest_list")
restaurants = restaurant_wrapper.find_all(class_="menu__vendor-name",
itemprop="name")
def extract_restaurant_data(restaurant):
results = [
{
"title": print(title.text.strip())
}
for title in restaurant_details
]
print(results)
results = [extract_restaurant_data(restaurant) for restaurant in restaurants]
输出:
AttributeError: 'tuple' object has no attribute 'text'
我认为问题是每个 div 都有一个 itemprop,也许这就是问题所在。
假设您的目标是从每家餐厅抓取一些详细信息,而不仅仅是其名称。改变您的策略 - 以与您读取数据相同的方式处理数据,并将其更有条理地存储在 list
of dicts
:
中
results = []
for restaurant in soup.select('.dd_rest_list a'):
results.append({
'title':restaurant.find('div',{'itemprop':'name'}).text,
'url':'https://www.chomp.delivery'+restaurant.get('href'),
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
'and':'so on'
})
在调用方法之前总是检查你喜欢select的元素是否存在:
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
例子
import requests
from bs4 import BeautifulSoup
url = 'https://www.chomp.delivery/restaurants'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/75.0.3770.80 Safari/537.36'}
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text)
results = []
for restaurant in soup.select('.dd_rest_list a'):
results.append({
'title':restaurant.find('div',{'itemprop':'name'}).text,
'url':'https://www.chomp.delivery'+restaurant.get('href'),
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
'and':'so on'
})
results
输出
[{'title': '2 Dogs Pub',
'url': 'https://www.chomp.delivery/r/21/restaurants/delivery/Burgers/2-Dogs-Pub-Iowa-City',
'address': '1705 S 1st Ave Ste Q,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Alebrije Mexican Restaurant',
'url': 'https://www.chomp.delivery/r/3316/restaurants/delivery/Mexican/Alebrije-Mexican-Restaurant-Iowa-City',
'address': '401 S Linn st,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Ascended Electronics',
'url': 'https://www.chomp.delivery/r/2521/restaurants/delivery/Retail/Ascended-Electronics-Iowa-City',
'address': '208 Stevens Dr,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Aspen Leaf Frozen Yogurt',
'url': 'https://www.chomp.delivery/r/522/restaurants/delivery/Ice-Cream-Sweets-Snacks/Aspen-Leaf-Frozen-Yogurt-Iowa-City',
'address': '125 S Dubuque St,Iowa City,IA,52240',
'and': 'so on'},...]
我正在尝试从中提取文本:
[<div class="menu__vendor-name" itemprop="name">Beno's Flowers & Gifts</div>, <div
class="menu__vendor-name" itemprop="name">Bluebird Diner</div>, <div
class="menu__vendor-name" itemprop="name">Bread Garden Market</div>]
这是我的代码:
import requests
from bs4 import BeautifulSoup
url = 'https://www.chomp.delivery/restaurants'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/75.0.3770.80 Safari/537.36'}
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
restaurant_wrapper = soup.find(class_ = "dd_rest_list")
restaurants = restaurant_wrapper.find_all(class_="menu__vendor-name",
itemprop="name")
def extract_restaurant_data(restaurant):
results = [
{
"title": print(title.text.strip())
}
for title in restaurant_details
]
print(results)
results = [extract_restaurant_data(restaurant) for restaurant in restaurants]
输出:
AttributeError: 'tuple' object has no attribute 'text'
我认为问题是每个 div 都有一个 itemprop,也许这就是问题所在。
假设您的目标是从每家餐厅抓取一些详细信息,而不仅仅是其名称。改变您的策略 - 以与您读取数据相同的方式处理数据,并将其更有条理地存储在 list
of dicts
:
results = []
for restaurant in soup.select('.dd_rest_list a'):
results.append({
'title':restaurant.find('div',{'itemprop':'name'}).text,
'url':'https://www.chomp.delivery'+restaurant.get('href'),
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
'and':'so on'
})
在调用方法之前总是检查你喜欢select的元素是否存在:
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
例子
import requests
from bs4 import BeautifulSoup
url = 'https://www.chomp.delivery/restaurants'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '\
'AppleWebKit/537.36 (KHTML, like Gecko) '\
'Chrome/75.0.3770.80 Safari/537.36'}
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text)
results = []
for restaurant in soup.select('.dd_rest_list a'):
results.append({
'title':restaurant.find('div',{'itemprop':'name'}).text,
'url':'https://www.chomp.delivery'+restaurant.get('href'),
'address':restaurant.find('div',{'itemprop':'address'}).get_text(',',strip=True) if restaurant.find('div',{'itemprop':'address'}) else None,
'and':'so on'
})
results
输出
[{'title': '2 Dogs Pub',
'url': 'https://www.chomp.delivery/r/21/restaurants/delivery/Burgers/2-Dogs-Pub-Iowa-City',
'address': '1705 S 1st Ave Ste Q,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Alebrije Mexican Restaurant',
'url': 'https://www.chomp.delivery/r/3316/restaurants/delivery/Mexican/Alebrije-Mexican-Restaurant-Iowa-City',
'address': '401 S Linn st,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Ascended Electronics',
'url': 'https://www.chomp.delivery/r/2521/restaurants/delivery/Retail/Ascended-Electronics-Iowa-City',
'address': '208 Stevens Dr,Iowa City,IA,52240',
'and': 'so on'},
{'title': 'Aspen Leaf Frozen Yogurt',
'url': 'https://www.chomp.delivery/r/522/restaurants/delivery/Ice-Cream-Sweets-Snacks/Aspen-Leaf-Frozen-Yogurt-Iowa-City',
'address': '125 S Dubuque St,Iowa City,IA,52240',
'and': 'so on'},...]