解析 second/third 页面并使用 BeautifulSoup 添加到列表

Question

我正在尝试从网站上抓取食谱，然后显示一个随机选择的页面。为此，我编写了一段代码，当我刚看到第一页时，它就可以完美运行，35 个食谱。但是：我也想从第 2 页和第 3 页获取食谱。我想我应该为此写一个循环，但我似乎做对了。我在这段代码中做错了什么？

from django.shortcuts import render
import requests
import re
from bs4 import BeautifulSoup
import random

# Create your views here.
def recipe(request):

#Create soup
    page = 0
    while page != 2:
        webpage_response = requests.get("https://www.ah.nl/allerhande/recepten-zoeken?page=" + str(page))
        webpage = webpage_response.content
        soup = BeautifulSoup(webpage, "html.parser")  
        recipe_links = soup.find_all('a', attrs={'class' : re.compile('^display-card_root__.*')})
        recipe_pictures = soup.find_all('img', attrs={'class' : re.compile('^card-image-set_imageSet__.*')})
        recipe_prep_time = [ul.find('li').text 
                   for ul in soup.find_all('ul',
                        attrs={'class': re.compile('^recipe-card-properties_root')})]


#Set up lists
        links = []
        titles = []
        pictures = []

#create prefix for link
        prefix = "https://ah.nl"

#scrape page for recipe
        for link in recipe_links:
            links.append(prefix + link.get('href'))

        for title in recipe_links:
            titles.append(title.get('aria-label'))

        for img in recipe_pictures:
            pictures.append(img.get('data-srcset'))

        page = page +1

#create random int to select a recipe
    nummer = random.randint(0,105)
    print(nummer)

#select correct link for image
    pic_url = pictures[nummer].split(' ')

#create context
    context = {
        "titles" : titles[nummer],
        "pictures" : pic_url[16],
        "preptime" : recipe_prep_time[nummer],
        "link" : links[nummer]
    }

#render page
    return render(request, "randomRecipe/recipe.html", context)

Answer 1

您可以使用 for loop

进行分页

from django.shortcuts import render
import requests
import re
from bs4 import BeautifulSoup
import random

# Create your views here.
def recipe(request):

#Create soup
    for page in range(1,4):
        webpage_response = requests.get(f"https://www.ah.nl/allerhande/recepten-zoeken?page={page}" )
        webpage = webpage_response.content
        soup = BeautifulSoup(webpage, "html.parser")  
        recipe_links = soup.find_all('a', attrs={'class' : re.compile('^display-card_root__.*')})
        recipe_pictures = soup.find_all('img', attrs={'class' : re.compile('^card-image-set_imageSet__.*')})
        recipe_prep_time = [ul.find('li').text 
                   for ul in soup.find_all('ul',
                        attrs={'class': re.compile('^recipe-card-properties_root')})]


#Set up lists
        links = []
        titles = []
        pictures = []

#create prefix for link
        prefix = "https://ah.nl"

#scrape page for recipe
        for link in recipe_links:
            links.append(prefix + link.get('href'))

        for title in recipe_links:
            titles.append(title.get('aria-label'))

        for img in recipe_pictures:
            pictures.append(img.get('data-srcset'))

        

#create random int to select a recipe
    nummer = random.randint(0,105)
    print(nummer)

#select correct link for image
    pic_url = pictures[nummer].split(' ')

#create context
    context = {
        "titles" : titles[nummer],
        "pictures" : pic_url[16],
        "preptime" : recipe_prep_time[nummer],
        "link" : links[nummer]
    }

#render page
    return render(request, "randomRecipe/recipe.html", context)

解析 second/third 页面并使用 BeautifulSoup 添加到列表

Parse second/third page and add to list with BeautifulSoup

python

django

beautifulsoup