python 抓取链接关键字
python scrape links keyword
我是 python 的新手,我需要帮助来抓取具有特定关键字的所有链接。问题是我收到以下错误:
if "air-max" in link["href"]:
^
IndentationError: expected an indented block.
这是我的代码
import requests
import time
from bs4 import BeautifulSoup
headers = {"Content-Type": "application/x-www-form-urlencoded;
charset=UTF-8","X-Requested-With": "XMLHttpRequest","User-Agent":
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])
在 link.has_key('href'):
之后您需要另一个缩进级别。另外,要保持一致;始终使用空格(首选)或始终使用制表符。这可能并不总是正确的,但一般来说,如果行尾有一个 COLON :
,那么下一行应该进一步缩进一级。
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])
请使用像 spyder IDE 或 jupyter notebook 这样的 IDE 进行开发。
import requests
import time
from bs4 import BeautifulSoup
headers = {"Content-Type": "application/x-www-form-urlencoded;
charset=UTF-8","X-Requested-With": "XMLHttpRequest","User-Agent":
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])
我是 python 的新手,我需要帮助来抓取具有特定关键字的所有链接。问题是我收到以下错误:
if "air-max" in link["href"]: ^ IndentationError: expected an indented block.
这是我的代码
import requests
import time
from bs4 import BeautifulSoup
headers = {"Content-Type": "application/x-www-form-urlencoded;
charset=UTF-8","X-Requested-With": "XMLHttpRequest","User-Agent":
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])
在 link.has_key('href'):
之后您需要另一个缩进级别。另外,要保持一致;始终使用空格(首选)或始终使用制表符。这可能并不总是正确的,但一般来说,如果行尾有一个 COLON :
,那么下一行应该进一步缩进一级。
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])
请使用像 spyder IDE 或 jupyter notebook 这样的 IDE 进行开发。
import requests
import time
from bs4 import BeautifulSoup
headers = {"Content-Type": "application/x-www-form-urlencoded;
charset=UTF-8","X-Requested-With": "XMLHttpRequest","User-Agent":
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
for i in range(0,4):
url = "https://www.aw-lab.com/shop/uomo/scarpe?p={}".format(i)
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
all_links = soup.find_all("a")
for link in all_links:
if link.has_key('href'):
if "air-max" in link["href"]:
print(link["href"])