使用 beautifulsoup 从网站抓取图片
Scrape pics from a website with beautifulsoup
我想从 link 下载个人资料图片。
<div class="l-main">
<script id="js-initial-data" data-json="{"openFanClub":1, "roomId":1700}"> </script>
<p class="room-profile-head-bg l-room-profile-head-bg">
<img src="https://image..." alt>
</p>
</div>
这是我的代码,但它不起作用:
import requests
from bs4 import BeautifulSoup
url = "https://..."
req = requests.get(url)
soup = BeautifulSoup(req.text,'html.parser')
images = soup.select('script[data-json="{"openFanClub":1, "roomId":1700}"]')
for image in images:
i = image.attrs['room_id']
link = image.select_one('img').attrs['src']
filename = 'images/{i}.jpg'
print(filename, link)
您可以使用任何工具或库(Selenium、BeautifulSoup 等)使其工作。
提前致谢!
您可以使用以下方法从站点获取图像:
from bs4 import BeautifulSoup
from urllib.request import urlopen, urlretrieve
#Get every image from the website
html_page = urlopen('https://www.showroom-live.com/room/profile?room_id=170038')
soup = BeautifulSoup(html_page, "html.parser")
images = []
for img in soup.findAll('img'):
images.append(img.get('src'))
#extract profile image
image = images[5]
#Download Image
path = r"C:\Users\username\Downloads\room_id.jpg"
urlretrieve(image, path)
如果我没理解错的话,这正是您要查找的内容。
import requests
from bs4 import BeautifulSoup
photos = [170038, 197343, 61730]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
}
def main(url):
with requests.Session() as req:
for photo in photos:
print(f"Downloading {photo}.jpg")
r = req.get(url, params={'room_id': photo}, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
target = soup.find("img", width="640").get("src")
r = req.get(target)
with open(f"{photo}.jpg", 'wb') as f:
f.write(r.content)
main("https://www.showroom-live.com/room/profile")
我想从 link 下载个人资料图片。
<div class="l-main">
<script id="js-initial-data" data-json="{"openFanClub":1, "roomId":1700}"> </script>
<p class="room-profile-head-bg l-room-profile-head-bg">
<img src="https://image..." alt>
</p>
</div>
这是我的代码,但它不起作用:
import requests
from bs4 import BeautifulSoup
url = "https://..."
req = requests.get(url)
soup = BeautifulSoup(req.text,'html.parser')
images = soup.select('script[data-json="{"openFanClub":1, "roomId":1700}"]')
for image in images:
i = image.attrs['room_id']
link = image.select_one('img').attrs['src']
filename = 'images/{i}.jpg'
print(filename, link)
您可以使用任何工具或库(Selenium、BeautifulSoup 等)使其工作。 提前致谢!
您可以使用以下方法从站点获取图像:
from bs4 import BeautifulSoup
from urllib.request import urlopen, urlretrieve
#Get every image from the website
html_page = urlopen('https://www.showroom-live.com/room/profile?room_id=170038')
soup = BeautifulSoup(html_page, "html.parser")
images = []
for img in soup.findAll('img'):
images.append(img.get('src'))
#extract profile image
image = images[5]
#Download Image
path = r"C:\Users\username\Downloads\room_id.jpg"
urlretrieve(image, path)
如果我没理解错的话,这正是您要查找的内容。
import requests
from bs4 import BeautifulSoup
photos = [170038, 197343, 61730]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
}
def main(url):
with requests.Session() as req:
for photo in photos:
print(f"Downloading {photo}.jpg")
r = req.get(url, params={'room_id': photo}, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
target = soup.find("img", width="640").get("src")
r = req.get(target)
with open(f"{photo}.jpg", 'wb') as f:
f.write(r.content)
main("https://www.showroom-live.com/room/profile")