Bs4 Python 如何下载.css 文件
Bs4 Python How to download .css files
你好,我正在尝试制作一个抓取工具,将网页中的所有 .css 文件保存在一个文件夹中,但是当 运行 我的脚本出现此错误时:
with open(shit, 'wb') as f: FileNotFoundError: [Errno 2] No such file or directory: 'https://url.com/cache/themes/theme1/index.min.css'
这是我的代码:
from bs4 import BeautifulSoup
import requests
import os
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
url = "https://url.com"
folder = "Files"
resp = requests.get(url, proxies=proxies)
soup = BeautifulSoup(resp.text, features='lxml')
def Downloader(url, folder):
os.mkdir(os.path.join(os.getcwd(), folder))
os.chdir(os.path.join(os.getcwd(), folder))
css = soup.find_all('link', rel="stylesheet")
for cum in css:
shit = cum['href']
if "http://" in shit:
with open(shit, 'wb') as f:
piss = requests.get(shit, proxies=proxies)
f.write(piss.content)
Downloader(url=url, folder=folder)
有人知道问题出在哪里吗?
谢谢 <3
您正在尝试写入名称中包含 /
的文件。这将其标识为 a directory/folder 部分。因此,要么 remove/replace 那些,要么构建创建这些文件夹结构的逻辑,它将写入文件。
import requests
from bs4 import BeautifulSoup
import os
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
url = "https://url.com"
folder = "Files"
resp = requests.get(url, proxies=proxies)
soup = BeautifulSoup(resp.text, features='lxml')
def Downloader(url, folder):
try:
os.mkdir(os.path.join(os.getcwd(), folder))
except Exception as e:
print(e)
os.chdir(os.path.join(os.getcwd(), folder))
css = soup.find_all('link', rel="stylesheet")
for each in css:
href = each['href']
if "http://" in href or "https://" in href:
filename = href.split('//')[-1].replace('/','_').replace('.','_')
with open(filename, 'wb') as f:
response = requests.get(href, proxies=proxies)
f.write(response.content)
Downloader(url=url, folder=folder)
你好,我正在尝试制作一个抓取工具,将网页中的所有 .css 文件保存在一个文件夹中,但是当 运行 我的脚本出现此错误时:
with open(shit, 'wb') as f: FileNotFoundError: [Errno 2] No such file or directory: 'https://url.com/cache/themes/theme1/index.min.css'
这是我的代码:
from bs4 import BeautifulSoup
import requests
import os
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
url = "https://url.com"
folder = "Files"
resp = requests.get(url, proxies=proxies)
soup = BeautifulSoup(resp.text, features='lxml')
def Downloader(url, folder):
os.mkdir(os.path.join(os.getcwd(), folder))
os.chdir(os.path.join(os.getcwd(), folder))
css = soup.find_all('link', rel="stylesheet")
for cum in css:
shit = cum['href']
if "http://" in shit:
with open(shit, 'wb') as f:
piss = requests.get(shit, proxies=proxies)
f.write(piss.content)
Downloader(url=url, folder=folder)
有人知道问题出在哪里吗? 谢谢 <3
您正在尝试写入名称中包含 /
的文件。这将其标识为 a directory/folder 部分。因此,要么 remove/replace 那些,要么构建创建这些文件夹结构的逻辑,它将写入文件。
import requests
from bs4 import BeautifulSoup
import os
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
url = "https://url.com"
folder = "Files"
resp = requests.get(url, proxies=proxies)
soup = BeautifulSoup(resp.text, features='lxml')
def Downloader(url, folder):
try:
os.mkdir(os.path.join(os.getcwd(), folder))
except Exception as e:
print(e)
os.chdir(os.path.join(os.getcwd(), folder))
css = soup.find_all('link', rel="stylesheet")
for each in css:
href = each['href']
if "http://" in href or "https://" in href:
filename = href.split('//')[-1].replace('/','_').replace('.','_')
with open(filename, 'wb') as f:
response = requests.get(href, proxies=proxies)
f.write(response.content)
Downloader(url=url, folder=folder)