Python Youtube 网络抓取工具无法正常工作
Python Youtube Web Scraper not working properly
所以我构建了这个小脚本,可以返回 URL 任何在 YouTube 上搜索到的视频。但是在再次打开它之后发现使用 youtube 进行的网络抓取无法正常工作。当打印 soup
时,它 returns 与在 Youtube 上使用 inspect element 看到的完全不同。有人可以帮我解决这个问题吗...
这是我的代码:
import requests
from lxml import html
import webbrowser
from bs4 import BeautifulSoup
import time
import tkinter
from pytube import YouTube
headers= {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36"}
def video_finder():
word = input("Enter video title: ")
if ' ' in word:
new = word.replace(' ', '+')
print(new)
else:
pass
vid = requests.get('https://www.youtube.com/results?search_query={}'.format(new))
soup = BeautifulSoup(vid.text, features='lxml')
all_vids = soup.find_all('div', id_='contents')
print(all_vids)
video1st = all_vids[0]
a_Tag = video1st.find('a', class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link", href=True)
Video_name = a_Tag.text
Video_id = a_Tag['href']
video_link = 'https://www.youtube.com' + Video_id
print(Video_name)
print(video_link)
它不是最好的但是你...谢谢
要从 Youtube 页面获得正确的结果,请将 User-Agent
HTTP header 设置为 Googlebot,并在 BeautifulSoup 中使用 html.parser
。
例如:
import requests
from bs4 import BeautifulSoup
headers= {"User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
def video_finder():
word = input("Enter video title: ")
params = {
'search_query': word
}
vid = requests.get('https://www.youtube.com/results', params=params, headers=headers)
soup = BeautifulSoup(vid.content, features='html.parser')
a_Tag = soup.find('a', class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link", href=lambda h: h.startswith('/watch?'))
Video_name = a_Tag.text
Video_id = a_Tag['href']
video_link = 'https://www.youtube.com' + Video_id
print(Video_name)
print(video_link)
video_finder()
打印:
Enter video title: sailor moon
Sailor Moon Opening (English) *HD*
https://www.youtube.com/watch?v=5txHGxJRwtQ
所以我构建了这个小脚本,可以返回 URL 任何在 YouTube 上搜索到的视频。但是在再次打开它之后发现使用 youtube 进行的网络抓取无法正常工作。当打印 soup
时,它 returns 与在 Youtube 上使用 inspect element 看到的完全不同。有人可以帮我解决这个问题吗...
这是我的代码:
import requests
from lxml import html
import webbrowser
from bs4 import BeautifulSoup
import time
import tkinter
from pytube import YouTube
headers= {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36"}
def video_finder():
word = input("Enter video title: ")
if ' ' in word:
new = word.replace(' ', '+')
print(new)
else:
pass
vid = requests.get('https://www.youtube.com/results?search_query={}'.format(new))
soup = BeautifulSoup(vid.text, features='lxml')
all_vids = soup.find_all('div', id_='contents')
print(all_vids)
video1st = all_vids[0]
a_Tag = video1st.find('a', class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link", href=True)
Video_name = a_Tag.text
Video_id = a_Tag['href']
video_link = 'https://www.youtube.com' + Video_id
print(Video_name)
print(video_link)
它不是最好的但是你...谢谢
要从 Youtube 页面获得正确的结果,请将 User-Agent
HTTP header 设置为 Googlebot,并在 BeautifulSoup 中使用 html.parser
。
例如:
import requests
from bs4 import BeautifulSoup
headers= {"User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}
def video_finder():
word = input("Enter video title: ")
params = {
'search_query': word
}
vid = requests.get('https://www.youtube.com/results', params=params, headers=headers)
soup = BeautifulSoup(vid.content, features='html.parser')
a_Tag = soup.find('a', class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link", href=lambda h: h.startswith('/watch?'))
Video_name = a_Tag.text
Video_id = a_Tag['href']
video_link = 'https://www.youtube.com' + Video_id
print(Video_name)
print(video_link)
video_finder()
打印:
Enter video title: sailor moon
Sailor Moon Opening (English) *HD*
https://www.youtube.com/watch?v=5txHGxJRwtQ