facebook 在 selenium 运行 headless 时检测到浏览器自动化
facebook detects browser automation when selenium running headless
当 运行在 窗口 chrome 中为 fb 聊天机器人设置此 selenium 脚本时 运行 很好 。 在无头模式下,它会被检测到并被阻止,即使是 python 虚拟显示。
我想 运行 使用 pyvirtualdisplay 在无头模式下运行脚本,同时避免检测 以便我可以将远程执行作为每日计划任务。
这是针对数字海洋上的ubuntu 18.04 水滴。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from pyvirtualdisplay import Display
display = Display(visible=0, size=(800, 800))
display.start()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--auto-open-devtools-for-tabs")
chrome_options.add_argument("--load-extension=" + me.favoriteExtension)
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("prefs", {
"profile.default_content_setting_values.notifications": 2
})
chrome_options.add_argument('headless')
chrome_options.add_argument('--no-sandbox')
browser = webdriver.Chrome(chrome_options=chrome_options)
def login(submitBtn):
email = browser.find_element_by_xpath('//*[@id="email"]')
password = browser.find_element_by_xpath('//*[@id="pass"]')
btn = browser.find_element_by_xpath(submitBtn)
email.send_keys(me.fbUser)
password.send_keys(me.fbPassword)
btn.click()
def respondToNewMessages():
browser.get("http://facebook.com/messages/t/")
print('responding to new msgs')
conversations = browser.find_elements_by_xpath('*//ul[@aria-label="Conversation List"]/li')
print('found '+str(len(conversations)) + ' conversations')
index = 0
for conversation in conversations:
index += 1
firstMsg = conversation.find_element_by_xpath('./div/a/div/div//span/span')
print('Found first message ' + firstMsg.text + ' conversation #' + str(index))
msgFontSize = firstMsg.value_of_css_property('font-weight')
if(not int(msgFontSize) > 400):
print('conversation #' + str(index) + ' is an old convo')
else:
firstMsg = conversation.find_element_by_xpath('./div/a/div/div//span/span')
firstMsg.click()
with open('awaymsg.txt') as f:
for line in f:
ActionChains(browser).send_keys(line).perform()
ActionChains(browser).send_keys(Keys.ENTER).perform()
f.close()
return True
browser.get("http://facebook.com") # load the web page
login("//*[@id='u_0_b' and not(@type='hidden')]|//button[@id='loginbutton' and @type='submit' and @name='login' and not(@type='hidden')]") # Attempt login
while ( 'login' in browser.current_url): # if login fails try again and / or dismiss checkpoint
login("//*[@id='u_0_b' and not(@type='hidden')]|//button[@id='loginbutton' and @type='submit' and @name='login' and not(@type='hidden')]")
if 'checkpoint' in browser.current_url:
browser.find_element_by_xpath("//button[@value='Yes']").click()
browser.get("http://facebook.com/messages/t/") # exit when messages are loaded
browser.implicitly_wait(5)
conversations = browser.find_elements_by_xpath('*//ul[@aria-label="Conversation List"]/li') # count conversations
respondToNewMessages()
browser.close()
预期 - 无头 w 虚拟显示将在窗口 chrome 中反映 运行ning。
实际 - 窗口化 运行很好,无头被 Facebook 检测并阻止。
pyvirtualdisplay
和浏览器的 headless 模式也是一样的,所以如果你使用 pyvirtualdisplay
那么你不应该设置 chrome_options.add_argument('headless')
.
安装 xvfb (sudo apt-get install xvfb
) 并将其设置为后端:
display = Display(backend="xvfb", visible=0, size=(800, 800))
当 运行在 窗口 chrome 中为 fb 聊天机器人设置此 selenium 脚本时 运行 很好 。 在无头模式下,它会被检测到并被阻止,即使是 python 虚拟显示。
我想 运行 使用 pyvirtualdisplay 在无头模式下运行脚本,同时避免检测 以便我可以将远程执行作为每日计划任务。
这是针对数字海洋上的ubuntu 18.04 水滴。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from pyvirtualdisplay import Display
display = Display(visible=0, size=(800, 800))
display.start()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--auto-open-devtools-for-tabs")
chrome_options.add_argument("--load-extension=" + me.favoriteExtension)
chrome_options.add_argument("--start-maximized")
chrome_options.add_experimental_option("prefs", {
"profile.default_content_setting_values.notifications": 2
})
chrome_options.add_argument('headless')
chrome_options.add_argument('--no-sandbox')
browser = webdriver.Chrome(chrome_options=chrome_options)
def login(submitBtn):
email = browser.find_element_by_xpath('//*[@id="email"]')
password = browser.find_element_by_xpath('//*[@id="pass"]')
btn = browser.find_element_by_xpath(submitBtn)
email.send_keys(me.fbUser)
password.send_keys(me.fbPassword)
btn.click()
def respondToNewMessages():
browser.get("http://facebook.com/messages/t/")
print('responding to new msgs')
conversations = browser.find_elements_by_xpath('*//ul[@aria-label="Conversation List"]/li')
print('found '+str(len(conversations)) + ' conversations')
index = 0
for conversation in conversations:
index += 1
firstMsg = conversation.find_element_by_xpath('./div/a/div/div//span/span')
print('Found first message ' + firstMsg.text + ' conversation #' + str(index))
msgFontSize = firstMsg.value_of_css_property('font-weight')
if(not int(msgFontSize) > 400):
print('conversation #' + str(index) + ' is an old convo')
else:
firstMsg = conversation.find_element_by_xpath('./div/a/div/div//span/span')
firstMsg.click()
with open('awaymsg.txt') as f:
for line in f:
ActionChains(browser).send_keys(line).perform()
ActionChains(browser).send_keys(Keys.ENTER).perform()
f.close()
return True
browser.get("http://facebook.com") # load the web page
login("//*[@id='u_0_b' and not(@type='hidden')]|//button[@id='loginbutton' and @type='submit' and @name='login' and not(@type='hidden')]") # Attempt login
while ( 'login' in browser.current_url): # if login fails try again and / or dismiss checkpoint
login("//*[@id='u_0_b' and not(@type='hidden')]|//button[@id='loginbutton' and @type='submit' and @name='login' and not(@type='hidden')]")
if 'checkpoint' in browser.current_url:
browser.find_element_by_xpath("//button[@value='Yes']").click()
browser.get("http://facebook.com/messages/t/") # exit when messages are loaded
browser.implicitly_wait(5)
conversations = browser.find_elements_by_xpath('*//ul[@aria-label="Conversation List"]/li') # count conversations
respondToNewMessages()
browser.close()
预期 - 无头 w 虚拟显示将在窗口 chrome 中反映 运行ning。
实际 - 窗口化 运行很好,无头被 Facebook 检测并阻止。
pyvirtualdisplay
和浏览器的 headless 模式也是一样的,所以如果你使用 pyvirtualdisplay
那么你不应该设置 chrome_options.add_argument('headless')
.
安装 xvfb (sudo apt-get install xvfb
) 并将其设置为后端:
display = Display(backend="xvfb", visible=0, size=(800, 800))