无法在 selenium 中设置代理 Python
Not able to set proxy in selenium Python
我有以下代码:
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': '192.156.1.1:33',
'ftpProxy': '192.156.1.1:33',
'sslProxy': '192.156.1.1:33',
'noProxy': '' # set this value as desired
})
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\Users\user\geckodriver.exe'
browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)
出于某种原因,每次我检查 ip 时,它都显示我的真实 IP 而不是代理 IP。为什么要这样做,您能否建议如何实现?代码有问题吗?
我开始研究这个并注意到代理是使用 geckodriver
中的 WebDriver capabilities and proxy configurations 设置的。
我使用了来自测试的这些来源的代理信息。
免费代理列表:
请允许我指出,使用免费代理 IP 地址可能会出现很大问题。这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是间歇性的,这意味着它们随时可能出现故障。有时这些网站会被滥用,因此可能会被屏蔽。
下面的代码使用 DesiredCapabilities
和 selenium.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"sslProxy": '34.95.40.165:3128',
}
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
你也可以这样做:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
你也可以使用Python包http_request_randomize
获取代理IP地址,可以传递给geckodriver
.
import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
try:
# print proxy IP for testing
print(random_proxy[0].get_address())
# output
93.183.250.200:53281
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
except TimeoutException as e:
print("A Page load Timeout Occurred.")
driver.quit()
如前所述,免费代理可以有多个问题。下面的代码显示了如何使用 proxy judge 检查单个代理的状态。
import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
def random_ssl_proxy_address():
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
return random_proxy[0].get_address()
def get_proxy_address():
proxy_address = random_ssl_proxy_address()
checker = ProxyChecker()
proxy_judge = checker.check_proxy(proxy_address)
proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
if proxy_status[0]:
return proxy_address
else:
print('Looking for a valid proxy address.')
# this sleep timer is helping with some timeout issues
# that were happening when querying
sleep(randint(5, 10))
get_proxy_address()
random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128
请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些以捕获一些错误。
我有以下代码:
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': '192.156.1.1:33',
'ftpProxy': '192.156.1.1:33',
'sslProxy': '192.156.1.1:33',
'noProxy': '' # set this value as desired
})
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\Users\user\geckodriver.exe'
browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)
出于某种原因,每次我检查 ip 时,它都显示我的真实 IP 而不是代理 IP。为什么要这样做,您能否建议如何实现?代码有问题吗?
我开始研究这个并注意到代理是使用 geckodriver
中的 WebDriver capabilities and proxy configurations 设置的。
我使用了来自测试的这些来源的代理信息。
免费代理列表:
请允许我指出,使用免费代理 IP 地址可能会出现很大问题。这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是间歇性的,这意味着它们随时可能出现故障。有时这些网站会被滥用,因此可能会被屏蔽。
下面的代码使用 DesiredCapabilities
和 selenium.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"sslProxy": '34.95.40.165:3128',
}
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
你也可以这样做:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
你也可以使用Python包http_request_randomize
获取代理IP地址,可以传递给geckodriver
.
import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
try:
# print proxy IP for testing
print(random_proxy[0].get_address())
# output
93.183.250.200:53281
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
except TimeoutException as e:
print("A Page load Timeout Occurred.")
driver.quit()
如前所述,免费代理可以有多个问题。下面的代码显示了如何使用 proxy judge 检查单个代理的状态。
import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
def random_ssl_proxy_address():
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
return random_proxy[0].get_address()
def get_proxy_address():
proxy_address = random_ssl_proxy_address()
checker = ProxyChecker()
proxy_judge = checker.check_proxy(proxy_address)
proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
if proxy_status[0]:
return proxy_address
else:
print('Looking for a valid proxy address.')
# this sleep timer is helping with some timeout issues
# that were happening when querying
sleep(randint(5, 10))
get_proxy_address()
random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128
请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些以捕获一些错误。