无法在 selenium 中设置代理 Python

Not able to set proxy in selenium Python

我有以下代码:

from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains

from selenium.webdriver.common.proxy import Proxy, ProxyType

proxy = Proxy({
    'proxyType': ProxyType.MANUAL,
    'httpProxy': '192.156.1.1:33',
    'ftpProxy': '192.156.1.1:33',
    'sslProxy': '192.156.1.1:33',
    'noProxy': '' # set this value as desired
    })
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\Users\user\geckodriver.exe'

browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)

出于某种原因,每次我检查 ip 时,它都显示我的真实 IP 而不是代理 IP。为什么要这样做,您能否建议如何实现?代码有问题吗?

我开始研究这个并注意到代理是使用 geckodriver 中的 WebDriver capabilities and proxy configurations 设置的。

我使用了来自测试的这些来源的代理信息。

免费代理列表:

请允许我指出,使用免费代理 IP 地址可能会出现很大问题。这些类型的代理因存在连接问题而臭名昭著,例如与延迟相关的超时。此外,这些站点也可能是间歇性的,这意味着它们随时可能出现故障。有时这些网站会被滥用,因此可能会被屏蔽。

下面的代码使用 DesiredCapabilitiesselenium.

from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

firefox_capabilities['proxy'] = {
    "proxyType": "MANUAL",
    "sslProxy": '34.95.40.165:3128',
}

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)

URL = 'http://www.expressvpn.com/what-is-my-ip'

driver.get(URL)

你也可以这样做:

from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                           desired_capabilities=firefox_capabilities)

URL = 'http://www.expressvpn.com/what-is-my-ip'

driver.get(URL)

你也可以使用Python包http_request_randomize获取代理IP地址,可以传递给geckodriver.

import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy

# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")

profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)

firefox_capabilities = DesiredCapabilities().FIREFOX

# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)

driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
                           desired_capabilities=firefox_capabilities)

try:
    # print proxy IP for testing
    print(random_proxy[0].get_address())
    # output 
    93.183.250.200:53281

    URL = 'http://www.expressvpn.com/what-is-my-ip'
    driver.get(URL)

except TimeoutException as e:
    print("A Page load Timeout Occurred.")
    driver.quit()

如前所述,免费代理可以有多个问题。下面的代码显示了如何使用 proxy judge 检查单个代理的状态。

import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy


def random_ssl_proxy_address():
    # Obtain a list of HTTPS proxies
    # Suppress the console debugging output by setting the log level
    req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)

    # Obtain a random single proxy from the list of proxy addresses
    random_proxy = random.sample(req_proxy.get_proxy_list(), 1)

    return random_proxy[0].get_address()


def get_proxy_address():
    proxy_address = random_ssl_proxy_address()
    checker = ProxyChecker()
    proxy_judge = checker.check_proxy(proxy_address)
    proxy_status = [value for key, value in proxy_judge.items() if key == 'status']

    if proxy_status[0]:
        return proxy_address
    else:
        print('Looking for a valid proxy address.')

        # this sleep timer is helping with some timeout issues
        # that were happening when querying
        sleep(randint(5, 10))

        get_proxy_address()


random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128

请注意,我使用的 proxy_checker 包没有任何嵌入式错误处理,因此您必须添加一些以捕获一些错误。