如何在scrapy中提交表单?
How to submit a form in scrapy?
我尝试使用 scrapy 完成登录并收集我的项目提交计数。这是代码。
from scrapy.item import Item, Field
from scrapy.http import FormRequest
from scrapy.spider import Spider
from scrapy.utils.response import open_in_browser
class GitSpider(Spider):
name = "github"
allowed_domains = ["github.com"]
start_urls = ["https://www.github.com/login"]
def parse(self, response):
formdata = {'login': 'username',
'password': 'password' }
yield FormRequest.from_response(response,
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)
def parse1(self, response):
open_in_browser(response)
运行代码后
scrapy runspider github.py
它应该显示表单的结果页面,这应该是登录失败的同一页面,因为用户名和密码是假的。但是它向我显示了 search page. The log file is located in pastebin
代码应该如何修复?提前致谢。
使用 webdriver 的解决方案。
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
from scrapy.contrib.spiders import CrawlSpider
class GitSpider(CrawlSpider):
name = "gitscrape"
allowed_domains = ["github.com"]
start_urls = ["https://www.github.com/login"]
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
login_form = self.driver.find_element_by_name('login')
password_form = self.driver.find_element_by_name('password')
commit = self.driver.find_element_by_name('commit')
login_form.send_keys("yourlogin")
password_form.send_keys("yourpassword")
actions = ActionChains(self.driver)
actions.click(commit)
actions.perform()
# by this point you are logged to github and have access
#to all data in the main menù
time.sleep(3)
self.driver.close()
您的问题是 FormRequest.from_response()
使用了不同的形式 - "search form"。但是,您希望它改用 "log in form"。提供一个 formnumber
参数:
yield FormRequest.from_response(response,
formnumber=1,
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)
这是应用更改后我在浏览器中打开的内容(使用 "fake" 用户):
使用 "formname" 参数也有效:
yield FormRequest.from_response(response,
formname='Login',
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)
我尝试使用 scrapy 完成登录并收集我的项目提交计数。这是代码。
from scrapy.item import Item, Field
from scrapy.http import FormRequest
from scrapy.spider import Spider
from scrapy.utils.response import open_in_browser
class GitSpider(Spider):
name = "github"
allowed_domains = ["github.com"]
start_urls = ["https://www.github.com/login"]
def parse(self, response):
formdata = {'login': 'username',
'password': 'password' }
yield FormRequest.from_response(response,
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)
def parse1(self, response):
open_in_browser(response)
运行代码后
scrapy runspider github.py
它应该显示表单的结果页面,这应该是登录失败的同一页面,因为用户名和密码是假的。但是它向我显示了 search page. The log file is located in pastebin
代码应该如何修复?提前致谢。
使用 webdriver 的解决方案。
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
from scrapy.contrib.spiders import CrawlSpider
class GitSpider(CrawlSpider):
name = "gitscrape"
allowed_domains = ["github.com"]
start_urls = ["https://www.github.com/login"]
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
login_form = self.driver.find_element_by_name('login')
password_form = self.driver.find_element_by_name('password')
commit = self.driver.find_element_by_name('commit')
login_form.send_keys("yourlogin")
password_form.send_keys("yourpassword")
actions = ActionChains(self.driver)
actions.click(commit)
actions.perform()
# by this point you are logged to github and have access
#to all data in the main menù
time.sleep(3)
self.driver.close()
您的问题是 FormRequest.from_response()
使用了不同的形式 - "search form"。但是,您希望它改用 "log in form"。提供一个 formnumber
参数:
yield FormRequest.from_response(response,
formnumber=1,
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)
这是应用更改后我在浏览器中打开的内容(使用 "fake" 用户):
使用 "formname" 参数也有效:
yield FormRequest.from_response(response,
formname='Login',
formdata=formdata,
clickdata={'name': 'commit'},
callback=self.parse1)