发出 post 请求时无法从 jsoup 获得结果
unable to get results from jsoup while giving post request
这是代码片段,它总是returns错误页面
try {
String url = "http://kepler.sos.ca.gov/";
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.GET)
.execute();
Document responseDocument = response.parse();
Element eventValidation = responseDocument.select("input[name=__EVENTVALIDATION]").first();
Element viewState = responseDocument.select("input[name=__VIEWSTATE]").first();
response = Jsoup.connect(url)
.data("__VIEWSTATE", viewState.attr("value"))
.data("__EVENTVALIDATION", eventValidation.attr("value"))
.data("ctl00_content_placeholder_body_BusinessSearch1_TextBox_NameSearch", "escrow") // <- search
.data("ctl00_content_placeholder_body_BusinessSearch1_RadioButtonList_SearchType", "Corporation Name")
.data("ctl00_content_placeholder_body_BusinessSearch1_Button_Search", "Search")
.method(Connection.Method.POST)
.followRedirects(true)
.execute();
Document document = response.parse(); //search results
System.out.println(document);
} catch (IOException e) {
e.printStackTrace();
}
我从 firebug 的网络面板收到请求响应并发送了相同的请求响应。
我错过了什么吗?
根据您的 android 版本,如果您尝试 运行 直接通过单击按钮或类似操作来尝试 "NetworkOnMainThreadExcpetion",该代码将给出 "NetworkOnMainThreadExcpetion"。在 honeycomb 或更高版本上,您必须从单独的显式线程或 AsyncTask 进行网络访问。
根据我的调试,您需要添加一些cookie。这包括在下面。此外,您的几个表单字段缺少美元符号,并且传递了一些空白表单字段,这些字段是空的,但服务器可能会期望,所以我也包括了这些字段。
为了将来参考,如果您还没有使用它,我推荐工具 Fiddler 来调试此类问题。
class DownloadFilesTask extends AsyncTask<Void, Integer, Long> {
protected Long doInBackground(Void... params) {
long totalSize = 0;
try {
String url = "http://kepler.sos.ca.gov/";
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.GET)
.execute();
Document responseDocument = response.parse();
Map<String, String> loginCookies = response.cookies();
Element eventValidation = responseDocument.select("input[name=__EVENTVALIDATION]").first();
String validationKey = eventValidation.attr("value");
Element viewState = responseDocument.select("input[name=__VIEWSTATE]").first();
String viewStateKey = viewState.attr("value");
response = Jsoup.connect(url)
.cookies(loginCookies)
.data("__EVENTTARGET", "")
.data("__EVENTARGUMENT", "")
.data("__LASTFOCUS", "")
.data("__VIEWSTATE", viewStateKey)
.data("__VIEWSTATEENCRYPTED", "")
.data("__EVENTVALIDATION", validationKey)
.data("ctl00$content_placeholder_body$BusinessSearch1$TextBox_NameSearch", "aaa") // <- search
.data("ctl00$content_placeholder_body$BusinessSearch1$RadioButtonList_SearchType", "Corporation Name")
.data("ctl00$content_placeholder_body$BusinessSearch1$Button_Search", "Search")
.method(Connection.Method.POST)
.followRedirects(true)
.execute();
Document document = response.parse(); //search results
System.out.println(document);
} catch (IOException e) {
e.printStackTrace();
}
return totalSize;
}
protected void onProgressUpdate(Integer... progress) {
}
protected void onPostExecute(Long result) {
}
}
您实际上会使用类似以下内容执行该代码:
TestAsyncTask t = new TestAsyncTask();
t.execute();
要获取第 2 页,您必须包含以下内容 headers。这是伪代码,显然,您必须将其转换为 .data 调用:
__EVENTTARGET = ctl00$content_placeholder_body$SearchResults1$GridView_SearchResults_Corp
__EVENTARGUMENT = Page
并且您还需要其他 headers(__VIEWSTATEENCRYPTED 空白,__VIEWSTATE 如上所述)和上述 cookie。
这是代码片段,它总是returns错误页面
try {
String url = "http://kepler.sos.ca.gov/";
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.GET)
.execute();
Document responseDocument = response.parse();
Element eventValidation = responseDocument.select("input[name=__EVENTVALIDATION]").first();
Element viewState = responseDocument.select("input[name=__VIEWSTATE]").first();
response = Jsoup.connect(url)
.data("__VIEWSTATE", viewState.attr("value"))
.data("__EVENTVALIDATION", eventValidation.attr("value"))
.data("ctl00_content_placeholder_body_BusinessSearch1_TextBox_NameSearch", "escrow") // <- search
.data("ctl00_content_placeholder_body_BusinessSearch1_RadioButtonList_SearchType", "Corporation Name")
.data("ctl00_content_placeholder_body_BusinessSearch1_Button_Search", "Search")
.method(Connection.Method.POST)
.followRedirects(true)
.execute();
Document document = response.parse(); //search results
System.out.println(document);
} catch (IOException e) {
e.printStackTrace();
}
我从 firebug 的网络面板收到请求响应并发送了相同的请求响应。 我错过了什么吗?
根据您的 android 版本,如果您尝试 运行 直接通过单击按钮或类似操作来尝试 "NetworkOnMainThreadExcpetion",该代码将给出 "NetworkOnMainThreadExcpetion"。在 honeycomb 或更高版本上,您必须从单独的显式线程或 AsyncTask 进行网络访问。
根据我的调试,您需要添加一些cookie。这包括在下面。此外,您的几个表单字段缺少美元符号,并且传递了一些空白表单字段,这些字段是空的,但服务器可能会期望,所以我也包括了这些字段。
为了将来参考,如果您还没有使用它,我推荐工具 Fiddler 来调试此类问题。
class DownloadFilesTask extends AsyncTask<Void, Integer, Long> {
protected Long doInBackground(Void... params) {
long totalSize = 0;
try {
String url = "http://kepler.sos.ca.gov/";
Connection.Response response = Jsoup.connect(url)
.method(Connection.Method.GET)
.execute();
Document responseDocument = response.parse();
Map<String, String> loginCookies = response.cookies();
Element eventValidation = responseDocument.select("input[name=__EVENTVALIDATION]").first();
String validationKey = eventValidation.attr("value");
Element viewState = responseDocument.select("input[name=__VIEWSTATE]").first();
String viewStateKey = viewState.attr("value");
response = Jsoup.connect(url)
.cookies(loginCookies)
.data("__EVENTTARGET", "")
.data("__EVENTARGUMENT", "")
.data("__LASTFOCUS", "")
.data("__VIEWSTATE", viewStateKey)
.data("__VIEWSTATEENCRYPTED", "")
.data("__EVENTVALIDATION", validationKey)
.data("ctl00$content_placeholder_body$BusinessSearch1$TextBox_NameSearch", "aaa") // <- search
.data("ctl00$content_placeholder_body$BusinessSearch1$RadioButtonList_SearchType", "Corporation Name")
.data("ctl00$content_placeholder_body$BusinessSearch1$Button_Search", "Search")
.method(Connection.Method.POST)
.followRedirects(true)
.execute();
Document document = response.parse(); //search results
System.out.println(document);
} catch (IOException e) {
e.printStackTrace();
}
return totalSize;
}
protected void onProgressUpdate(Integer... progress) {
}
protected void onPostExecute(Long result) {
}
}
您实际上会使用类似以下内容执行该代码:
TestAsyncTask t = new TestAsyncTask();
t.execute();
要获取第 2 页,您必须包含以下内容 headers。这是伪代码,显然,您必须将其转换为 .data 调用:
__EVENTTARGET = ctl00$content_placeholder_body$SearchResults1$GridView_SearchResults_Corp
__EVENTARGUMENT = Page
并且您还需要其他 headers(__VIEWSTATEENCRYPTED 空白,__VIEWSTATE 如上所述)和上述 cookie。