如何在 jsoup 中处理 JavaScript 重定向
How to handle JavaScript redirects in jsoup
我有这个 url http://www.zara.com/qr/1260020210042
并且我正在尝试获得重定向的最终结果 URL:
String url = "http://www.zara.com/qr/1260020210042";
Response response = Jsoup.connect(url).followRedirects(true).execute();
String url2 = response.url().toString();
Response response2 = Jsoup.connect(url2).followRedirects(true).execute();
System.out.println(response2.url());
但它不打印最终重定向的 URl ,我应该更改什么?
谢谢,
编辑:
我也尝试使用 Htmlunit,但它没有给我我需要的最终 link:
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setCssEnabled(true);
HtmlPage page = (HtmlPage) webClient.getPage("http://www.zara.com/qr/1260020210042");
WebResponse response = page.getWebResponse();
String content = response.getContentAsString();
System.out.println(page.getUrl());
Frederic Klein 建议的 HtmlUnit 解决方案实际上工作得很好,但有一个与 cookie 相关的警告,请参阅下面的 "update" 评论。
首先将此依赖项添加到您的 Maven 配置中:
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.25</version>
</dependency>
然后像这样使用它:
package de.scrum_master.Whosebug;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebClientOptions;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import static com.gargoylesoftware.htmlunit.BrowserVersion.CHROME;
import static java.util.logging.Level.OFF;
import static java.util.logging.Logger.getLogger;
public class Application {
public static void main(String[] args) throws IOException {
WebClient webClient = createWebClient();
String originalURL = "http://www.zara.com/qr/1260020210042";
String redirectedURL = webClient.getPage(originalURL).getUrl().toString();
Response response = Jsoup.connect(redirectedURL).execute();
System.out.println(response.url());
}
private static WebClient createWebClient() throws MalformedURLException {
getLogger("com.gargoylesoftware").setLevel(OFF);
WebClient webClient = new WebClient(CHROME);
WebClientOptions options = webClient.getOptions();
options.setJavaScriptEnabled(true);
options.setRedirectEnabled(true);
// IMPORTANT: Without the country/language selection cookie the redirection does not work!
webClient.addCookie("storepath=us/en", new URL("http://www.zara.com/"), null);
return webClient;
}
}
控制台日志显示:
http://www.zara.com/us/en/man/shoes/leather/brown-braided-leather-ankle-boots-c0p4065286.html
更新: 好的,我找到了问题的根本原因。这不是 HtmlUnit,而是 zara.com 上的重定向在用户首次访问任何浏览器期间手动选择国家/地区+语言之前不起作用的事实。该信息存储在一个名为 storefront
的 cookie 中,没有它,每个浏览器会话将始终再次登陆首页,并再次出现国家选择对话框。我已经更新了我的示例代码,以便将该 cookie 设置为美国 + 英语。然后就可以了。
尽情享受吧!
我有这个 url http://www.zara.com/qr/1260020210042
并且我正在尝试获得重定向的最终结果 URL:
String url = "http://www.zara.com/qr/1260020210042";
Response response = Jsoup.connect(url).followRedirects(true).execute();
String url2 = response.url().toString();
Response response2 = Jsoup.connect(url2).followRedirects(true).execute();
System.out.println(response2.url());
但它不打印最终重定向的 URl ,我应该更改什么? 谢谢,
编辑:
我也尝试使用 Htmlunit,但它没有给我我需要的最终 link:
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_45);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setCssEnabled(true);
HtmlPage page = (HtmlPage) webClient.getPage("http://www.zara.com/qr/1260020210042");
WebResponse response = page.getWebResponse();
String content = response.getContentAsString();
System.out.println(page.getUrl());
Frederic Klein 建议的 HtmlUnit 解决方案实际上工作得很好,但有一个与 cookie 相关的警告,请参阅下面的 "update" 评论。
首先将此依赖项添加到您的 Maven 配置中:
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.25</version>
</dependency>
然后像这样使用它:
package de.scrum_master.Whosebug;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebClientOptions;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import static com.gargoylesoftware.htmlunit.BrowserVersion.CHROME;
import static java.util.logging.Level.OFF;
import static java.util.logging.Logger.getLogger;
public class Application {
public static void main(String[] args) throws IOException {
WebClient webClient = createWebClient();
String originalURL = "http://www.zara.com/qr/1260020210042";
String redirectedURL = webClient.getPage(originalURL).getUrl().toString();
Response response = Jsoup.connect(redirectedURL).execute();
System.out.println(response.url());
}
private static WebClient createWebClient() throws MalformedURLException {
getLogger("com.gargoylesoftware").setLevel(OFF);
WebClient webClient = new WebClient(CHROME);
WebClientOptions options = webClient.getOptions();
options.setJavaScriptEnabled(true);
options.setRedirectEnabled(true);
// IMPORTANT: Without the country/language selection cookie the redirection does not work!
webClient.addCookie("storepath=us/en", new URL("http://www.zara.com/"), null);
return webClient;
}
}
控制台日志显示:
http://www.zara.com/us/en/man/shoes/leather/brown-braided-leather-ankle-boots-c0p4065286.html
更新: 好的,我找到了问题的根本原因。这不是 HtmlUnit,而是 zara.com 上的重定向在用户首次访问任何浏览器期间手动选择国家/地区+语言之前不起作用的事实。该信息存储在一个名为 storefront
的 cookie 中,没有它,每个浏览器会话将始终再次登陆首页,并再次出现国家选择对话框。我已经更新了我的示例代码,以便将该 cookie 设置为美国 + 英语。然后就可以了。
尽情享受吧!