如何使用 Jsoup 获取脚本的一部分
How to get part of a script using Jsoup
我正在尝试从 HTML 源获取坐标,我使用以下脚本获取了一个字符串:
Document doc = Jsoup.connect("https://www.google.pl/maps?source=tldsi&hl=pl").get();
Element scriptElement = doc.getElementsByTag("script").first();
输出如下:
<script>
mapslite = {
START_TIME: new Date()
};
mapslite.getBasePageResponse = function(cacheResponse) {
delete mapslite.getBasePageResponse;
cacheResponse([[[19763.02300843847,19.416411,51.7444121],[0,0,0],[1024,768],13.10000038146973],"/maps-lite/js/2/maps_lite_20151130_RC02/intl/pl_ALL",null,null,null,["pl-PL","pl"],["/maps/lite/ApplicationService.GetEntityDetails","/maps/lite/ApplicationService.UpdateStarring","/maps/lite/ApplicationService.Search",null,"/maps/lite/suggest","/maps/lite/directions","/maps/lite/MapsLiteService.GetHotelAvailability"],null,null,null,null,null,null,null,null,null,"/maps/api/js?client=google-maps-lite\u0026libraries=common,geometry,map,search\u0026v=3.23.1\u0026language=pl-PL\u0026region=pl\u0026callback=v3loaded\u0026inline_main_and_libraries=1","/maps-lite/js/2/maps_lite_20151130_RC02/intl/pl_ALL/main.js",0,"Java/1.8.0_45,gzip(gfe)",null,null,0,0,null,"https://www.google.pl/maps/@?source=tldsi\u0026hl=pl\u0026dg=dbrw\u0026newdg=1",0,null,0,null,null,"HL9lVvOvF5WpNKH-s9gM",null,null,["dbrw",1],null,null,null,null,0,null,null,null,[0,0,null,0,1,null,null,null,0,0,1,1,0,"map,common",1,0,0,0,0],null,null,null,null,null,null,null,"//consent.google.com","2.maps_lite_20151130_RC02"]);
};
</script>
cacheResponse中的19.416411,51.7444121是我需要获取的坐标,请帮帮我
Jsoup 不解释或解析脚本标签的内容。您唯一可以做的就是将脚本的内容作为字符串获取,然后为此使用更合适的解析器。正则表达式可以完成这项工作。也许您需要使用 JSON 解析器来解析一些字符串。对于你的问题,我会:
Pattern p = Pattern.compile("cacheResponse\(\[+([0-9.]+),([0-9.]+),([0-9.]+)");
第 2 组和第 3 组包含您想要的坐标:19.416411 和 51.7444121
我正在尝试从 HTML 源获取坐标,我使用以下脚本获取了一个字符串:
Document doc = Jsoup.connect("https://www.google.pl/maps?source=tldsi&hl=pl").get();
Element scriptElement = doc.getElementsByTag("script").first();
输出如下:
<script>
mapslite = {
START_TIME: new Date()
};
mapslite.getBasePageResponse = function(cacheResponse) {
delete mapslite.getBasePageResponse;
cacheResponse([[[19763.02300843847,19.416411,51.7444121],[0,0,0],[1024,768],13.10000038146973],"/maps-lite/js/2/maps_lite_20151130_RC02/intl/pl_ALL",null,null,null,["pl-PL","pl"],["/maps/lite/ApplicationService.GetEntityDetails","/maps/lite/ApplicationService.UpdateStarring","/maps/lite/ApplicationService.Search",null,"/maps/lite/suggest","/maps/lite/directions","/maps/lite/MapsLiteService.GetHotelAvailability"],null,null,null,null,null,null,null,null,null,"/maps/api/js?client=google-maps-lite\u0026libraries=common,geometry,map,search\u0026v=3.23.1\u0026language=pl-PL\u0026region=pl\u0026callback=v3loaded\u0026inline_main_and_libraries=1","/maps-lite/js/2/maps_lite_20151130_RC02/intl/pl_ALL/main.js",0,"Java/1.8.0_45,gzip(gfe)",null,null,0,0,null,"https://www.google.pl/maps/@?source=tldsi\u0026hl=pl\u0026dg=dbrw\u0026newdg=1",0,null,0,null,null,"HL9lVvOvF5WpNKH-s9gM",null,null,["dbrw",1],null,null,null,null,0,null,null,null,[0,0,null,0,1,null,null,null,0,0,1,1,0,"map,common",1,0,0,0,0],null,null,null,null,null,null,null,"//consent.google.com","2.maps_lite_20151130_RC02"]);
};
</script>
cacheResponse中的19.416411,51.7444121是我需要获取的坐标,请帮帮我
Jsoup 不解释或解析脚本标签的内容。您唯一可以做的就是将脚本的内容作为字符串获取,然后为此使用更合适的解析器。正则表达式可以完成这项工作。也许您需要使用 JSON 解析器来解析一些字符串。对于你的问题,我会:
Pattern p = Pattern.compile("cacheResponse\(\[+([0-9.]+),([0-9.]+),([0-9.]+)");
第 2 组和第 3 组包含您想要的坐标:19.416411 和 51.7444121