使用 Selenium Webdriver 在网页中查找断开链接的代码获取异常

Getting Exception with code for finding broken links in webpage with Selenium Webdriver

我编写此代码是为了获取任何 link 的 HTTP 响应,并根据此代码打印 link 是否有效。但是每次我执行代码时,它都会进入 catch 块。你能告诉我我错在哪里吗?

import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.firefox.FirefoxDriver;

public class BrokenLinks {

    public static void main(String[] args) {

        // Telling Selenium to find Chrome Driver
        System.setProperty("webdriver.chrome.driver", "C:\selenium\chromedriver.exe");

        // Initialize browser
        ChromeDriver driver = new ChromeDriver();

        // Maximize Window
        driver.manage().window().maximize();

        // Launch Google
        driver.get("https://www.google.co.in/");

        List<WebElement> links = driver.findElements(By.tagName("a"));

        System.out.println("Total links are " + links.size());

        for (int i = 0; i <= links.size(); i++) {
            try {
                String nextHref = links.get(i).getAttribute("href");
                // System.out.println(nextHref);
                URL url = new URL("nextHref");
                HttpURLConnection connection = (HttpURLConnection) url.openConnection();
                connection.setRequestMethod("GET");
                connection.connect();
                int code = connection.getResponseCode();
                System.out.println("code: " + code + "Url" + url);
                /*
                 * if (code == 200) { System.out.println("Valid Link:" +
                 * nextHref);} else { System.out.println("INVALID Link:" +
                 * nextHref);}
                 */
            } catch (Exception e) {
                System.out.println("In Exception");
            }
        }
        // Close the browser
        driver.quit();
    }
}

删除下一行中的引号。可能有用。

URL url = new URL("nextHref");

应该是,

URL url = new URL(nextHref);

改变它

URL url = new URL(nextHref); //changed

试试这个代码

  // Launch Google
    driver.get("https://www.google.co.in/");

    List<WebElement> links = driver.findElements(By.tagName("a"));

    System.out.println("Total links are " + links.size());

    for (int i = 0; i <= links.size(); i++) {
        try {
            String nextHref = links.get(i).getAttribute("href");
            // System.out.println(nextHref);
            URL url = new URL(nextHref); //changed
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();
            connection.setRequestMethod("GET");
            connection.connect();
            int code = connection.getResponseCode();
            System.out.println("code: " + code + "Url" + url);
            /*
             * if (code == 200) { System.out.println("Valid Link:" +
             * nextHref);} else { System.out.println("INVALID Link:" +
             * nextHref);}
             */
        } catch (Exception e) {
            System.out.println("In Exception");
        }
    }
    // Close the browser
    driver.quit();