Wednesday, 17 August 2016

Different ways to get all "404 page not found" links using Selenium WebDriver

In this post, I will explain the most convenient way using Selenium WebDriver to get all the links from web page and verifying each page contains specific like 404 or Page not found in different ways.

//Following ways to identify 404 links
By using page title
By using page source
By using response code of page URL     
  
Please find the below code for the same.
Sample Code:
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

public class GetAllLinks {
public WebDriver driver;
ArrayList<String> al ;

@BeforeClass
public void setup(){
driver = new FirefoxDriver();
driver.get("http://www.way2selenium.com/");
driver.manage().window().maximize();

}
@Test
public void identifyBrokenAnd404Links() throws MalformedURLException, IOException{

   al = new ArrayList<String>();

 //identifying total number of URls in a page
 List<WebElement> links = driver.findElements(By.tagName("a"));
 System.out.println(links.size());

        //for Getting all links from page
       for (int i = 0;i<links.size(); i++) {
 
    //get one by one URL href value
    String URL=links.get(i).getAttribute("href");
   
    //Removing unwanted URLS based on http or https
    if(links.get(i).getAttribute("href").contains("https")||links.get(i).getAttribute("href").contains("http"))
    {
    System.out.println(URL);
   
     //storing all in URL's in array list
     al.add(URL);
   
    }
       }
 
       //Identifying  broken and 404 links
     
       for(int i=0;i<al.size();i++){
       
        //Navigating each URL
        driver.get(al.get(i));
       
        //getting response Code for the link
       int statusCode= ResponseCode(al.get(i));

        //verifying 404 links using page title
       
        if(driver.getTitle().contains("404")){
       
        System.out.println("404 link is  "+al.get(i));
       
        }
       
        //verifying 404 links using page source
        else if(driver.getPageSource().contains("404 page not found")){
       
        System.out.println("404 link is  "+al.get(i));
       
        }
        //verifying 404 links using status code
        else if(statusCode==404){
       
        System.out.println("404 link is  "+al.get(i));
        }
       
       
       }
}

//method for generating response code for URL
 public static int ResponseCode(String URL) throws MalformedURLException, IOException {    
    URL url = new URL(URL);
    HttpURLConnection huc = (HttpURLConnection) url.openConnection();
    huc.setRequestMethod("GET");
    huc.connect();
    return huc.getResponseCode();
 }

}

10 comments: