Search code examples
javahtmlunit

How to scrape the images from web pages?


I used htmlunit to scrape the images from web pages. I am beginner in htmlunit. I coded, but don't know how to get the images. Below is my code.

import java.io.*;
import java.net.URL;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class urlscrap {

    public static void main(String[] args) throws Exception 
    {

        //WebClient webClient = new WebClient(Opera);
        WebClient webClient = new WebClient();
        HtmlPage currentPage = (HtmlPage) webClient.getPage(new URL("http://www.google.com"));

        System.out.println(currentPage.asText());
        //webClient.closeAllWindows();      

    }
}

Solution

  • Does this work for you??

    import java.net.URL;
    import java.util.List;
    
    import com.gargoylesoftware.htmlunit.WebClient;
    import com.gargoylesoftware.htmlunit.html.HtmlImage;
    import com.gargoylesoftware.htmlunit.html.HtmlPage;
    
    public class urlscrap {
    
        public static void main(String[] args) throws Exception 
        {
    
            //WebClient webClient = new WebClient(Opera);
            WebClient webClient = new WebClient();
            HtmlPage currentPage = (HtmlPage) webClient.getPage(new URL("http://www.google.com"));
          //get list of all divs
            final List<?> images = currentPage.getByXPath("//img");
            for (Object imageObject : images) {
                HtmlImage image = (HtmlImage) imageObject;
                System.out.println(image.getSrcAttribute());
            }
            //webClient.closeAllWindows();      
        }
    }