Search code examples
javahtmlunit

Access html table with htmlunit


I would like access the the table contained in a html file. Here is my code:

  import java.io.*; 
  import com.gargoylesoftware.htmlunit.html.HtmlPage;
  import com.gargoylesoftware.htmlunit.html.HtmlTable;
  import com.gargoylesoftware.htmlunit.html.*;
  import com.gargoylesoftware.htmlunit.WebClient;


  public class test {

  public static void main(String[] args) throws Exception {

    WebClient client = new WebClient();
    HtmlPage currentPage = client.getPage("http://www.mysite.com");
    client.waitForBackgroundJavaScript(10000);



final HtmlDivision div = (HtmlDivision) currentPage.getByXPath("//div[@id='table-matches-time']");

   String textSource = div.toString();
    //String textSource = currentPage.asXml();

FileWriter fstream = new FileWriter("index.txt");
BufferedWriter out = new BufferedWriter(fstream);
out.write(textSource);

out.close();

    client.closeAllWindows();

  }

 }

the table is in this form:

   <div id="table-matches-time" class="">
                    <table class=" table-main">

but i get this error:

 Exception in thread "main" java.lang.ClassCastException: java.util.ArrayList cannot be cast to com.gargoylesoftware.htmlunit.html.HtmlDivision
at test.main(test.java:20)

how can i read this table?


Solution

  • this works (and return me a csv file ;)):

        import java.io.*; 
        import com.gargoylesoftware.htmlunit.html.HtmlPage;
        import com.gargoylesoftware.htmlunit.html.HtmlTable;
        import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
        import com.gargoylesoftware.htmlunit.html.*;
        import com.gargoylesoftware.htmlunit.WebClient;
    
    
        public class test {
    
        public static void main(String[] args) throws Exception {
    
        WebClient client = new WebClient();
        HtmlPage currentPage = client.getPage("http://www.mysite.com");
        client.waitForBackgroundJavaScript(10000);
    
    FileWriter fstream = new FileWriter("index.txt");
    BufferedWriter out = new BufferedWriter(fstream);
    
    
    
       for (int i=0;i<2;i++){
    
    final HtmlTable table = (HtmlTable) currentPage.getByXPath("//table[@class=' table-main']").get(i);
    
    
    
    
       for (final HtmlTableRow row : table.getRows()) {
    
       for (final HtmlTableCell cell : row.getCells()) {
        out.write(cell.asText()+',');
       }
    out.write('\n');
       }
    
       }
    
    out.close();
    
        client.closeAllWindows();
    
       }
    
       }