Search code examples
javaarraylistjsoupstore

How can I store data, using Jsoup


My problem is the following: I got some data with Jsoup from a website (Jsoup code is from here also)

    public class Kereso {

   public static void main(String[] args) throws IOException {
      String html = "http://www.szerencsejatek.hu/xls/otos.html";

      Document doc = Jsoup.connect(html).get();


       Elements tableElements = doc.select("table");

       Elements tableRowElements = tableElements.select(":not(thead) tr");
       for (Element row : tableRowElements) {

           Elements rowItems = row.select("td");
           for (Element rowItem : rowItems) {
               System.out.println(rowItem.text());
           }
           System.out.println();
       }
   }
}

Every line that I get from the website should be an object and I want to store all of these objects in an ArrayList

This is the class for the objects, and the data they need:

public class Huzas {

    private String ev;
    private String het;
    private String huzasdatum;
    private String otosDb;
    private String otos;
    private String negyesDb;
    private String negyes;
    private String harmasDb;
    private String harmas;
    private String kettesDb;
    private String kettes;
    private int szam1;
    private int szam2;
    private int szam3;
    private int szam4;
    private int szam5;

    public Huzas(String ev, String het, String huzasdatum, String otosDb, String otos, String negyesDb, String negyes, String harmasDb, String harmas, String kettesDb, String kettes, int szam1, int szam2, int szam3, int szam4, int szam5) {
        this.ev = ev;
        this.het = het;
        this.huzasdatum = huzasdatum;
        this.otosDb = otosDb;
        this.otos = otos;
        this.negyesDb = negyesDb;
        this.negyes = negyes;
        this.harmasDb = harmasDb;
        this.harmas = harmas;
        this.kettesDb = kettesDb;
        this.kettes = kettes;
        this.szam1 = szam1;
        this.szam2 = szam2;
        this.szam3 = szam3;
        this.szam4 = szam4;
        this.szam5 = szam5;
    }

Is it possible to store them in that way? And if Yes, of course how?


Solution

  • Since that site has a simple and structured html you could simply do like this

    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    public class Kereso {
    
        public static void main(String[] args) throws IOException {
            String html = "http://www.szerencsejatek.hu/xls/otos.html";
            List<Huzas> listOfHuzas = new ArrayList<Huzas>();
    
            Document doc = Jsoup.connect(html).get();
            Elements rows = doc.select("tr");
            rows.remove(0); //Remove head row
            for (Element row : rows) {
                Elements children = row.children();
                listOfHuzas.add(new Huzas(children.get(0).text(), // ev
                                children.get(1).text(), // het
                                children.get(2).text(), // huzasdatum
                                children.get(3).text(), // otosDb
                                children.get(4).text(), // otos
                                children.get(5).text(), // negyesDb
                                children.get(6).text(), // negyes
                                children.get(7).text(), // harmasDb
                                children.get(8).text(), // harmas
                                children.get(9).text(), // kettesDb
                                children.get(10).text(), // kettes
                                Integer.parseInt(children.get(11).text()), // szam1
                                Integer.parseInt(children.get(12).text()), // szam2
                                Integer.parseInt(children.get(13).text()), // szam3
                                Integer.parseInt(children.get(14).text()), // szam4
                                Integer.parseInt(children.get(15).text())) // szam5
                            );
            }
            System.out.println(listOfHuzas);
        }
    }
    

    Since every row had exactly 16 columns and all int field had values i just directly indexed the child elements for simplicity. You may add more length checks or error handling here.