Search code examples
javaandroidxpatharraylistvtd-xml

How to put nested XML file elements to list of objects with VTD-XML parser?


I have big, very nested XML file. All entities and attributes are going to be my Object variables. I'm creating list of such objects. I know how to do it with DOM, SAX and XMLPullParser and it works fine but I have problem with VTD parser. ListView after parsing is empty. I'm putting below part of XML file and my code. Maybe someone knows what am I doing wrong.

<MedlineCitationSet>
<MedlineCitation Owner="NLM" Status="MEDLINE">
    <PMID Version="1">10540283</PMID>
    <DateCreated>
        <Year>1999</Year>
        <Month>12</Month>
        <Day>17</Day>
    </DateCreated>
    <Article PubModel="Print">
        <Journal>
            <ISSN IssnType="Print">0950-382X</ISSN>
            <JournalIssue CitedMedium="Print">
                <Volume>34</Volume>
                <Issue>1</Issue>
            </JournalIssue>...

My android code:

try {
        articlesList = new ArrayList<>();

        VTDGen vtdGen = new VTDGen();
        vtdGen.setDoc(bytes);
        vtdGen.parse(false);

        AutoPilot ap = new AutoPilot();
        VTDNav vtdNav = vtdGen.getNav();

        int i = -1;

        ap.bind(vtdNav);
        ap.selectXPath("/MedlineCitationSet/MedlineCitation");

        while ((ap.evalXPath()) != -1) {

            articlesList.add(new Article());
            String year = null, day = null, month = null;
            i++;

            if (vtdNav.hasAttr("Owner"))
                articlesList.get(i).setOwner(vtdNav.toNormalizedString(vtdNav.getAttrVal("Owner")));
            if (vtdNav.hasAttr("Status"))
                articlesList.get(i).setStatus(vtdNav.toNormalizedString(vtdNav.getAttrVal("Status")));

            vtdNav.push();

            AutoPilot ap1 = new AutoPilot();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/PMID");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath()) != -1) {
                articlesList.get(i).setPMID(vtdNav.toNormalizedString(vtdNav.getText()));
                articlesList.get(i).setVersion(vtdNav.toNormalizedString(vtdNav.getAttrVal("Version")));
            }

            ap1.resetXPath();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath() != -1)) {

                vtdNav.push();

                AutoPilot ap1x = new AutoPilot();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Year");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    year = vtdNav.toNormalizedString(vtdNav.getText());
                }

                ap1x.resetXPath();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Month");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    month = vtdNav.toNormalizedString(vtdNav.getText());
                }

                ap1x.resetXPath();
                ap1x.selectXPath("/MedlineCitationSet/MedlineCitation/DateCreated/Day");
                ap1x.bind(vtdNav);

                while ((ap1x.evalXPath()) != -1) {
                    day = vtdNav.toNormalizedString(vtdNav.getText());
                }

                articlesList.get(i).setDateCreated(day + "-" + month + "-" + year);

                vtdNav.pop();
            }

            ap1.resetXPath();
            ap1.selectXPath("/MedlineCitationSet/MedlineCitation/Article");
            ap1.bind(vtdNav);

            while ((ap1.evalXPath()) != -1) {

                if (vtdNav.hasAttr("Print"))
                    articlesList.get(i).setPubModel(vtdNav.toNormalizedString(vtdNav.getAttrVal("Print")));

                vtdNav.push();

                AutoPilot ap2 = new AutoPilot();
                ap2.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal");
                ap2.bind(vtdNav);

                {
                    vtdNav.push();

                    AutoPilot ap2x = new AutoPilot();
                    ap2x.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/ISSN");
                    ap2x.bind(vtdNav);

                    while ((ap2x.evalXPath()) != -1) {
                        articlesList.get(i).setISSN(vtdNav.toNormalizedString(vtdNav.getText()));
                        articlesList.get(i).setIssnType(vtdNav.toNormalizedString(vtdNav.getAttrVal("IssnType")));
                    }

                    ap2x.resetXPath();
                    ap2x.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue");
                    ap2x.bind(vtdNav);

                    while ((ap2x.evalXPath()) != -1) {

                        articlesList.get(i).setCitedMedium(vtdNav.toNormalizedString(vtdNav.getAttrVal("CitedMedium")));

                        vtdNav.push();

                        AutoPilot ap3 = new AutoPilot();
                        ap3.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue/Volume");
                        ap3.bind(vtdNav);

                        while ((ap3.evalXPath()) != -1) {
                            articlesList.get(i).setVolume(vtdNav.toNormalizedString(vtdNav.getText()));
                        }

                        ap3.resetXPath();
                        ap3.selectXPath("/MedlineCitationSet/MedlineCitation/Article/Journal/JournalIssue/Issue");
                        ap3.bind(vtdNav);

                        while ((ap3.evalXPath()) != -1) {
                            articlesList.get(i).setIssue(vtdNav.toNormalizedString(vtdNav.getText()));
                        }

                        ap3.resetXPath();
                        vtdNav.pop();
                    }...

Thank you for any help!


Solution

  • Below is a code snippet helping you extract the relevant fields in the document. There are issues with your use of AutoPilot.

    For example, I suggest you move all the selectXPath out of the while loop because they are relatively a slow operation. Also if the xml has deep nesting, you should consider turning on VTDGen's selectLcDepth and set it to 5. This helps improve navigation/xpath performance. Below is just a sample of what can be done. Also for simple XPaths, you can use VTDNav's native cursor API which is more handy...

    let me know if you got any issues...

        VTDGen vtdGen = new VTDGen();
         vtdGen.selectLcDepth(5);
         vtdGen.parseFile("c:\\xml\\agata.xml",false);
         AutoPilot ap = new AutoPilot(),ap1=new AutoPilot(), 
    ap2=new AutoPilot(),ap3=new AutoPilot();
         VTDNav vn = vtdGen.getNav();
         int i = -1;
         ap.bind(vn);ap1.bind(vn);ap2.bind(vn);ap3.bind(vn);
         ap.selectXPath("/MedlineCitationSet/MedlineCitation");
         ap1.selectXPath("PMID");
         ap2.selectXPath("DateCreated");
         ap3.selectXPath("Article");
         while ((ap.evalXPath()) != -1) {
                String year = null, day = null, month = null;
                i++;
    
               if (vn.hasAttr("Owner"))     System.out.println("Owner==>"+vn.toNormalizedString(vn.getAttrVal("Owner")));
                            //articlesList.get(i).setOwner(vtdNav.toNormalizedString(vtdNav.getAttrVal("Owner")));
                        if (vn.hasAttr("Status"))
                            System.out.println("Stats==>"+vn.toNormalizedString(vn.getAttrVal("Status")));
                            //articlesList.get(i).setStatus(vtdNav.toNormalizedString(vtdNav.getAttrVal("Status")));
                        vn.push();
                        while((ap1.evalXPath())!=-1){
                            System.out.println("Version==>"+vn.toNormalizedString(vn.getAttrVal("Version")));
                            System.out.println("PMID==>"+vn.toNormalizedString(vn.getText()));
                        }
                        ap1.resetXPath();
                        vn.pop();
                        vn.push();
                        while((ap2.evalXPath())!=-1){
                            vn.toElement(VTDNav.FIRST_CHILD,"Year");
                            System.out.println("Year==>"+vn.toNormalizedString(vn.getText()));
                            vn.toElement(VTDNav.PARENT);
                            vn.toElement(VTDNav.FIRST_CHILD,"Month");
                            System.out.println("Month==>"+vn.toNormalizedString(vn.getText()));
                            vn.toElement(VTDNav.PARENT);
                            vn.toElement(VTDNav.FIRST_CHILD,"Day");
                            System.out.println("Day==>"+vn.toNormalizedString(vn.getText()));
                            vn.toElement(VTDNav.PARENT);
                        }
                        ap2.resetXPath();
                        vn.pop();
                        VN.push();
                        while((ap3.evalXPath())!=-1){
                              System.out.println("PubModel==>"+vn.toNormalizedString(vn.getAttrVal("PubModel")));
                        VN.pop();   
    
    
          }