Search code examples
javaweb-servicesjaxbw3cxmlnodelist

Not able to retrieve Node List from Document when my XML tag contains "_" in it


I have a xml like this

<PARENT>
<TAG_1>
<ROLE>Architect </ROLE>
</TAG_1>
<TAG2>
<ROLE>Architect </ROLE>
</TAG2>
</PARENT>

I am using JAX-B framework for Marshalling and UnMarshalling.

The issue is while I am retrieving org.w3c.dom.NodeList, I am able to do for TAG2 which don't contain _ and unable to do for TAG1 which contains _(underscore)

org.w3c.dom.NodeList nodeList = org.w3c.dom.Document.getElementsByTagName("TAG2")
 returns me length of 1 which is correct.

org.w3c.dom.NodeList nodeList = org.w3c.dom.Document.getElementsByTagName("TAG_1")
 , it returns me length 0 but it should be 1 .

Can anyone suggest what can be the issue with the underscore and how to do with it because XML I can't change as given by the client.

Thanks


Solution

  • Well, it works for me:

    package com.stackof.helps.nonspring;
    
    import java.io.StringReader;
    
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    
    import org.junit.Assert;
    import org.junit.Test;
    import org.w3c.dom.Document;
    import org.w3c.dom.Element;
    import org.w3c.dom.NodeList;
    import org.xml.sax.InputSource;
    
    /**
     * @author samuele m.
     *
     */
    public class MixedTest
    {
        @Test
        public void testPathItem() throws Exception {
            String xml = "<PARENT><TAG_1><ROLE>Architect </ROLE></TAG_1><TAG1><ROLE>Engineer </ROLE></TAG1></PARENT>";
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            dbFactory.setIgnoringElementContentWhitespace(true);
            dbFactory.setIgnoringComments(true);
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
            InputSource is = new InputSource(new StringReader(xml));
            Document doc = dBuilder.parse(is);
            NodeList nodeList1 = doc.getElementsByTagName("TAG1");
            NodeList nodeList2 = doc.getElementsByTagName("TAG_1");
            Assert.assertEquals(1, nodeList1.getLength());
            Assert.assertEquals(1, nodeList2.getLength());
    
            NodeList engList = ((Element)nodeList1.item(0)).getElementsByTagName("ROLE");
            NodeList arcList = ((Element)nodeList2.item(0)).getElementsByTagName("ROLE");
            Assert.assertEquals(1, engList.getLength());
            Assert.assertEquals(1, arcList.getLength());
    
            String eng = engList.item(0).getTextContent().trim();
            String arc = arcList.item(0).getTextContent().trim();
            Assert.assertEquals("Architect", arc);
            Assert.assertEquals("Engineer", eng);
        }
    }
    

    Please check your code, since there is something wrong somewhere (file encoding?)