Search code examples
javaxmlsaxsaxparser

Parse value containing special character "/" gives wrong output using SAX parser


I am have below xml structure

  <fs:AsReportedItem>
     <fs:BookMark>/BODY[1]/DIV[3135]/DIV[0]/TABLE[0]/TBODY[0]/TR[32]/TD[5]/DIV[0]/FONT[0]/substr(1,2)
     </fs:BookMark>
  </fs:AsReportedItem>

I am parsing using SAX and reading tax value in the endElement() method

Here is my sample code

private void parseDocument() {
        // parse
        SAXParserFactory factory = SAXParserFactory.newInstance();
        try {
            SAXParser parser = factory.newSAXParser();
            parser.parse(FileName, this);
        } catch (ParserConfigurationException e) {
            System.out.println("ParserConfig error");
        } catch (SAXException e) {
            System.out.println("SAXException : xml not well formed");
        } catch (IOException e) {
            System.out.println("IO error");
        }
    }

public void startElement(String s, String s1, String elementName, Attributes attributes) throws SAXException {

if (OrgDataPartitonObj != null && "fs:FinancialStatementLineItemDataItem".equals(OrgDataPartitonObj.getType())) {

                FinancialStatementLineItemParser.startFinancialStatementLineItemParser(OrgDataPartitonObj,financialStatementLineItemObj, elementName, attributes);

            }
        }


public void endElement(String s, String s1, String element) throws SAXException {

if (OrgDataPartitonObj != null && "fs:FinancialStatementLineItemDataItem".equals(OrgDataPartitonObj.getType())) {

                 FinancialStatementLineItemParser.getEndElementFinancialStatementLineItemParser(financialStatementLineItemObj, element, tmpValue);
        }
}


public static void getEndElementFinancialStatementLineItemParser(FinancialStatementLineItem financialStatementLineItemObj, String element, String tmpValue) {

            if (element.equals("fs:BookMark")) {
            financialStatementLineItemObj.setBookMark(tmpValue);
        }

    }
   @Override
    public void characters(char[] buffer, int start, int length) {
        tmpValue = new String(buffer, start, length);
    }

When i debug then i can see only this value /substr(1,2) all value with "/" is escaped

I dont know why i am not getting full value /BODY[1]/DIV[3135]/DIV[0]/TABLE[0]/TBODY[0]/TR[32]/TD[5]/DIV[0]/FONT[0]/substr(1,2)

If any escape character is used then where i have to use .


Solution

  • Just change character() method

    @Override
        public void characters(char[] buffer, int start, int length) {
            tmpValue += new String(buffer, start, length);
        }
    

    And add this at last line in the endElement method .

    public void endElement(String s, String s1, String element) throws SAXException {
    
    if (OrgDataPartitonObj != null && "fs:FinancialStatementLineItemDataItem".equals(OrgDataPartitonObj.getType())) {
    
      FinancialStatementLineItemParser.getEndElementFinancialStatementLineItemParser(financialStatementLineItemObj, element, tmpValue);
            }
    tmpValue="";
    }