I'm working on an XML message process program using dom4j, and encounter a problem with XML node attribute parsing. The business requirement is: parse and validate input XML, if a field or an attribute is invalid, return an XML which structure denotes the error. And I'v snipped a test program below:
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class XPathStudy {
private final static Logger log = LoggerFactory.getLogger(XPathStudy.class);
private static String xmlInput;
private static void initXmlInput() {
xmlInput = "<RootTag> <BizRec FLAG=\"5\">";
xmlInput += " <FieldOne>11111</FieldOne>";
xmlInput += " <FieldTwo></FieldTwo>";
xmlInput += " <FieldThree>33333</FieldThree>";
xmlInput += " </BizRec> </RootTag>";
}
private static Document makeErrRspsDoc(String xpath, String errCode, String errDesc) {
Document errDoc = DocumentHelper.createDocument();
Element errElem = DocumentHelper.makeElement(errDoc, xpath);
errElem.addElement("ErrCode").addText(errCode);
errElem.addElement("ErrDesc").addText(errDesc);
return errDoc;
}
public static void main(String[] args) throws Exception {
initXmlInput();
log.info("xmlInput = " + xmlInput);
Document doc = DocumentHelper.parseText(xmlInput);
log.info("xmlInput parsed done");
String xpath = "*//FieldTwo";
Node node = doc.getRootElement().selectSingleNode(xpath);
if (node == null) {
log.warn("node [" + xpath + "] not found");
System.exit(1);
}
log.info("node [" + node.getPath() + "] located");
if (node.getText().trim().isEmpty()) {
Document errDoc = makeErrRspsDoc(node.getPath(), "1201", "FieldTwo can not be empty");
log.warn("errDoc: " + errDoc.asXML());
} else {
log.info("FieldTwo validation ok");
}
xpath = "*//@FLAG";
node = doc.getRootElement().selectSingleNode(xpath);
if (node == null) {
log.warn("node [" + xpath + "] not found");
System.exit(1);
}
log.info("node [" + node.getPath() + "] located");
int flagVal = Integer.parseInt(node.getText().trim());
if (flagVal == 1 || flagVal == 2) {
log.info("FLAG " + flagVal + " is ok");
} else {
Document errDoc = makeErrRspsDoc(node.getPath(), "1001", "FLAG attr should be 1 or 2");
log.warn("errDoc: " + errDoc.asXML());
}
}
}
And run it, the log info below:
15:01:08.143 [main] INFO XPathStudy - xmlInput = <RootTag> <BizRec FLAG="5"> <FieldOne>11111</FieldOne> <FieldTwo></FieldTwo> <FieldThree>33333</FieldThree> </BizRec> </RootTag>
15:01:08.203 [main] INFO XPathStudy - xmlInput parsed done
15:01:08.255 [main] INFO XPathStudy - node [/RootTag/BizRec/FieldTwo] located
15:01:08.259 [main] WARN XPathStudy - errDoc: <?xml version="1.0" encoding="UTF-8"?>
<RootTag><BizRec><FieldTwo><ErrCode>1201</ErrCode><ErrDesc>FieldTwo can not be empty</ErrDesc></FieldTwo></BizRec></RootTag>
15:01:08.260 [main] INFO XPathStudy - node [/RootTag/BizRec/@FLAG] located
15:01:08.260 [main] WARN XPathStudy - errDoc: <?xml version="1.0" encoding="UTF-8"?>
<RootTag><BizRec><@FLAG><ErrCode>1001</ErrCode><ErrDesc>FLAG attr should be 1 or 2</ErrDesc></@FLAG></BizRec></RootTag>
Everything seems ok, and the errDoc will be logged to an Oracle10g table (a VARCHAR2(1000) field), the following SQL is ok:
select extractvalue(xmltype('<RootTag><BizRec><FieldTwo><ErrCode>1201</ErrCode><ErrDesc>FieldTwo can not be empty</ErrDesc></FieldTwo></BizRec></RootTag>'),
'*//ErrCode') as err_code from dual;
1201
But this SQL will fail:
select extractvalue(xmltype('<RootTag><BizRec><@FLAG><ErrCode>1001</ErrCode><ErrDesc>FLAG attribute should be 1 or 2</ErrDesc></@FLAG></BizRec></RootTag>'),
'*//ErrCode') as err_code from dual;
Error starting at line 1 in command:
select extractvalue(xmltype('<RootTag><BizRec><@FLAG><ErrCode>1001</ErrCode><ErrDesc>FLAG attribute should be 1 or 2</ErrDesc></@FLAG></BizRec></RootTag>'),
'*//ErrCode') as err_code from dual
Error report:
SQL Error: ORA-31011: XML parsing failed
ORA-19202: Error occurred in XML processing
LPX-00231: invalid character 64 ('@') found in a Name or Nmtoken
Error at line 1
ORA-06512: at "SYS.XMLTYPE", line 301
ORA-06512: at line 1
31011. 00000 - "XML parsing failed"
*Cause: XML parser returned an error while trying to parse the document.
*Action: Check if the document to be parsed is valid.
And Oracle's doc explains that XML's node can not contain special chars. So, my question is: howto change my Java code to solve the problem of the 'FLAG' attribute's error response?
<@FLAG>
is not a legal XML element name, since the @
character cannot be the start of a name. DOM4J is not known for being best in class when it comes to enforcing well formed and valid XML documents.
To communicate the failing node as an XPath expression, consider storing it in an attribute or as a text node.