#include <libxml++/libxml++.h>
xmlpp::NodeSet xmlP(std::string xml_string, std::string xpath) {
xmlpp::DomParser doc;
// 'response' contains your HTML
doc.parse_memory(xml_string);
xmlpp::Document* document = doc.get_document();
xmlpp::Element* root = document->get_root_node();
xmlpp::NodeSet elemns = root->find(xpath);
xmlpp::Node* element = elemns[0];
std::cout << elemns.size() << std::endl;
std::cout << element->get_line() << std::endl;
//const auto nodeText = dynamic_cast<const xmlpp::TextNode*>(element);
const auto nodeText = dynamic_cast<const xmlpp::ContentNode*>(element);
if (nodeText && nodeText->is_white_space()) //Let's ignore the indenting - you don't always want to do this.
{
std::cout << nodeText->get_content() << std::endl;
}
}
The xml_string is something like this :
std::string xml_strings("
<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">
<html lang=\"en\" xml:lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\">
<head>
<title>Demo page</title></head>
<body>
<div class=\"item\">
<div class=\"row\">
<div class=\"col-xs-8\">Item</div>
<div class=\"col-xs-4 value\">
<script type=\"text/javascript\">fruit('orange');</script>
</div></div></div>
</body></html>");
The function called with the page and the xpath expression like this : xmlpp::NodeSet xmlNodes = xmlP(xml_strings, "/html/body/div/div/div[2]/script");
The problem is i couldn't get the text inside the <script>
, i tried dynamic_cast'ing to ContentNode, nothing helped...
is libxml++ worth it or i need to solve my problem with another xml library?
Please, i appreciate all suggestions that can get me the text value from the <script>
tag.
I tried reproducing your issue locally and could not get root->find(xpath)
to produce any nodes.
According to this issue, you need to tell XPath which namespace your nodes are under, even if it is the default namespace.
I changed the XPath string and find
invocation as follows:
std::string xpath("/x:html/x:body/x:div/x:div/x:div[2]/x:script");
xmlpp::Node::PrefixNsMap nsMap = {{"x",root->get_namespace_uri()}};
xmlpp::Node::NodeSet elemns = root->find(xpath, nsMap);
xmlpp::Node* element = elemns[0];
const auto nodeText = dynamic_cast<const xmlpp::Element*>(element);
if (nodeText) {
std::cout << nodeText->get_first_child_text()->get_content() << std::endl;
}