Search code examples
cparsingxhtmllibxml2

XML Parsing with libxml in C is not showing attribute name


I am trying to parse a XHTML file and get the attributes and its values.Using libxml.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/HTMLparser.h>
#include <libxml/xmlmemory.h> 
#include <libxml/tree.h>
#include <libxml/parser.h>

void walkTree(xmlNode * a_node)
{
  xmlNode *cur_node = NULL;
  xmlAttr *cur_attr = NULL;
  for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
     // do something with that node information, like… printing the tag’s name and attributes
     printf("Got tag : %s\n", cur_node->name);
     for (cur_attr = cur_node->properties; cur_attr; cur_attr = cur_attr->next) {
     printf("  -> with attribute : %s\n", cur_attr->name);
     printf("     -> with Value: %s\n", (cur_attr->children)->name);
}
walkTree(cur_node->children);
}
}
int main(void)
{  
// Load XHTML
char *data;
data = "<html><body class=\"123\" damn=\"123\"></html>";

int len = strlen(data) + 1;
htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, 0);
htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
htmlParseChunk(parser, data, len, 0);
htmlParseChunk(parser, NULL, len, 1);
walkTree(xmlDocGetRootElement(parser->myDoc));

 }

I was expecting this output

 Got tag: html
Got tag: body
-> with attribute: class
  -> with value: 123
-> with attribute: damn
  -> with value: 123

But I got this ouptput unfortunately:

 Got tag: html
Got tag: body
-> with attribute: class
  -> with value: text
-> with attribute: damn
  -> with value: text

I have tried with other html codes too no matter what the attribute value is it always shows "text" instead of the value.

Why? how to fix it? How to get the real attribute value?


Solution

  • This can be very easily achieved by modifying a line of code.xmlNodeGetContent() because it accepts xmlNode.Unlike xmlGetProp() which accepts xmlNodePtr.So best to use xmlNodeGetContent() to get attribute of a node when using xmlNode or xmlAttr. Hope that helps :)

    here is the solution:

     #include <stdio.h>
     #include <string.h>
     #include <stdlib.h>
     #include <libxml/HTMLparser.h>
     #include <libxml/xmlmemory.h>
     #include <libxml/tree.h>
     #include <libxml/parser.h>
     void walkTree(xmlNode * a_node)
     {
        xmlNode *cur_node = NULL;
        xmlAttr *cur_attr = NULL;
        xmlChar *attr;
        for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        // do something with that node information, like… printing the tag’s name and attributes
        printf("Got tag : %s\n", cur_node->name);
        for (cur_attr = cur_node->properties; cur_attr; cur_attr = cur_attr->next) {
    
           printf("  -> with attribute : %s\n", cur_attr->name);
    
           // This part fixed the code :D 
           attr =  xmlNodeGetContent(cur_attr);
    
          printf("     -> with Value: %s\n", attr);
        }
       walkTree(cur_node->children);
    }
    }
    int main(void)
    {
    // Load XHTML
    char *data;
    data = "<html><body class=\"123\" damn=\"123\"></html>";
    
        int len = strlen(data) + 1;
        htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, 0);
         htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
         htmlParseChunk(parser, data, len, 0);
         htmlParseChunk(parser, NULL, len, 1);
         walkTree(xmlDocGetRootElement(parser->myDoc));
    
    }