Search code examples
cxml-parsinglibxml2

How to find the occurance of a specific tag in a XML file using C with libxml2 library?


<?xml version='1.0' encoding='utf-8'?>
<UnlimitRectCubes>
    <RectCubes>
        <depth>0</depth>
        <rectangle>
            <height>0</height>
            <width>0</width>
        </rectangle>
    </RectCubes>
    <RectCubes>
        <depth>1</depth>
        <rectangle>
            <height>11</height>
            <width>101</width>
        </rectangle>
    </RectCubes>
    <RectCubes>
        <depth>2</depth>
        <rectangle>
            <height>22</height>
            <width>202</width>
        </rectangle>
    </RectCubes>
</UnlimitRectCubes>

Note: I've also included, .xml file

void print_element_names(xmlNode *a_node, char **findStr)
{
    xmlNode *cur_node = NULL;
    char *key = *findStr;
    cur_node = a_node;
    int len = 0;

while (cur_node)
{
 if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
    {
        len++; //len is int type
    }
  cur_node = cur_node->next;
}
  printf("%d", len);
   //int i = 0;
//xmlChar *array = (xmlChar *)calloc(1, sizeof(xmlChar));
for (cur_node = a_node; cur_node; cur_node = cur_node->next)
{
    if ((cur_node->type == XML_ELEMENT_NODE))
    {
        if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
        {
            //printf("%s", cur_node->children->content);
        }
    }
    print_element_names(cur_node->children, findStr);
}
}

int main(int argc, char **argv)
{
    xmlDoc *doc = NULL;           /* an xml document */
    xmlNode *root_element = NULL; /* a node in a xml tree */
    if (argc != 2)
    {
        return (1);
    }
    LIBXML_TEST_VERSION
    /*  parse the file and get the DOM */
    doc = xmlReadFile(argv[1], NULL, 0);
    if (doc == NULL)
    {
        printf("error could not parse");
        return;
    }
    /*Get the root element*/
    root_element = xmlDocGetRootElement(doc);
    char *myStr = "depth";
    print_element_names(root_element, &myStr);

    xmlFreeDoc(doc);

    /*
        *free the global variables that may have been allocated by the     parser
    */
    xmlCleanupParser();

    return 0;
}

Output: 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0

I need to find the frequency of occurrence of a particular tag in an .xml file, which here I have passed as a key (char*) to a function, and using xmlStrCmp() to validate the right tag.

The len is supposed to print integer value, but it seems to output this: Please refer output above. There is actually 3 occurrence of a particular tag, and output show has 3 1's. I believe, I am not traversing the XML tree properly, what is the right way to do it? or is there any XML function available to achieve this?

Input: .xml file and a tag of interest passed as key Expected Output: No. of occurence of a tag, in this case I should get 3.

for (cur_node = a_node; cur_node; cur_node = cur_node->next)
{
    if ((cur_node->type == XML_ELEMENT_NODE))
    {
        if ((!xmlStrcmp(cur_node->children->name, (const xmlChar *)key)))
        {
            //printf("%s\n", cur_node->children->content);
        }
         printf("%s", cur_node->name);   
 }
    
    len += print_element_names(cur_node->children, findStr);
}
printf("%d ", len);

Updated Output: UnlimitRectCubes0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 RectCubes0 depth0 2 0 rectangle0 height0 2 0 width0 2 0 10 0 11 0 17 3


Solution

  • You do not propagate the number of found tags on one level up to the calling level.

    I tried this code and got correct result:

    /*
     * Compile with:
     * > gcc `xml2-config --cflags` -std=c99 -o test  test.c `xml2-config --libs`
     * */
     
    #include <stdio.h>
    #include <stdlib.h>
    #include <libxml/parser.h>
    #include <libxml/tree.h>
    
    int print_element_names(xmlNode *a_node, char **findStr)
    {
        xmlNode *cur_node = a_node;
        char *key = *findStr;
        int len = 0;
    
        if (a_node == NULL)
            return 0;
        
        printf("a_node->name=%s\n", a_node->name);
        while (cur_node)
        {
            printf("cur_node->name=%s, type=%d\n", cur_node->name, cur_node->type);
            if ((!xmlStrcmp(cur_node->name, (const xmlChar *)key)))
            {
                len++; //len is int type
            }
            cur_node = cur_node->next;
        }
        printf("found=%d\n", len);
    
        for (cur_node = a_node; cur_node; cur_node = cur_node->next)
        {
            if ((cur_node->type == XML_ELEMENT_NODE))
            {
        
                printf("search children for cur_node->name=%s, type=%d\n", cur_node->name, cur_node->type);
                len += print_element_names(cur_node->children, findStr);
            }
        }
        printf("found total for a_node->name %s: %d\n", a_node->name, len);
        return len;
    }
    
    int main(int argc, char **argv)
    {
        xmlDoc *doc = NULL;           /* an xml document */
        xmlNode *root_element = NULL; /* a node in a xml tree */
    
        LIBXML_TEST_VERSION
        /*  parse the file and get the DOM */
        doc = xmlReadFile("test.xml", NULL, 0);
        if (doc == NULL)
        {
            printf("error could not parse");
            return 1;
        }
        /*Get the root element*/
        root_element = xmlDocGetRootElement(doc);
        char *myStr = "depth";
        print_element_names(root_element, &myStr);
    
        xmlFreeDoc(doc);
        xmlCleanupParser();
    
        return 0;
    }