Search code examples
cxml-parsingexpat-parser

Geting xml data using xml parser expat


I have managed to parse ok. But now I am having trouble getting the values that I need. I can get the element and the attributes. But cannot get the values. I would like to get the value of frame in this xml it is 20.

/* track the current level in the xml tree */
static int depth = 0;
/* first when start element is encountered */
void start_element(void *data, const char *element, const char **attribute)
{
int i;

for(i = 0; i < depth; i++)
{
    printf(" ");
}

printf("%s", element);

for(i = 0; attribute[i]; i += 2)
{
    printf(" %s= '%s'", attribute[i], attribute[i + 1]);
}

printf("\n");
depth++;
}

/* decrement the current level of the tree */
void end_element(void *data, const char *el)
{
depth--;
}
int parse_xml(char *buff, size_t buff_size)
{
    FILE *fp;
    fp = fopen("start_indication.xml", "r");
    if(fp == NULL)
    {
    printf("Failed to open file\n");
    return 1;
    }

    XML_Parser parser = XML_ParserCreate(NULL);
    int done;
    XML_SetElementHandler(parser, start_element, end_element);

    memset(buff, 0, buff_size);
    printf("strlen(buff) before parsing: %d\n", strlen(buff));

    size_t file_size = 0;
    file_size = fread(buff, sizeof(char), buff_size, fp);

    /* parse the xml */
    if(XML_Parse(parser, buff, strlen(buff), XML_TRUE) == XML_STATUS_ERROR)
    {
        printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
    }

    fclose(fp);
    XML_ParserFree(parser);

    return 0;
}



<data>
    <header length="4">
            <item name="time" type="time">16</item>
            <item name="ref" type="string">3843747</item>
            <item name="port" type="int16">0</item>
            <item name="frame" type="int16">20</item>
    </header>
</data>

Output from parsing


Element: data
Element: header length= '4'
Element: item name= 'time' type= 'time'
Element: item name= 'ref' type= 'string'
Element: item name= 'port' type= 'int16'
Element: item name= 'frame' type= 'int16'

Solution

  • It is quite difficult with expat. expat is better when you are only interested with the structure, not the content of the elements. Why not using libxml instead? What are your reasons for using an even-based parser like expat, rather than a tree-based one?

    Anyway, the way to do it is to set a character data handler. Here is an example, based on your code:

    #include <expat.h>
    #include <stdio.h>
    #include <string.h>
    
    #define BUFFER_SIZE 100000
    
    /* track the current level in the xml tree */
    static int      depth = 0;
    
    static char    *last_content;
    
    /* first when start element is encountered */
    void
    start_element(void *data, const char *element, const char **attribute)
    {
        int             i;
    
        for (i = 0; i < depth; i++) {
            printf(" ");
        }
    
        printf("%s", element);
    
        for (i = 0; attribute[i]; i += 2) {
            printf(" %s= '%s'", attribute[i], attribute[i + 1]);
        }
    
        printf("\n");
        depth++;
    }
    
    /* decrement the current level of the tree */
    void
    end_element(void *data, const char *el)
    {
        int             i;
        for (i = 0; i < depth; i++) {
            printf(" ");
        }
        printf("Content of element %s was \"%s\"\n", el, last_content);
        depth--;
    }
    
    void
    handle_data(void *data, const char *content, int length)
    {
        char           *tmp = malloc(length);
        strncpy(tmp, content, length);
        tmp[length] = '\0';
        data = (void *) tmp;
        last_content = tmp;         /* TODO: concatenate the text nodes? */
    }
    
    int
    parse_xml(char *buff, size_t buff_size)
    {
        FILE           *fp;
        fp = fopen("start_indication.xml", "r");
        if (fp == NULL) {
            printf("Failed to open file\n");
            return 1;
        }
    
        XML_Parser      parser = XML_ParserCreate(NULL);
        XML_SetElementHandler(parser, start_element, end_element);
        XML_SetCharacterDataHandler(parser, handle_data);
    
        memset(buff, 0, buff_size);
        printf("strlen(buff) before parsing: %d\n", strlen(buff));
    
        size_t          file_size = 0;
        file_size = fread(buff, sizeof(char), buff_size, fp);
    
        /* parse the xml */
        if (XML_Parse(parser, buff, strlen(buff), XML_TRUE) == XML_STATUS_ERROR) {
            printf("Error: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
        }
    
        fclose(fp);
        XML_ParserFree(parser);
    
        return 0;
    }
    
    int
    main(int argc, char **argv)
    {
        int             result;
        char            buffer[BUFFER_SIZE];
        result = parse_xml(buffer, BUFFER_SIZE);
        printf("Result is %i\n", result);
        return 0;
    }