Search code examples
cxpathsetlibxml2

libxml2 + pblSet (or any other set) - Can't compare results


I'm using C libxml2. My aim is to gather strings from tags from different files and compare in between them.

I'm using xpath to get all the nodes and pulling the text from the nodes successfully. The problem comes when I'm trying to compare them;

What I tried: Own implementations of set, 3 different kinds. -> Was thinking the problem in them so I went for PblSet as it's the only lib I found that can make difference between 2 sets.

And here the fun begins; I am able to pull the items, I am able to store them in PblSet and later to get them from it, but when I'm doing comparison between two of them it returns as both sets are different. A stripped down test case of the parser:

#include <stdio.h>
#include "pbl_1_04_04/src/pbl.h"
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <string.h>
#include <ctype.h>

int checkPtr(void *ptr) {
    if (ptr == NULL) {
        printf("Cannot allocate memory");
        return 1;
    }
    return 0;
}

const char *strstrip(char *s) {
    size_t size;
    char *end;

    size = strlen(s);

    if (!size)
        return s;

    end = s + size - 1;
    while (end >= s && isspace(*end))
        end--;
    *(end + 1) = '\0';

    while (*s && isspace(*s))
        s++;

    return s;
}

xmlDocPtr loadFile(const char *filename) {
    xmlDocPtr doc;
    doc = xmlParseFile(filename);
    if (doc == NULL ) {
        printf("%s cannot be loaded\n", filename);
        xmlFreeDoc(doc);
        return NULL;
    }
    return doc;
}

PblSet* getDocIDs(xmlDocPtr doc, const char *xpath) {
    xmlXPathContextPtr context;
    xmlXPathObjectPtr result;
    PblSet *set = pblSetNewTreeSet();
    xmlNodeSetPtr nodeset;
    xmlNodePtr cur;
    size_t size;
    size_t i;
    xmlChar *key;

    context = xmlXPathNewContext(doc);
    if (checkPtr(context) > 0) {
        xmlXPathFreeContext(context);
        return NULL;
    }
    result = xmlXPathEvalExpression((xmlChar *)xpath, context);
    if (checkPtr(result) > 0) {
        xmlXPathFreeObject(result);
        return NULL;
    }
    if (xmlXPathNodeSetIsEmpty(result->nodesetval)) {
        xmlXPathFreeObject(result);
        printf("No results for %s\n", xpath);
        return NULL;
    }

    printf("%d objects found with path \"%s\"\n", result->nodesetval->nodeNr, \
            xpath);

    nodeset = result->nodesetval;
    if (checkPtr(nodeset) > 0) {
        xmlXPathFreeNodeSet(nodeset);
        return NULL;
    }
    size = nodeset->nodeNr;
    for (i=0; i<size; ++i) {
        cur = nodeset->nodeTab[i];
        key = xmlNodeGetContent(cur);
        pblSetAdd(set, key);
    }

    /*xmlXPathFreeNodeSet(nodeset);*/
    xmlFreeNode(cur);
    xmlXPathFreeContext(context);
    xmlXPathFreeObject(result);

    return set;
}


int main(void) {
    // Arrays holding new and old items in sets
    xmlDocPtr doc1, doc2;

    const char *filename1 = "js_new.xml";
    const char *filename2 = "js_old.xml";
    const char *xpath = "//id";

    doc1 = loadFile(filename1);
    if (checkPtr(doc1) > 0) return EXIT_FAILURE;
    doc2 = loadFile(filename2);
    if (checkPtr(doc2) > 0) return EXIT_FAILURE;


    PblSet *set1 = getDocIDs(doc1, xpath);
    if (checkPtr(set1) > 0) return EXIT_FAILURE;
    PblSet *set2 = getDocIDs(doc2, xpath);
    if (checkPtr(set2) > 0) return EXIT_FAILURE;
    PblSet *new;
    PblSet *old;

    printf("arr1 size: %d\n", pblSetSize(set1));
    printf("arr2 size: %d\n", pblSetSize(set2));

    new = pblSetDifference(set1, set2);
    old = pblSetDifference(set2, set1);

    printf("new size: %d\n", pblSetSize(new));
    printf("old size: %d\n", pblSetSize(old));

    printf("First element of set1: %s\n", pblSetGetFirst(new));
    printf("First element of set2: %s\n", pblSetGetFirst(old));

    pblSetFree(set1);
    pblSetFree(set2);
    pblSetFree(new);
    pblSetFree(old);

    xmlFreeDoc(doc1);
    xmlFreeDoc(doc2);
}

The test xml files are here:

https://gist.github.com/osleg/176f3a921ed91b3c9e6f

https://gist.github.com/osleg/dbc85f8642bfa255f9ba


Solution

  • Answer found. There was a problem with comparison function in pblSet. had to set pblSetCompareFunction to pblCollectionStringCompareFunction as follows:

    PblSet *set1 = pblSetNewHashSet();
    PblSet *set2 = pblSetNewHashSet();
    pblSetCompareFunction(set1, pblCollectionStringCompareFunction);
    pblSetCompareFunction(set2, pblCollectionStringCompareFunction);
    

    to make it work as expected.

    Though still wondering why did it work when I fed to sets strings by myself instead of parsed strings from the xmlParser.