I'm using C libxml2. My aim is to gather strings from tags from different files and compare in between them.
I'm using xpath to get all the nodes and pulling the text from the nodes successfully. The problem comes when I'm trying to compare them;
What I tried: Own implementations of set, 3 different kinds. -> Was thinking the problem in them so I went for PblSet as it's the only lib I found that can make difference between 2 sets.
And here the fun begins; I am able to pull the items, I am able to store them in PblSet and later to get them from it, but when I'm doing comparison between two of them it returns as both sets are different. A stripped down test case of the parser:
#include <stdio.h>
#include "pbl_1_04_04/src/pbl.h"
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <string.h>
#include <ctype.h>
int checkPtr(void *ptr) {
if (ptr == NULL) {
printf("Cannot allocate memory");
return 1;
}
return 0;
}
const char *strstrip(char *s) {
size_t size;
char *end;
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
while (*s && isspace(*s))
s++;
return s;
}
xmlDocPtr loadFile(const char *filename) {
xmlDocPtr doc;
doc = xmlParseFile(filename);
if (doc == NULL ) {
printf("%s cannot be loaded\n", filename);
xmlFreeDoc(doc);
return NULL;
}
return doc;
}
PblSet* getDocIDs(xmlDocPtr doc, const char *xpath) {
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
PblSet *set = pblSetNewTreeSet();
xmlNodeSetPtr nodeset;
xmlNodePtr cur;
size_t size;
size_t i;
xmlChar *key;
context = xmlXPathNewContext(doc);
if (checkPtr(context) > 0) {
xmlXPathFreeContext(context);
return NULL;
}
result = xmlXPathEvalExpression((xmlChar *)xpath, context);
if (checkPtr(result) > 0) {
xmlXPathFreeObject(result);
return NULL;
}
if (xmlXPathNodeSetIsEmpty(result->nodesetval)) {
xmlXPathFreeObject(result);
printf("No results for %s\n", xpath);
return NULL;
}
printf("%d objects found with path \"%s\"\n", result->nodesetval->nodeNr, \
xpath);
nodeset = result->nodesetval;
if (checkPtr(nodeset) > 0) {
xmlXPathFreeNodeSet(nodeset);
return NULL;
}
size = nodeset->nodeNr;
for (i=0; i<size; ++i) {
cur = nodeset->nodeTab[i];
key = xmlNodeGetContent(cur);
pblSetAdd(set, key);
}
/*xmlXPathFreeNodeSet(nodeset);*/
xmlFreeNode(cur);
xmlXPathFreeContext(context);
xmlXPathFreeObject(result);
return set;
}
int main(void) {
// Arrays holding new and old items in sets
xmlDocPtr doc1, doc2;
const char *filename1 = "js_new.xml";
const char *filename2 = "js_old.xml";
const char *xpath = "//id";
doc1 = loadFile(filename1);
if (checkPtr(doc1) > 0) return EXIT_FAILURE;
doc2 = loadFile(filename2);
if (checkPtr(doc2) > 0) return EXIT_FAILURE;
PblSet *set1 = getDocIDs(doc1, xpath);
if (checkPtr(set1) > 0) return EXIT_FAILURE;
PblSet *set2 = getDocIDs(doc2, xpath);
if (checkPtr(set2) > 0) return EXIT_FAILURE;
PblSet *new;
PblSet *old;
printf("arr1 size: %d\n", pblSetSize(set1));
printf("arr2 size: %d\n", pblSetSize(set2));
new = pblSetDifference(set1, set2);
old = pblSetDifference(set2, set1);
printf("new size: %d\n", pblSetSize(new));
printf("old size: %d\n", pblSetSize(old));
printf("First element of set1: %s\n", pblSetGetFirst(new));
printf("First element of set2: %s\n", pblSetGetFirst(old));
pblSetFree(set1);
pblSetFree(set2);
pblSetFree(new);
pblSetFree(old);
xmlFreeDoc(doc1);
xmlFreeDoc(doc2);
}
The test xml files are here:
Answer found.
There was a problem with comparison function in pblSet.
had to set pblSetCompareFunction
to pblCollectionStringCompareFunction
as follows:
PblSet *set1 = pblSetNewHashSet();
PblSet *set2 = pblSetNewHashSet();
pblSetCompareFunction(set1, pblCollectionStringCompareFunction);
pblSetCompareFunction(set2, pblCollectionStringCompareFunction);
to make it work as expected.
Though still wondering why did it work when I fed to sets strings by myself instead of parsed strings from the xmlParser.