Search code examples
clinuxbinary-search-tree

Creating a line counter using a Binary Search Tree **Specific Issues**


I want to create a C file which, via a binary-search-tree, reads from a text file, takes its identifiers (eg.include, stdio, hello, etc.), sorts them alphabetically, and returns what line number they appear on.

Desired terminal output:

Identifier: Hello

Counter: 2

Lines Appeared: [4, 6]

Issues:

  1. q1.c:28:21: error: expected expression before '{' token 28 | n -> lineList = {};
  2. When calling insert(), for the current Node, I want to add the current lineNumber to the tree->lineList[tree->counter-1].

test.txt

#include <stdio.h>

main() {
    printf("Hello world!\n");
    printf("Lochlann\n");
    printf("Hello world!\n");
}

quetion.c

#include <stdio.h>

#include <ctype.h>

struct Node {
    char * data;
    int lineList[100];
    int counter;
    struct Word * word;
    struct Node * ltree;
    struct Node * rtree;
};

struct Node * head;

struct Node * newTree(char * identifier) {
    struct Node * n = malloc(sizeof(struct Node));
    n -> data = malloc(strlen(identifier) + 1);
    n -> lineList = {};
    n -> counter = 1;
    strcpy(n -> data, identifier);
    n -> ltree = n -> rtree = NULL;
    return n;
}

struct Node * insert(struct Node * tree, char * identifier, int lineNumber) {
    if (tree == NULL)
        return newTree(identifier);
    int cmp = strcmp(identifier, tree -> data);
    if (cmp == 0) {
        tree -> counter++;
        tree -> lineList[tree -> counter - 1] = lineNumber;
        return tree;
    }

    if (cmp < 0)
        tree -> ltree = insert(tree -> ltree, identifier, lineNumber);
    else
        tree -> rtree = insert(tree -> rtree, identifier, lineNumber);
    return tree;
}

void inorder(struct Node * tree) {
    if (tree == NULL)
        return;
    inorder(tree -> ltree);

    printf("Identifier: %s\nCounter: %i\nLines Appeared: ", tree -> data, tree -> counter);
    for (int i = 0; i < tree -> counter; i++) {
        printf("%d", tree -> lineList[i]);
        //tree -> lineList[i] = lineNumber;
    }
    printf("\n");

    inorder(tree -> rtree);
}

main() {
    FILE * fp;
    fp = fopen("test.txt", "r");

    char buf[200];
    char id[100];
    int lineNumber = 1; //the tree->lineList should be [1,1,1,1,1,1]
    int j;
    while (fgets(buf, 100, fp)) {
        int i = 0;
        int len = strlen(buf);
        for (j = 0, i = 0; i < len; i++) {
            if (isalpha(buf[i]) || buf[i] == '_') {
                while (buf[i] && (isalnum(buf[i]) || buf[i] == '_'))
                    id[j++] = buf[i++];

                // have id
                id[j] = 0;

                //third argument adding line to linelist
                head = insert(head, id, lineNumber);

                j = 0;
            }

        }

    }
    inorder(head);
}

Solution

  • With little else on the go, here's a "mark-up" of your code. Please consider some / most / all of these comments for this program and for others.

    #include <stdio.h>
    #include <ctype.h>
    
    struct Node {
        char * data; // "generic" name. "token" might be better.
        int lineList[100]; // limited, but okay for now
        int counter;
    //  struct Word * word; // unused. keep things clean.
        struct Node * ltree;
        struct Node * rtree;
    };
    
    struct Node * head; // global variables are not recommended
    
    struct Node * newTree(char * identifier, int lnNum ) {
    //  struct Node * n = malloc(sizeof(struct Node));
        struct Node * n = calloc( 1, sizeof *n ); // use calloc
        /* omitting test for NULL */
    
        n -> data = malloc(strlen(identifier) + 1);
        /* omitting test for NULL */
        strcpy(n -> data, identifier); // do this here
    
    //  n -> lineList = {}; // unnecessary with calloc
    
        n->lineList[ n->counter++ ] = lnNum; // missing!!!
        // with calloc(), counter starts at zero
    
    //  strcpy(n -> data, identifier); // shifted to where used
    //  n -> ltree = n -> rtree = NULL; // unnecessary with calloc
        return n;
    }
    
    struct Node * insert(struct Node * tree, char * identifier, int lineNumber) {
        if (tree == NULL)
            return newTree(identifier, lineNumber ); // missing param linenumber!
        int cmp = strcmp(identifier, tree -> data);
        if (cmp == 0) {
    //      tree -> counter++;
    //      tree -> lineList[tree -> counter - 1] = lineNumber;
    
            /* omitting test for overrun */
            tree -> lineList[ tree->counter ] = lineNumber;
            tree->counter++;
            return tree;
        }
    
        if (cmp < 0)
            tree -> ltree = insert(tree -> ltree, identifier, lineNumber);
        else
            tree -> rtree = insert(tree -> rtree, identifier, lineNumber);
        return tree;
    }
    
    void inorder(struct Node * tree) {
        if (tree == NULL)
            return;
        inorder(tree -> ltree);
    
        printf("Identifier: %s\nCounter: %i\nLines Appeared: ", tree -> data, tree -> counter);
        for (int i = 0; i < tree -> counter; i++) {
            printf("%d", tree -> lineList[i]);
            //tree -> lineList[i] = lineNumber;
        }
        printf("\n");
    
        inorder(tree -> rtree);
    }
    
    int main() { // proper declaration of 'main()'
        FILE *fp = fopen("test.txt", "r"); // fewer lines
        /* omitting test for success */
    
        char buf[200];
    //  char id[100]; // not needed yet
        int lineNumber = 0; // got zero lines so far
    //  int j; // willy-nilly variables
        while (fgets(buf, sizeof buf, fp)) { // compiler knows dimensions
            lineNumber++; // missing!
    
            int len = strlen(buf);
            for (int i = 0; i < len; /* i++ */) { // let body decide
                if (isalpha(buf[i]) || buf[i] == '_') {
                    int j = 0; // NOW we need 'j' and..
                    char id[100];
    
                    while (buf[i] && (isalnum(buf[i]) || buf[i] == '_'))
                        id[j++] = buf[i++]; // here, 'i' is left on next character
    
                    // have id
                    id[j] = '\0'; // same, but conventional
    
                    //third argument adding line to linelist
                    head = insert(head, id, lineNumber);
    
                    // j = 0; // don't care about j anymore
                }
                else i++;
            }
        }
        inorder(head);
    
        return 0; // optional with newer compilers
    }
    

    You could even consider 'ditching' the copying of tokens from one buffer to another. Here's a slightly tighter for() loop for main():

    for( int i = 0; buf[ i ]; i++ )
        if (isalpha(buf[i]) || buf[i] == '_') {
            int tokStart = i;
            while (buf[i] && (isalnum(buf[i]) || buf[i] == '_'))
                i++; // find end of token (or line)
    
            char c = buf[i]; // remember this character
            buf[i] = '\0'; // clobber it
            head = insert(head, buf + tokStart, lineNumber);
            buf[i--] = c; // restore character AND decrement
        }