Search code examples
carraysstringsegmentation-faultabort

Problems Manipulating strings stored from File I/O in C


My method was reading in each char from the file and keeping a count so when we hit an illegal character I keep track of the string length and the count for how many strings of this length are encountered. Now I'm trying to build strings with the chars I read in and store them in an array. It is almost working but I can get around aborts and seg faults when ever I try to add 2 strings together in the case that 2 of the strings read in are the same length. I marked where I'm having trouble on line 129 of my code if you don't mind giving me some feedback.... I am hoping to print the strings of every length when I'm done

this is the text file I'm using to test:

Tomorrow, and tomorrow, and tomorrow,
To the last syllable of recorded time;

Source code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 *this program reads in a text file from the command line
 *then counts and stores the number of words of all lengths
 */
#define LENGTH 34
#define WORD_BUFFER 750

int strLengths[LENGTH],lengthsCopy[LENGTH];
char *array[WORD_BUFFER][LENGTH];
char strings[LENGTH];
int counter = 0;
int ch,tester;

 //sorts the output of string lengths printing the largest amounts first
 void sort()
 {
    int max_val =0;
    int i,j,temp,val;
    //create copy
    for (i=0; i < LENGTH; i++)
    {
        lengthsCopy[i] = strLengths[i];
    }
    //for loop finds the max value in the array elements
    for(i=0; i<LENGTH; i++)
    {
        if(lengthsCopy[i] > max_val)
        max_val = lengthsCopy[i];
    }

    printf("max val in the array is %d\n",max_val);

    //prints the max value,decrements,checks,prints, rinse repeat...
    //iterates until the max is 0
    while(max_val!=0)
    {
        //checks all elements
        for(i=LENGTH-1; i > 0; i--)
        {
            //print when max val is found
            if(lengthsCopy[i] == max_val)
            {
                temp = i;
                printf("Count[%02d]=%02d;\n",i,max_val);
                //check for doubles
                for(j=LENGTH-1; j > 0; j--)
                {
                    //if double is found that is not the original, print
                    if(lengthsCopy[j] == max_val && temp != j)
                    {
                        printf("Count[%02d]=%02d;\n",j,max_val);
                        //erase value
                    lengthsCopy[j] = 0;
                    }
                }
            }
        }
        max_val--;
    }
}

//print all array that are not null, represent count of word lenghts
void printList()
{
    int i,val;
    for(i=1; i<=LENGTH;i++)
    {
        if(strLengths[i] > 0)
        {
        val = strLengths[i];
        printf("Count[%02d]=%02d;\n",i,val);
        }
    }
}

int main (int argc, char *argv[])
{
    //error message if input file is not passed
    if(argc < 2)
    {
        printf("You have to give me a file!\n");
        exit(1);
    }
    FILE *text = fopen(argv[1], "r");
    //errror message if no contents in the file
    if(text == NULL)
    {
        printf("No content to read in %s. \n", argv[1]);
        exit(1);
    }
    //iterate through text until end of file
    ch = fgetc(text);
    int strPoint =0;
    while(ch != EOF)
    {
        //if illegal char is met, add a count to the array value of current counter
        //set counter back to 0
        //scan next char
        if(ch==' '||ch==','||ch=='('||ch==')'||ch==';'||ch=='\n')
        {

            if(array[counter][0] == NULL)//if length not defined yet
            {
                array[counter][0] = strings;//add current string build to the array
                printf("%s\n",array[counter][0] );
            }
            else if(array[counter][0] != NULL && strings[0] != '\0')
            {//else length is defined add to text bank
                printf("else if reached\n");
                printf("%s\n",strings );
                printf("%lu\n",strlen(array[counter][0]) );
                int arrayptr = strlen(*array[counter]);
                printf("ptr %d",arrayptr);
                /* next line aborts / seg_faults */
                strncat(*array[counter],strings,strlen(strings)); 
            }

            strLengths[counter]++;
            counter = 0;
            ch = fgetc(text);
            memset(strings, 0, sizeof(strings));//clear stringBuild
            strPoint =0;
        }
        //else a legal character, increase counter, scan next char
        else
        {
            strings[strPoint] = ch;
            printf("string build %c\n",strings[strPoint]);
            counter++;
            strPoint++;
            ch = fgetc(text);
        }
    }
    fclose(text);
    printf("stored string %s\n",array[3][0] );

    printList();
    //call sort
    sort();

    exit(0);
}

Solution

  • From what I can tell from your code, your primary problem is your misunderstanding of what is happening with:

    array[counter][0] = strings;//add current string build to the array
    

    You are setting the pointer array[counter][0] to the address of strings. You only have one strings variable, so every array[counter][0] points to the same thing (so every row in your array will point to the last string contained in strings)

    Your strncat as a strcpy but nul-termianting by virtue of the behavior of strncat, is not wrong, but be aware that it can be a performance penalty with long buffers. You may have other logic issues, but they are obfuscated by the awkward layout of your code and rather non-standard use of a pointer to an array of array of char.

    Feedback

    Try and simplify your implementation. If you are primarily concerned about storing the words you read from a file, and the length of each for sorting purposes, then you can either simply store the words in a 2D array of char and call strlen every time you need a length, or for the size of an int, you can associate the length of each word with the word itself using a simple struct, e.g.

    typedef struct {
        char word[LENGTH];
        int len;
    } wordinfo;
    

    You then simply create an array or struct (e.g. wordinfo words[WORD_BUFFER];) and store your words in words[x].word and the length in word[x].len. If you want to forgo using a struct, then simply declare a 2D array (e.g. char words[LENGTH][WORD_BUFFER]; and store the words there. (for the cost of 4-bytes per-word, if storage isn't an issue, you will save the overhead of repeated function calls to strlen by storing the length you already have available from your read of chars)

    You can also declare a pointer to an array of char LENGTH (e.g. char (*array)[LENGTH]; and dynamically allocate storage for WORD_BUFFER of them with array = malloc (sizeof *array * WORD_BUFFER); (you can use calloc instead to initialize all bytes allocated to zero). That is a good option, but it does not appear dynamic allocation was your goal.

    Further, avoid the use of Global Variables. They are almost never needed and increase the risk of name collisions and value overwrites. Declare your variables local to main() and pass them as parameters as required. For example, using the struct implementation, you could write both your sort by length and print as follows taking a pointer to your array of struct and the number filled as parameters:

    /* simple insertion sort on len (descending) */
    void sort (wordinfo *a, int n)
    {
        int i, j;
        wordinfo v;
        for (i = 1; i < n; i++) {
            v = a[i];
            j = i;
            while (j > 0 && a[j - 1].len < v.len ) {
                a[j] = a[j - 1];
                j -= 1;
            }
            a[j] = v;
        }
    }
    
    /* tabular print of words read */
    void printlist (wordinfo *a, int n)
    {
        int i;
        for (i = 0; i < n; i++)
            printf ("  %-34s  (%d-chars)\n", a[i].word, a[i].len);
    }
    

    (note: don't write or use you own sort unless it is required for homework. C provides qsort that is infinitely more efficient and well tested, just write a compare function to compare two elements of whatever you need to sort and let qsort do the work)

    Lastly, the logic for reading each character from the file need not be complicated at all. Simply read the char, check it, and take whatever action is appropriate. The only added complexity comes from testing to make sure you stay within LENGTH chars and WORD_BUFFER words to prevent overwriting the bounds of your storage. Even using the struct implementation, declared and initialized as:

        int c, len = 0, maxndx = 0, ndx = 0;
        wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
    

    you can simplify your read logic in main to simply:

        while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
            if (len + 1 == LENGTH ||        /* check if full or c matches */
                c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
                if (len) {                          /* if we started a word */
                    if (len > words[maxndx].len)    /* check if longest  */
                        maxndx = ndx;               /* update max index  */
                    words[ndx].len = len;           /* set words[x].len  */
                    words[ndx++].word[len] = 0;     /* nul-terminat word */
                    len = 0;                        /* reset length */
                }
            }
            else
                words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
        }
    

    (note: maxndx just saves the index (ndx) for the struct holding the longest word, or one of the longest is you have more than one of the same maximum length)

    Putting that altogether, you could boil your code down to:

    #include <stdio.h>
    
    #define LENGTH 34
    #define WORD_BUFFER 750
    
    typedef struct {
        char word[LENGTH];
        int len;
    } wordinfo;
    
    /* simple insertion sort on len (descending) */
    void sort (wordinfo *a, int n)
    {
        int i, j;
        wordinfo v;
        for (i = 1; i < n; i++) {
            v = a[i];
            j = i;
            while (j > 0 && a[j - 1].len < v.len ) {
                a[j] = a[j - 1];
                j -= 1;
            }
            a[j] = v;
        }
    }
    
    /* tabular print of words read */
    void printlist (wordinfo *a, int n)
    {
        int i;
        for (i = 0; i < n; i++)
            printf ("  %-34s  (%d-chars)\n", a[i].word, a[i].len);
    }
    
    int main (int argc, char **argv) {
    
        int c, len = 0, maxndx = 0, ndx = 0;
        wordinfo words[WORD_BUFFER] = {{ .word = "", .len = 0 }};
        FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
    
        if (!fp) {  /* validate file open for reading */
            fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
            return 1;
        }
    
        /* read each char and store in words[x].word up to 'ndx' words.
         * save the length of each word in words[x].len.
         */
        while (ndx < WORD_BUFFER && (c = fgetc (fp)) != EOF) {
            if (len + 1 == LENGTH ||        /* check if full or c matches */
                c==' ' || c==',' || c=='(' || c==')' || c==';' || c=='\n') {
                if (len) {                          /* if we started a word */
                    if (len > words[maxndx].len)    /* check if longest  */
                        maxndx = ndx;               /* update max index  */
                    words[ndx].len = len;           /* set words[x].len  */
                    words[ndx++].word[len] = 0;     /* nul-terminat word */
                    len = 0;                        /* reset length */
                }
            }
            else
                words[ndx].word[len++] = c; /* assign c to words[x].word[len] */
        }
        if (fp != stdin) fclose (fp);       /* close file if not stdin */
    
        printf ("\nlongest word: '%s'  (%d-chars)\n\n", 
                words[maxndx].word, words[maxndx].len);
    
        printf ("words read from file:\n\n");
        printlist (words, ndx);     /* print words in order read */
    
        sort (words, ndx);
    
        printf ("\nwords sorted by length:\n\n");
        printlist (words, ndx);     /* print words sorted by length */
    
        return 0;
    }
    

    (note: the program expects the filename to read as the first argument, or it will read from stdin (by default) if no argument is given)

    Example Use/Output

    $ ./bin/rdstrings3 <dat/tomorrow.txt
    
    longest word: 'Tomorrow'  (8-chars)
    
    words read from file:
    
      Tomorrow                            (8-chars)
      and                                 (3-chars)
      tomorrow                            (8-chars)
      and                                 (3-chars)
      tomorrow                            (8-chars)
      To                                  (2-chars)
      the                                 (3-chars)
      last                                (4-chars)
      syllable                            (8-chars)
      of                                  (2-chars)
      recorded                            (8-chars)
      time                                (4-chars)
    
    words sorted by length:
    
      Tomorrow                            (8-chars)
      tomorrow                            (8-chars)
      tomorrow                            (8-chars)
      syllable                            (8-chars)
      recorded                            (8-chars)
      last                                (4-chars)
      time                                (4-chars)
      and                                 (3-chars)
      and                                 (3-chars)
      the                                 (3-chars)
      To                                  (2-chars)
      of                                  (2-chars)
    

    Look things over and let me know if you have any questions. The choice of using a struct and storing the len or just calling strlen wherever needed is completely up to you.