Search code examples
cmemorytruncation

Unknown C String Truncation/Overwrite


I am having an interesting memory problem with a simple string manipulation. The problem itself isn't actually in the reading of the string but right before it when I am trying to call the string.

char *removeInvalid(char *token){
    fprintf(stderr," Before: %s \n", token);
    char *newToken = malloc(sizeof(100) + 1);
    fprintf(stderr," After: %s \n", token);
}

Whenever I run this, the string if truncated right after the char *newToken is malloc'd. So the printout of this results in

Before: Willy Wanka's Chochlate Factory
After: Will Wanka's Chochlate F!

Anyone have any clue what this is? I looked at other examples of malloc, but can't figure out how it is going wrong here.

EDIT: FULL CODE BELOW. Take note I am a college student who just began C, so it isn't perfect by anymeans. But it works up until this error.

Function calls goes as follows. Main->initialReadAVL (This part works perfectly) Then after commandReadAVL is called which goes commandReadAVL->ReadHelper (Again works fine here. Then CleanUpString->removeSpaces(works fine) Then CleanUpString->removeInvalid(THIS IS WHERE IT ERRORS)

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "node.h"
#include "avl.h"
#include "scanner.h"
#include "bst.h"

/* Options */
int avlSwitch = 0;
int bstSwitch = 0;
int insertSwitch = 0;
int deleteSwitch = 0;
int frequencySwitch = 0;
int displaySwitch = 0;
int statisticSwitch = 0;

int ProcessOptions(int argc, char **argv);
char *cleanUpString(char *token);
char *turnToLowerCase(char *token);
char *removeSpaces(char *token);
char *removeInvalid(char *token);
char *readHelper(FILE *in);
void Fatal(char *fmt, ...);
void preOrder(struct node *root);
void initialReadAVL(avl *mainAVL, FILE *in);
void initialReadBST(bst *mainBST, FILE *in);
void commandReadBST(bst *mainBST, FILE *commandList);
void commandReadAVL(avl *mainAVL, FILE *commandList);

int main(int argc, char **argv) {
    struct avl *mainAVL;
    struct bst *mainBST;
    FILE *text;
    FILE *commandList;


    if(argc != 4){
        Fatal("There must be 4 arguments of form 'trees -b corpus commands' \n");
    }

    int argIndex = ProcessOptions(argc,argv);

    text = fopen(argv[2], "r");
    commandList = fopen(argv[3], "r");

    //Protect against an empty file.
    if (text == NULL){
        fprintf(stderr,"file %s could not be opened for reading\n", argv[2]);
        exit(1);
    }

    if (commandList == NULL){
        fprintf(stderr,"file %s could not be opened for reading\n", argv[3]);
        exit(1);
    }


    if (avlSwitch){
        mainAVL = newAVL();
        initialReadAVL(mainAVL, text);
        preOrder(mainAVL->root);
        fprintf(stderr,"\n");
        commandReadAVL(mainAVL, commandList);
        preOrder(mainAVL->root);
        fprintf(stderr,"\n");
    }
    else if (bstSwitch){
        mainBST = newBST();
        initialReadBST(mainBST, text);
        preOrder(mainBST->root);
        commandReadBST(mainBST, commandList);
        preOrder(mainBST->root);
    }


    return 0;
}


void commandReadAVL(avl *mainAVL, FILE *commandList){
    char *command;
    char *textSnip;
    while(!feof(commandList)){
        command = readHelper(commandList);
        textSnip = readHelper(commandList);
        textSnip = cleanUpString(textSnip);

        if(command != NULL){
            switch (command[0]) {
            case 'i':
                fprintf(stderr,"%s \n", textSnip);
                insertAVL(mainAVL, textSnip);
                break;
            case 'd':
                deleteAVL(mainAVL, textSnip);
                break;
            case 'f':
                break;
            case 's':
                break;
            case 'r':
                break;
            default:
                Fatal("option %s not understood\n",command);
            } 
        }

    }
}

void commandReadBST(bst *mainBST, FILE *commandList){
    char *command;
    char *textSnip;
    while(!feof(commandList)){
        command = readHelper(commandList);
        textSnip = readHelper(commandList);
        textSnip = cleanUpString(textSnip);
        if(command != NULL){
            switch (command[0]) {
                case 'i':
                    insertBST(mainBST, textSnip);
                    break;
                case 'd':
                    deleteBST(mainBST, textSnip);
                    break;
                case 'f':
                    break;
                case 's':
                    break;
                case 'r':
                    break;
                default:
                    Fatal("option %s not understood\n",command);
                } 
        }
    }
}


char *readHelper(FILE *in){
    char *token;
    if (stringPending(in)){
        token = readString(in);
    }
    else {
        token = readToken(in);
    }
    return token;
}

void initialReadBST(bst *mainBST, FILE *in){
    char *token;
    while(!feof(in)){

        token = readHelper(in);
        token = cleanUpString(token);
        if (token != NULL){
            insertBST(mainBST, token);
        }
    }
}

void initialReadAVL(avl *mainAVL, FILE *in){
    char *token;
    while(!feof(in)){

        token = readHelper(in);
        token = cleanUpString(token);
        if (token != NULL){
            insertAVL(mainAVL, token);
        }
    }
}

//Helper Function to clean up a string using all the prerequisites. 
char *cleanUpString(char *token){
    char *output = malloc(sizeof(*token)+ 1);
    if (token != NULL){
        output = removeSpaces(token);
         fprintf(stderr,"before : %s \n", output);
        output = removeInvalid(output);
         fprintf(stderr,"%s \n", output);
        output = turnToLowerCase(output);
        return output;
    }
    return NULL;

}

//Helper function to turn the given string into lower case letters
char *turnToLowerCase(char *token){
    char *output = malloc(sizeof(*token) + 1);
    for (int x = 0; x < strlen(token); x++){
            output[x] = tolower(token[x]);
        }
    return output;
}

//Helper function to remove redundent spaces in a string.
char *removeSpaces(char *token){
    char *output;
    int x = 0;
    int y = 0;

    while (x < strlen(token)){
        if (token[x]== ' ' && x < strlen(token)){
            while(token[x] == ' '){
                x++;
            }
            output[y] = ' ';
            y++;
            output[y] = token[x];
            y++;
            x++;
        }
        else {
            output[y] = token[x];
            y++;
            x++;
        }

    }
    return output;

}

char *removeInvalid(char *token){
    fprintf(stderr," Before: %s \n", token);
    char *newToken = malloc(sizeof(* token)+ 1);
    fprintf(stderr," After: %s \n", token);


    int x = 0;
    int y = 0;
    while (x < strlen(token)){
        if (!isalpha(token[x]) && token[x] != ' '){
            x++;
        }
        else {
            newToken[y] = token[x];
            y++;
            x++;
        }
    }
    return newToken;
}


//Processes a system ending error. 
void Fatal(char *fmt, ...) {
    va_list ap;

    fprintf(stderr,"An error occured: ");
    va_start(ap, fmt);
    vfprintf(stderr, fmt, ap);
    va_end(ap);

    exit(-1);
    }


//Processes the options needed to be executed from the command line
int ProcessOptions(int argc, char **argv) {
    int argIndex;
    int argUsed;
    int separateArg;

    argIndex = 1;

    while (argIndex < argc && *argv[argIndex] == '-')
        {
        /* check if stdin, represented by "-" is an argument */
        /* if so, the end of options has been reached */
        if (argv[argIndex][1] == '\0') return argIndex;

        separateArg = 0;
        argUsed = 0;

        if (argv[argIndex][2] == '\0')
            {
            separateArg = 1;
            }

        switch (argv[argIndex][1])
            {
            case 'b':
                bstSwitch = 1;
                break;
            case 'a':
                avlSwitch = 1;
                break;
            default:
                Fatal("option %s not understood\n",argv[argIndex]);
            }

        if (separateArg && argUsed)
            ++argIndex;

        ++argIndex;
        }

    return argIndex;
}


void preOrder(struct node *root) {
    if(root != NULL)
    {
        fprintf(stderr,"%s ", root->key);
        preOrder(root->lChild);
        preOrder(root->rChild);
    }

}

ReadString()

char *
readString(FILE *fp)
    {
    int ch,index;
    char *buffer;
    int size = 512;

    /* advance to the double quote */

    skipWhiteSpace(fp);
    if (feof(fp)) return 0;

    ch = fgetc(fp);
    if (ch == EOF) return 0;

    /* allocate the buffer */

    buffer = allocateMsg(size,"readString");

    if (ch != '\"')
        {
        fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
        fprintf(stderr,"first character was <%c>\n",ch);
        exit(4);
        }

    /* toss the double quote, skip to the next character */

    ch = fgetc(fp);

    /* initialize the buffer index */

    index = 0;

    /* collect characters until the closing double quote */

    while (ch != '\"')
        {
        if (ch == EOF)
            {
            fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
            fprintf(stderr,"no closing double quote\n");
            exit(6);
            }
        if (index > size - 2) 
            {
            ++size;
            buffer = reallocateMsg(buffer,size,"readString");
            }

        if (ch == '\\')
            {
            ch = fgetc(fp);
            if (ch == EOF)
                {
                fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
                fprintf(stderr,"escaped character missing\n");
                exit(6);
                }
            buffer[index] = convertEscapedChar(ch);
            }
        else
            buffer[index] = ch;
        ++index;
        ch = fgetc(fp);
        }

    buffer[index] = '\0';

    return buffer;
    }

INPUT: Commands.txt

i "Willy Wonka's Chochlate Factory"

INPUT testFile.txt

a b c d e f g h i j k l m n o p q r s t u v w x y z

Thanks!


Solution

  • char *turnToLowerCase(char *token){
        char *output = malloc(sizeof(*token) + 1);
        for (int x = 0; x < strlen(token); x++){
                output[x] = tolower(token[x]);
            }
        return output;
    }
    

    This is probably your main issue. You allocate enough space for two characters and then proceed to store lots more than that. You probably wanted:

        char *output = malloc(strlen(token) + 1);
    

    Since token is a char*, *token is a char. So sizeof(*token) is sizeof(char) -- definitely not what you want.