Search code examples
cforkstdinstdiocorruption

C stdin corrupted from file redirection?


I am doing an assignment which consists of writing a mini shell that executes other programs using forks and execvp. When everything seemed to work, I tried to directly pass in a list of commands from a file to my shell using ./myshell < file.txt on bash, and got an infinite* stream of commands that would never finish executing.

I am not sure what caused it so I went to debug it using gdb. To my surprise, when reaching the supposed last line of the file, an additional character was added to the line read from stdin. Furthermore, more lines, which were previously executed would return to the feed and be re-executed. Here is the file that was fed to the shell:

-trimmed for compactness-
chdir file
chdir ../
touch hello.txt index.html app.py
ls
rm hello.txt
ls -1
wc app.py
cat index.html
pwd
history
touch .hidden.txt
ls -a
history
echo preparing to exit
cd ../../
ip route show
ls -R
rm -rf path
invalid command
history

enter image description here

notice after invalid command is history with a / then the next line is touch hello.txt index.html app.py and so forth. I have tried multiple way to debug this problem, by taking my readLine funtion into a separate file and testing it alone, but the program terminated correctly after reading the last line. I also compiled my shell on MacOS and to my surprise, the issue did not occur neither. For reference, the bug occurs on a system running Ubuntu 18.04.3 LTS.

I am completely bamboozled as to why it happens. I hypothesize that it might have something to do with my stdin being written by a forked copy of itself but I am really not sure. Some insights would be appreciated.

*not sure if it is actually infinite

Edit 1: Here is part of my code (sorry, I was not able to reduce its size more without removing the issue, since I have no idea what might have caused it)

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/queue.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <signal.h>
#include <fcntl.h>


char* get_a_line();
char * extract_arguments(char** line, char delimiter);
int my_system(char* line);

// buffer size to read stdin
size_t BUFFER_SIZE = 256;

// string node for tailq
struct string_node {
    char* value;
    TAILQ_ENTRY(string_node) entries; 
};

// macro to init node struct for string
TAILQ_HEAD(str_list, string_node);
struct str_list history;


int main(int argc, char** argv) {

    int user_input = isatty(0);

    while (1) {

        if (user_input) {
            printf("$ ");
        }
        char* line = get_a_line();
        if (feof(stdin)) {
            exit(0);
        }
        if (strlen(line) > 0) {
            my_system(line);
        } 
        // won't free since line is used in the queue to display `history`
        // free(line);
    }

    return 0;

}

char* get_a_line() {

    char * buffer = (char*)malloc(BUFFER_SIZE * sizeof(char));
    size_t len = getline(&buffer, &BUFFER_SIZE, stdin);
    // transform `\n' to '\0' to terminate string
    if (len != -1 && buffer[len-1] == '\n') {
        buffer[len-1] = '\0';
    }

    return buffer;
}

int parse(char** line, char*** parsed) {

    // init string list to contain arguments
    struct str_list strings_list;
    TAILQ_INIT(&strings_list);

    struct string_node *tmp_node;

    // number of argument parts
    int count = 0 ;
    char * s;
    while((s = extract_arguments(line, ' ')) != NULL) {

        tmp_node = malloc(sizeof(struct string_node));
        tmp_node->value = s;
        TAILQ_INSERT_TAIL(&strings_list, tmp_node, entries);

        count++;
    }

    // save arguments into array of strings( array of char array)
    char ** arguments = malloc (sizeof(char**) * (count+1));

    int i=0;
    while ((tmp_node = TAILQ_FIRST(&strings_list))) {
            arguments[i++] = tmp_node->value;
            TAILQ_REMOVE(&strings_list, tmp_node, entries);
            free(tmp_node);
    }

    // terminate array
    arguments[count] = NULL;

    // check the type of termination
    *parsed =  arguments;
    if (**line == '|') {
        (*line) += 1;
        return 1;
    }

    return 0;

}


// extract string from the start of the *line until length by allocating through malloc
char * extract_string(char ** line, int length) {
    char * str = NULL;
    if (length > 0) {
        str = malloc((length+1) * sizeof(char));
        strncpy(str, *line, length);
        str[length] = '\0';
        *line += (length);
    }
    return str;
}

/*
    Merges two string str1 and str2 by calloc on str1 and freeing str2
    (prob should not free str2 in this but w/e)
*/
char * strcat_alloc(char * str1, char * str2) {

    if (str1 == NULL) {
        return str2;
    } 

    if (str2 == NULL) {
        return str1;
    }
    size_t length1 = strlen(str1) ;
    size_t length2 =  strlen(str2);
    str1 = realloc(str1, length1 + length2+1);
    strcpy(str1+length1, str2);
    str1[length1+length2] = '\0';
    free(str2);
    return str1;
}


/*
    Extract a single argument of the line, terminated by the delimiter
    Basic quotes and escaping implemented in order to support multiword arguments
*/
char * extract_arguments(char** line, char delimiter) {
    // remove trailing spaces
    while (**line == ' ') {
        (*line)+=1;
    }
    int right = 0;
    char * str_p = NULL;
    while ((*line)[right] != delimiter && 
            (*line)[right] != EOF && 
            (*line)[right] != '\0' &&
            (*line)[right] != '|')
    {
        if ((*line)[right] == '\\'){
            str_p = extract_string(line, right);
            // the escaped character is one after '\'
            *line+=1;
            char *c = malloc(sizeof(char));
            *c = **line;
            *line +=1;
            return strcat_alloc(strcat_alloc(str_p, c), extract_arguments(line, delimiter));
        }


        if ((*line)[right] == '\''){
            str_p = extract_string(line, right);
            *line+=1;
            char * str_p2 =  extract_arguments(line, '\'');
            return strcat_alloc(strcat_alloc(str_p, str_p2), extract_arguments(line, ' '));

        } else if ((*line)[right] == '\"') {
            str_p = extract_string(line, right);
            *line+=1;
            char * str_p2 = extract_arguments(line, '\"');
            return strcat_alloc(strcat_alloc(str_p, str_p2), extract_arguments(line, ' '));
        }
        right++;
    }

    str_p = extract_string(line, right);

    if (**line == delimiter) {
        *line+=1;
    }

    return str_p;

}


/*
    Execute command defined by **args dending on the flag (pipe or normal execution)
*/
int execute(char **args, int flag, int * wait_count) {

        pid_t pid = fork();
        if (pid == 0) {
            // exit to prevent multiple instance of shell
            exit(0);

        } else {
            // PARENT
            wait(NULL);

        }
        return 0;
}


int my_system(char* line) {

    char** args;

    int flag = 0;

    // wait count keeps tracks of the amount of fork to wait for
    // max 2 in this case 
    int wait_count= 0;
    while(*line != '\0') {
        flag = parse(&line, &args);

        if (*args == NULL) {
            return 0;
        }
        // exit can't be in fork nor chdir
        if (strcasecmp(args[0], "exit") == 0) {
            exit(0);
        } else if (strcasecmp(args[0], "chdir") == 0 || strcasecmp(args[0], "cd") == 0) {
            if(chdir(args[1]) < 0) {
                printf("chdir: change directory failed\n");
            }
            return 0;
        } 
        execute(args, flag, &wait_count);
    }

    return 0;
}

Solution

  • A friend figured it out for me. The issue arose from the use of exit in the child fork. Apparently, the stdin was somehow flushed twice and corrupted when read later by the parent. To fix it, simply change it to _exit instead.