Search code examples
csplitsubstringc-stringsfunction-definition

string splitter in C - how is it working?


I have inherited a large code base and there is a utility function to split strings on : char. I understand about 80% of how it works, I do not understand the *token = '\0'; line.

Any pointers are highly appreciated.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_TOKEN_SIZE 200

const char *splitter(const char *str, char delimiter, char *token) {

    while (*str && (delimiter != *str)) {
        *token++ = *str;
        str++;            
    }
    if (delimiter == *str)
        str++;

    *token = '\0';    // what is this line doing?

    //how could the token be correct in the main() after setting it to null terminator 
    //here?

    return str;
} 

int main() {
    char token[MAX_TOKEN_SIZE + 1];  
    const char *env = "/bin:/sbin:::/usr/bin";
    while (*env) {
        env = splitter(env, ':', token);  

        //if token is empty, set it to "./"
        if ((token != NULL) && (token[0] == '\0')) {
            strcpy(token, "./\0");            
        }

        printf("%s\n", token)  ;
    }
    return 0;
}

The output is correct:

/bin
/sbin
./
./
/usr/bin

Solution

  • There are subtle problems in the posted code:

    • the test if ((token != NULL) && (token[0] == '\0')) is redundant: token is an array, hence token!= NULL is always true.

    • splitter does not receive the length of the destination array: if the str argument contains a token longer than MAX_TOKEN_SIZE bytes, it will cause undefined behavior because of a buffer overflow.

    • if the delimiter passed to splitter is the null byte, the return value will point beyond the end of the string, potentially causing undefined behavior.

    • the line *token = '\0'; just sets the null terminator at the end of the token copied from str, if any.

    Here is a modified version:

    #include <stdio.h>
    #include <string.h>
    
    #define MAX_TOKEN_SIZE 200
    
    const char *splitter(const char *str, char delimiter, char *token, size_t size) {
        size_t i = 0;
        while (*str) {
            char c = *str++;
            if (c == delimiter)
                break;
            if (i + 1 < size)
                token[i++] = c;
        }
        if (i < size) {
            token[i] = '\0';  /* set the null terminator */
        }
        return str;
    } 
    
    int main() {
        char token[MAX_TOKEN_SIZE + 1];  
        const char *env = "/bin:/sbin:::/usr/bin";
        while (*env) {
            env = splitter(env, ':', token, MAX_TOKEN_SIZE + 1);  
    
            // if token is empty, set it to "./"
            if (*token == '\0') {
                strcpy(token, "./");            
            }
            printf("%s\n", token);
        }
        return 0;
    }