Search code examples
csplitdynamic-memory-allocationc-stringsstrncpy

Storing tokens from 1D char array to char** array


I am trying to write a program that will dynamically allocate enough space to store all the words in a 1D char array separated by a space. ex:

char *literal = "The quick brown fox";
char **words = { "The", "quick", "brown", "fox" };

The program I wrote keeps segfaulting when trying to strncpy(str[buff_ptr],tok,strlen(tok));

I will post my code bellow:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *mutableString(char *lit) {
    int size = strlen(lit);
    char *str = (char *)malloc(sizeof(char) * size);
    strncpy(str, lit, size + 1);
    return str;
}

int numTokens(char *str, const char *DELIM) {
    char* clone = (char*)malloc(sizeof(char*));
    strncpy(clone, str, strlen(str) + 1);
    int count = 0;
    for (char *tok = strtok(clone, " "); tok != NULL; tok = strtok(NULL, " "))
        count++;
    free(clone);
    return count;
}

char **tokenize(char *str, const char *DELIM) {
    printf("tokenize-------------------------\n");
    int size = numTokens(str, DELIM);
    //allocate space on heap for buffer
    char **buff = (char **)malloc(size * sizeof(char *));
    //get first word
    char *tok = strtok(str, DELIM);
    int buff_ptr = 0;
    while (tok != NULL) {
        strncpy(buff[buff_ptr], tok, strlen(tok) + 1);
        printf("buff[%d]%s\n", buff_ptr, buff[buff_ptr]);
        //increment to next word for storage
        buff_ptr++;
        //find next word in string
        tok = strtok(NULL, DELIM);
    }
    for (int i = 0; i < size; i++) {
        printf("%s\n", buff[i]);
    }
    //return 2D pointer
    return buff;
}

int main() {
    char *literal = "some literal string.";
    //convert string to mutable string for strtok
    char *str = mutableString(literal);
    //set 2D pointer equal to the pointer address returned
    char **no_spaces_str = tokenize(str, " ");
    printf("%s\n", str);
    for (int i = 0; i < numTokens(str, " "); i++) {
        printf("%s\n", no_spaces_str[i]);
    }
    //free heap allocated memory
    free(str);
    free(no_spaces_str);
    return 0;
}

Please see attachment of lldb stack variables:

lldb output


Solution

  • Within the function mutableString there is dynamically allocated the character array str that does not contain a string

    char* mutableString(char* lit){
      int size = strlen(lit);
      char* str = (char*)malloc(sizeof(char)*size);
      strncpy(str,lit,size);
      return str;
    }
    

    So other functions invoke undefined behavior as for example in this for loop

    int numTokens(char* str, const char* DELIM){
      int count = 0;
      for(; *str != '\0'; str++)
      //...
    

    Moreover if the array contained a string nevertheless the function numTokens is incorrect because for example it returns 0 when a passed string contains only one word.

    Also in the function tokenize

    strncpy(buff[buff_ptr],tok,strlen(tok));
    

    there are used uninitialized pointers buff[buff_ptr] allocated like.

    char **buff = (char**)malloc(size*sizeof(char*));
    

    And again you are trying to copy strings without including the terminating zero character '\0; using eth functions strncpy.

    So this call in main

    printf("%s\n",no_spaces_str[i]);
    

    also will invoke undefined behavior.