I have a program, that splits strings based on the delimiter. I have also, 2 other functions, one that prints the returned array and another that frees the array.
My program prints the array and returns an error when the free array method is called. Below is the full code.
#include "stringsplit.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
unsigned long int getNofTokens(const char *string) {
char *stringCopy;
unsigned long int stringLength;
unsigned long int count = 0;
stringLength = (unsigned)strlen(string);
stringCopy = malloc((stringLength + 1) * sizeof(char));
strcpy(stringCopy, string);
if (strtok(stringCopy, " \t") != NULL) {
count++;
while (strtok(NULL, " \t") != NULL)
count++;
}
free(stringCopy);
return count;
}
char **split_string(const char *str, const char *split) {
unsigned long int count = getNofTokens(str);
char **result;
result = malloc(sizeof(char *) * count + 1);
char *tmp = malloc(sizeof(char) * strlen(str));
strcpy(tmp, str);
char *token = strtok(tmp, split);
int idx = 0;
while (token != NULL) {
result[idx++] = token;
token = strtok(NULL, split);
}
return result;
}
void print_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
char *currentPointer = split_string[i];
free(currentPointer);
}
free(split_string);
}
Also, do I need to explicitly add \0
at the end of the array or does strtok
add it automatically?
There are some problems in your code:
[Major] the function getNofTokens()
does not take the separator string as an argument, it counts the number of words separated by blanks, potentially returning an inconsistent count to its caller.
[Major] the size allocated in result = malloc(sizeof(char *) * count + 1);
is incorrect: it should be:
result = malloc(sizeof(char *) * (count + 1));
Storing the trailing NULL
pointer will write beyond the end of the allocated space.
[Major] storing the said NULL
terminator at the end of the array is indeed necessary, as the block of memory returned by malloc()
is uninitialized.
[Major] the copy of the string allocated and parsed by split_string
cannot be safely freed because the pointer tmp
is not saved anywhere. The pointer to the first token will be different from tmp
in 2 cases: if the string contains only delimiters (no token found) or if the string starts with a delimiter (the initial delimiters will be skipped). In order to simplify the code and make it reliable, each token could be duplicated and tmp
should be freed. In fact your free_split_string()
function relies on this behavior. With the current implementation, the behavior is undefined.
[Minor] you use unsigned long
and int
inconsistently for strings lengths and array index variables. For consistency, you should use size_t
for both.
[Remark] you should allocate string copies with strdup()
. If this POSIX standard function is not available on your system, write a simple implementation.
[Major] you never test for memory allocation failure. This is OK for testing purposes and throw away code, but such potential failures should always be accounted for in production code.
[Remark] strtok()
is a tricky function to use: it modifies the source string and keeps a hidden static state that makes it non-reentrant. You should avoid using this function although in this particular case it performs correctly, but if the caller of split_string
or getNofTokens
relied on this hidden state being preserved, it would get unexpected behavior.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *string, const char *split) {
char *tmp = strdup(string);
size_t count = 0;
if (strtok(tmp, split) != NULL) {
count++;
while (strtok(NULL, split) != NULL)
count++;
}
free(tmp);
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
char *tmp = strdup(str);
char *token = strtok(tmp, split);
size_t idx = 0;
while (token != NULL && idx < count) {
result[idx++] = strdup(token);
token = strtok(NULL, split);
}
result[idx] = NULL;
free(tmp);
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
Here is an alternative without strtok()
and without intermediary allocations:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
size_t getNofTokens(const char *str, const char *split) {
size_t count = 0;
size_t pos = 0, len;
for (pos = 0;; pos += len) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
count++;
}
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t pos, len, idx;
for (pos = 0, idx = 0; idx < count; pos += len, idx++) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
result[idx] = strndup(str + pos, len);
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
EDIT After re-reading the specification in your comment, there seems to be some potential confusion as to the semantics of the split
argument:
split
is a set of delimiters, the above code does the job. And the examples will be split as expected.split
is an actual string to match explicitly, the above code only works by coincidence on the examples given in the comment.To implement the latter semantics, you should use strstr()
to search for the split
substring in both getNofTokens
and split_string
.
Here is an example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *str, const char *split) {
const char *p;
size_t count = 1;
size_t len = strlen(split);
if (len == 0)
return strlen(str);
for (p = str; (p = strstr(p, split)) != NULL; p += len)
count++;
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t len = strlen(split);
size_t idx;
const char *p = str;
for (idx = 0; idx < count; idx++) {
const char *q = strstr(p, split);
if (q == NULL) {
q = p + strlen(p);
} else
if (q == p && *q != '\0') {
q++;
}
result[idx] = strndup(p, q - p);
p = q + len;
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}