Search code examples
csplitchardynamic-memory-allocationc-strings

First char* filled with junk characters when I put many words in my string split function


I'm trying to create my own string_split function in C called ft_split. The delimiter is based on a single char.

Here is my code :

#include <stdio.h>
#include <stdlib.h>

int is_sep(const char s, char c) //Return 1 if the current letter is a separator
{
    if (s == c)
        return (1);
    return (0);
}

size_t  wrd_len(const char *s, char c) //Return the length of the word
{
    int i;

    i = 0;
    while (s[i] != '\0' && is_sep(s[i], c) == 0)
        i++;
    return (i);
}

size_t  strcnt(const char *s, char c) //Return the length of the sentence without the separators
{
    int i;

    i = 0; 
    while (s[i])
    {
        if (is_sep(*s, c) == 0)
            i++;
    }
    return (i);
}

char    *word(const char *s, char c) //Return the word without the separator
{
    int     wdlen;
    char    *wd;
    int     i;

    i = 0;
    wdlen = wrd_len (s, c);
    wd = malloc (wdlen + 1);
    if (!wd)
        return (NULL);
    while (i < wdlen)
    {
        wd[i] = s[i];
        i++;
    }
    wd[i] = '\0';
    return (wd);
}

char    **ft_split(const char *s, char c)
{
    char    **strs;
    int     i;

    strs = malloc (strcnt (s, c) + 1);
    if (!strs)
        return (NULL);
    i = 0;
    while (*s != '\0')
    {
        while (is_sep(*s, c) == 1 && *s)
            s++;
        if (*s)
        {
            strs[i] = word(s, c);
            i++;
        }
        while (is_sep(*s, c) == 0 && *s)
            s++;
    }
    strs[i] = NULL;
    return (strs);
}

int main (void)
{
        int i = 0;
        const char test[] = "How are you ? I'm fine !";
        char    sep = ' ';
        char    **split = ft_split(test, sep);
        while (split[i])
        {
            printf("split %d : %s\n",i,split[i]);
            i++;
        }
}

And this is the output :

split 0 : ��^T�U
split 1 : are
split 2 : you
split 3 : ?
split 4 : I'm
split 5 : fine
split 6 : !

The first split is filled with junk characters instead of "How"

I have tried to put only 3 words and it works perfectly, for example "How are you" and this is the output :

split 0 : How
split 1 : are
split 2 : you

And I also tested with a lot of words to split and more char** gets filled with junk characters like so :

split 0 : �d���U
split 1 : Pe���U
split 2 : you
split 3 : today
split 4 : ?
split 5 : I
split 6 : don't
split 7 : know
split 8 : why
split 9 : my
split 10 : split
split 11 : is
split 12 : filling
split 13 : the
split 14 : first
split 15 : char*
split 16 : with
split 17 : junk
split 18 : charracters

NOTE : This is my first post on StackOverflow, I don't know if there is a universal format to ask questions, sorry if I did not respected that.

I will appreciate any help !


Solution

  • You code is wrong at least because the function strcnt

    size_t  strcnt(const char *s, char c) //Return the length of the sentence without the separators
    {
        int i;
    
        i = 0; 
        while (s[i])
        {
            if (is_sep(*s, c) == 0)
                i++;
        }
        return (i);
    }
    

    can invoke an infinite loop because the variable i is incremented only in the case when the first character of the passed string *s is not a separator. Otherwise the returned value of i will be equal to the length of the string.

    Or for example within the function ft_split you allocating an extent of memory of an invalid size

    strs = malloc (strcnt (s, c) + 1);
    

    You need to write at least

    strs = malloc ( ( strcnt (s, c) + 1 ) * sizeof( char * ) );
    

    There are other errors as for example the function word can return a null pointer but within the function ft_split there is no check whether a null pointer is returned.

    And so on.

    I can suggest the following solution shown in the demonstration program below.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    size_t  word_count( const char *s, const char *delim )
    {
        size_t count = 0;
    
        while (*s)
        {
            s += strspn( s, delim );
    
            if (*s)
            {
                ++count;
                s += strcspn( s, delim );
            }
        }
    
        return count;
    }
    
    char ** ft_split( const char *s, char c )
    {
        char delim[2] = { c };
    
        size_t count = word_count( s, delim );
    
        char **strs = malloc( ( count + 1 ) * sizeof( char * ) );
    
        int success = strs != NULL;
    
        size_t i = 0;
    
        while (success && *s)
        {
            s += strspn( s, delim );
    
            if (*s)
            {
                size_t n = strcspn( s, delim );
    
                strs[i] = malloc( n + 1 );
    
                success = strs[i] != NULL;
    
                if (success)
                {
                    memcpy( strs[i], s, n );
                    strs[i][n] = '\0';
                    ++i;
                    s += n;
                }
                else
                {
                    while (i--)
                    {
                        free( strs[i] );
                    }
                    free( strs );
                }
            }
        }
    
        if (success) strs[i] = NULL;
    
        return strs;
    }
    
    int main( void )
    {
        const char test[] = "How are you ? I'm fine !";
    
        size_t count = word_count( test, " " );
    
        char **split = ft_split( test, ' ' );
    
        if (split != NULL)
        {
            printf( "There are %zu words. They are:\n", count );
    
            for (char **p = split; *p; ++p)
            {
                puts( *p );
            }
    
            for (char **p = split; *p; ++p)
            {
                free( *p );
            }
            free( split );
        }
    }
    

    The program output is

    There are 7 words. They are:
    How
    are
    you
    ?
    I'm
    fine
    !