Search code examples
cstringmemcpy

memcpy() copies some garbage characters in the destination string


I am trying to create a random string generator with following code.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Random string generator
void rand_str(char *dest, size_t length) {
    char charset[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    int charset_length = 62;

    while (length-- > 0) {
        size_t index = rand() % charset_length;
        *dest++ = charset[index];
    }
    *dest = '\0';
}

int main ()
{
    int num_data = 5;
    int string_length;
    int max_string_length = 10;

    char data[num_data][string_length];
    int i = 0;
    while (i < num_data)
    {
        string_length = 3 + (rand() % max_string_length);
        char str[string_length];
        rand_str(str, string_length);
        short increment_avoider_flag = 0;
        for (int j = 0; j < i; j++)
        {
            if (!strcmp(data[j], str))
            {
                string_length = 3 + (rand() % max_string_length);
                char str[string_length];
                rand_str(str, string_length);
                increment_avoider_flag = -1;
                break;
            }
        }
        if (!increment_avoider_flag)
        {
            memcpy(data[i], str, sizeof(str));
            printf("%s\n", str);
            printf("%s\n\n\n", data[i]);
            i++;
        }
    }
    
}

The output to mentioned code is

pn2QMwQbLq
pn2QMwQbLq~??


WqJ99NSq
WqJ99NSqLq~??


LDvi5z
LDvi5zSqLq~??


gxBewrk5rHr
gxBewrk5rHr??


DcDg
DcDgwrk5rHr??


There are two problems with the output here.

  1. If 1st string created has length x which is greater than later string lengths, memcpy copies residue of previous string too. e.g. First string is pn2QMwQbLq and second string is WqJ99NSq but the copied string is WqJ99NSqLq~?? which has additional Lq from first string.
  2. The copied stings has some garbage characters in them. e.g. First original string pn2QMwQbLq but copied string pn2QMwQbLq~?? has additional ~??.

I am not sure what is happening here but seems like I am declaring or copying the character arrays incorrectly. Please help me figure out this problem.


Solution

  • If you want to generate random strings without any memory leaks.

    The following code implements something of a map structure with time complexity of O(n), where n is the length of the string. This is not a good implementation of map, however creating a map in C would be a heavy task.

    Pros:

    • no heap memory allocation
    • no repeating character in the final random string

    Cons:

    • not a good implementation of map, as time complexity should be O(log(n))

    Here's try it online

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <time.h>
    
    const char *charset = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
    void random_str(char *str, size_t len);
    void random_str(char *str, size_t len)
    {
        if (len == 0)
        {
            fprintf(stderr, "could not generate 0 length string\n");
            exit(1);
        }
        for (size_t i = 0; i < len; i++)
        {
            str[i] = charset[(rand() % 62)];
        }
        str[len] = 0;
    }
    
    #define NUM_DATA 5    
    #define MAX_LEN 10
    
    int main(void)
    {
        char rdata[NUM_DATA][MAX_LEN + 1] = {0};
        srand(time(NULL));
        for (size_t i = 0; i < NUM_DATA;)
        {
            int flag = 1;
            random_str(rdata[i], MAX_LEN);
            for (size_t j = 0; j < i; j++)
                if (strcmp(rdata[i], rdata[j]) == 0)
                    flag = 0;
            if (flag == 1)
            {
                printf("%s\n", rdata[i]);
                i++;
            }
        }
        return 0;
    }