I am making a program for the school that must generate 1 million strings all different from each other and whose length varies from 3 to 20 characters, I immediately want to say that I have already finished the program and I have already generated the strings, but I wanted to, as a pastime, optimize it.
I initially conceived it by making a 2d array (array containing char arrays) and it worked but I wanted to make the string checking faster by saving each string in the column equal to its length, for example the string "Ci_3sa" which contains 6 characters is stored in arr[6][0][nOfChars]
and instead a 4 characters long word in arr[4][0][nOfChars]
, I think I explained what I wanted to do .
I do not think I have made reference errors of the type of variables, because when I compile the code I do not get errors or warnings (I have active warnings), but when I execute the code it gives me an error in the dump core, I also tried to run the code in a debugger for C but however I don't understand what the problem is because it seems to work some times and in others it doesn't (but it never finished executing the program).
I would really like to understand what kind of mistake I made.
I assume the error is in the "checkStrings()" function
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#define N_STRINGS 1000000
#define MIN_CHARS 3
#define MAX_CHARS 20
#define ASCII_START 32 // 32 è lo spazio
#define ASCII_END 126
#define randnum(min, max) ((rand() % (int)(((max) + 1) - (min))) + (min))
char *generateRandomString(int size)
{
int i;
// char *charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.-#'?!";
// printf("Dimensione del charset: %ld\n", strlen(charset));
char *res = malloc(size + 1);
for (i = 0; i < size; i++)
{
/*
int key = randnum(0, 69);
res[i] = charset[key];
*/
int key = (rand() % (ASCII_END - ASCII_START)) + ASCII_START;
res[i] = (char)key;
if (key == 92 || key == 32 || key == 34)
i--;
}
res[i] = '\0';
return res;
}
int checkStrings(char *strs[18][N_STRINGS], char *str, int size)
{
for (int i = 0; i < size; i++)
{
/*
printf("Lunghezza stringa\n: %ld", strlen(str));
printf("Vediamo : %s\n", str);
printf("Parola Array in posizione %d: %s\n", i, strs[i]);
printf("Parola Generata: %s\n", str);
*/
if (strcmp(strs[strlen(str) - 3][i], str) == 0)
return 1;
}
return 0;
}
void wirteOutput(char *outputFile, char *strs[18][N_STRINGS], int *ss, int isJson)
{
FILE *output;
if ((output = fopen(outputFile, "w+")) == NULL)
{
printf("errore nell'apertura del file output");
exit(2);
}
if (isJson == 1)
fprintf(output, "[\n"); // inizio array in json
for (int j = 0; j < 18; j++)
{
for (int i = 0; i < ss[j]; i++)
{
if (isJson == 1)
if (i < ss[j] - 1)
fprintf(output, "\"%s\",\n", strs[j][i]);
else
fprintf(output, "\"%s\"\n", strs[j][i]);
else
fprintf(output, "%s\n", strs[j][i]);
// printf("i: %d\n", i);
}
}
if (isJson == 1)
fprintf(output, "]\n"); // fine array in json
fclose(output);
}
int main()
{
// printf("Questo è il char 32: '%c'\n\n", 32);
srand(time(NULL));
// printf("%d\n", randnum(1, 70));
int percentage = 0;
int sizes[18] = {0};
char *strings[18][N_STRINGS];
printf("Starting to generate random strings\n\n");
for (int i = 0; i < N_STRINGS; i++)
{
int n = randnum(MIN_CHARS, MAX_CHARS);
// printf("Numero di lettere estratto: %d\n", n);
char *str = generateRandomString(n);
// printf("Parola generata: %s\n", str);
// sleep(1);
if (checkStrings(strings, str, sizes[strlen(str) - 3]) != 0)
{
// printf("find same word\n");
i--;
}
else
{
strings[strlen(str) - 3][i] = str;
sizes[strlen(str) - 3]++;
}
// if (i >= 10000 && N_STRINGS % i == 0 && ((100 * i) / N_STRINGS) % 10 == 0)
if (i >= (N_STRINGS / 100) && percentage != (100 * i) / N_STRINGS)
{
percentage = (100 * i) / N_STRINGS;
fputs("\033[A\033[2K", stdout);
rewind(stdout);
printf("%d%%\n", percentage); // 1000 : 100 = i : x
}
// printf("Parola assegnata: %s\n", strings[i]);
}
wirteOutput("output2.json", strings, sizes, 1);
wirteOutput("output2.txt", strings, sizes, 0);
printf("End\n");
return 0;
}
EDIT:
This is my test code and @AndreaWenzel i tried your solution but after some attempts it works, but I don't understand how this 3d array should work because according to the logic of this code it should only write the strings that have been generated and instead it writes me in the file 1 or 2 strings and in the other cases (null) even if the code reaches the end by writing "End".
This is the file
(null)
Ji+]
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
//#define N_STRINGS 1000000
#define N_STRINGS 100
#define MIN_CHARS 3
#define MAX_CHARS 20
#define ASCII_START 32 // 32 è lo spazio
#define ASCII_END 126
#define randnum(min, max) ((rand() % (int)(((max) + 1) - (min))) + (min))
char *generateRandomString(int size)
{
int i;
// char *charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.-#'?!";
// printf("Dimensione del charset: %ld\n", strlen(charset));
char *res = malloc(size + 1);
for (i = 0; i < size; i++)
{
/*
int key = randnum(0, 69);
res[i] = charset[key];
*/
int key = (rand() % (ASCII_END - ASCII_START)) + ASCII_START;
res[i] = (char)key;
if (key == 92 || key == 32 || key == 34)
i--;
}
res[i] = '\0';
return res;
}
int checkStrings(char *strs[18][N_STRINGS], char *str, int size)
{
for (int i = 0; i < size; i++)
{
printf("Lunghezza stringa: %ld\n", strlen(str));
// sleep(1);
printf("Parola Array in posizione %d: %s\n", i, strs[strlen(str) - 3][i]);
printf("Parola Generata: %s\n", str);
if (strs[strlen(str) - 3][i] != NULL)
if (strcmp(strs[strlen(str) - 3][i], str) == 0)
return 1;
}
return 0;
}
void wirteOutput(char *outputFile, char *strs[18][N_STRINGS], int *ss, int isJson)
{
FILE *output;
if ((output = fopen(outputFile, "w+")) == NULL)
{
printf("errore nell'apertura del file output");
exit(2);
}
if (isJson == 1)
fprintf(output, "[\n"); // inizio array in json
for (int j = 0; j < 18; j++)
{
for (int i = 0; i < ss[j]; i++)
{
if (isJson == 1)
if (i < ss[j] - 1)
fprintf(output, "\"%s\",\n", strs[j][i]);
else
fprintf(output, "\"%s\"\n", strs[j][i]);
else
fprintf(output, "%s\n", strs[j][i]);
// printf("i: %d\n", i);
}
}
if (isJson == 1)
fprintf(output, "]\n"); // fine array in json
fclose(output);
}
int main()
{
// printf("Questo è il char 32: '%c'\n\n", 32);
srand(time(NULL));
// printf("%d\n", randnum(1, 70));
int percentage = 0;
int sizes[18] = {0};
// char *strings[18][N_STRINGS];
char *(*strings)[N_STRINGS];
strings = malloc(18 * sizeof *strings);
if (strings == NULL)
{
fprintf(stderr, "Memory allocation error!\n");
exit(EXIT_FAILURE);
}
printf("Starting to generate random strings\n\n");
for (int i = 0; i < N_STRINGS; i++)
{
int n = randnum(MIN_CHARS, MAX_CHARS);
// printf("Numero di lettere estratto: %d\n", n);
char *str = generateRandomString(n);
// printf("Parola generata: %s\n", str);
// sleep(1);
if (checkStrings(strings, str, sizes[strlen(str) - 3]) != 0)
{
// printf("find same word\n");
i--;
}
else
{
strings[strlen(str) - 3][i] = str;
sizes[strlen(str) - 3]++;
}
// if (i >= 10000 && N_STRINGS % i == 0 && ((100 * i) / N_STRINGS) % 10 == 0)
if (i >= (N_STRINGS / 100) && percentage != (100 * i) / N_STRINGS)
{
percentage = (100 * i) / N_STRINGS;
fputs("\033[A\033[2K", stdout);
rewind(stdout);
printf("%d%%\n", percentage); // 1000 : 100 = i : x
}
// printf("Parola assegnata: %s\n", strings[i]);
}
printf("Ecco quante parole sono state scritte\n");
for (int i = 0; i < 18; i++)
printf("%d\n", sizes[i]);
// wirteOutput("output2.json", strings, sizes, 1);
wirteOutput("output2.txt", strings, sizes, 0);
printf("End\n");
free(strings);
return 0;
}
I fixed the core dump problem, apparently, but the thing that turns my nose up in any case is that, in the test code, cmq when the words are generated and there are no similar ones, it does not assign them because so many times it has happened that was going to check for more than one null value within the same word length section.
This is another test after the changes: (N_STRINGS = 50)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
1/E1-&E/3@K
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
(null)
@wLo!>p%QR|+bNUgs
(null)
(null)
(null)
(null)
(null)
(null)
(null)
x>KEeQ&tG[jgT9roWdt
(null)
(null)
(null)
(null)
The line
char *strings[18][N_STRINGS];
in the function main
will allocate 18 * 1000000 * sizeof(char*)
bytes on the stack, which is 144 MB on most platforms. This will likely cause a stack overflow. For example, on Linux, a typical maximum stack size is 8 MB.
You should generally not allocate more than a few kilobytes on the stack. I suggest that you allocate the memory on the heap instead, using malloc
:
char *(*strings)[N_STRINGS];
strings = malloc( 18 * sizeof *strings );
if ( strings == NULL )
{
fprintf( stderr, "Memory allocation error!\n" );
exit( EXIT_FAILURE );
}
The pointer strings
should behave like a decayed 2D/3D array (depending on how you see it). This is because I selected the type of the pointer in such a way that it points to a sub-array as a whole, instead of a single element of the sub-array.
Don't forget to call free
when you no longer need the memory, otherwise you will have a memory leak.
Also, the line
strings[strlen(str) - 3][i] = str;
does not do what you want. This line should be the following:
strings[strlen(str)-3][sizes[strlen(str)-3]] = str;
After fixing these bugs, your program should look like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
//#define N_STRINGS 1000000
#define N_STRINGS 100
#define MIN_CHARS 3
#define MAX_CHARS 20
#define ASCII_START 32 // 32 è lo spazio
#define ASCII_END 126
#define randnum(min, max) ((rand() % (int)(((max) + 1) - (min))) + (min))
char *generateRandomString(int size)
{
int i;
// char *charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,.-#'?!";
// printf("Dimensione del charset: %ld\n", strlen(charset));
char *res = malloc(size + 1);
for (i = 0; i < size; i++)
{
/*
int key = randnum(0, 69);
res[i] = charset[key];
*/
int key = (rand() % (ASCII_END - ASCII_START)) + ASCII_START;
res[i] = (char)key;
if (key == 92 || key == 32 || key == 34)
i--;
}
res[i] = '\0';
return res;
}
int checkStrings(char *strs[18][N_STRINGS], char *str, int size)
{
for (int i = 0; i < size; i++)
{
printf("Lunghezza stringa: %ld\n", strlen(str));
// sleep(1);
printf("Parola Array in posizione %d: %s\n", i, strs[strlen(str) - 3][i]);
printf("Parola Generata: %s\n", str);
if (strs[strlen(str) - 3][i] != NULL)
if (strcmp(strs[strlen(str) - 3][i], str) == 0)
return 1;
}
return 0;
}
void wirteOutput(char *outputFile, char *strs[18][N_STRINGS], int *ss, int isJson)
{
FILE *output;
if ((output = fopen(outputFile, "w+")) == NULL)
{
printf("errore nell'apertura del file output");
exit(2);
}
if (isJson == 1)
fprintf(output, "[\n"); // inizio array in json
for (int j = 0; j < 18; j++)
{
for (int i = 0; i < ss[j]; i++)
{
if (isJson == 1)
if (i < ss[j] - 1)
fprintf(output, "\"%s\",\n", strs[j][i]);
else
fprintf(output, "\"%s\"\n", strs[j][i]);
else
fprintf(output, "%s\n", strs[j][i]);
// printf("i: %d\n", i);
}
}
if (isJson == 1)
fprintf(output, "]\n"); // fine array in json
fclose(output);
}
int main( void )
{
// printf("Questo è il char 32: '%c'\n\n", 32);
srand(time(NULL));
// printf("%d\n", randnum(1, 70));
int percentage = 0;
int sizes[18] = {0};
// char *strings[18][N_STRINGS];
char *(*strings)[N_STRINGS];
strings = malloc(18 * sizeof *strings);
if (strings == NULL)
{
fprintf(stderr, "Memory allocation error!\n");
exit(EXIT_FAILURE);
}
printf("Starting to generate random strings\n\n");
for (int i = 0; i < N_STRINGS; i++)
{
int n = randnum(MIN_CHARS, MAX_CHARS);
// printf("Numero di lettere estratto: %d\n", n);
char *str = generateRandomString(n);
// printf("Parola generata: %s\n", str);
// sleep(1);
if (checkStrings(strings, str, sizes[strlen(str) - 3]) != 0)
{
// printf("find same word\n");
i--;
}
else
{
strings[strlen(str)-3][sizes[strlen(str)-3]] = str;
sizes[strlen(str) - 3]++;
}
// if (i >= 10000 && N_STRINGS % i == 0 && ((100 * i) / N_STRINGS) % 10 == 0)
if (i >= (N_STRINGS / 100) && percentage != (100 * i) / N_STRINGS)
{
percentage = (100 * i) / N_STRINGS;
fputs("\033[A\033[2K", stdout);
rewind(stdout);
printf("%d%%\n", percentage); // 1000 : 100 = i : x
}
// printf("Parola assegnata: %s\n", strings[i]);
}
printf("Ecco quante parole sono state scritte\n");
for (int i = 0; i < 18; i++)
printf("%d\n", sizes[i]);
// wirteOutput("output2.json", strings, sizes, 1);
wirteOutput("output2.txt", strings, sizes, 0);
printf("End\n");
free(strings);
return 0;
}
This program writes the following to the file output2.txt
:
ux*
&{;
)}I
XGo
LOh
V3)
8>q
GNX
@J7
^%|
-keU
m44T
_a;x
F*|+
w0nTc
21gMx
N>z/J
bWM|Y
@/=kr
ill&A
}MWQk
#6Ouab
ado8mR
mYtPO:
Ct?LJ,
QpC1=g
U+nF&*
oSQE+p
]N[TH(
'L_hwty
y{AGIR0
k>b5_Me4
(n2oQD$f
3>SKZ.?y
gb;gO,aq`
;mGV_joDp
pWF7A!3jK
$%ZD#l!4&
J?VFUI2gZ
X6{Z!Qs<5
`VMNvn[+B
+61rN2b0*S
w-4@8HV;!M
)dkU^(SD@:
@w;C%Xt-<N
Qz5+LLeN}u
Gzm1qCsy-=
Yyc3v}RW{=
B9-,q.A<ou
uv>m*`BQm5
Qsel*;+x'Y7
#0r;E7VMhKO
,G7pD^Z{OP%
ZiT2x:bI|d!
*koo)ltKdwO
%KI.9Zs}mDX
5Q}n77CU8$k
H@=KJQp=7v>
6*DvlN]NB6W
J^rM*$i:[9>(
7]jH@iP9|wg?
oS/-=,5->@^b
V8J@Y;Ks+qy6
jyt9c()ZNe1?
{&)tTLGH/%An
`eg}|ihyr_64
jS&>?p.X];y9=
{]KT&b1Au|(lk
]?d89=!^zZee^
Y&g[#,nX@]Rq#
'zh>xIXN?mMRf
Ycx2!-n@zVzDu
>cr@&g%:5@a%`
w-!#.ed4)[{Mk<
Vj{j-p#`8SqF#?+
5OHpfux>,1#9R6Z
EH*e`XWm%,&ak_5
*sm)N'IW{:?DbOd
;u-J;aDeR4J{GM=U
TE$i0Lv]V7%#T?pj
]Ed?tb#7!ham[:@m
Yi@m!Xr%Tfjzht+X
d7!#3SW@yt`&D,RsD
3R]/g$v5&`N{f}Ne)
@(UkF#8@9J*RvZ]1x
Afz(3;4S]PDJ3G|Sz
w#@--N.^i1q>h3:y/Z
&<-o46#<Z?&Vsd6Obf
l#&^KCK?*(*!cb<3(u
PuuU(]M:V?Q[C'zO-2
'cT-0aK5u!0&vimv(DY
gr9`t*6?QSK{%B[#gXg
UEsgt5IDL1^62g8K6+}
BEYRZfo?gn4+pJi!j>P
PN#26}M{dT`Txv[N|y)!
46p#AnR<>TP&OMGv35BE
ANZZ>;4D?S!}V):j/Cl<
7bqAwAu6rhY&'}:&:=4;
O:9HKA))>^=g+]?kNh9Q
!cX'f6Dr@9Ok#?|NV-1R
Note that this program has memory leaks, because you are not freeing any of the strings allocated in the function generateRandomString
.
Also, in the function main
, the expression strlen(str)-3
is being used 4 times in total. Especially the call to the function strlen
is rather expensive. Therefore, it would make sense to only call this function once and to save the result to a variable, so that you remember the result and don't have to call the function again.