If my program is going to have large lists of numbers passed in through stdin
, what would be the most efficient way of reading this in?
The input I'm going to be passing into the program is going to be of the following format:
3,5;6,7;8,9;11,4;;
I need to process the input so that I can use the numbers between the colons (i.e I want to be able to use 3 and 5, 6 and 7 etc etc). The ;;
indicates that it is the end of the line.
I was thinking of using a buffered reader to read entire lines and then using parseInt.
Would this be the most efficient way of doing it?
One other fairly elegant way to handle this is to allow strtol
to parse the input by advancing the string to be read to endptr
as returned by strtol
. Combined with an array allocated/reallocated as needed, you should be able to handle lines of any length (up to memory exhaustion). The example below uses a single array for the data. If you want to store multiple lines, each as a separate array, you can use the same approach, but start with a pointer to array of pointers to int. (i.e. int **numbers
and allocate the pointers and then each array). Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
int main () {
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int *numbers = NULL; /* array to hold numbers */
size_t nmax = NMAX; /* check for reallocation */
size_t idx = 0; /* numbers array index */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, stdin)) != -1)
{
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
{
int *tmp = realloc (numbers, 2 * nmax * sizeof *numbers);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
exit (EXIT_FAILURE);
}
numbers = tmp;
memset (numbers + nmax, 0, nmax * sizeof *numbers);
nmax *= 2;
}
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
size_t i = 0;
for (i = 0; i < idx; i++)
printf (" numbers[%2zu] %d\n", i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
Output
$ echo "3,5;6,7;8,9;11,4;;" | ./bin/prsistdin
numbers[ 0] 3
numbers[ 1] 5
numbers[ 2] 6
numbers[ 3] 7
numbers[ 4] 8
numbers[ 5] 11
numbers[ 6] 4
Also works where the string is stored in a file as:
$ cat dat/numsemic.csv | ./bin/prsistdin
or
$ ./bin/prsistdin < dat/numsemic.csv
fgets
and without size_t
It took a little reworking to come up with a revision I was happy with that eliminated getline
and substituted fgets
. getline
is far more flexible, handling the allocation of space for you, with fgets
it is up to you. (not to mention getline
returning the actual number of chars read without having to call strlen
).
My goal here was to preserve the ability to read any length line to meet your requirement. That either meant initially allocating some huge line buffer (wasteful) or coming up with a scheme that would reallocate the input line buffer as needed in the event it was longer than the space initially allocate to ln
. (this is what getline
does so well). I'm reasonably happy with the results. Note: I put the reallocation code in functions to keep main
reasonably clean. footnote 2
Take a look at the following code. Note, I have left the DEBUG
preprocessor directives in the code allowing you to compile with the -DDEBUG
flag if you want to have it spit out each time it allocates. [footnote 1] You can compile the code with:
gcc -Wall -Wextra -o yourexename yourfilename.c
or if you want the debugging output (e.g. set LMAX to 2
or something less than the line length), use the following:
gcc -Wall -Wextra -o yourexename yourfilename.c -DDEBUG
Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
#define LMAX 1024
char *realloc_char (char *sp, unsigned int *n); /* reallocate char array */
int *realloc_int (int *sp, unsigned int *n); /* reallocate int array */
char *fixshortread (FILE *fp, char **s, unsigned int *n); /* read all stdin */
int main () {
char *ln = NULL; /* dynamically allocated for fgets */
int *numbers = NULL; /* array to hold numbers */
unsigned int nmax = NMAX; /* numbers check for reallocation */
unsigned int lmax = LMAX; /* ln check for reallocation */
unsigned int idx = 0; /* numbers array index */
unsigned int i = 0; /* simple counter variable */
char *nl = NULL;
/* initial allocation for numbers */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed (numbers).");
return 1;
}
/* initial allocation for ln */
if (!(ln = calloc (LMAX, sizeof *ln))) {
fprintf (stderr, "error: memory allocation failed (ln).");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while (fgets (ln, lmax, stdin) != NULL)
{
/* provide a fallback to read remainder of line
if the line length exceeds lmax */
if (!(nl = strchr (ln, '\n')))
fixshortread (stdin, &ln, &lmax);
else
*nl = 0;
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
realloc_int (numbers, &nmax);
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
for (i = 0; i < idx; i++)
printf (" numbers[%2u] %d\n", (unsigned int)i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
/* reallocate character pointer memory */
char *realloc_char (char *sp, unsigned int *n)
{
char *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: char pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* reallocate integer pointer memory */
int *realloc_int (int *sp, unsigned int *n)
{
int *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: int pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* if fgets fails to read entire line, fix short read */
char *fixshortread (FILE *fp, char **s, unsigned int *n)
{
unsigned int i = 0;
int c = 0;
i = *n - 1;
realloc_char (*s, n);
do
{
c = fgetc (fp);
(*s)[i] = c;
i++;
if (i == *n)
realloc_char (*s, n);
} while (c != '\n' && c != EOF);
(*s)[i-1] = 0;
return *s;
}
footnote 1
nothing special about the choice of the word DEBUG
(it could have been DOG
, etc..), the point to take away is if you want to conditionally include/exclude code, you can simply use preprocessor flags to do that. You just add -Dflagname
to pass flagname
to the compiler.
footnote 2
you can combine the reallocation functions into a single void*
function that accepts a void pointer as its argument along with the size
of the type to be reallocated and returns a void pointer to the reallocated space -- but we will leave that for a later date.