Search code examples
crecursionmallocdynamic-arrays

Realloc chews up string array during recursive storage of dir/filenames


I am using opendir/readdir in a C program (GNU on cygwin) to collect the filenames from some nested directories into a string array (the program uses largely C89 and earlier conventions). Since I don't know the number of files in advance I decided to use malloc/realloc to perform dynamic memory allocation. A pointer array is passed through recursive calls to collect filenames. The problem is that filenames stored during early calls to getlist() are being corrupted during later storage steps. After going into a subdirectory, performing a second call to realloc and emerging from the subdirectory, strings at the edge of the storage at the point after the realloc was executed are progressively corrupted as additional filenames are collected in the parent directory.

If instead I allocate memory using a single malloc assignment to create a large initial pointer array I avoid the problem, but I'd like to be able to use realloc. Can anyone point out what I am doing wrong, specifically I guess it's how I am using realloc in this case?

#include <string.h>
#include <stdlib.h>
#include <signal.h>

#include <stdio.h>
#include <stddef.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>

// function prototypes
char ** getlist(char ** filelist,long int *numfiles,char *dirname);

extern int stat (const char *filename, struct stat *buf);   
extern DIR * opendir (const char *_dirname);
extern  struct dirent * readdir (DIR *dirstream);

int main(int narg, char* argv[])
{ 

    // vars
    char * dirname;
    char **filelist, **traverse; 
    long int numfiles =0;
    FILE* pfile;

    //....................................
    // and go ...

    if ( (filelist = (char **)malloc(sizeof(char *)))  == NULL )
    { 
            printf("Fatal malloc error!\n"); 
            exit(3);
    }
    filelist = getlist(filelist,&numfiles,argv[1]); // argv[1] is dir name 

    // list the stored filenames and write to file

    pfile = fopen("listoutput.txt","w");
    printf("Stored filenames (N=%i):\n",numfiles);
    traverse = filelist;    
    while(*traverse)
    {
        printf("%s\n",*traverse);
        fprintf(pfile,"%s\n",*traverse);
        traverse++;
    }
    fclose(pfile);

    // free etc should go here...

    return 0 ;
}

//-------------------------------------------------------------------------------

char ** getlist(char** filelist, long int *numfiles, char* dirname)
{

    // variables
    char filename[200];
    char dirname_[200];
    DIR * directory;
    struct dirent * file;

    strcpy(dirname_,dirname);

    // for checking file type
    // macro:   int S_ISREG (mode_t m)  
    struct stat* filestat = malloc(sizeof(struct stat));
    int sizeofchar = sizeof(char); // fields: mode_t st_mode, unsigned char d_namlen

    char **traverse, **ptemp; 

    //aux
    long int ii, icheck;    

    // check number of valid files in dirname and allocate memory in char pointer array
    ii=0; 
    directory = opendir (dirname_); 
    while(file = readdir(directory))
    {
        sprintf(filename,"%s/%s",_dirname_,file->d_name);
        icheck = stat(filename,filestat);
        if (icheck==0)
        {
            if (S_ISREG(filestat->st_mode)) ii++;
        }
        else
        {
            printf("Couldn't check file type of file \"%s\"  (icheck = %i)\n", filename, icheck);
        }
    }

    // generate enough room for all the filename strings

    if ( (filelist=(char **)realloc(filelist,sizeof(char *)*(*numfiles+ii+1)))  ==  NULL )
    {
        printf("Fatal realloc error!\n");
        exit(3);
    }

    traverse = filelist + *numfiles; 

    // now store the filenames in filelist ...

    (void) rewinddir (directory);   
    while(file = readdir(directory))
    {
        sprintf(filename,"%s/%s",dirname_,file->d_name);
        icheck = stat(filename,filestat);
        if (icheck==0 && S_ISREG(filestat->st_mode))
        {
            *traverse = (char *)malloc(sizeofchar*(strlen(filename)+1));
            strcpy(*traverse,filename);
            traverse++;
            (*numfiles)++;

            // spit out what we have so far
            printf("\nCurrent list (looping):\n-----------\n");
            ptemp = filelist;   
            ii=*numfiles;
            while(ii--)
            {
                printf("%s\n",*ptemp);
                ptemp++;
            }
            printf("\n-----------\n");          

        //sleep(1); 
        }
        else if (icheck==0 && S_ISDIR(filestat->st_mode))
        {
            if (strcmp(file->d_name,".")!=0 && strcmp(file->d_name,"..")!=0 )
            {
                printf("Processing folder %s\n", filename);
                filelist = getlist(filelist,numfiles,filename);
                traverse = filelist + *numfiles; 

                // spit out what we have so far
                printf("\nCurrent list (returned from getlist):\n-----------\n");
                ptemp = filelist;   
                while(*ptemp)
                {
                    printf("%s\n",*ptemp);
                    ptemp++;
                }
                printf("\n-----------\n");

            }       
        }
    }
    (void) closedir (directory);

    *traverse = NULL;

    return filelist;    
}

Solution

  • Here is AFAIK a working solution. It turns out I wasn't keeping proper track of the number of pointer array elements already allocated, and so during sequential calls to realloc I was effectively overwriting old memory locations with new data. It is surprising that the result of the bug was not more dramatic - accidental I guess.

    My solution is to pass around a second int that keeps track of the number of strings (filenames) to store. I could alternately scan the pointer array at each level of recursion to determine this number on the fly.

    Finally I wonder if it would not have been better to implement this as a linked list. Leave that for next time.

    #include <string.h>
    #include <stdlib.h>
    #include <signal.h>
    
    #include <stdio.h>
    #include <stddef.h>
    #include <sys/types.h>
    #include <dirent.h>
    #include <sys/stat.h>
    
    typedef struct stat STAT;
    
    
    // prototypes
    extern int stat (const char *filename, STAT *buf);  
    extern DIR * opendir (const char *dirname);
    extern struct dirent * readdir (DIR *dirstream);
    
    
    char ** getlist(char ** filelist,long int *pnumfiles, long int *pnallocated,char *dirname);
    
    
    int main(int narg, char* argv[])
    { 
        // vars
    
        FILE* pfile;
        char **filelist, **traverse; 
        long int numfiles, nallocated;
    
        //....................................
        // and go ...
    
        if ( (filelist = (char **)malloc(sizeof(char *)))  == NULL )
            { printf("Fatal malloc error!\n"); exit(1);}
        nallocated = 1;
        numfiles = 0;
    
        filelist = getlist(filelist,&numfiles,&nallocated,argv[1]);         // argv[1] should be a directory name
    
        // list the stored filenames and store to file
    
        pfile = fopen("listoutput.txt","w");
        if ( pfile  == NULL )
            { fprintf(stderr, "Could not open file for output!"); exit(4);};
    
        printf("Stored filenames (N=%i):\n",numfiles);
        traverse = filelist;    
        while(*traverse)
        {
            printf("%s\n",*traverse);
            fprintf(pfile,"%s\n",*traverse);
            traverse++;
        }
        printf("\nDONE!\n");
        fclose(pfile);
    
        return 0;
    }
    
    //-------------------------------------------------------------------------------
    
    char ** getlist(char** filelist, long int *pnumfiles, long int *pnallocated, char* dirname)
    {
        // variables
        char filename[200];
        char dirname_[200];
        DIR * directory;
        struct dirent * file;
    
        // for checking file type
        // macro:   int S_ISREG (mode_t m)  
    
        STAT* filestat; // fields: mode_t st_mode, unsigned char d_namlen
    
        long int ifile = 0; // number of files to store from current dir
        int sizeofchar = sizeof(char); // = 1
    
        //aux
        long int icheck;    
        char **traverse, **ptemp; 
    
        // and go ....
    
        // copy string to const char array because opendir demands one
    
        strcpy(dirname_,dirname);
    
        if ( (filestat = malloc(sizeof(STAT)) ) == NULL) 
            {
                printf("Error allocating memory for filestat!"); 
                exit(2);
            }
    
        // check number of valid files in dirname and allocate memory in char pointer array
    
        directory = opendir (dirname_); 
        while(file = readdir(directory))
        {
            sprintf(filename,"%s/%s",dirname_,file->d_name);
            icheck = stat(filename,filestat);
            if (icheck==0)
            {
                if (S_ISREG(filestat->st_mode)) ifile++;
            }
            else
            {
                printf("Couldn't check file type of file \"%s\"  (icheck = %i)\n", filename, icheck);
            }
        }
    
        // generate enough room for all the filename strings
    
        *pnallocated = *pnallocated + ifile;
        if ( (filelist=(char **)realloc(filelist,sizeof(char *)*(*pnallocated)))  ==  NULL )
            {
                printf("Fatal realloc error!\n");
                exit(2);
            }
        traverse = filelist + *pnumfiles; 
    
        // now store the filenames in filelist ...
    
        (void) rewinddir (directory);   
        while(file = readdir(directory))
        {
            sprintf(filename,"%s/%s",dirname_,file->d_name);
            icheck = stat(filename,filestat);
            if (icheck==0 && S_ISREG(filestat->st_mode))
            {
                size_t size = strlen(filename) + 1;  // Add 1!
                if ( (*traverse = malloc(size)) == NULL ) 
                {
                    printf("Fatal malloc error!\n");
                    exit(3);
                }
                memcpy(*traverse, filename, size);  
                traverse++;
                (*pnumfiles)++;
            }
            else if (icheck==0 && S_ISDIR(filestat->st_mode))
            {
                if (strcmp(file->d_name,".")!=0 && strcmp(file->d_name,"..")!=0 )
                {
                    printf("Processing folder %s\n", filename);
                    filelist = getlist(filelist,pnumfiles,pnallocated,filename);
                    traverse = filelist + *pnumfiles; 
                }       
            }
        }
        *traverse = NULL;
        (void) closedir (directory);    
    
        return filelist;    
    }