Search code examples
cfilerecursionstructurels

Searching directories recursively for largest files in C


The goal of this program is to take a command line argument of a directory, and print the top 10 largest files under that directory (including subdirectories). So far, I am just trying to get it to print the files and their sizes recursively. I am using a structure to contain the file name along with its size, and storing these structures in an array. For some reason though, the program does not seem to be adding the information for the subdirectory files to the array. The sorting also seems to be off, but I think this is part of the same problem as its returning the wrong value for num_entries since its not statting all the files correctly. I'm sure it's some sort of flaw in the recursion logic, but I just can't seem to find it. If anyone could offer me any pointers in the right direction (ha) I would appreciate it. Thanks!

(This is also my first post here so if there's anything else I've done wrong as far as formatting my post let me know too.)

#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <sys/stat.h>
#include <string.h>
#include <stdlib.h>

#define K   1024
#define CHUNK   16

typedef struct mls { 
    char *ml_name;
    long ml_size;
} ML_ENTRY;

ML_ENTRY **getentries (int *num_entries, const char *name, int level);
void swap(ML_ENTRY *list[], int pos1, int pos2);
void quicksort(ML_ENTRY *list[], int n);

ML_ENTRY **getentries (int *num_entries, const char *name, int level)
{
  DIR *dir;
  struct dirent *entry;
  struct stat st;
  int rv;
  int n;
  int size;
  char path[K];
  ML_ENTRY **li;

  if (!(dir = opendir (name)))
    return;
  if (!(entry = readdir (dir)))
    return;

  size = CHUNK;
  n = 0;
  li = malloc(size * sizeof(ML_ENTRY *));

  while (entry = readdir (dir)){
          if(n >= size){
          size <<= 1;
          li = realloc(li, size*sizeof(ML_ENTRY));
      }
      int len =
        snprintf (path, sizeof (path) - 1, "%s/%s", name, entry->d_name);
      rv = lstat(path, &st);
      if (entry->d_type == DT_DIR){
      path[len] = 0;
      if (strcmp (entry->d_name, ".") == 0
          || strcmp (entry->d_name, "..") == 0)
        continue;
      getentries(num_entries, path, level + 1);
    }
    if (rv < 0)
        continue;
    li[n] = malloc(sizeof(ML_ENTRY));
    li[n]->ml_name = strdup(entry->d_name);
    li[n++]->ml_size = st.st_size;
    }
  closedir (dir);
  *num_entries = n;
  return li;
}


int main (int argc, char *argv[])
{
  int nentries;
  ML_ENTRY **list;
  char dir[K];
  int i;

  if (argc > 1)
    strcpy(dir, argv[1]);
  else  
      strcpy(dir, ".");

  list = malloc(sizeof(ML_ENTRY*));

  list = getentries(&nentries, dir, 0);
  quicksort(list, nentries);

  for(i = 0; i < nentries; i++)
      printf("%5d %s\n", list[i]->ml_size, list[i]->ml_name);

  return 0;
}

void swap(ML_ENTRY *list[], int pos1, int pos2)
{
  ML_ENTRY *tmp;

  tmp = list[pos1];
  list[pos1] = list[pos2];
  list[pos2] = tmp;
}
void quicksort(ML_ENTRY *list[], int n)
{
  int last;
  int i;

  if(n < 2)
    return;
  last = 0;
  for(i=1;i<n;i++)
    if(list[i]->ml_size > list[0]->ml_size)
      swap(list, ++last, i);
  swap(list, 0, last);
  quicksort(list, last - 1);
  quicksort(list + last + 1, n - last - 1);
}



void swap(ML_ENTRY *list[], int pos1, int pos2)
{
  ML_ENTRY *tmp;

  tmp = list[pos1];
  list[pos1] = list[pos2];
  list[pos2] = tmp;
}
void quicksort(ML_ENTRY *list[], int n)
{
  int last;
  int i;

  if(n < 2)
    return;
  last = 0;
  for(i=1;i<n;i++)
    if(list[i]->ml_size > list[0]->ml_size)
      swap(list, ++last, i);
  swap(list, 0, last);
  quicksort(list, last - 1);
  quicksort(list + last + 1, n - last - 1);
}

Solution

  • You said,

    For some reason though, the program does not seem to be adding the information for the subdirectory files to the array.

    In your function getentries, you have a recursive call for dealing with sub-directories:

         getentries(num_entries, path, level + 1);
    

    The returned value from that recursive call is being ignored. That's why you don't see any entries corresponding to the sub-directories in your output.

    Update

    Here's the updated function that deals with the entries of the sub-directories.

    ML_ENTRY **getentries (int *num_entries, const char *name, int level)
    {
       DIR *dir;
       struct dirent *entry;
       struct stat st;
       int rv;
       int n;
       int size;
       char path[K];
       ML_ENTRY **li;
    
       // Add variables to deal with the entries from sub-directories.
       ML_ENTRY **subDirectoryLi;
       int subDirctoryNumEntries;
       int i;
    
       if (!(dir = opendir (name)))
          return;
    
       size = CHUNK;
       n = 0;
       li = malloc(size * sizeof(ML_ENTRY *));
    
       while (entry = readdir (dir)){
          if(n >= size){
             size <<= 1;
             li = realloc(li, size*sizeof(ML_ENTRY*));
          }
          int len =
             snprintf (path, sizeof (path) - 1, "%s/%s", name, entry->d_name);
          rv = lstat(path, &st);
          if (entry->d_type == DT_DIR){
             path[len] = 0;
             if (strcmp (entry->d_name, ".") == 0
                 || strcmp (entry->d_name, "..") == 0)
                continue;
    
             // Process the entries from the sub-directory.
             subDirectoryLi = getentries(&subDirctoryNumEntries, path, level + 1);
             for ( i = 0; i < subDirctoryNumEntries; ++i, ++n )
             {
                if(n >= size){
                   size <<= 1;
                   li = realloc(li, size*sizeof(ML_ENTRY*));
                }
                li[n] = subDirectoryLi[i];
             }
          }
          if (rv < 0)
             continue;
          li[n] = malloc(sizeof(ML_ENTRY));
          li[n]->ml_name = strdup(entry->d_name);
          li[n++]->ml_size = st.st_size;
       }
       closedir (dir);
       *num_entries = n;
       return li;
    }