Search code examples
cmallocvalgrind

malloc() within if statement, assigned only if (if) is entered but valgrind says that bytes are lost


In the context of an assignment using C, where I have to create a li clone, I am trying to access all of the names of the child files in my directory.

By popular demand, here is the minimum code to reproduce this problem (please consider removing your downvotes...) (I wasn't kidding when I said it was a lot).

To start, have this file structure somewhere (.txt files have keyboard juggling) :

var
├── file1.txt
└── var2
    ├── test -> ../../test
    ├── var3
    │   ├── file2.txt
    │   └── file3.txt
    └── varfind -> ../

4 directories, 3 files

Code to reproduce main.c

#include <stdio.h>
#include <stdlib.h>

#include "helpers.h"

int main(int argc, char **argv) {
    char *destination = malloc(sizeof(char[THEORETICAL_MAX_PATH_LENGTH])); 
    switch (argc) {
        case 1: // (== Error case)
            fprintf(stderr, "Usage: %s file\n", argv[0]);
            exit(EXIT_FAILURE);
            break;
        case 2: // (== List)
            stripSlash(argv[argc - 1], destination);
            Object* obj =  createNode(destination, NULL);
            freeNode(obj);
            break;
        default:
            exit(0);
            break;
    }
    free(destination);
}

helpers.h

#ifndef HEADER_UTILITIES
#define HEADER_UTILITIES
#define THEORETICAL_MAX_PATH_LENGTH 4096

#include <sys/stat.h>

typedef struct {
    int numberOfChildren;
    char path[2][THEORETICAL_MAX_PATH_LENGTH];
    struct stat info;
    struct Object **children; // child[0] is parent.
} Object;

void stripSlash(char arg[], char *filename);

void freeNode(Object *node);
Object * createNode(const char *filename, Object *parent);

#endif

helpers.c

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <dirent.h>
#include "helpers.h"

void stripSlash(char arg[], char *filename) {
    strncpy(filename, arg, THEORETICAL_MAX_PATH_LENGTH);
    if (filename[strlen(filename) - 1] == '/')
        filename[strlen(filename) - 1] = '\0';
}

void getChildren(const char *path, struct stat* statBuffer, char *files[THEORETICAL_MAX_PATH_LENGTH], int *numberOfFiles) {
    int count = 0;
    struct dirent *currentDir;
    if (lstat(path, statBuffer) == 0 && S_ISDIR(statBuffer->st_mode)) {
        DIR *folder = opendir(path);
        if (access(path, F_OK) != -1) {
            if (folder)
                while ((currentDir = readdir(folder))) {
                    if (strcmp(currentDir->d_name, ".") && strcmp(currentDir->d_name, "..")) {
                        files[count] = (char*) malloc(THEORETICAL_MAX_PATH_LENGTH);
                        snprintf(files[count], THEORETICAL_MAX_PATH_LENGTH, "%s", currentDir->d_name);
                        count++;
                    }
                }
            free(currentDir);
        }
        closedir(folder);
    } else if (errno < 0)
        printf("ERROR %d\n", errno);

    *numberOfFiles = count;
}

int rippleCycleCheckUpwards(Object *parent, __ino_t inode) {
    if (parent)
        if (parent->info.st_ino == inode)
            return 1;
        else 
            return rippleCycleCheckUpwards((Object*) parent->children[0], inode);
    else 
        return 0;
}

void freeNode(Object *node) {
    node->children[0] = NULL;
    for (int i = 1; i < node->numberOfChildren + 1; i++)
        freeNode((Object*) node->children[i]);
    free(node->children[0]);
    free(node->children);
    free(node);
    node = NULL;
}

Object * createNode(const char *filename, Object *parent) {
    Object *node = malloc(sizeof(Object));

    char *files[THEORETICAL_MAX_PATH_LENGTH];
    char *realDestination = malloc(THEORETICAL_MAX_PATH_LENGTH);
    char *res = realpath(filename, realDestination);

    strncpy(node->path[0], filename, THEORETICAL_MAX_PATH_LENGTH - 1);
    if (res) {
        strncpy(node->path[1], realDestination, THEORETICAL_MAX_PATH_LENGTH - 1);
        strncat(node->path[1], "\0", 1);
    }
    free(realDestination);
    
    struct stat *info = malloc(sizeof(struct stat));
    lstat(filename, info);
    if (S_ISLNK(info->st_mode) == 1) {
        getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
    } else {
        getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
    }
    free(info);
    
    node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
    node->children[0] = (struct Object*) parent;

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    }

    for (int i = 0; i < node->numberOfChildren; i++) {
        char nextfile[THEORETICAL_MAX_PATH_LENGTH];
        snprintf(nextfile, THEORETICAL_MAX_PATH_LENGTH - 1, "%s/%s", filename, files[i]);
        
        if (S_ISDIR(node->info.st_mode) || S_ISLNK(node->info.st_mode)) {
            node->children[i + 1] = (struct Object*) createNode(nextfile, node);
        }
        free(files[i]);
    }
    return node;
}

You can build with this Makefile or gcc:

CC=gcc
CCFLAGS=-Wall -g
LDFLAGS=
SOURCES=$(wildcard *.c)
OBJECTS=$(SOURCES:.c=.o)
TARGET=ultra_cp

all: $(TARGET)

$(TARGET): $(OBJECTS)
        $(CC) -o $@ $^ $(LDFLAGS) 

%.o: %.c %.h
        $(CC) $(CCFLAGS) -c $<

%.o: %.c
        $(CC) $(CCFLAGS) -c $<

clean:
        rm -f *.o $(TARGET)

Then test with make then ./ultra_cp var for standard use or

valgrind --tool=memcheck --leak-check=yes --track-origins=yes --read-var-info=yes ./ultra_cp var

My issue is that valgrind prints out:

HEAP SUMMARY:
==3221==     in use at exit: 8,192 bytes in 2 blocks
==3221==   total heap usage: 112 allocs, 110 frees, 670,440 bytes allocated
==3221==
==3221== 8,192 bytes in 2 blocks are definitely lost in loss record 1 of 1
==3221==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==3221==    by 0x108CA1: getChildren (helpers.c:29)
==3221==    by 0x108F86: createNode (helpers.c:80)
==3221==    by 0x1090F8: createNode (helpers.c:98)
==3221==    by 0x1090F8: createNode (helpers.c:98)
==3221==    by 0x1091E3: main (main.c:15)
==3221==
==3221== LEAK SUMMARY:
==3221==    definitely lost: 8,192 bytes in 2 blocks
==3221==    indirectly lost: 0 bytes in 0 blocks
==3221==      possibly lost: 0 bytes in 0 blocks
==3221==    still reachable: 0 bytes in 0 blocks
==3221==         suppressed: 0 bytes in 0 blocks

helpers.c:29 is the line where files[count] = (char*) malloc(THEORETICAL_MAX_PATH_LENGTH); is called and the number of cases where it does not enter within the if statement.

It does not hinder the functioning of my code, but I'd like to avoid leaking memory if at all possible and would subsequently thank you for any help on this matter.

EDIT

Thanks to user dbush who provided the source of the problem, being that in the very specific case where rippleCycleCheckUpwards(...) is called, I do not empty the children of files.

In helpers.c one can replace

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    }

with

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        for (int i = 0; i < node->numberOfChildren; i++) {
            free(files[i]);
        }
        node->numberOfChildren = 0;
    }

Solution

  • The problem is here in createNode:

    struct stat *info = malloc(sizeof(struct stat));
    lstat(filename, info);
    if (S_ISLNK(info->st_mode) == 1) {
        getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
    } else {
        getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
    }
    free(info);
    
    node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
    node->children[0] = (struct Object*) parent;
    
    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    }
    

    You first get the list of children which are stored in files. You then do the cycle check, and if you find a cycle you set node->numberOfChildren to 0. This prevents you from entering the loop that follows where the entries of files are freed. So if you have a cycle you have a memory leak.

    You can fix this by doing the cycle check first, then decending into the children if you don't find a cycle:

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    } else {
        struct stat *info = malloc(sizeof(struct stat));
        lstat(filename, info);
        if (S_ISLNK(info->st_mode) == 1) {
            getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
        } else {
            getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
        }
        free(info);
    }
    
    node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
    node->children[0] = (struct Object*) parent;
    

    Besides this, valgrind also shows several errors on reading uninitialized values, but I'll leave fixing those as an exercise to the reader.