Search code examples
clinuxdirectoryposixsystem-calls

Create a directory and return a dirfd with `open`


I want to create a file tree in C and avoid possible race conditions. My intent was to use open(3) to create the root directory and open would return a directory file descriptor (dirfd) that I would give to subsequent openat(3)/mkdirat(3) calls to create the tree.

int dirfd = open(path, O_DIRECTORY | O_CREAT | O_RDONLY, mode);

An usual way of doing this would have been to replace the first open call with mkdir(3), but that doesn't open the directory and thus, is racy.

mkdir(path, mode);
DIR *dirp = opendir(path);

Is this doable? All my tests either return EISDIR or ENOTDIR. Also, the man page of open(2) states:

When both O_CREAT and O_DIRECTORY are specified in flags and the file specified by pathname does not exist, open() will create a regular file (i.e., O_DIRECTORY is ignored).

This seems to still be the case as of Linux 5.09. I wonder if this can be fixed, or if it's part of the interface for ever now.

Here is a sample program to try creating and opening a directory with open:

#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>

int main(void) {
    /* const char *path = "directory"; */
    /* int dirfd = openat(AT_FDCWD, path, O_DIRECTORY | O_CREAT | O_RDONLY, 0755); */
    const char *path = "/tmp/test";
    int dirfd = open(path, O_DIRECTORY | O_CREAT | O_RDONLY, 0755);
    if(dirfd < 0) {
        fprintf(stderr, "openat(%s): %s\n", topdir, strerror(errno));
        return EXIT_FAILURE;
    }
    close(dirfd);
    return EXIT_SUCCESS;
}

Also, these lines from the man pages seem contradictory:

  • open(3):

    If O_CREAT and O_DIRECTORY are set and the requested access mode is neither O_WRONLY nor O_RDWR, the result is unspecified.

  • open(2):

    EISDIR pathname refers to a directory and the access requested involved writing (that is, O_WRONLY or O_RDWR is set).


Solution

  • The man 2 open man page (link to most up to date Linux manpages at man7.org) explicitly states in the Bugs section that using O_CREAT | O_DIRECTORY will create a regular file. There is also this discussion.

    More importantly, even if it did succeed, some other process could still access the directory immediately after the creation succeeded, even before the call returns to your program. Therefore, the race window you worry about would exist anyway.

    The common pattern is to create a temporary directory in the same directory with a sufficiently random name (beginning with . to omit it from typical file and directory listings) accessible only to the current user; then populate it; then adjust its access mode; and then rename it to the final name.

    This does not make it impossible for some other process to access the directory, but this pattern is considered safe enough.

    Here is an example program doing this:

    #define  _POSIX_C_SOURCE  200809L
    #define  _ATFILE_SOURCE
    #define  _GNU_SOURCE
    #include <stdlib.h>
    #include <inttypes.h>
    #include <unistd.h>
    #include <sys/stat.h>
    #include <sys/random.h>
    #include <sys/syscall.h>
    #include <fcntl.h>
    #include <signal.h>
    #include <time.h>
    #include <string.h>
    #include <stdio.h>
    #include <errno.h>
    
    #ifndef  RENAME_NOREPLACE
    #define  RENAME_NOREPLACE  (1 << 0)
    static inline int renameat2(int olddirfd, const char *oldpath,
                                int newdirfd, const char *newpath, unsigned int flags)
    {
        int  retval = syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, flags);
        if (!retval)
            return 0;
        errno = -retval;
        return -1;
    }
    #endif
    
    /* Xorshift64* pseudo-random number generator.
    */
    static uint64_t  prng_state = 0; /* unseeded */
    
    static uint64_t  prng_u64(void)
    {
        uint64_t  state = prng_state;
        state ^= state >> 12;
        state ^= state << 25;
        state ^= state >> 27;
        prng_state = state;
        return state * UINT64_C(2685821657736338717);
    }
    
    static uint64_t  prng_randomize(void)
    {
        uint64_t  state;
    
        /* Use Linux-specific getrandom() call. */
        {
            ssize_t   n;
            do {
                n = getrandom(&state, sizeof state, 0);
            } while (n == -1 && errno == EINTR);
            if (n == (ssize_t)sizeof state && state != 0) {
                prng_state = state;
                return state;
            }
        }
    
        /* Fall back to using time as a seed. */
        {
            struct timespec  now;
            size_t           rounds = 250;
    
            clock_gettime(CLOCK_REALTIME, &now);
            state = (uint64_t)now.tv_sec * UINT64_C(270547637)
                  ^ (uint64_t)now.tv_nsec * UINT64_C(90640031)
                  ^ (uint64_t)getpid() * UINT64_C(4758041);
    
            clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
            state ^= (uint64_t)now.tv_sec * UINT64_C(3266177)
                   ^ (uint64_t)now.tv_nsec * UINT64_C(900904331);
    
            clock_gettime(CLOCK_MONOTONIC, &now);
            state ^= (uint64_t)now.tv_sec * UINT64_C(24400169)
                   ^ (uint64_t)now.tv_nsec * UINT64_C(1926466307);
    
            /* Make sure state is nonzero */
            state += (!state);
    
            /* Mix it a bit, to make it less predictable. */
            while (rounds-->0) {
                state ^= state >> 12;
                state ^= state << 25;
                state ^= state >> 27;
            }
    
            prng_state = state;
            return state;
        }
    }
    
    static const char base64[64] = {
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
        'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
        'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
        'y', 'z', '-', '_'
    };
    
    /* Create a new directory atomically, returning an open descriptor to it.
       name must be non-empty, and not contain a slash.
    */
    int mkdiratfd(const int atfd, const char *dirpath, const char *name, const mode_t mode)
    {
        char    buf[32];
        mode_t  curr_umask;
        int     atdirfd, fd;
    
        /* New directory name cannot be NULL, empty, or contain a slash. */
        if (!name || !*name || strchr(name, '/')) {
            errno = EINVAL;
            return -1;
        }
    
        /* If dirpath is NULL or empty, we use "." for it. */
        if (!dirpath || !*dirpath)
            dirpath = ".";
    
        /* Open a handle to the target directory. */
        do {
            atdirfd = openat(atfd, dirpath, O_PATH | O_DIRECTORY | O_CLOEXEC);
        } while (atdirfd == -1 && errno == EINTR);
        if (atdirfd == -1) {
            return -1;
        }
    
        /* Obtain current umask. */
        curr_umask = umask(0); umask(curr_umask);
    
        /* Make sure our PRNG has been seeded. */
        if (!prng_state)
            prng_randomize();
    
        /* Create a temporary random name for the directory. */
        while (1) {
            char *ptr = buf;
    
            /* Start with a dot, making it "hidden". */
            *(ptr++) = '.';
    
            /* Use 2*10 = 20 random characters (120 bits) */
            for (int k = 2; k > 0; k--) {
                uint64_t  u = prng_u64();
                int       n = 10;
                while (n-->0) {
                    *(ptr++) = base64[u & 63];
                    u >>= 6;
                }
            }
    
            /* Terminate name */
            *ptr = '\0';
    
            /* Create the temporary directory with access only to current user. */
            if (mkdirat(atdirfd, buf, 0700) == -1) {
                const int  saved_errno = errno;
                if (errno == EINTR || errno == EEXIST)
                    continue;
                /* Actual error. */
                close(atdirfd);
                errno = saved_errno;
                return -1;
            }
    
            /* Successfully created. */
            break;
        }
    
        /* Open the temporary directory. */
        do {
            fd = openat(atdirfd, buf, O_PATH | O_DIRECTORY | O_CLOEXEC);
        } while (fd == -1 && errno == EINTR);
        if (fd == -1) {
            const int  saved_errno = errno;
            unlinkat(atdirfd, buf, AT_REMOVEDIR);
            close(atdirfd);
            errno = saved_errno;
            return -1;
        }
    
        /*
         * Note: Other actions, like file creation, etc.
         *       should be done at this stage.
        */
    
        /* Update directory owner group here, if necessary. */
    
        /* Update proper access mode. */
        if (fchmodat(atdirfd, buf, mode & (~curr_umask), 0) == -1) {
            const int  saved_errno = errno;
            close(fd);
            unlinkat(atdirfd, buf, AT_REMOVEDIR);
            close(atdirfd);
            errno = saved_errno;
            return -1;
        }
    
        /* Rename directory. */
        if (renameat2(atdirfd, buf, atdirfd, name, RENAME_NOREPLACE) == -1) {
            const int  saved_errno = errno;
            close(fd);
            unlinkat(atdirfd, buf, AT_REMOVEDIR);
            close(atdirfd);
            if (saved_errno == EPERM)
                errno = EEXIST;
            else
                errno = saved_errno;
            return -1;
        }
    
        /* Success. */
        close(atdirfd);
        return fd;
    }
    
    int main(int argc, char *argv[])
    {
        int fd;
    
        if (argc != 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
            const char *argv0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
            fprintf(stderr, "\n");
            fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
            fprintf(stderr, "       %s NAME\n", argv0);
            fprintf(stderr, "\n");
            fprintf(stderr, "This program creates directory NAME in the current directory.\n");
            fprintf(stderr, "\n");
            return EXIT_FAILURE;
        }
    
        fd = mkdiratfd(AT_FDCWD, NULL, argv[1], 0755);
        if (fd == -1) {
            fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
            return EXIT_FAILURE;
        }
    
        return EXIT_SUCCESS;
    }
    

    Note that this uses renameat2() via a raw syscall if the C library does not expose it. (It was added to glibc in 2.28, but is supported by Linux kernels since 3.15).

    If you are still worried, a paranoid pattern is to creating a temporary directory to hold the temporary directory. After opening the inner directory that will be the final directory, change the mode on the outer temporary directory to zero, to stop traversal to the inner tree. The creator can still access the inner tree via the open directory descriptor. The directory can still be renamed, because they reside on the same file system.

    I personally would not bother, because using a temporary name, and only renaming the directory when completed – which is what many applications in Linux do – is safe enough.