I want to create a file tree in C and avoid possible race conditions. My intent was to use open(3)
to create the root directory and open
would return a directory file descriptor (dirfd) that I would give to subsequent openat(3)
/mkdirat(3)
calls to create the tree.
int dirfd = open(path, O_DIRECTORY | O_CREAT | O_RDONLY, mode);
An usual way of doing this would have been to replace the first open
call with mkdir(3)
, but that doesn't open the directory and thus, is racy.
mkdir(path, mode);
DIR *dirp = opendir(path);
Is this doable? All my tests either return EISDIR
or ENOTDIR
. Also, the man page of open(2)
states:
When both
O_CREAT
andO_DIRECTORY
are specified in flags and the file specified by pathname does not exist,open()
will create a regular file (i.e.,O_DIRECTORY
is ignored).
This seems to still be the case as of Linux 5.09. I wonder if this can be fixed, or if it's part of the interface for ever now.
Here is a sample program to try creating and opening a directory with open
:
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
int main(void) {
/* const char *path = "directory"; */
/* int dirfd = openat(AT_FDCWD, path, O_DIRECTORY | O_CREAT | O_RDONLY, 0755); */
const char *path = "/tmp/test";
int dirfd = open(path, O_DIRECTORY | O_CREAT | O_RDONLY, 0755);
if(dirfd < 0) {
fprintf(stderr, "openat(%s): %s\n", topdir, strerror(errno));
return EXIT_FAILURE;
}
close(dirfd);
return EXIT_SUCCESS;
}
Also, these lines from the man pages seem contradictory:
open(3)
:
If
O_CREAT
andO_DIRECTORY
are set and the requested access mode is neitherO_WRONLY
norO_RDWR
, the result is unspecified.
open(2)
:
EISDIR
pathname refers to a directory and the access requested involved writing (that is,O_WRONLY
orO_RDWR
is set).
The man 2 open man page (link to most up to date Linux manpages at man7.org) explicitly states in the Bugs section that using O_CREAT | O_DIRECTORY
will create a regular file. There is also this discussion.
More importantly, even if it did succeed, some other process could still access the directory immediately after the creation succeeded, even before the call returns to your program. Therefore, the race window you worry about would exist anyway.
The common pattern is to create a temporary directory in the same directory with a sufficiently random name (beginning with .
to omit it from typical file and directory listings) accessible only to the current user; then populate it; then adjust its access mode; and then rename it to the final name.
This does not make it impossible for some other process to access the directory, but this pattern is considered safe enough.
Here is an example program doing this:
#define _POSIX_C_SOURCE 200809L
#define _ATFILE_SOURCE
#define _GNU_SOURCE
#include <stdlib.h>
#include <inttypes.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/random.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#ifndef RENAME_NOREPLACE
#define RENAME_NOREPLACE (1 << 0)
static inline int renameat2(int olddirfd, const char *oldpath,
int newdirfd, const char *newpath, unsigned int flags)
{
int retval = syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, flags);
if (!retval)
return 0;
errno = -retval;
return -1;
}
#endif
/* Xorshift64* pseudo-random number generator.
*/
static uint64_t prng_state = 0; /* unseeded */
static uint64_t prng_u64(void)
{
uint64_t state = prng_state;
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
prng_state = state;
return state * UINT64_C(2685821657736338717);
}
static uint64_t prng_randomize(void)
{
uint64_t state;
/* Use Linux-specific getrandom() call. */
{
ssize_t n;
do {
n = getrandom(&state, sizeof state, 0);
} while (n == -1 && errno == EINTR);
if (n == (ssize_t)sizeof state && state != 0) {
prng_state = state;
return state;
}
}
/* Fall back to using time as a seed. */
{
struct timespec now;
size_t rounds = 250;
clock_gettime(CLOCK_REALTIME, &now);
state = (uint64_t)now.tv_sec * UINT64_C(270547637)
^ (uint64_t)now.tv_nsec * UINT64_C(90640031)
^ (uint64_t)getpid() * UINT64_C(4758041);
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
state ^= (uint64_t)now.tv_sec * UINT64_C(3266177)
^ (uint64_t)now.tv_nsec * UINT64_C(900904331);
clock_gettime(CLOCK_MONOTONIC, &now);
state ^= (uint64_t)now.tv_sec * UINT64_C(24400169)
^ (uint64_t)now.tv_nsec * UINT64_C(1926466307);
/* Make sure state is nonzero */
state += (!state);
/* Mix it a bit, to make it less predictable. */
while (rounds-->0) {
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
}
prng_state = state;
return state;
}
}
static const char base64[64] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z', '-', '_'
};
/* Create a new directory atomically, returning an open descriptor to it.
name must be non-empty, and not contain a slash.
*/
int mkdiratfd(const int atfd, const char *dirpath, const char *name, const mode_t mode)
{
char buf[32];
mode_t curr_umask;
int atdirfd, fd;
/* New directory name cannot be NULL, empty, or contain a slash. */
if (!name || !*name || strchr(name, '/')) {
errno = EINVAL;
return -1;
}
/* If dirpath is NULL or empty, we use "." for it. */
if (!dirpath || !*dirpath)
dirpath = ".";
/* Open a handle to the target directory. */
do {
atdirfd = openat(atfd, dirpath, O_PATH | O_DIRECTORY | O_CLOEXEC);
} while (atdirfd == -1 && errno == EINTR);
if (atdirfd == -1) {
return -1;
}
/* Obtain current umask. */
curr_umask = umask(0); umask(curr_umask);
/* Make sure our PRNG has been seeded. */
if (!prng_state)
prng_randomize();
/* Create a temporary random name for the directory. */
while (1) {
char *ptr = buf;
/* Start with a dot, making it "hidden". */
*(ptr++) = '.';
/* Use 2*10 = 20 random characters (120 bits) */
for (int k = 2; k > 0; k--) {
uint64_t u = prng_u64();
int n = 10;
while (n-->0) {
*(ptr++) = base64[u & 63];
u >>= 6;
}
}
/* Terminate name */
*ptr = '\0';
/* Create the temporary directory with access only to current user. */
if (mkdirat(atdirfd, buf, 0700) == -1) {
const int saved_errno = errno;
if (errno == EINTR || errno == EEXIST)
continue;
/* Actual error. */
close(atdirfd);
errno = saved_errno;
return -1;
}
/* Successfully created. */
break;
}
/* Open the temporary directory. */
do {
fd = openat(atdirfd, buf, O_PATH | O_DIRECTORY | O_CLOEXEC);
} while (fd == -1 && errno == EINTR);
if (fd == -1) {
const int saved_errno = errno;
unlinkat(atdirfd, buf, AT_REMOVEDIR);
close(atdirfd);
errno = saved_errno;
return -1;
}
/*
* Note: Other actions, like file creation, etc.
* should be done at this stage.
*/
/* Update directory owner group here, if necessary. */
/* Update proper access mode. */
if (fchmodat(atdirfd, buf, mode & (~curr_umask), 0) == -1) {
const int saved_errno = errno;
close(fd);
unlinkat(atdirfd, buf, AT_REMOVEDIR);
close(atdirfd);
errno = saved_errno;
return -1;
}
/* Rename directory. */
if (renameat2(atdirfd, buf, atdirfd, name, RENAME_NOREPLACE) == -1) {
const int saved_errno = errno;
close(fd);
unlinkat(atdirfd, buf, AT_REMOVEDIR);
close(atdirfd);
if (saved_errno == EPERM)
errno = EEXIST;
else
errno = saved_errno;
return -1;
}
/* Success. */
close(atdirfd);
return fd;
}
int main(int argc, char *argv[])
{
int fd;
if (argc != 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *argv0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s NAME\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "This program creates directory NAME in the current directory.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
fd = mkdiratfd(AT_FDCWD, NULL, argv[1], 0755);
if (fd == -1) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Note that this uses renameat2()
via a raw syscall if the C library does not expose it. (It was added to glibc in 2.28, but is supported by Linux kernels since 3.15).
If you are still worried, a paranoid pattern is to creating a temporary directory to hold the temporary directory. After opening the inner directory that will be the final directory, change the mode on the outer temporary directory to zero, to stop traversal to the inner tree. The creator can still access the inner tree via the open directory descriptor. The directory can still be renamed, because they reside on the same file system.
I personally would not bother, because using a temporary name, and only renaming the directory when completed – which is what many applications in Linux do – is safe enough.