Search code examples
javascriptnode.jsfileservertimer

How to delete uploaded files automatically after 2 days


I have a chat application where users send files to each other. Each file must be deleted after 2 days.

For this purpose, I recursively check all the files every 4 hours and delete those that have been created for more than 2 days.

But this work consumes a lot of my server's CPU, which disrupts the application's work. Because the number of files to be checked is very large and the size of all of them is about 10 GB.

Now, if as soon as each file is uploaded, a timer is activated to delete that file after 2 days, is this better? Considering that a file is uploaded almost every second and the number of timers may be very high.

My recursive solution:

const fs = require('fs')
const path = require('path')
const rootDir = require('../config').rootDir
const log = require('../da/log');

const DIRECTORY_MESSAGES = rootDir + '/file/messages';
const DIRECTORY_AVATARS = rootDir + '/file/avatars';
const INTERVAL_MINUTES_MESSAGES = 240;
const INTERVAL_MINUTES_AVATARS = 265;
const ACCESS_DAYS_MESSAGES = 2;
const ACCESS_DAYS_AVATARS = 10;
/*
* Remove files that access times is older than {fileAccessHours}.
* Runs every {removeEveryMinutes}
* */
module.exports = {
    start: function () {
        setInterval(deleteFiles.bind(this, DIRECTORY_MESSAGES, ACCESS_DAYS_MESSAGES), INTERVAL_MINUTES_MESSAGES * 60_000)
        setInterval(deleteFiles.bind(this, DIRECTORY_AVATARS, ACCESS_DAYS_AVATARS), INTERVAL_MINUTES_AVATARS * 60_000)
    }
}

function deleteFiles(dir, accessDays) {
    walkDir(dir, function (path, isDirectory) {
        try {
            if (isDirectory) {
                if (path === dir) {
                    return;
                }
                return fs.rmdir(path, function (err) {
                    if (err) {
                        return log.error(__filename + ' :deleteFiles, fs.rmdir, ' + err);
                    }
                });
            } else {
                fs.stat(path, function (err, stat) {
                    try {

                        const now = new Date().getTime();
                        const endTime = new Date(stat.atime).getTime() + accessDays * 86_400_000;
                        if (err) {
                            return log.error(__filename + ' :deleteFiles, fs.stat, ' + err);
                        }
                        if (now > endTime) {
                            return fs.unlink(path, function (err) {
                                if (err) {
                                    return log.error(__filename + ' :deleteFiles, fs.unlink, ' + err);
                                }
                            });
                        }
                    } catch (err) {
                        console.log('File Deleter: walkDir: callback: fs.stat: ' + err);
                    }
                });
            }
        } catch (err) {
            console.log('File Deleter: walkDir: callback' + err);
        }
    });
}

function walkDir(dir, callback) {
    try {
        let files = fs.readdirSync(dir);
        if (files.length === 0) {
            callback(dir, /* isDirectory */ true);
        } else {
            files.forEach(f => {
                let dirPath = path.join(dir, f);
                let isDirectory = fs.statSync(dirPath).isDirectory();
                isDirectory ? walkDir(dirPath, callback) : callback(path.join(dir, f), /* isDirectory */ false);
            });
        }
    } catch (err) {
        console.log('File Deleter: walkDir: ' + err);
    }
}

Solution

  • Do the directory walk but don't reinvent the wheel. eg using gnu find:

     find /path/to/directory -type f -ctime +2 -delete
    

    run that from cron.