Search code examples
javascriptnode.jsamazon-s3uploadmultipart

AWS S3 multipart upload javascript sdk


I'm looking for an example implementation of the S3 multipart upload for very large files. I found a variety of halfway implementations online, but none that met my requirements.


Solution

  • The simplest and most memory efficient implementation i could come up with. I wish i could somehow use Promise.all() to speed up the uploading process, but i found it difficult to maintain the part order using a heavily asynch implementation. I tested this with 6GB files and directories.

    const _ = require('underscore')
    const mime = require('mime-types')
    let AWS = require('aws-sdk/index');
    AWS.config.region = 'us-west-2';
    let s3 = new AWS.S3();
    
    async function multipartUploadFile(params, filePath) {
        console.log(`S3Helper: Beginning multipart upload of file ${params.Key} to ${params.Bucket}`)
        //First create the multipart upload ID
        let multipartCreateResult = await s3.createMultipartUpload({
            Bucket: params.Bucket,
            Key: params.Key,
            ContentType: mime.lookup(filePath),
            StorageClass: 'STANDARD'
        }).promise();
        console.log("S3Helper: multipartUploadFile createResult - ", multipartCreateResult)
    
        let chunkCount = 1;
        let CHUNK_SIZE = 10 * 1024 * 1024;
    
        let uploadedParts = []
        //Read through the file, grab a chunk, and put it in an upload promise
        async function gatherChunks() {
            const stream = fs.createReadStream(filePath, { highWaterMark: CHUNK_SIZE });
            for await(const data of stream) {
                // do something with data
                let etag = await s3.uploadPart({
                    Body: data,
                    Bucket: params.Bucket,
                    Key: params.Key,
                    PartNumber: chunkCount,
                    UploadId: multipartCreateResult.UploadId,
                }).promise().then( (result) => {return result.ETag.toString()})
                uploadedParts.push({
                    ETag: etag,
                    PartNumber: chunkCount
                })
                chunkCount++;
            }
        }
    
        gatherChunks().then( () => {
            let sortedUploads = _.sortBy(uploadedParts, 'PartNumber')
            console.log("Sorted uploadedParts: ", sortedUploads)
            return s3.completeMultipartUpload({
                Bucket: params.Bucket,
                Key: params.Key,
                MultipartUpload: {
                    Parts: sortedUploads
                },
                UploadId: multipartCreateResult.UploadId
            }).promise()
        })
    
    }
    
    let params = {
        Bucket: "someBucket",
        Key:  "someKey"
    };
    
    let filePath = "./someFilePath"
    
    await multipartUploadFile(params, filePath);