I'm looking for an example implementation of the S3 multipart upload for very large files. I found a variety of halfway implementations online, but none that met my requirements.
The simplest and most memory efficient implementation i could come up with. I wish i could somehow use Promise.all() to speed up the uploading process, but i found it difficult to maintain the part order using a heavily asynch implementation. I tested this with 6GB files and directories.
const _ = require('underscore')
const mime = require('mime-types')
let AWS = require('aws-sdk/index');
AWS.config.region = 'us-west-2';
let s3 = new AWS.S3();
async function multipartUploadFile(params, filePath) {
console.log(`S3Helper: Beginning multipart upload of file ${params.Key} to ${params.Bucket}`)
//First create the multipart upload ID
let multipartCreateResult = await s3.createMultipartUpload({
Bucket: params.Bucket,
Key: params.Key,
ContentType: mime.lookup(filePath),
StorageClass: 'STANDARD'
}).promise();
console.log("S3Helper: multipartUploadFile createResult - ", multipartCreateResult)
let chunkCount = 1;
let CHUNK_SIZE = 10 * 1024 * 1024;
let uploadedParts = []
//Read through the file, grab a chunk, and put it in an upload promise
async function gatherChunks() {
const stream = fs.createReadStream(filePath, { highWaterMark: CHUNK_SIZE });
for await(const data of stream) {
// do something with data
let etag = await s3.uploadPart({
Body: data,
Bucket: params.Bucket,
Key: params.Key,
PartNumber: chunkCount,
UploadId: multipartCreateResult.UploadId,
}).promise().then( (result) => {return result.ETag.toString()})
uploadedParts.push({
ETag: etag,
PartNumber: chunkCount
})
chunkCount++;
}
}
gatherChunks().then( () => {
let sortedUploads = _.sortBy(uploadedParts, 'PartNumber')
console.log("Sorted uploadedParts: ", sortedUploads)
return s3.completeMultipartUpload({
Bucket: params.Bucket,
Key: params.Key,
MultipartUpload: {
Parts: sortedUploads
},
UploadId: multipartCreateResult.UploadId
}).promise()
})
}
let params = {
Bucket: "someBucket",
Key: "someKey"
};
let filePath = "./someFilePath"
await multipartUploadFile(params, filePath);