Search code examples
node.jspdfamazon-s3parse-servernode-pdfkit

Using PDFKit to store a PDF in S3 on the fly


I'm trying to create a pdf of some images (in datauri format) is nodejs and having the pdf stored in my S3. The return of the function is expected to provide the s3 URL of the file.

I'm using parse-server here for the server and node-canvas to create the canvas of the images and then PDFKit to create the pdf from canvas elements. (jsPdf didn't work out). Now I want this pdf to be sent to my s3 using the AWS-SDK and finally return the URL of the file. Below is my code that works till canvas generation. I don't know if pdf is created or not in the first place, even before being sent to s3. And oh! The entire thing is running on heroku.

Parse.Cloud.define('getBulkMeta',async (req)=>{
    const PDFDocument = require('pdfkit'),
        {Canvas,loadImage} = require('canvas');

        try {       
            let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
            let labels = req.params.labels,
                allCanvas = [],
                rowH = baseImg.naturalHeight,
                rowW = baseImg.naturalWidth,
                perpage = req.params.size[1],
                pages = Math.ceil(labels.length/perpage),
                imgInd = 0,
                g = 10;
                size = req.params.size[0];

            for(var p=0;p<pages;p++){
                let canvas = new Canvas(rowW*((size=='A4')?2:1),rowH*((size=='A4')?2:1)),
                    ctx = canvas.getContext("2d");

                ctx.beginPath();
                ctx.rect(0,0,canvas.width,canvas.height)
                ctx.fillStyle = "#fff";
                ctx.fill();

                if(perpage == 1){
                    let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                    ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
                } else {
                    var thisImgInd = 0;
                    for (var r=0;r<2;r++){
                        for(var c=0;c<2;c++){
                            let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                            ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
                            thisImgInd++
                            if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                        }
                        if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                    }
                }
                allCanvas.push(canvas)
            }
        var thisPDF = new PDFDocument({layout: 'landscape',size:size});
        var bcoded;
        thisPDF.pipe(()=>{bcoded = new Buffer.from(thisPDF).toString('base64')});
        allCanvas.forEach((c,i)=>{
            if(i){thisPDF.addPage();}
            thisPDF.image(c.toDataURL(),0,0,thisPDF.page.width,thisPDF.page.width);
        })
        thisPDF.end();
        const S3_BUCKET = process.env.S3_BUCKET;
        aws.config.region = process.env.AWS_REGION;
        aws.config.signatureVersion  = 'v4';

        let s3 = new aws.S3();
        let fileName = req.params.name;
        let s3Params = {
            Bucket: S3_BUCKET,
            Body: bcoded,
            Key: fileName,
            ContentType : 'application/pdf',
            ACL: 'public-read'
        };
        s3.putObject(s3Params, (err, data) => {
            if(err){
                console.log('\n\n\n\n\n\n\n'+err+'\n\n\n\n\n\n\n');
                throw 'Error: '+ (err);
            }
            let returnData = {
                signedRequest: data,
                url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}`
            };
            return (returnData);
        })
    } catch (e) {throw e;}
})

Update. I have got it to save the pdf file in s3 with the below code:

    Parse.Cloud.define('getBulkMeta',async (req)=>{
    const PDFDocument = require('pdfkit'),
        {Canvas,loadImage} = require('canvas');

        try {       
            let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
            let labels = req.params.labels,
                allCanvas = [],
                rowH = baseImg.naturalHeight,
                rowW = baseImg.naturalWidth,
                perpage = req.params.size[1],
                pages = Math.ceil(labels.length/perpage),
                imgInd = 0,
                g = 10;
                size = req.params.size[0];

            for(var p=0;p<pages;p++){
                let canvas = new Canvas(),
                    ctx = canvas.getContext("2d");

                canvas.height = rowH*((size=='A4')?2:1);
                canvas.width = rowW*((size=='A4')?2:1);

                ctx.beginPath();
                ctx.rect(0,0,canvas.width,canvas.height)
                ctx.fillStyle = "#fff";
                ctx.fill();

                if(perpage == 1){
                    let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                    ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
                } else {
                    var thisImgInd = 0;
                    for (var r=0;r<2;r++){
                        for(var c=0;c<2;c++){
                            let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                            ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
                            thisImgInd++
                            if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                        }
                        if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                    }
                }
                allCanvas.push(canvas)
            }
        var thisPDF = new PDFDocument({layout: 'landscape',size:size});
        let buffers = [],pdfData,returnData='Hi';
        thisPDF.on('data', buffers.push.bind(buffers));
        thisPDF.on('end',() => {
            pdfData = Buffer.concat(buffers);
            const S3_BUCKET = process.env.S3_BUCKET;
            aws.config.region = process.env.AWS_REGION;
            aws.config.signatureVersion  = 'v4';

            let s3 = new aws.S3();
            let fileName = req.params.name;
            let s3Params = {
                Bucket: S3_BUCKET,
                Body: pdfData,
                Key: (+new Date())+'-'+fileName,
                ContentType : 'application/pdf',
                ACL: 'public-read'
            };
            s3.putObject(s3Params,(err, data) => {
                delete pdfData,thisPDF;
                pdfData = null;thisPDF = null;
                if(err){ throw 'Error: '+ (err); }
                returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
            })
        })
        allCanvas.forEach((c,i)=>{
            if(i){thisPDF.addPage();}
            thisPDF.image(c.toDataURL(),0,0,{fit:[thisPDF.page.width,thisPDF.page.height]});
        })
        thisPDF.end();
        return returnData;
    } catch (e) {throw e;}
})

However, returnData always gives "Hi" as the output and it also appears the function isn't closing - Heroku throws memory exceeded error everytime.


Solution

  • Since you are using async functions to create the PDF and to send it to S3, your cloud function is returning before these operations are actually completed. That's why you always have Hi in your returnData var. You need to create a promise and await for this promise to finish in the end of these two operations. It should be something like this:

    await (new Promise((resolve, reject) => {
      var thisPDF = new PDFDocument({layout: 'landscape',size:size});
      let buffers = [];
      thisPDF.on('data', buffers.push.bind(buffers));
      thisPDF.on('end',() => {
        pdfData = Buffer.concat(buffers);
        const S3_BUCKET = process.env.S3_BUCKET;
        aws.config.region = process.env.AWS_REGION;
        aws.config.signatureVersion  = 'v4';
    
        let s3 = new aws.S3();
        let fileName = req.params.name;
        let s3Params = {
          Bucket: S3_BUCKET,
          Body: pdfData,
          Key: (+new Date())+'-'+fileName,
          ContentType : 'application/pdf',
          ACL: 'public-read'
        };
        s3.putObject(s3Params,(err, data) => {
          delete pdfData,thisPDF;
          pdfData = null;thisPDF = null;
          if(err){ reject(err); }
          returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
          resolve();
        })
      })
    });
    

    BTW, instead of using the AWS SDK, you could be using the Parse S3 Adapter and saving the PDF as a regular Parse file.