javascript html5-canvas getimagedata pdf.js putimagedata

Render .pdf to single Canvas using pdf.js and ImageData

I am trying to read an entire .pdf Document using PDF.js and then render all the pages on a single canvas.

My idea: render each page onto a canvas and get the ImageData (context.getImageData()), clear the canvas do the next page. I store all the ImageDatas in an array and once all pages are in there I want to put all the ImageDatas from the array onto a single canvas.

var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
    //Prepare some things
    var canvas = document.getElementById('cv');
    var context = canvas.getContext('2d');
    var scale = 1.5;
    PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
        pdf = _pdf;
        //Render all the pages on a single canvas
        for(var i = 1; i <= pdf.numPages; i ++){
            pdf.getPage(i).then(function getPage(page){
                var viewport = page.getViewport(scale);
                canvas.width = viewport.width;
                canvas.height = viewport.height;
                page.render({canvasContext: context, viewport: viewport});
                pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
                context.clearRect(0, 0, canvas.width, canvas.height);
                p.Out("pre-rendered page " + i);
            });
        }

    //Now we have all 'dem Pages in "pages" and need to render 'em out
    canvas.height = 0;
    var start = 0;
    for(var i = 0; i < pages.length; i++){
        if(canvas.width < pages[i].width) canvas.width = pages[i].width;
        canvas.height = canvas.height + pages[i].height;
        context.putImageData(pages[i], 0, start);
        start += pages[i].height;
    }
    });

So from the way I understnad thing this should work, right? When I run this I end up with the canvas that is big enought to contain all the pages of the pdf but doesn't show the pdf...

Thank you for helping.

Solution

I can’t speak to the part of your code that renders the pdf into a canvas, but I do see some problems.

Every resetting canvas.width or canvas.height automatically clears the canvas contents. So in the top section, your clearRect is not needed because the canvas is cleared by canvas.width prior to your every page.render.
More importantly, in the bottom section, all your previous pdf drawings are cleared by every canvas resizing (oops!).
getImageData() gets an array where each pixel is represented by 4 consecutive elements of that array (red then green then blue then alpha). Since getImageData() is an array, so it doesn’t have a pages[i].width or pages[i].height—it only has a pages[i].length. That array length cannot be used to determine widths or heights.

So to get you started, I would start by changing your code to this (very, very untested!):

var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
//Prepare some things
var canvas = document.getElementById('cv');
var context = canvas.getContext('2d');
var scale = 1.5;
var canvasWidth=0;
var canvasHeight=0;
var pageStarts=new Array();
pageStarts[0]=0;

PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
    pdf = _pdf;
    //Render all the pages on a single canvas
    for(var i = 1; i <= pdf.numPages; i ++){
        pdf.getPage(i).then(function getPage(page){
            var viewport = page.getViewport(scale);
            // changing canvas.width and/or canvas.height auto-clears the canvas
            canvas.width = viewport.width;
            canvas.height = viewport.height;
            page.render({canvasContext: context, viewport: viewport});
            pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
            // calculate the width of the final display canvas
            if(canvas.width>maxCanvasWidth){
              maxCanvasWidth=canvas.width;
            }
            // calculate the accumulated with of the final display canvas
            canvasHeight+=canvas.height;
            // save the "Y" starting position of this pages[i]
            pageStarts[i]=pageStarts[i-1]+canvas.height;
            p.Out("pre-rendered page " + i);
        });
    }


    canvas.width=canvasWidth; 
    canvas.height = canvasHeight;  // this auto-clears all canvas contents
    for(var i = 0; i < pages.length; i++){
        context.putImageData(pages[i], 0, pageStarts[i]);
    }

});

Alternatively, here’s a more traditional way of accomplishing your task:

Use a single “display” canvas and allow the user to “page through” each desired page.

Since you already start by drawing each page into a canvas, why not keep a separate, hidden canvas for each page. Then when the user wants to see page#6, you just copy the hidden canvas#6 onto your display canvas.

The Mozilla devs use this approach in their pdfJS demo here: http://mozilla.github.com/pdf.js/web/viewer.html

You can check out the code for the viewer here: http://mozilla.github.com/pdf.js/web/viewer.js