Search code examples
google-apps-scriptpdfpdf-generationgoogle-docslibreoffice-draw

How can I remove the white background layers of a pdf export in google docs / google apps script?


I've got a problem with google apps script / google docs. I wanted to make a addon which adds a watermark to google docs documents with an API. That works fine, but only if I place the watermark in front of the document. If I put it behind the document, the watermark is not visible. Then I tried to look on the different layers in LibreOffice Draw and there I saw that google docs puts some white layers behind the text, which are over the watermark.

So does anybody know, how I can export the pdf file out of google docs without having the white layers?

I also tried to set the background of the page to null or rgba(0,0,0,0.0) through google apps script, like this:

var highlightStyle = {};
highlightStyle[DocumentApp.Attribute.BACKGROUND_COLOR] = "rgba(0,0,0,0.0)";
let editedText = DocumentApp.getActiveDocument().getBody().editAsText().setAttributes(highlightStyle);

But it didn't worked.

Here is a test document, where you can see the white layers in the background, like in libre office drawer.

Thanks upfront.

Here is the code of the google apps script plugin:

    function base64Encode(str) {
    var encoded = Utilities.base64EncodeWebSafe(Utilities.newBlob(str).getBytes());
    return encoded.replace(/=+$/, '');
};

function encodeJWT(public, secret) {
    var header = JSON.stringify({
        typ: 'JWT',
        alg: 'HS256',
      jti: public
    });
    var encodedHeader = base64Encode(header);
    var iat = new Date().getTime() / 1000 - 60;
    var payload = JSON.stringify({
        iat: iat,
      iss: 'name',
      jti: public,
      public_key: public
    });
    var encodedPayload = base64Encode(payload);
    var toSign = [encodedHeader, encodedPayload].join('.');
    var signature = Utilities.computeHmacSha256Signature(toSign, secret);
    var encodedSignature = base64Encode(signature);
    return [toSign, encodedSignature].join('.');
};


function onInstall(){
  onOpen();
}

function onOpen(){
  let menu = DocumentApp.getUi().createAddonMenu().addItem("Merge PDFs", "showSidebar").addToUi();
}

let folderName = "watermark data";
let fileName = "watermark template";

function showSidebar(){
  var html = HtmlService.createTemplateFromFile("index").evaluate().setTitle("PDF Creator");
  
  DocumentApp.getUi().showSidebar(html);
}

function mergePdf(transparency){
  var highlightStyle = {};
  highlightStyle[DocumentApp.Attribute.BACKGROUND_COLOR] = "#ffffff";
  let editedText = DocumentApp.getActiveDocument().getBody().editAsText().setAttributes(highlightStyle);
  
  
  let folders = DriveApp.getFoldersByName(folderName);
  if(!folders.hasNext()){
    DriveApp.getRootFolder().createFolder(folderName) 
  }
  
  let files = DriveApp.getFilesByName(fileName);
  if(files.hasNext()){
    let header;
    if(DocumentApp.getActiveDocument().getHeader()){
      header = DocumentApp.getActiveDocument().getHeader();
    }else{
      header = DocumentApp.getActiveDocument().addHeader();
    }
    
  }
  fetchAPIData("https://api.ilovepdf.com/v1/auth", "post", {
    "public_key": "api_key"
  });
  

  let startData = JSON.parse(fetchAPIData("https://api.ilovepdf.com/v1/start/watermark", "get"));
  let uploadData1 = JSON.parse(requestAPI(convertPDF(), "https://" + startData.server + "/v1/upload", {
                                         "task": startData.task
                                         }));
    let file = DriveApp.getFilesByName(fileName + ".png");
  let file2 = DriveApp.getFilesByName(fileName + ".jpg");
  let file3 = DriveApp.getFilesByName(fileName + ".jpeg");
  if(file.hasNext()){
     file = file.next().getId();
  }else if(file2.hasNext()){
     file = file2.next().getId();
  }else if(file3.hasNext()){
     file = file3.next().getId();
  }else{
    DocumentApp.getUi().alert("Error, watermark not found!\n Please upload a watermark!");
  }
  let uploadData2 = JSON.parse(requestAPI(file, "https://" + startData.server + "/v1/upload", {
                                         "task": startData.task
                                         }));
//  DocumentApp.getUi().alert(typeof transparency);
  let processResponse = JSON.parse(fetchAPIData("https://" + startData.server + "/v1/process", "post",{
                                         "task": startData.task,
                                         "tool": "watermark",
                                         "files": [
                                         {
                                         "server_filename": uploadData1.server_filename,
                                         "filename": "Test"
                                         }                                       
                                         ],
                                         "mode": "image",
                                         "layer": "below",
                                         "image": uploadData2.server_filename,
                                                "transparency": transparency
                                         }));
  if(processResponse.status == "TaskSuccess"){
    let responseData = fetchAPIData("https://" + startData.server + "/v1/download/" + startData.task, "get");
    if(responseData){
      file = DriveApp.getRootFolder().createFile(responseData.getAs('application/pdf'));
      file.setName(DocumentApp.getActiveDocument().getName());
      DocumentApp.getUi().alert('You can download your pdf file here: ' + file.getDownloadUrl());
      var actionResponse = CardService.newActionResponseBuilder()
      .setOpenLink(CardService.newOpenLink()
                   .setUrl(file.getDownloadUrl())
                   .setOpenAs(CardService.OpenAs.FULL_SIZE)
                   .setOnClose(CardService.OnClose.NOTHING))
      .build();
    }else{
      DocumentApp.getUi().alert("Error, the API Request wasn't successful!");
    }
  }
}



function fetchAPIData(route, method, data){

  var options = {method: method};
  if(data){
    options = {
      'method' : method,
      'headers': {
        'Authorization': 'Bearer ' +  encodeJWT("api_key", "api_key")
      },
      'contentType': 'application/json',
      // Convert the JavaScript object to a JSON string.
      'payload' : JSON.stringify(data)
    };
  }else{
    options = {
      'method' : method,
      'headers': {
        'Authorization': 'Bearer ' +  encodeJWT("api_key", "api_key")
      }
    };
  }

  var response = UrlFetchApp.fetch(route, options); 
  return response;
}


function requestAPI(fileId, url, metadata) {
  var file = DriveApp.getFileById(fileId);
  var boundary = "name";
  var data = "";
  for (var i in metadata) {
    data += "--" + boundary + "\r\n";
    data += "Content-Disposition: form-data; name=\"" + i + "\"; \r\n\r\n" + metadata[i] + "\r\n";
  }
  data += "--" + boundary + "\r\n";
  data += "Content-Disposition: form-data; name=\"file\"; filename=\"" + file.getName() + "\"\r\n";
  data += "Content-Type:" + file.getMimeType() + "\r\n\r\n";
  var payload = Utilities.newBlob(data).getBytes()
    .concat(file.getBlob().getBytes())
    .concat(Utilities.newBlob("\r\n--" + boundary + "--").getBytes());
   var options = {
    method : "post",
    contentType : "multipart/form-data; boundary=" + boundary,
    payload : payload,
    muteHttpExceptions: true,
      'headers': {
        'Authorization': 'Bearer ' +  encodeJWT("api_key", "api_key")
      }
  };
  var res = UrlFetchApp.fetch(url, options).getContentText();

  Logger.log(res);
  return res;
}

function convertPDF() {
  doc = DocumentApp.getActiveDocument();
  var ui = DocumentApp.getUi();
    docblob = DocumentApp.getActiveDocument().getAs('application/pdf');
    /* Add the PDF extension */
    docblob.setName(doc.getName() + ".pdf");
    let file = DriveApp.getFoldersByName(folderName);
    if(!file.hasNext()){
      DriveApp.getRootFolder().createFolder(folderName) 
    }
    var files = DriveApp.getFilesByName(doc.getName() + ".pdf");
    while (files.hasNext()) {
      files.next().setTrashed(true);
    }
    file = DriveApp.getFoldersByName(folderName).next().createFile(docblob);
  return file.getId();
}

function saveFile(obj) {
    var files = DriveApp.getFilesByName(fileName + ".jpg");
    while (files.hasNext()) {
      files.next().setTrashed(true);
    }
    files = DriveApp.getFilesByName(fileName + ".png");
    while (files.hasNext()) {
      files.next().setTrashed(true);
    }
  var blob = Utilities.newBlob(Utilities.base64Decode(obj.data), obj.mimeType, fileName + "." + obj.mimeType.split("/")[1]);
  DocumentApp.getUi().alert("Watermark is uploaded successfully!");
  return DriveApp.getFoldersByName(folderName).next().createFile(blob).getId();
}

The watermark api I use is the ilovepdfapi


Solution

  • I ended up removing the background by hand:

    1. decompress the PDF using any suitable tool (I used pdftk)

      pdftk file.pdf output output.pdf uncompress
      
    2. edit the decompressed PDF with any manual or automated text editor (I used Vim)

      you're looking for blocks of code that look like this:

      1 1 1 RG 1 1 1 rg
      /G3 gs
      0 1123 794 1123 re
      f
      0 1123 794 1123 re
      f
      0 0 794 6738 re
      f
      

      that is, setting the background color to white 1 1 1 rg and then drawing one or more big rectangles re with filled background f

      you want to remove or comment (%) all of those lines

    3. check the file using a PDF viewer and optionally re-compress it:

      pdftk edited.pdf output final.pdf compress
      

    Here's a link to the PDF language reference. Just use the alphabetical index to look up operators, like re and f, to get an idea of what they do.