I have a PDF file saved in Google Drive, I want to find a text from that file i.e USD then pick the value next to found text i.e: 167.1764, and insert it in my google spreadsheet.
Below is the preview of my PDF File.
Link to my PDF File.
Here is the code below which I tried but failed to find the text and reached to that value which is next to it.
below is my code.
function extractTextFromPDF() {
var drive = DriveApp;
var folders = drive.getFolderById('folderid');
var newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
var blob = file1;
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
Logger.log(text);
//DriveApp.getFileById(file.id).setTrashed(true);
var body = doc.getBody();
var foundElement = body.findText("(USD)");
while (foundElement != null) {
// Get the text object from the element
var foundText = foundElement.getElement().asText();
// Where in the element is the found text?
var start = foundElement.getStartOffset();
var end = foundElement.getEndOffsetInclusive();
}
// i want the value of USD i.e 167.1144 in log
Logger.log(foundText);
}
With the help of RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here is a link).
The code:
function extractTextFromPDF() {
const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
const newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
const blob = file1;
const resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
const file = Drive.Files.insert(resource, blob, {convert: true});
// Extract Text from PDF file
const doc = DocumentApp.openById(file.id);
const text = doc.getBody().getText();
Logger.log(text);
const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
console.log(buying)
console.log(selling)
//Remove the converted file.
DriveApp.getFileById(file.id).setTrashed(true);
}