I have a large PDF that has been combined from multiple documents.
How can I split the PDF back into multiple documents with a keyword delimiter?
As well as Adobe Reader you will need Adobe Acrobat.
Add the following script using the Action Wizard:
Paste in the following script and modify for your needs. See //comments for help on customisation.
/* Extract Pages into Documents by Keyword */
// Iterates over all pages and find a given string and extracts all
// pages on which that string is found to a new file.
var pageArray = [];
var pageArrayEnd = [];
var stringToSearchFor = app.response("This Action Script splits the document by a keyword on each X number of pages, please enter the keyword:");
for (var p = 0; p < this.numPages; p++) {
// iterate over all words
for (var n = 0; n < this.getPageNumWords(p); n++) {
// DEBUGGING HELP, UNCOMMENT NEXT LINE, CHANGE TO MATCH MULTIPLE WORDS OR WHAT EVER ORDER, eg if ((this.getPageNthWord(p, n) == stringToSearchFor) && (this.getPageNthWord(p, n + 1) == stringToSearchForTWO)) {..., Also add a prompt for the second search word and iterate one less for (var n = 0; n < this.getPageNumWords(p) - 1; n++) ...
//app.alert("Word is " + this.getPageNthWord(p, n));
if (this.getPageNthWord(p, n) == stringToSearchFor) {
//app.alert("Found word on page " + p + " word number " + n, 3);
if (pageArray.length > 0) {
pageArrayEnd.push(p - 1);
}
pageArray.push(p);
break;
}
}
}
pageArrayEnd.push(this.numPages - 1);
//app.alert("Number of sub documents " + pageArray.length, 3);
if (pageArray.length > 0) {
// extract all pages that contain the string into a new document
for (var n = 0; n < pageArray.length; n++) {
var d = app.newDoc(); // this will add a blank page - we need to remove that once we are done
//app.alert("New Doc using pages " + pageArray[n] + " to " + pageArrayEnd[n], 3);
d.insertPages( {
nPage: d.numPages-1,
cPath: this.path,
nStart: pageArray[n],
nEnd: pageArrayEnd[n],
} );
// remove the first page
d.deletePages(0);
d.saveAs({ cPath: this.path.replace(".pdf","") + n + ".pdf" });
d.closeDoc(true);
}
}