Search code examples
pdfsplitadobekeywordacrobat

How to split a PDF into multiple documents


I have a large PDF that has been combined from multiple documents.

How can I split the PDF back into multiple documents with a keyword delimiter?


Solution

  • As well as Adobe Reader you will need Adobe Acrobat.

    Add the following script using the Action Wizard:

    enter image description here

    enter image description here

    Paste in the following script and modify for your needs. See //comments for help on customisation.

    /* Extract Pages into Documents by Keyword */
    // Iterates over all pages and find a given string and extracts all 
    // pages on which that string is found to a new file.
    
    var pageArray = [];
    var pageArrayEnd = [];
    
    var stringToSearchFor = app.response("This Action Script splits the document by a keyword on each X number of pages, please enter the keyword:");
    
    for (var p = 0; p < this.numPages; p++) {
        // iterate over all words
        for (var n = 0; n < this.getPageNumWords(p); n++) {
        // DEBUGGING HELP, UNCOMMENT NEXT LINE, CHANGE TO MATCH MULTIPLE WORDS OR WHAT EVER ORDER, eg if ((this.getPageNthWord(p, n) == stringToSearchFor) && (this.getPageNthWord(p, n + 1) == stringToSearchForTWO)) {..., Also add a prompt for the second search word and iterate one less for (var n = 0; n < this.getPageNumWords(p) - 1; n++) ...
        //app.alert("Word is " + this.getPageNthWord(p, n));
            if (this.getPageNthWord(p, n) == stringToSearchFor) {
                //app.alert("Found word on page " + p + " word number " + n, 3);
                if (pageArray.length > 0) {
                    pageArrayEnd.push(p - 1);
                }
                pageArray.push(p);
                break;
            }
        }
    }
    
    pageArrayEnd.push(this.numPages - 1);
    //app.alert("Number of sub documents " + pageArray.length, 3);
    if (pageArray.length > 0) {
        // extract all pages that contain the string into a new document
        for (var n = 0; n < pageArray.length; n++) {
            var d = app.newDoc();    // this will add a blank page - we need to remove that once we are done
                //app.alert("New Doc using pages " + pageArray[n] + " to " + pageArrayEnd[n], 3);
                d.insertPages( {
                                nPage: d.numPages-1,
                                cPath: this.path,
                                nStart: pageArray[n],
                                nEnd: pageArrayEnd[n],
                } );
            // remove the first page
            d.deletePages(0);
            d.saveAs({ cPath: this.path.replace(".pdf","") + n + ".pdf" });
            d.closeDoc(true);
        }
    }