Search code examples
c#itext7

Split a Pdf into byte array pages with IText7


I need to split a Pdf file into byte array pages without using the file system. I found the next code from @AlexeySubach which seems to work, but I have problems to export the contents from DocumentReadyListener:

class ByteArrayPdfSplitter : PdfSplitter {

    private MemoryStream currentOutputStream;

    public ByteArrayPdfSplitter(PdfDocument pdfDocument) : base(pdfDocument) {
    }

    protected override PdfWriter GetNextPdfWriter(PageRange documentPageRange) {
        currentOutputStream = new MemoryStream();
        return new PdfWriter(currentOutputStream);
    }

    public MemoryStream CurrentMemoryStream {
        get { return currentOutputStream; }
    }

    public class DocumentReadyListender : IDocumentReadyListener {

        private ByteArrayPdfSplitter splitter;

        public DocumentReadyListender(ByteArrayPdfSplitter splitter) {
            this.splitter = splitter;
        }

        public void DocumentReady(PdfDocument pdfDocument, PageRange pageRange) {
            pdfDocument.Close();
            byte[] contents = splitter.CurrentMemoryStream.ToArray();
            String pageNumber = pageRange.ToString();
        }
    }
}

Usage:

    public static List<Byte[]> SplitOnPages(Byte[] bytes)
    {
        using (MemoryStream memoryStream = new MemoryStream(bytes))
        {
            using (PdfReader reader = new PdfReader(memoryStream))
            {
                PdfDocument docToSplit = new PdfDocument(reader);
                ByteArrayPdfSplitter splitter = new ByteArrayPdfSplitter(docToSplit);
                splitter.SplitByPageCount(1, new ByteArrayPdfSplitter.DocumentReadyListender(splitter));
            }
        }

        //How do I get here the array of byte array pages??
        return ...
    }

Solution

  • The code from Alexey Subach you found expects that you add some sensible operation in the DocumentReadyListender method DocumentReady. As you eventually want a list of result PDF bytes, you should in your case add the bytes of the ready document to such a list, e.g. by improving the DocumentReadyListender like this:

    public class DocumentReadyListender : IDocumentReadyListener
    {
        public List<byte[]> splitPdfs;
    
        private ByteArrayPdfSplitter splitter;
    
        public DocumentReadyListender(ByteArrayPdfSplitter splitter, List<byte[]> results)
        {
            this.splitter = splitter;
            this.splitPdfs = results;
        }
    
        public void DocumentReady(PdfDocument pdfDocument, PageRange pageRange)
        {
            pdfDocument.Close();
            byte[] contents = splitter.CurrentMemoryStream.ToArray();
            splitPdfs.Add(contents);
        }
    }
    

    (ByteArrayPdfSplitter, improved helper class DocumentReadyListender)

    With that change you can make your SplitOnPages operational:

    public static List<Byte[]> SplitOnPages(Byte[] bytes)
    {
        List <byte[]> result = new List<byte[]>();
        using (MemoryStream memoryStream = new MemoryStream(bytes))
        {
            using (PdfReader reader = new PdfReader(memoryStream))
            {
                PdfDocument docToSplit = new PdfDocument(reader);
                ByteArrayPdfSplitter splitter = new ByteArrayPdfSplitter(docToSplit);
                splitter.SplitByPageCount(1, new DocumentReadyListender(splitter, result));
            }
        }
    
        return result;
    }
    

    (SplitInMemory test, improved method SplitOnPages)