Search code examples
pdfc#-4.0itextattachment3d-secure

Reading attachment from a secured PDF


I am working on a PDF file, which is a secured one and an excel is attached in the PDF file.

The following is the code i tried.

    static void Main(string[] args)
    {
        Program pgm = new Program();
        pgm.EmbedAttachments();
        //pgm.ExtractAttachments(pgm.pdfFile);
    }

    private void ExtractAttachments(string _pdfFile)
    {
        try
        {
            if (!Directory.Exists(attExtPath))
                Directory.CreateDirectory(attExtPath);

            byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
            //byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");


            PdfDictionary documentNames = null;
            PdfDictionary embeddedFiles = null;
            PdfDictionary fileArray = null;
            PdfDictionary file = null;
            PRStream stream = null;

            //PdfReader reader = new PdfReader(_pdfFile);

            PdfReader reader = new PdfReader(_pdfFile, password);

            PdfDictionary catalog = reader.Catalog;

            documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

            if (documentNames != null)
            {
                embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
                if (embeddedFiles != null)
                {
                    PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);

                    for (int i = 0; i < filespecs.Size; i++)
                    {
                        i++;
                        fileArray = filespecs.GetAsDict(i);
                        file = fileArray.GetAsDict(PdfName.EF);

                        foreach (PdfName key in file.Keys)
                        {
                            stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
                            string attachedFileName = fileArray.GetAsString(key).ToString();
                            byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);

                            System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                        }

                    }
                }
                else
                    throw new Exception("Unable to Read the attachment or There may be no Attachment");
            }
            else
            {
                throw new Exception("Unable to Read the document");
            }

        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.ToString());
            Console.ReadKey();
        }
    }

    private void EmbedAttachments()
    {
        try
        {

            if (File.Exists(pdfFile))
                File.Delete(pdfFile);

            Document PDFD = new Document(PageSize.LETTER);



            PdfWriter writer;
            writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));

            PDFD.Open();
            PDFD.NewPage();
            PDFD.Add(new Paragraph("This is test"));

            PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, @"C:\PDFReader\1.xls", "11.xls", null);

            //PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
            writer.AddFileAttachment(pfs);
            //writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\\test.xml")));

            //writer.Close();
            PDFD.Close();

            Program pgm=new Program();

            using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    PdfReader reader = new PdfReader(input);
                    PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.StackTrace.ToString());
            Console.ReadKey();
        }
    }
}

The above code contains the creation of a encrypted PDF with an excel attachment and also to extract the same.

Now the real problem is with the file which I already have as a requirement document(I cannot share the file) which also has an excel attachment like my example.

But the above code works for the secured PDF which i have created but not for the actual Secured PDF.

While debugging, I found that the Issue is with the following code

documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

In which,

catalog.Get(PdfName.NAMES)

is returned as NULL, Where as the File created by me, provides the expected output.

Please guide me on the above.

TIA.


Solution

  • As mkl suggested, It has been attached as an Annotated attachment. But the reference which is used in the example is provided ZipFile Method is no longer supported. Hence I found an alternate code attached below.

    public void ExtractAttachments(byte[] src)
        {
            PRStream stream = null;
            string attExtPath = @"C:\PDFReader\Extract\";
    
            if (!Directory.Exists(attExtPath))
                Directory.CreateDirectory(attExtPath);
    
            byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
            PdfReader reader = new PdfReader(src, password);
            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
                if (array == null) continue;
                for (int j = 0; j < array.Size; j++)
                {
                    PdfDictionary annot = array.GetAsDict(j);
                    if (PdfName.FILEATTACHMENT.Equals(
                        annot.GetAsName(PdfName.SUBTYPE)))
                    {
                        PdfDictionary fs = annot.GetAsDict(PdfName.FS);
                        PdfDictionary refs = fs.GetAsDict(PdfName.EF);
                        foreach (PdfName name in refs.Keys)
                        {
                            //zip.AddEntry(
                            //  fs.GetAsString(name).ToString(),
                            //  PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
                            //);
                            stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
                            string attachedFileName = fs.GetAsString(name).ToString();
                            var splitname = attachedFileName.Split('\\');
                            if (splitname.Length != 1)
                                attachedFileName = splitname[splitname.Length - 1].ToString();
                            byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
    
                            System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                        }
                    }
                }
            }
        }
    

    Please Let me Know if it can be achieved in any other way.

    Thanks!!!