Search code examples
c#itexthtml-to-pdfpdfa

iTextSharp PDF/A-3 from HTML in C#


I've been trying to create PDF file from HTML string using iTextSharp library, it worked with creating a normal PDF but when I tried to create PDF/A-3 from the same HTML it always return an error.

Message: The document has no pages.

Stack_Trace: at iTextSharp.text.pdf.PdfPages.WritePageTree() at iTextSharp.text.pdf.PdfWriter.Close() at iTextSharp.text.pdf.PdfAWriter.Close() at iTextSharp.text.pdf.PdfDocument.Close() at iTextSharp.text.Document.Close() at Common.PDFHelper.GeneratePDF(String html, String fileName, Boolean isNormalPDF, String detailsJSON)

This is my code:

using iTextSharp.text.pdf;
using iTextSharp.tool.xml;
using System;
using System.IO;
using System.Text;
using System.Threading;
using System.Web;

namespace Common
{
    public class PDFHelper
    {
        public void GeneratePDF(string html, string fileName = "", bool isNormalPDF = false, string detailsJSON = "")
        {
            try
            {
                if (string.IsNullOrWhiteSpace(fileName))
                    fileName = Helpers.GenerateRandomString(8);

                fileName += Guid.NewGuid().ToString();

                byte[] bytes;
                using (MemoryStream memoryStream = new MemoryStream())
                {
                    XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(Environment.GetFolderPath(Environment.SpecialFolder.Fonts));
                    if (isNormalPDF) //Create normal PDF
                    {
                        Document pdfDoc = new Document(PageSize.A4);
                        PdfWriter writer = PdfWriter.GetInstance(pdfDoc, memoryStream);

                        pdfDoc.Open();

                        var xmlWorker = XMLWorkerHelper.GetInstance();
                        MemoryStream htmlContent = new MemoryStream(Encoding.UTF8.GetBytes(html));
                        xmlWorker.ParseXHtml(writer, pdfDoc, htmlContent, null, Encoding.UTF8, fontProvider);

                        pdfDoc.Close();
                    }
                    else //Create PDF/A-3
                    {
                        Document pdfDoc = new Document(PageSize.A4);
                        PdfAWriter writer = PdfAWriter.GetInstance(pdfDoc, memoryStream, PdfAConformanceLevel.PDF_A_3A);

                        pdfDoc.Open();
                        writer.CreateXmpMetadata();

                        string sRGBCSprofile = HttpContext.Current.Server.MapPath("~/Resources/Color/sRGB_CS_profile.icm");
                        FileStream sRGBCSproFileStream = new FileStream(sRGBCSprofile, FileMode.Open, FileAccess.Read);
                        var sRGBCSproFileByte = new byte[sRGBCSproFileStream.Length];
                        sRGBCSproFileStream.Read(sRGBCSproFileByte, 0, sRGBCSproFileByte.Length);

                        ICC_Profile iccProfile = ICC_Profile.GetInstance(sRGBCSproFileByte);
                        writer.SetOutputIntents("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1", iccProfile);

                        PdfDictionary markInfo = new PdfDictionary(PdfName.MARKINFO);
                        markInfo.Put(PdfName.MARKED, new PdfBoolean("true"));
                        writer.ExtraCatalog.Put(PdfName.MARKINFO, markInfo);

                        var xmlWorker = XMLWorkerHelper.GetInstance();
                        MemoryStream htmlContent = new MemoryStream(Encoding.UTF8.GetBytes(html));
                        xmlWorker.ParseXHtml(writer, pdfDoc, htmlContent, null, Encoding.UTF8, fontProvider);

                        pdfDoc.Close();
                    }

                    bytes = memoryStream.ToArray();
                    memoryStream.Close();
                }

                // Clears all content output from the buffer stream
                HttpContext.Current.Response.Clear();
                HttpContext.Current.Response.ContentType = "application/pdf";
                HttpContext.Current.Response.AddHeader("content-disposition", "attachment;filename=" + fileName + ".pdf");
                HttpContext.Current.Response.Cache.SetCacheability(HttpCacheability.NoCache);
                HttpContext.Current.Response.BinaryWrite(bytes);

                HttpContext.Current.Response.End();
            }
            catch (ThreadAbortException)
            {
            }
            catch (Exception ex)
            {

            }
        }
    }
}

Solution

  • So after looking at the iText7 docs

    https://kb.itextpdf.com/home/it7kb/ebooks/itext-7-converting-html-to-pdf-with-pdfhtml/chapter-4-creating-reports-using-pdfhtml

    I finally figured out how to do it and here is my code just for anyone that need help with it

    using iText.Kernel.Pdf;
    using iText.Kernel.Pdf.Filespec;
    using System;
    using System.IO;
    using System.Threading;
    using System.Web;
    
    namespace Common
    {
        public class PDFHelper
        {
            public void GeneratePDF(string html, string fileName = "", bool isNormalPDF = false)
            {
                try
                {
                    string resourceFolderPath = HttpContext.Current.Server.MapPath("~/Resources");
    
                    int imageIndex = html.IndexOf("data:image/jpeg;base64");
                    string imageURL = "";
                    //Check if there is an image in the HTML to re-create it and use it in the PDF and will remove it later
                    if (imageIndex > -1)
                    {
                        try
                        {
                            int imageSourceEndIndex = html.IndexOf("\"", imageIndex);
                            string imageBase64 = html.Substring(imageIndex, imageSourceEndIndex - imageIndex);
                            imageBase64 = imageBase64.Substring(imageBase64.IndexOf(",") + 1);
                            byte[] imageBytes = Convert.FromBase64String(imageBase64);
                            imageURL = HttpContext.Current.Server.MapPath(fileName + ".jpg");
                            File.WriteAllBytes(imageURL, imageBytes);
    
                            html = html.Remove(imageIndex, imageSourceEndIndex - imageIndex);
                            html = html.Insert(imageIndex, imageURL);
                        }
                        catch (Exception ex)
                        {
    
                        }
                    }
    
                    byte[] bytes;
                    iText.Layout.Font.FontProvider fontProvider = new iText.Html2pdf.Resolver.Font.DefaultFontProvider(false, false, false);
                    string[] fonts = Directory.GetFiles(resourceFolderPath + @"\Font\");
                    foreach (string font in fonts)
                    {
                        iText.IO.Font.FontProgram fontProgram = iText.IO.Font.FontProgramFactory.CreateFont(font);
                        fontProvider.AddFont(fontProgram);
                    }
    
                    ConverterProperties properties = new ConverterProperties();
                    properties.SetCreateAcroForm(true);
                    properties.SetFontProvider(fontProvider);
                    if (isNormalPDF)
                    {
                        using (MemoryStream outputStream = new MemoryStream())
                        {
                            HtmlConverter.ConvertToPdf(html, outputStream, properties);
                            bytes = outputStream.ToArray();
                            outputStream.Close();
                        }
                    }
                    else
                    {
                        using (MemoryStream outputStream = new MemoryStream())
                        {
                            FileStream iccProfile = new FileStream(resourceFolderPath + @"\Color\sRGB_CS_profile.icm", FileMode.Open, FileAccess.Read);
    
                            PdfWriter writer = new PdfWriter(outputStream);
                            iText.Pdfa.PdfADocument pdf = new iText.Pdfa.PdfADocument(writer, PdfAConformanceLevel.PDF_A_3A, new PdfOutputIntent("Custom", "", "https://www.color.org",
                                "sRGB IEC61966-2.1", iccProfile));
                            pdf.SetTagged();
    
                            // Embed the XML file
                            string xmlString = "";
                            //Fill xml embedded file content, just the XML
    
                            if (!string.IsNullOrWhiteSpace(xmlString))
                            {
                                //Create XML file in memory and add it to the PDF
                                using (var ms = new MemoryStream())
                                {
                                    using (TextWriter tw = new StreamWriter(ms))
                                    {
                                        tw.Write(xmlString);
                                        tw.Flush();
                                        ms.Position = 0;
    
                                        PdfDictionary parameters = new PdfDictionary();
                                        parameters.Put(PdfName.ModDate, new PdfDate().GetPdfObject());
    
                                        PdfFileSpec fileSpec = PdfFileSpec.CreateEmbeddedFileSpec(pdf, ms.ToArray(), "invoice.xml", "invoice.xml", PdfName.ApplicationXml, parameters, PdfName.Data);
                                        fileSpec.Put(new PdfName("AFRelationship"), new PdfName("Data"));
                                        pdf.AddFileAttachment("Invoice Xml", fileSpec);
                                    }
                                }
                            }
    
                            HtmlConverter.ConvertToPdf(html, pdf, properties);
                            bytes = outputStream.ToArray();
                        }
                    }
    
                    // Clears all content output from the buffer stream
                    HttpContext.Current.Response.Clear();
                    HttpContext.Current.Response.ContentType = "application/pdf";
                    HttpContext.Current.Response.AddHeader("content-disposition", "attachment;filename=" + fileName + ".pdf");
                    HttpContext.Current.Response.Cache.SetCacheability(HttpCacheability.NoCache);
                    HttpContext.Current.Response.BinaryWrite(bytes);
                    // Delete the temp image.
                    if (!string.IsNullOrWhiteSpace(imageURL))
                        File.Delete(imageURL);
    
                    HttpContext.Current.Response.End();
                }
                catch (ThreadAbortException)
                {
                }
                catch (Exception ex)
                {
    
                }
            }
        }
    }