I am trying to extract the fonts used in a PDF document via PDFSharp (http://www.pdfsharp.net/).
Unfortunately I am not able to do so since I keep on getting strange results and I don't know how to interpret them. In the assembly I have seen classes like PdfFontTable
etc, but they are all internal. When decompiling the PdfResources
class again everything related to Fonts is internal.
I have tried to access the Resources to get the fonts as:
var reader = Reader.Open(stream, PdfDocumentOpenMode.InformationOnly);
foreach (var page in reader.Pages)
{
var resources = page.Resources;
var fonts = resources.Elements.GetValue("/Font");
}
But that gives me an incomprehensible response:
Is there a way to extract the list of fonts used just like I see them in Adobe Acrobat Reader?
Eventually I managed to get it to work by registering an IEventListener
and handling the TextRenderInfo
.
public class FontReader : IEventListener
{
private ICollection<FontData> _fonts;
/// <summary>
/// Gets all fonts used in a PdfDocument.
/// </summary>
public ICollection<FontData> GetFonts(PdfDocument document)
{
_fonts = new List<FontData>();
var processor = new PdfCanvasProcessor(this);
for (var i = 1; i <= document.GetNumberOfPages(); i++)
{
var page = document.GetPage(i);
processor.ProcessPageContent(page);
}
return _fonts;
}
public void EventOccurred(IEventData data, EventType type)
{
if (!(data is TextRenderInfo)) return;
var font = ((TextRenderInfo)data).GetFont();
var name = font.GetFontProgram().GetFontNames().GetFontName();
var type = font.GetPdfObject().GetAsName(PdfName.Subtype).GetValue();
...
_fonts.Add(...);
}
}