Search code examples
c#pdfpdfsharp

Extract pdf vector objects


Is there any way to extract the coordinates and dimensions of vector objects with a specific color with C#? Like a "dieline" or a "cut line", for example? I tried with PDFSharp library, but it doesn't seem to have such function. I can extract the swatches but not the coordinates or dimensions of the objects using that respective color.


Solution

  • You can list the vector graphics from a PDF page with PDF4NET as follows (assuming your dieline and cut line are separation colors):

    PDFFixedDocument document = new PDFFixedDocument(pdfFile);
    
    PDFContentExtractor ce = new PDFContentExtractor(document.Pages[pageNumber]);
    PDFVisualObjectCollection pageVisualObjects = ce.ExtractVisualObjects(false);
    
    for (int i = 0; i < pageVisualObjects.Count; i++)
    {
        switch (pageVisualObjects[i].Type)
        {
            case PDFVisualObjectType.Path:
                PDFPathVisualObject pathVisualObject = pageVisualObjects[i] as PDFPathVisualObject;
                if ((pathVisualObject.Pen != null) && 
                    (pathVisualObject.Pen.Color.ColorSpace.Type == PDFColorSpaceType.Separation))
                {
                    PDFSeparationColorSpace cs = 
                        pathVisualObject.Pen.Color.ColorSpace as PDFSeparationColorSpace;
                    if (cs.Colorant == "dieline")
                    {
                        Console.WriteLine("dieline stroke");
                        for (int j = 0; j < pathVisualObject.PathItems.Count; j++)
                        {
                            Console.Write("{0}: ", pathVisualObject.PathItems[j].Type);
                            if (pathVisualObject.PathItems[j].Points != null)
                            {
                                for (int k = 0; k < pathVisualObject.PathItems[j].Points.Length; k++)
                                {
                                    Console.Write("[{0:0.######}, {1:0.######}] ", 
                                        pathVisualObject.PathItems[j].Points[k].X, 
                                        pathVisualObject.PathItems[j].Points[k].Y);
                                }
                            }
                            Console.WriteLine("");
                        }
                    }
                }
                if ((pathVisualObject.Brush != null) &&
                    (pathVisualObject.Brush.Color.ColorSpace.Type == PDFColorSpaceType.Separation))
                {
                    PDFSeparationColorSpace cs =
                        pathVisualObject.Brush.Color.ColorSpace as PDFSeparationColorSpace;
                    if (cs.Colorant == "dieline")
                    {
                        Console.WriteLine("dieline fill");
                        for (int j = 0; j < pathVisualObject.PathItems.Count; j++)
                        {
                            Console.Write("{0}: ", pathVisualObject.PathItems[j].Type);
                            if (pathVisualObject.PathItems[j].Points != null)
                            {
                                for (int k = 0; k < pathVisualObject.PathItems[j].Points.Length; k++)
                                {
                                    Console.Write("[{0:0.######}, {1:0.######}] ",
                                        pathVisualObject.PathItems[j].Points[k].X,
                                        pathVisualObject.PathItems[j].Points[k].Y);
                                }
                            }
                            Console.WriteLine("");
                        }
                    }
                }
                break;
        }
    }
    

    This is the output from a sample PDF file that contains a filled and stroked rectangle:

    dieline stroke
    MoveTo: [20, 605]
    LineTo: [270, 605]
    LineTo: [270, 705]
    LineTo: [20, 705]
    CloseSubpath:
    dieline fill
    MoveTo: [20, 605]
    LineTo: [270, 605]
    LineTo: [270, 705]
    LineTo: [20, 705]
    CloseSubpath:
    

    Disclaimer: I work for the company that develops PDF4NET library.