Search code examples
c#ms-wordopenxmlopenxml-sdk

OpenXml Force image fit in parent container


I'm parsing a piece of hmtl into a word document using the following code

//Need the following packages
//<package id="DocumentFormat.OpenXml" version="2.7.2" targetFramework="net471" />
//<package id = "HtmlToOpenXml.dll" version="2.0.1" targetFramework="net471" />

using System.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;

using DocumentFormat.OpenXml.Wordprocessing;
using HtmlToOpenXml;

namespace ConsoleAppHtmlParse
{
    class Program
    {
        static void Main(string[] args)
        {
            string fileName = @"C:\temp\myDoc.docx";

            using (WordprocessingDocument document = WordprocessingDocument.Create(fileName, WordprocessingDocumentType.Document))
            {
                document.AddMainDocumentPart();
                document.MainDocumentPart.Document = new Document(new Body());
                HtmlConverter conveter = new HtmlConverter(document.MainDocumentPart);

                var compositeElements = conveter.Parse(Html);

                Paragraph p = compositeElements[0] as Paragraph;
                p.ParagraphProperties = new ParagraphProperties();
                p.ParagraphProperties.FrameProperties = new FrameProperties();
                p.ParagraphProperties.FrameProperties.Width = new StringValue("3200");

                document.MainDocumentPart.Document.Body.Append(compositeElements);
            }
        }

        const string Html = "<p>SomeText<img  src=\"\" alt=\"Screenshot_3\" />moretext</p>";
    }
}

The above code sample produces something like this:
enter image description here
I'd like to "shrink" the image to something like this:
enter image description here

Can I do this if I know the "parent" container size?

Thanks


Solution

  • Understanding the modifications for OpenXml:

    The w:drawing element has two parts to manage the image size

    1. wp:extent node, child of the wp:inline, it determines the extent of the area of the document that will contain the image. It is not helping us here.

    Note: It is essential for Libre Office

    extent

    1. spPr node is used to define the shape properties which contains xfrm node, which applies the transform to an object. It defines offset and extent.

    Note: It is essential for OpenXml - Google Documents, etc.

    inline extent

    I have written a program below with two modifications to your code.

    1. Define Page Width: Setting the PageSize and PageMargin: defines the available area that we need to fill with the image. In our case, the available area can be defined as below:

      Available Page Width = PageSize.Width - PageMargin.Left - PageMargin.Right

      // Define Constants for Page Width and Page Margin
      private const int PageWidth = 17000;
      private const int PageHeight = 10000;
      private const int PageMarginLeft = 1000;
      private const int PageMarginRight = 1000;
      private const int PageMarginTop = 1000;
      private const int PageMarginBottom = 1000;
      private const double DocumentSizePerPixel = 15;
      private const double EmuPerPixel = 9525;
      
      // Set Page Size and Page Margin so that we can place the image as desired.
      // Available Width = PageWidth - PageMarginLeft - PageMarginRight (= 17000 - 1000 - 1000 = 15000 for default values)
      var sectionProperties = new SectionProperties();
      sectionProperties.AppendChild(new PageSize { Width = PageWidth, Height = PageHeight });
      sectionProperties.AppendChild(new PageMargin { Left = PageMarginLeft, Bottom = PageMarginBottom, Top = PageMarginTop, Right = PageMarginRight });
      document.MainDocumentPart.Document.Body.AppendChild(sectionProperties);
      
    2. Update Image Width: Update the Extents with the updated value of width and height for the image

      1. Calculate the aspect ratio, useful in updating height for the image.

      2. Calculate the new width in Emu which is understood by word by using below

        15 width of Page = 1 width of image in pixel = 9525 EMUs per pixel

        Avalable page width = 15000 page width = 15000/ 15 pixels = 1000 pixels = 1000 * 9525 Emu = 9525000 Emu

      3. Update the width and height of the Page by using Extents in two separate places to be used by Google Documents and Libre Office, etc.

                 // Search for Extents used by the word present in Drawing > Inline > Extent
                var inlineEnumerable = p.ChildElements.Where(e => e is DocumentFormat.OpenXml.Wordprocessing.Run)
                    .Where(r => r.GetFirstChild<Drawing>() != null).Select(r => r.GetFirstChild<Drawing>())
                    .Where(r => r.GetFirstChild<Inline>() != null).Select(r => r.GetFirstChild<Inline>());
        
                // Update Visible Extent
                var inlineChildren = inlineEnumerable as Inline[] ?? inlineEnumerable.ToArray();
                foreach (var inlineChild in inlineChildren)
                {
                    var inlineElement = inlineChild.Extent;
                    UpdateExtent(inlineElement);
                }
        
                // Search for Extents used by the word present in Drawing > Inline > Graphic > GraphicData > Picture > ShapeProperties > Transform2D > Extents
                var extentsEnumerable = inlineChildren
                    .Where(r => r.GetFirstChild<Graphic>() != null).Select(d => d.GetFirstChild<Graphic>())
                    .Where(r => r.GetFirstChild<GraphicData>() != null).Select(r => r.GetFirstChild<GraphicData>())
                    .Where(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.Picture>() != null)
                    .Select(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.Picture>())
                    .Where(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.ShapeProperties>() != null)
                    .Select(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.ShapeProperties>())
                    .Where(r => r.GetFirstChild<Transform2D>() != null).Select(r => r.GetFirstChild<Transform2D>())
                    .Where(r => r.GetFirstChild<Extents>() != null).Select(r => r.GetFirstChild<Extents>());
        
                // Modify all images in Extents to the desired size here, to be stretched out on available page width
                foreach (var extents in extentsEnumerable)
                {
                    // Set Image Size: We calculate Aspect Ratio of the image and then calculate the width and update the height as per aspect ratio
                    var inlineElement = extents;
        
                    UpdateExtent(inlineElement);
                }
        

    Update Extent method to update the extent values:

        private static void UpdateExtent(dynamic inlineElement)
        {
            // Read Default Cx and Cy Values provided in Emu
            var extentCx = inlineElement.Cx;
            var extentCy = inlineElement.Cy;
    
            // Aspect ratio used to set image height after calculation of width
            double aspectRatioOfImage = (double)extentCy / extentCx;
    
            // We know 15 width of Page = 1 width of image in pixel = 9525 EMUs per pixel, and we convert document size to pixel and then to EMU
            // For Default Values Available page width = 15000 page width = 15000/ 15 pixels = 1000 pixels = 1000 * 9525 Emu = 9525000 Emu
            double newExtentCx = EmuPerPixel * ((PageWidth - PageMarginLeft - PageMarginRight) / DocumentSizePerPixel);
            // Maintain the Aspect Ratio for height
            double newExtentCy = aspectRatioOfImage * newExtentCx;
    
            // Update the values
            inlineElement.Cx = (long)Math.Round(newExtentCx);
            inlineElement.Cy = (long)Math.Round(newExtentCy);
        } 
    

    Full Program:

    namespace Solutions
    {
        using System;
        using System.Linq;
        using DocumentFormat.OpenXml;
        using DocumentFormat.OpenXml.Drawing;
        using DocumentFormat.OpenXml.Drawing.Wordprocessing;
        using DocumentFormat.OpenXml.Packaging;
        using DocumentFormat.OpenXml.Wordprocessing;
        using HtmlToOpenXml;
    
        using Paragraph = DocumentFormat.OpenXml.Wordprocessing.Paragraph;
    
        public class WordProcessorClass
        {
            const string Html = "<p>SomeText<img src=\"\" alt=\"Screenshot_3\" />moretext</p>";
    
            // Define Constants for Page Width and Page Margin
            private const int PageWidth = 17000;
            private const int PageHeight = 10000;
            private const int PageMarginLeft = 1000;
            private const int PageMarginRight = 1000;
            private const int PageMarginTop = 1000;
            private const int PageMarginBottom = 1000;
            private const double DocumentSizePerPixel = 15;
            private const double EmuPerPixel = 9525;
    
            public static void Main1()
            {
                string fileName = @"f:\myDoc.docx";
    
                using (WordprocessingDocument document = WordprocessingDocument.Create(fileName, WordprocessingDocumentType.Document))
                {
                    document.AddMainDocumentPart();
                    document.MainDocumentPart.Document = new Document(new Body());
    
                    HtmlConverter converter = new HtmlConverter(document.MainDocumentPart);
                    var compositeElements = converter.Parse(Html);
                    var p = compositeElements[0] as Paragraph;
    
    
                    // Set Page Size and Page Margin so that we can place the image as desired.
                    // Available Width = PageWidth - PageMarginLeft - PageMarginRight (= 17000 - 1000 - 1000 = 15000 for default values)
                    var sectionProperties = new SectionProperties();
                    sectionProperties.AppendChild(new PageSize { Width = PageWidth, Height = PageHeight });
                    sectionProperties.AppendChild(new PageMargin { Left = PageMarginLeft, Bottom = PageMarginBottom, Top = PageMarginTop, Right = PageMarginRight });
                    document.MainDocumentPart.Document.Body.AppendChild(sectionProperties);
    
                    if (p != null)
                    {
                        // Search for Extents used by the word present in Drawing > Inline > Extent
                        var inlineEnumerable = p.ChildElements.Where(e => e is DocumentFormat.OpenXml.Wordprocessing.Run)
                            .Where(r => r.GetFirstChild<Drawing>() != null).Select(r => r.GetFirstChild<Drawing>())
                            .Where(r => r.GetFirstChild<Inline>() != null).Select(r => r.GetFirstChild<Inline>());
    
                        // Update Visible Extent
                        var inlineChildren = inlineEnumerable as Inline[] ?? inlineEnumerable.ToArray();
                        foreach (var inlineChild in inlineChildren)
                        {
                            var inlineElement = inlineChild.Extent;
                            UpdateExtent(inlineElement);
                        }
    
                        // Search for Extents used by the word present in Drawing > Inline > Graphic > GraphicData > Picture > ShapeProperties > Transform2D > Extents
                        var extentsEnumerable = inlineChildren
                            .Where(r => r.GetFirstChild<Graphic>() != null).Select(d => d.GetFirstChild<Graphic>())
                            .Where(r => r.GetFirstChild<GraphicData>() != null).Select(r => r.GetFirstChild<GraphicData>())
                            .Where(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.Picture>() != null)
                            .Select(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.Picture>())
                            .Where(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.ShapeProperties>() != null)
                            .Select(r => r.GetFirstChild<DocumentFormat.OpenXml.Drawing.Pictures.ShapeProperties>())
                            .Where(r => r.GetFirstChild<Transform2D>() != null).Select(r => r.GetFirstChild<Transform2D>())
                            .Where(r => r.GetFirstChild<Extents>() != null).Select(r => r.GetFirstChild<Extents>());
    
                        // Modify all images in Extents to the desired size here, to be stretched out on available page width
                        foreach (var extents in extentsEnumerable)
                        {
                            // Set Image Size: We calculate Aspect Ratio of the image and then calculate the width and update the height as per aspect ratio
                            var inlineElement = extents;
    
                            UpdateExtent(inlineElement);
                        }
                    }
    
                    document.MainDocumentPart.Document.Body.Append(compositeElements);
                }
            }
    
            private static void UpdateExtent(dynamic inlineElement)
            {
                // Read Default Cx and Cy Values provided in Emu
                var extentCx = inlineElement.Cx;
                var extentCy = inlineElement.Cy;
    
                // Aspect ratio used to set image height after calculation of width
                double aspectRatioOfImage = (double)extentCy / extentCx;
    
                // We know 15 width of Page = 1 width of image in pixel = 9525 EMUs per pixel, and we convert document size to pixel and then to EMU
                // For Default Values Available page width = 15000 page width = 15000/ 15 pixels = 1000 pixels = 1000 * 9525 Emu = 9525000 Emu
                double newExtentCx = EmuPerPixel * ((PageWidth - PageMarginLeft - PageMarginRight) / DocumentSizePerPixel);
                // Maintain the Aspect Ratio for height
                double newExtentCy = aspectRatioOfImage * newExtentCx;
    
                // Update the values
                inlineElement.Cx = (long)Math.Round(newExtentCx);
                inlineElement.Cy = (long)Math.Round(newExtentCy);
            }
        }
    }
    

    Output:

    Libre Office

    Libre Office Document

    Google Document

    Google Document

    Reference: Output Document