Search code examples
rofficer

Avoid field updates when merging and printing word docx with {officer} package


I am stacking two docx, both of which have distinct dates in the title property, linked to fields in the respective documents. I would like the date in docx1 to remain unchanged in the new combined docx ("current.docx") as it is when printed in "docx1.docx", but the natural behavior is for docx2 to also update the field in docx1 to its title property once it has been added with body_add_docx() and printed.

library(officer)

# Set dates for docx1 and docx2
date1 <- format(Sys.Date(), "%A, %B %d, %Y")
date2 <- format(Sys.Date() +1, "%A, %B %d, %Y")

# Create docx1 with date1
docx1 <- read_docx()
docx1 <- set_doc_properties(docx1, title = date1)
fp1 <- fpar(
  ftext("Hello"), 
  run_linebreak(),
  run_word_field(field = "DOCPROPERTY \"title\""),
  run_pagebreak()
)
docx1 <- body_add_fpar(docx1, value = fp1)
print(docx1, "doc1.docx")

# Create docx2 with date2, and add docx1 as "current.docx" to show how date1 updates to date2
docx2 <- read_docx()
docx2 <- set_doc_properties(docx2, title = date2)
fp2 <- fpar(
  ftext("World"),
  run_linebreak(),
  run_word_field(field = "DOCPROPERTY \"title\"")
)
docx2 <- body_add_fpar(docx2, value = fp2)
docx2 <- body_add_docx(docx2, "doc1.docx", pos = "before")
print(docx2, "current.docx")

# Create example "desired.docx" retaining date1 and date2 after stacking
docx3 <- read_docx()
fp <- fpar(
  ftext("Hello"), 
  run_linebreak(),
  ftext(date1),
  run_pagebreak(),
  ftext("World"),
  run_linebreak(),
  ftext(date2)
)
docx3 <- body_add_fpar(docx3, value=fp) |>
  print("desired.docx")

Created on 2023-07-21 with reprex v2.0.2

Once I have printed docx1, I no longer need the title field, so it seems like converting that field specifically (or even all docx1 fields) to text before printing would make sense as a way of preventing the update. I have tried body_replace_all_text(), but that doesn't work because there is no text actual text to find in the field. I have also considered that fields can be converted to text in a word document using Ctrl+Shift+F9 to run the "Selection.Range.Field.Unlink" macro, but am not sure how to do this programatically on many documents using R.

Thank you so much for the package and any guidance you can give.


Solution

  • officer::run_word_field(field = "DOCPROPERTY \"title\"") appears to be outputting objects of type Field with a Code property of DOCPROPERTY "title".

    In the example file you uploaded here, the field appears to be of type ContentControl with a Title property of "Title" and an XMLMapping property having an XPath property of /ns1:coreProperties[1]/ns0:title[1] (i.e., an xml mapping back to the document title).

    Here is an updated solution (for Windows) based on the google docs file.

    First Define a Custom Function

    This function:

    1. Writes your rdox object to a temp docx file
    2. Writes a temp VBScript (.vbs) file that:
      1. opens your temp docx file
      2. finds your title content control by title (i.e., name)
      3. deletes the control but leaves its contents
      4. saves and closes the temp docx
    3. Runs the VBScript
    4. Reads the temp docx file back in as an rdox object
    5. Cleans up the temp files
    6. Returns your rdox object
    delete_title_content_control <- function(x) {
      
      if(Sys.info()['sysname'] != "Windows"){
        stop('The this function requires Windows operating system.')
      }
    
      tryCatch(
        { print(x, target = file.path(paste(getwd(), "/temp.docx", sep=""))) },
        error = function(cond) { stop("x is not a Word document.") }
      )
    
      print(x, target = file.path(paste(getwd(), "/temp.docx", sep="")))
    
      writeLines(
        c(
          'Set objWord = CreateObject("Word.Application")',
          'objWord.Visible = False',
          'objWord.DisplayAlerts = False',
          paste('Set doc = objWord.Documents.Open("', normalizePath(paste(getwd(), "/temp.docx", sep="")), '")', sep=""),
          'For Each objCC In doc.ContentControls',
          '    If objCC.Title = "Title" Then objCC.Delete',
          'Next',
          'doc.Saved = False',
          'doc.Save',
          'doc.Close (TRUE)',
          'objWord.Quit'
        ),
        con = file.path(paste(getwd(), "/temp.vbs", sep="")),
        sep = "\n",
        useBytes = FALSE
      )
    
      shell(shQuote(normalizePath(file.path(paste(getwd(), "/temp.vbs", sep="")))), "cscript", flag = "//nologo")
    
      x <- officer::read_docx(path = file.path(paste(getwd(), "/temp.docx", sep="")))
    
      invisible(file.remove(file.path(paste(getwd(), "/temp.docx", sep=""))))
    
      invisible(file.remove(file.path(paste(getwd(), "/temp.vbs", sep=""))))
    
      invisible(x)
    }
    

    Then Do About As Before

    library(officer)
    
    # example file from google link
    sample_doc1 <- read_docx("sample_doc1.docx")
    
    # add a linebreak to match your question example
    sample_doc1 <- body_add_break(sample_doc1)
    
    # manage cursor position so body_add_docx() works as expected
    sample_doc1 <- cursor_begin(sample_doc1)
    
    # convert title content control to text
    sample_doc1_title_as_text <- delete_title_content_control(sample_doc1)
    
    #  write out to use in body_add_docx()
    print(sample_doc1_title_as_text, "sample_doc1_title_as_text.docx")
    
    # example file from google link
    # - edited by hand to set document title property to tomorrow
    # - and change text to world
    sample_doc2 <- read_docx("sample_doc2.docx")
    
    # manage cursor position so body_add_docx() works as expected
    sample_doc2 <- cursor_begin(sample_doc2)
    
    # stack as in question example
    as_desired <- body_add_docx(
      sample_doc2, # note, resulting doc is retaining this title property
      src = "sample_doc1_title_as_text.docx",
      pos = "before"
    )
    
    # save desired document
    print(as_desired, "as_desired.docx")