Search code examples
pythonmailmerge

Fill Word Merge Fields from dataset using Python


I am trying to fill a Word template that contains 10 Merge Fields on one single page ("guest_1", ..., "guest_10") with a column from a dataset. The template can be found here: https://file.io/GVO4hyOwBCiv

I was able to fill the Word template with constant data using the docx-mailmerge package (see below in the code # This Works session), but unsuccessful by trying to replicate the result by looping a dataset (see below # This does not work).

I first need to create objects of type dict (in my code below, I call them guests_page_1 and guests_page_2). Each of these dict objects should have up to 10 items (e.g. {'guest_1': 'Name 1', 'guest_2': 'Name 1'}). So the dataset (in my example below, guests) should be split into multiple objects (each one coining up to 10 items) and then, using the merge_templates function, replicate the template filling onto multiple pages (depending on the size of the dataset).

In summary for the example below: I need to split the guests (which contains 18 names) dataset into 2 objects of type dict (guests_page_1 and guests_page_2). guests_page_1 should contain the first 10 names and guests_page_2 the remaining 8 names. Then, I need to add guests_page_1 and guests_page_2 into the merge_templates function.

# Requirements: python -m pip install docx-mailmerge

# Import packages
from natsort import natsorted
import os

from mailmerge import MailMerge
import pandas as pd


# Set working directory to user's 'Downloads' folder
os.chdir(os.path.join(os.path.expanduser('~'), 'Downloads'))

# Import template
document = MailMerge('Template.docx')

# Get set of Merge Fields
document_guest_fields = document.get_merge_fields()

# Sort Merge Fields
document_guest_fields = natsorted(document_guest_fields)



## This works

guests_page_1 = {}
guests_page_2 = {}

for i in document_guest_fields:
    guests_page_1[i] = 'Name 1'

for i in document_guest_fields:
    guests_page_2[i] = 'Name 2'

document.merge_templates([guests_page_1, guests_page_2], separator='continuous_section')
document.write('Template-Output.docx')

document.close()



## This does not work

# Create dummy dataset with 18 names
guests = [
    ['Tom'],
    ['Jones'],
    ['Krystal'],
    ['Albert'],
    ['Paloma'],
    ['Shania'],
    ['Max'],
    ['Steve'],
    ['Paul'],
    ['Patrick'],
    ['Lucia'],
    ['Rachel'],
    ['Ray'],
    ['Jessica'],
    ['Julianna'],
    ['Lucille'],
    ['Leandro'],
    ['Vincent'],
    ]

guests = pd.DataFrame(guests, columns = ['name'])


guests_page_1 = {}

for index, row in guests.iterrows():
    for i in document_guest_fields:
        guests_page_1[i] = row['name']

document.merge_templates([guests_page_1], separator='continuous_section')
document.write('Template-Output.docx')

document.close()

Does anyone know how to fix this issue? Thanks in advance.


Solution

  • I found the solution:

    # Import packages
    from natsort import natsorted
    import os
    
    from mailmerge import MailMerge
    import pandas as pd
    
    # Split dataframe into chunks of up to 10 rows - adapted from: https://stackoverflow.com/a/28882020/9195104
    def split_dataframe(df, chunk_size=10000):
    
        chunks = list()
        num_chunks = len(df) // chunk_size+1
    
        for i in range(num_chunks):
            chunks.append(df[i*chunk_size:(i+1)*chunk_size])
    
        for i in range(len(chunks)):
            chunks[i].index = pd.RangeIndex(start=1, stop=len(chunks[i])+1, step=1)
            chunks[i].reset_index(inplace=True, level=0)
            chunks[i] = chunks[i].rename(columns={'index': 'merge_field'})
            chunks[i]['merge_field'] = chunks[i]['merge_field'].astype(str)
            chunks[i]['merge_field'] = chunks[i]['merge_field'].str.replace(r'^(.*)$', r'guest_\1', regex=True)
    
        return chunks
    
    
    
    # Set working directory to user's 'Downloads' folder
    os.chdir(os.path.join(os.path.expanduser('~'), 'Downloads'))
    
    # Import template
    document = MailMerge('Template.docx')
    
    # Get set of Merge Fields
    document_guest_fields = document.get_merge_fields()
    
    # Sort Merge Fields
    document_guest_fields = natsorted(document_guest_fields)
    
    # Create dummy dataset with 18 names
    guests = [
        ['Tom'],
        ['Jones'],
        ['Krystal'],
        ['Albert'],
        ['Paloma'],
        ['Shania'],
        ['Max'],
        ['Steve'],
        ['Paul'],
        ['Patrick'],
        ['Lucia'],
        ['Rachel'],
        ['Ray'],
        ['Jessica'],
        ['Julianna'],
        ['Lucille'],
        ['Leandro'],
        ['Vincent'],
        ]
    
    guests = pd.DataFrame(guests, columns = ['name'])
    
    # Split guests dataframe
    guests = split_dataframe(guests, chunk_size=10)
    
    # Create guests dictionary object
    guests_list = {}
    
    for i in range(len(guests)):
        guests_list[i] = dict(zip(guests[i]['merge_field'], guests[i]['name']))
    
    # Fill Word Template file
    document.merge_templates(list(guests_list.values()), separator='continuous_section')
    document.write('Template-Output.docx')
    document.close()