I am trying to fill a Word template that contains 10 Merge Fields on one single page ("guest_1", ..., "guest_10") with a column from a dataset. The template can be found here: https://file.io/GVO4hyOwBCiv
I was able to fill the Word template with constant data using the docx-mailmerge
package (see below in the code # This Works
session), but unsuccessful by trying to replicate the result by looping a dataset (see below # This does not work
).
I first need to create objects of type dict
(in my code below, I call them guests_page_1
and guests_page_2
). Each of these dict
objects should have up to 10 items (e.g. {'guest_1': 'Name 1', 'guest_2': 'Name 1'}
). So the dataset (in my example below, guests
) should be split into multiple objects (each one coining up to 10 items) and then, using the merge_templates
function, replicate the template filling onto multiple pages (depending on the size of the dataset).
In summary for the example below: I need to split the guests
(which contains 18 names) dataset into 2 objects of type dict
(guests_page_1
and guests_page_2
). guests_page_1
should contain the first 10 names and guests_page_2
the remaining 8 names. Then, I need to add guests_page_1
and guests_page_2
into the merge_templates
function.
# Requirements: python -m pip install docx-mailmerge
# Import packages
from natsort import natsorted
import os
from mailmerge import MailMerge
import pandas as pd
# Set working directory to user's 'Downloads' folder
os.chdir(os.path.join(os.path.expanduser('~'), 'Downloads'))
# Import template
document = MailMerge('Template.docx')
# Get set of Merge Fields
document_guest_fields = document.get_merge_fields()
# Sort Merge Fields
document_guest_fields = natsorted(document_guest_fields)
## This works
guests_page_1 = {}
guests_page_2 = {}
for i in document_guest_fields:
guests_page_1[i] = 'Name 1'
for i in document_guest_fields:
guests_page_2[i] = 'Name 2'
document.merge_templates([guests_page_1, guests_page_2], separator='continuous_section')
document.write('Template-Output.docx')
document.close()
## This does not work
# Create dummy dataset with 18 names
guests = [
['Tom'],
['Jones'],
['Krystal'],
['Albert'],
['Paloma'],
['Shania'],
['Max'],
['Steve'],
['Paul'],
['Patrick'],
['Lucia'],
['Rachel'],
['Ray'],
['Jessica'],
['Julianna'],
['Lucille'],
['Leandro'],
['Vincent'],
]
guests = pd.DataFrame(guests, columns = ['name'])
guests_page_1 = {}
for index, row in guests.iterrows():
for i in document_guest_fields:
guests_page_1[i] = row['name']
document.merge_templates([guests_page_1], separator='continuous_section')
document.write('Template-Output.docx')
document.close()
Does anyone know how to fix this issue? Thanks in advance.
I found the solution:
# Import packages
from natsort import natsorted
import os
from mailmerge import MailMerge
import pandas as pd
# Split dataframe into chunks of up to 10 rows - adapted from: https://stackoverflow.com/a/28882020/9195104
def split_dataframe(df, chunk_size=10000):
chunks = list()
num_chunks = len(df) // chunk_size+1
for i in range(num_chunks):
chunks.append(df[i*chunk_size:(i+1)*chunk_size])
for i in range(len(chunks)):
chunks[i].index = pd.RangeIndex(start=1, stop=len(chunks[i])+1, step=1)
chunks[i].reset_index(inplace=True, level=0)
chunks[i] = chunks[i].rename(columns={'index': 'merge_field'})
chunks[i]['merge_field'] = chunks[i]['merge_field'].astype(str)
chunks[i]['merge_field'] = chunks[i]['merge_field'].str.replace(r'^(.*)$', r'guest_\1', regex=True)
return chunks
# Set working directory to user's 'Downloads' folder
os.chdir(os.path.join(os.path.expanduser('~'), 'Downloads'))
# Import template
document = MailMerge('Template.docx')
# Get set of Merge Fields
document_guest_fields = document.get_merge_fields()
# Sort Merge Fields
document_guest_fields = natsorted(document_guest_fields)
# Create dummy dataset with 18 names
guests = [
['Tom'],
['Jones'],
['Krystal'],
['Albert'],
['Paloma'],
['Shania'],
['Max'],
['Steve'],
['Paul'],
['Patrick'],
['Lucia'],
['Rachel'],
['Ray'],
['Jessica'],
['Julianna'],
['Lucille'],
['Leandro'],
['Vincent'],
]
guests = pd.DataFrame(guests, columns = ['name'])
# Split guests dataframe
guests = split_dataframe(guests, chunk_size=10)
# Create guests dictionary object
guests_list = {}
for i in range(len(guests)):
guests_list[i] = dict(zip(guests[i]['merge_field'], guests[i]['name']))
# Fill Word Template file
document.merge_templates(list(guests_list.values()), separator='continuous_section')
document.write('Template-Output.docx')
document.close()