I have following .YAML file:
Name: Tom
Surname: Smith
Status: Married
Childs:
- ChildName: Willy
Age: 30
Grandchild:
- GrandchildName: John
Age: 4
- GrandchildName: Maria
Age: 3
- ChildName: Arthur
Age: 40
Grandchild: N/A
What I've done:
import openpyxl
import yaml
with open('family.yaml') as file:
family_list = yaml.load(file, Loader=yaml.FullLoader)
def create_workbook():
wb = openpyxl.Workbook('family.xlsx')
wb.save('family.xlsx')
def update_workbook():
wb = openpyxl.load_workbook('family.xlsx')
sh1 = wb.active
for item in family_list.items():
sh1.append(item)
wb.save('family.xlsx')
create_workbook()
update_workbook()
This code allows me add data (without "pause" part, so only Name, Surname, Status) to excel but "Name", "Surname" etc. are added in one column A1 (not in rows as I want to)
Final excel workbook I want to get
I'm not sure how to append and format (each pause sign should be new row) mentioned data. I will be grateful for any tips!
I have decided to split the yaml load into three dictionaries;
Parent
Child
Grandchild
so they can be adjusted to suit the excel layout. Then use Pandas to export these to Excel at the appropriate row/column locations.
The code will process YAML files in 'yaml_files'. On the first run for
the set 'save_file' the Headers are written to Row 1 and each YAML file data is then
placed under the previous entry down the spreadsheet across columns A - G. Any subsequent
run on the same 'save_file' will place the new data below the last data written.
Each file entry is also highlighted by alternating background colour.
You can change the background colors used or disable if you wish.
import glob
import os
import pandas as pd
import yaml
from openpyxl import load_workbook
from yaml.loader import SafeLoader
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill
def write_next_segments(dataf, header, start_row, start_col):
dataframe = ''
if start_col == 0:
dataframe = pd.DataFrame([parent_dictionary])
dataframe.to_excel(writer, sheet_name=save_sheet,
index=False,
header=header,
startrow=start_row,
startcol=start_col)
else:
for ele in dataf:
dataframe = pd.DataFrame([dataf[ele]])
dataframe.to_excel(writer, sheet_name=save_sheet,
index=False,
header=header,
startrow=start_row,
startcol=start_col)
if header:
header = False
start_row += 2
else:
start_row += 1
adjust_colmns(dataframe, start_col)
def adjust_colmns(dataframe, offset):
for column in dataframe:
if column != '':
column_width = max(dataframe[column].astype(str).map(len).max(), len(column)) + 2
col_letter = get_column_letter(dataframe.columns.get_loc(column) + offset + 1)
writer.sheets[save_sheet].column_dimensions[col_letter].width = column_width
def build_dictionary(yf):
# Open the file and load the file
print("Formatting YAML file: " + yf)
yaml_sub_name = '' # Name of 'child' sub element
with open(yf) as file:
yaml_data = yaml.load(file, Loader=SafeLoader)
for k, v in yaml_data.items():
if type(v) == list:
yaml_sub_name = k # Name of 'child' sub element
# Make copy of the original dictionary for modification
print("------ Copy the Parent dictionary -------------")
parent_dict = yaml_data.copy()
print("------ Create the Child & GrandChild dictionaries -------------")
child_dict = {}
grandchild_dict = {}
sub_list = ''
for x in parent_dict[yaml_sub_name][0]:
if len(child_dict) == 0:
child_dict['Childs0'] = {x: ''}
else:
if type(yaml_data[yaml_sub_name][0][x]) != list:
child_dict['Childs0'].update({x: ''})
else:
sub_list = x
for x in parent_dict[yaml_sub_name][0][sub_list][0]:
if len(grandchild_dict) == 0:
grandchild_dict['Grandchilds0'] = {x: ''}
else:
grandchild_dict['Grandchilds0'].update({x: ''})
# Remove the Child elements from the Parent dictionary
child_list = parent_dict.pop(yaml_sub_name)
# Create the rest of the Child and Grandchild dictionaries
row = 1
for enum, ele in enumerate(child_list):
if row != 1: row += 1
child_dict[yaml_sub_name + str(row)] = ele
pop_num = row
if type(child_list[enum][sub_list]) == list:
for i in range(len(child_list[enum][sub_list])):
if i == 0:
grandchild_dict['Grandchilds' + str(row)] = {'': '', '': ''}
grandchild_dict['Grandchilds' + str(row + 1)] = ele[sub_list][i]
row += 1
child_dict[yaml_sub_name + str(row)] = {'': '', '': ''}
child_dict[yaml_sub_name + str(pop_num)].pop(sub_list)
return parent_dict, child_dict, grandchild_dict
def set_background_color(bg_color, bg_start_row):
coords = writer.sheets[save_sheet].dimensions
start_coord, end_coord = coords.split(':')
bg_color_dims = 'A' + str(bg_start_row) + ':' + end_coord[:1] + str(row_offset)
sheet = writer.sheets[save_sheet]
for cells in sheet[bg_color_dims]:
for cell in cells:
cell.fill = PatternFill("solid", start_color=bg_color)
bg_start_row = row_offset + 1
return bg_start_row
if __name__ == "__main__":
# Change these variables as necessary
yaml_dir = './yaml_files' # Set name of the input yaml files directory
save_file = 'family.xlsx' # Set name of Excel workbook for output
save_sheet = 'Sheet1' # Set name of Excel sheet
# These variables should not be changed
row_offset = 0 # Row number for next Excel write
header = True # Write Header to Excel
bg_color_start_row = '2' # First row to start fill background color
# Set start bg color
bg_color = "DDD9C4" # First segment background color
# If save file already exists then load it and the save sheet
# Get the last row and last background colour used. Header is set to False
if os.path.isfile(save_file):
book = load_workbook(save_file)
writer = pd.ExcelWriter(save_file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
row_offset = writer.sheets[save_sheet].max_row + 1
bg_color_start_row = row_offset
bg_color = writer.sheets[save_sheet][writer.sheets[save_sheet].dimensions[3:]].fill.fgColor.value[2:]
header = False
else:
# If no save file then create a new Excel file
writer = pd.ExcelWriter(save_file, engine='openpyxl')
# Get a list of the YAML files in yaml directory
yaml_files = glob.glob(yaml_dir + "/[!~]*.yaml")
# Loop through the YAML files
for enum, y_file in enumerate(yaml_files):
if enum > 0:
header = False
print("------ Export dictionaries to Excel using Pandas --------------")
parent_dictionary, child_dictionary, grandchild_dictionary = build_dictionary(y_file)
parent_col_offset = len(parent_dictionary)
child_col_offset = parent_col_offset + len(child_dictionary['Childs0'])
write_next_segments(parent_dictionary, header, row_offset, 0)
write_next_segments(child_dictionary, header, row_offset, parent_col_offset)
write_next_segments(grandchild_dictionary, header, row_offset, child_col_offset)
row_offset = writer.sheets[save_sheet].max_row + 1
# Insert background fill using alternate fill colours
print("------ Set Background Colors ---------------------------------")
if bg_color == "DAEEF3":
bg_color = "DDD9C4"
else:
bg_color = "DAEEF3"
bg_color_start_row = set_background_color(bg_color, bg_color_start_row)
print("\n")
print("------ Save Excel file to '" + save_file + "' ---------------------")
writer.save()