I'm painfully close to automating the dump of a yml file that's created from a data frame for an automation task.
I have a function that is structured like the following:
def get_all_values(nested_dictionary):
for key,value in nested_dictionary.items():
model = {
"models": [
{
"name": key,
"columns": None
}
]
}
yield(model)
for key,value in value.items():
table = [
{
"name": key,
"tests": [
"not_null",
"unique"
]
}
]
yield(table)
nested_dictionary = d1
get_all_values(nested_dictionary)
data = get_all_values(nested_dictionary)
with open('data.yml', 'w') as outfile:
with redirect_stdout(outfile):
for i in data:
ruamel.yaml.round_trip_dump(i,outfile, indent=5, block_seq_indent=2)
The dict it references is yielded as a generator. The dicts structure is:
{'models': [{'name': 'budgets_sales', 'columns': None}]}
[{'name': 'budget_amt', 'tests': ['not_null', 'unique']}]
[{'name': 'budget_group', 'tests': ['not_null', 'unique']}]
[{'name': 'budget_name', 'tests': ['not_null', 'unique']}]
[{'name': 'budget_pk', 'tests': ['not_null', 'unique']}]
This works "well'...but the output is as follows:
models:
- name: budgets_sales
columns:
- name: budget_amt
tests:
- not_null
- unique
- name: budget_group
tests:
- not_null
- unique
- name: budget_name
tests:
- not_null
- unique
I require all values to keys in the dict to have an additional indentation. I cannot figure out how to make the values indented against the keys.
It would look like this if correct:
- name: budgets_sales
columns:
- name: budget_amt
tests:
- not_null
- unique
- name: budget_group
tests:
- not_null
- unique
- name: budget_name
tests:
- not_null
- unique
- name: budget_pk
tests:
- not_null
- unique
- name: entry_type_code
tests:
- not_null
- unique
- name: institution_fk
tests:
- not_null
- unique
Could anyone provide an approach?
Thanks to Anthon, this is what I ended up using:
def get_all_values(nested_dictionary):
res = [{"version":2},{"models":None}]
for key,value in nested_dictionary.items():
seq = []
res.append([{"name": key, "columns": seq}])
# for key1, value1 in value.items(): # not using value1
for key1 in value.keys():
elem = {"name": key1, "tests": ["not_null", "unique"]}
seq.append(elem)
return res
nested_dictionary = d1
get_all_values(nested_dictionary)
data = get_all_values(nested_dictionary)
with open('data.yml', 'w') as outfile:
with redirect_stdout(outfile):
for i in data:
yaml = ruamel.yaml.YAML()
yaml.indent(mapping=5, sequence=5, offset=4)
yml.dump(i,outfile)
In your required output the value associated with key columns
is a sequence.
You only get that if your Python data structure is a list, so make sure you
append your individual table
entries to some variable.
I am guessing d1
based on your "incorrect" output:
import sys
import ruamel.yaml
d1 = dict(budgets_sales=dict(budget_amt=None, budget_group=None, budget_name=None, budget_pk=None))
def get_all_values(nested_dictionary):
res = []
for key,value in nested_dictionary.items():
seq = []
res.append({"name": key, "columns": seq})
# for key1, value1 in value.items(): # not using value1
for key1 in value.keys():
elem = {"name": key, "tests": ["not_null", "unique"]}
seq.append(elem)
return res
data = get_all_values(d1)
yaml = ruamel.yaml.YAML()
yaml.indent(mapping=5, sequence=5, offset=3)
yaml.dump(data, sys.stdout)
which gives:
- name: budgets_sales
columns:
- name: budgets_sales
tests:
- not_null
- unique
- name: budgets_sales
tests:
- not_null
- unique
- name: budgets_sales
tests:
- not_null
- unique
- name: budgets_sales
tests:
- not_null
- unique
There are a few things you should take into account (apart from better formatting your code and data here on SO):
round_trip_dump
function has been deprecated, don't use it in new code.yaml
, since at least Sep 2007.dump_all()
method.If all else fails and you have valid handcrafted YAML that you want to generate as output, load that YAML (with YAML(typ='safe').load()
and inspect the data structure in Python that you get.