Search code examples
pythonjsonpandasansibleinventory

dynamic ansible inventory JSON normalized with pandas


I'm having trouble normalizing ansible-inventory JSON using pandas data frame.

After running

ansible-inventory -i dynamic_inventory.yaml --playbook-dir ./ --list > list.json

I get

{
    "_meta": {
        "hostvars": {
            "myhostdcdb01": {
                "ansible_host": "10.10.252.66",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostdcdb01",
                "portal_ip_address": "10.10.252.66",
                "portal_memory": 8
            },
            "myhostdcdb02": {
                "ansible_host": "10.10.252.67",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostdcdb02",
                "portal_ip_address": "10.10.252.67",
                "portal_memory": 8
            },
            "myhostdcdb03": {
                "ansible_host": "10.10.252.68",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostdcdb03",
                "portal_ip_address": "10.10.252.68",
                "portal_memory": 8
            },
            "myhostscdb01": {
                "ansible_host": "10.10.252.76",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostscdb01",
                "portal_ip_address": "10.10.252.76",
                "portal_memory": 8
            },
            "myhostscdb02": {
                "ansible_host": "10.10.252.78",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostscdb02",
                "portal_ip_address": "10.10.252.78",
                "portal_memory": 8
            },
            "myhostscdb03": {
                "ansible_host": "10.10.252.80",
                "portal_cpu": 2,
                "portal_domain": "prod.local",
                "portal_group": "DBA",
                "portal_hostname": "myhostscdb03",
                "portal_ip_address": "10.10.252.80",
                "portal_memory": 8
            }
        }
    },
    "all": {
        "children": [
            "ungrouped"
        ]
    },
    "ungrouped": {
        "hosts": [
            "myhostdcdb01",
            "myhostdcdb02",
            "myhostdcdb03",
            "myhostscdb01",
            "myhostscdb02",
            "myhostscdb03"
        ]
    }
}

And now I have no idea how to normalize it into a sensible format where each host and their details are in a separate row i.e.:

ansible_host portal_cpu portal_domain portal_group portal_hostname portal_ip_address portal_memory
10.10.252.78 2 prod.local DBA myhostscdb02 10.10.252.78 8

Solution

  • This was my very first hand on with pandas and was quite easy to achieve. You may have to adapt a bit more and change the way your read/store data to fit your exact requirement, but that makes most of the job.

    Example as a sample python interactive session

    $ python
    Python 3.8.10 (default, Jun  2 2021, 10:49:15) 
    [GCC 9.4.0] on linux
    Type "help", "copyright", "credits" or "license" for more information.
    >>>
    >>> import pandas
    >>>
    >>> # Load original data. Read that from your command or a file
    >>> inventory_data = {
    ...     "_meta": {
    ...         "hostvars": {
    ...             "myhostdcdb01": {
    ...                 "ansible_host": "10.10.252.66",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostdcdb01",
    ...                 "portal_ip_address": "10.10.252.66",
    ...                 "portal_memory": 8
    ...             },
    ...             "myhostdcdb02": {
    ...                 "ansible_host": "10.10.252.67",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostdcdb02",
    ...                 "portal_ip_address": "10.10.252.67",
    ...                 "portal_memory": 8
    ...             },
    ...             "myhostdcdb03": {
    ...                 "ansible_host": "10.10.252.68",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostdcdb03",
    ...                 "portal_ip_address": "10.10.252.68",
    ...                 "portal_memory": 8
    ...             },
    ...             "myhostscdb01": {
    ...                 "ansible_host": "10.10.252.76",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostscdb01",
    ...                 "portal_ip_address": "10.10.252.76",
    ...                 "portal_memory": 8
    ...             },
    ...             "myhostscdb02": {
    ...                 "ansible_host": "10.10.252.78",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostscdb02",
    ...                 "portal_ip_address": "10.10.252.78",
    ...                 "portal_memory": 8
    ...             },
    ...             "myhostscdb03": {
    ...                 "ansible_host": "10.10.252.80",
    ...                 "portal_cpu": 2,
    ...                 "portal_domain": "prod.local",
    ...                 "portal_group": "DBA",
    ...                 "portal_hostname": "myhostscdb03",
    ...                 "portal_ip_address": "10.10.252.80",
    ...                 "portal_memory": 8
    ...             }
    ...         }
    ...     },
    ...     "all": {
    ...         "children": [
    ...             "ungrouped"
    ...         ]
    ...     },
    ...     "ungrouped": {
    ...         "hosts": [
    ...             "myhostdcdb01",
    ...             "myhostdcdb02",
    ...             "myhostdcdb03",
    ...             "myhostscdb01",
    ...             "myhostscdb02",
    ...             "myhostscdb03"
    ...         ]
    ...     }
    ... }
    >>>
    >>> # Load hostvars in a dataframe + switch lines/columns
    >>> df = pandas.DataFrame.from_dict(inventory_data['_meta']['hostvars']).transpose()
    >>>
    >>> # Have a look at result
    >>> print(df)
                  ansible_host portal_cpu portal_domain portal_group portal_hostname portal_ip_address portal_memory
    myhostdcdb01  10.10.252.66          2    prod.local          DBA    myhostdcdb01      10.10.252.66             8
    myhostdcdb02  10.10.252.67          2    prod.local          DBA    myhostdcdb02      10.10.252.67             8
    myhostdcdb03  10.10.252.68          2    prod.local          DBA    myhostdcdb03      10.10.252.68             8
    myhostscdb01  10.10.252.76          2    prod.local          DBA    myhostscdb01      10.10.252.76             8
    myhostscdb02  10.10.252.78          2    prod.local          DBA    myhostscdb02      10.10.252.78             8
    myhostscdb03  10.10.252.80          2    prod.local          DBA    myhostscdb03      10.10.252.80             8
    >>> exit()