Search code examples
pythonjsonstrip

How to remove whitespaces and newlines from every value in a JSON file?


I have a JSON file that has the following structure:

{
    "name":[
        {
            "someKey": "\n\n   some Value   "
        },
        {
            "someKey": "another value    "
        }
    ],
    "anotherName":[
        {
            "anArray": [
                {
                    "key": "    value\n\n",
                    "anotherKey": "  value"
                },
                {
                    "key": "    value\n",
                    "anotherKey": "value"
                }
            ]
        }
    ]
}

Now I want to strip off all he whitespaces and newlines for every value in the JSON file. Is there some way to iterate over each element of the dictionary and the nested dictionaries and lists?


Solution

  • Now I want to strip off all he whitespaces and newlines for every value in the JSON file

    Using pkgutil.simplegeneric() to create a helper function get_items():

    import json
    import sys
    from pkgutil import simplegeneric
    
    @simplegeneric
    def get_items(obj):
        while False: # no items, a scalar object
            yield None
    
    @get_items.register(dict)
    def _(obj):
        return obj.items() # json object. Edit: iteritems() was removed in Python 3
    
    @get_items.register(list)
    def _(obj):
        return enumerate(obj) # json array
    
    def strip_whitespace(json_data):
        for key, value in get_items(json_data):
            if hasattr(value, 'strip'): # json string
                json_data[key] = value.strip()
            else:
                strip_whitespace(value) # recursive call
    
    
    data = json.load(sys.stdin) # read json data from standard input
    strip_whitespace(data)
    json.dump(data, sys.stdout, indent=2)
    

    Note: functools.singledispatch() function (Python 3.4+) would allow to use collections' MutableMapping/MutableSequence instead of dict/list here.

    Output

    {
      "anotherName": [
        {
          "anArray": [
            {
              "anotherKey": "value", 
              "key": "value"
            }, 
            {
              "anotherKey": "value", 
              "key": "value"
            }
          ]
        }
      ], 
      "name": [
        {
          "someKey": "some Value"
        }, 
        {
          "someKey": "another value"
        }
      ]
    }