Search code examples
pythonjsonstringio

Unable to read StringIO object created by json.dump()


I am trying to write a list of dictionaries to a StringIO object with json.dump() in Python 3. The objective is to create a pandas dataframe from a JSON "file" without actually saving it to disk. I can't seem to read anything from the StringIO object. My code is as follows:

import json
from io import StringIO

errors = [{'dateTime': '05/06/2021, 05:00PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34255', 'time-taken': '218', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.fudge.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '66.69.69.145', 'userAgent': 'Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '538d59226642d818723f0f9cf1e465b9c01061daafdd79c7cbd897bceefcac1c', 'errorLogLine': 1, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:00PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34255', 'time-taken': '156', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.crap.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '69.69.69.69', 'userAgent': 'Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '6763ca746c468193c53a84a4c95039e672b5592d8808b702ec3b8552b72c54eb', 'errorLogLine': 2, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:02PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '597', 'time-taken': '187', 'serverIP': '209.212.247.227', 'method': 'HEAD', 'file': '/https:/www.shuckydarn.com/', 'errorMessage': "404;https://www.fatfool.com:443/https:/www.goshdarnit.com/|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '72.54.211.17', 'userAgent': 'Test+Certificate+Info', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': '2c0b5cac5730c90c5fd73f401f9196061deb59319782bf67c0f7605408fb66e0', 'absoluteHash': 'e7ea762b3a23ea79ce84c2f1c6fdb5ae4248fc7090a783a02735e42b70084689', 'errorLogLine': 3, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:03PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34303', 'time-taken': '187', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.supbro.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '54.197.11.69', 'userAgent': 'Mozilla/5.0+(compatible;+proximic;++http://www.proximic.com/info/spider.php)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '495a2444a27222883e868f1c9d410b6197f04ce4be43af85f632c35113f8b351', 'errorLogLine': 4, 'queryParams': '-'}]
fp = StringIO()
json.dump(errors, fp, indent=2)

print(fp.read())

I expect it to print something like this:

[
  {
    "dateTime": "05/06/2021, 05:00PM",
    "cs-username": "-",
    "sc-substatus": "0",
    "sc-win32-status": "0",
    "sc-bytes": "34255",
    "time-taken": "218",
    "serverIP": "209.213.12.24",
    "method": "GET",
    "file": "/ads.txt",
    "errorMessage": "404;https://www.fudge.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'",
    "port": "443",
    "clientIP": "66.69.69.145",
    "userAgent": "Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)",
    "cookie": "-",
    "referer": "-",
    "statusCode": "500",
    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",
    "absoluteHash": "538d59226642d818723f0f9cf1e465b9c01061daafdd79c7cbd897bceefcac1c",
    "errorLogLine": 1,
    "queryParams": "-"
  },
  {
    "dateTime": "05/06/2021, 05:00PM",
    "cs-username": "-",
    "sc-substatus": "0",
    "sc-win32-status": "0",
    "sc-bytes": "34255",
    "time-taken": "156",
    "serverIP": "209.213.12.24",
    "method": "GET",
    "file": "/ads.txt",
    "errorMessage": "404;https://www.crap.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'",
    "port": "443",
    "clientIP": "69.69.69.69",
    "userAgent": "Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)",
    "cookie": "-",
    "referer": "-",
    "statusCode": "500",
    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",
    "absoluteHash": "6763ca746c468193c53a84a4c95039e672b5592d8808b702ec3b8552b72c54eb",
    "errorLogLine": 2,
    "queryParams": "-"
  },
  {
    "dateTime": "05/06/2021, 05:02PM",
    "cs-username": "-",
    "sc-substatus": "0",
    "sc-win32-status": "0",
    "sc-bytes": "597",
    "time-taken": "187",
    "serverIP": "209.212.247.227",
    "method": "HEAD",
    "file": "/https:/www.shuckydarn.com/",
    "errorMessage": "404;https://www.fatfool.com:443/https:/www.goshdarnit.com/|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'",
    "port": "443",
    "clientIP": "72.54.211.17",
    "userAgent": "Test+Certificate+Info",
    "cookie": "-",
    "referer": "-",
    "statusCode": "500",
    "errorHash": "2c0b5cac5730c90c5fd73f401f9196061deb59319782bf67c0f7605408fb66e0",
    "absoluteHash": "e7ea762b3a23ea79ce84c2f1c6fdb5ae4248fc7090a783a02735e42b70084689",
    "errorLogLine": 3,
    "queryParams": "-"
  },
  {
    "dateTime": "05/06/2021, 05:03PM",
    "cs-username": "-",
    "sc-substatus": "0",
    "sc-win32-status": "0",
    "sc-bytes": "34303",
    "time-taken": "187",
    "serverIP": "209.213.12.24",
    "method": "GET",
    "file": "/ads.txt",
    "errorMessage": "404;https://www.supbro.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'",
    "port": "443",
    "clientIP": "54.197.11.69",
    "userAgent": "Mozilla/5.0+(compatible;+proximic;++http://www.proximic.com/info/spider.php)",
    "cookie": "-",
    "referer": "-",
    "statusCode": "500",
    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",
    "absoluteHash": "495a2444a27222883e868f1c9d410b6197f04ce4be43af85f632c35113f8b351",
    "errorLogLine": 4,
    "queryParams": "-"
  }
]

But instead it prints nothing. Why can't I read the data from json.dump() with fp.read()? It's probably something simple that I'm missing. Any help is appreciated!


Solution

  • You need to use fp.getvalue() instead of read(). Note that your errors dictionary is already JSON-like so calling json.dump() somewhat clobbers the output format

    Try:

    import json
    from io import StringIO
    
    errors = [{'dateTime': '05/06/2021, 05:00PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34255', 'time-taken': '218', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.fudge.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '66.69.69.145', 'userAgent': 'Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '538d59226642d818723f0f9cf1e465b9c01061daafdd79c7cbd897bceefcac1c', 'errorLogLine': 1, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:00PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34255', 'time-taken': '156', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.crap.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '69.69.69.69', 'userAgent': 'Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '6763ca746c468193c53a84a4c95039e672b5592d8808b702ec3b8552b72c54eb', 'errorLogLine': 2, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:02PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '597', 'time-taken': '187', 'serverIP': '209.212.247.227', 'method': 'HEAD', 'file': '/https:/www.shuckydarn.com/', 'errorMessage': "404;https://www.fatfool.com:443/https:/www.goshdarnit.com/|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '72.54.211.17', 'userAgent': 'Test+Certificate+Info', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': '2c0b5cac5730c90c5fd73f401f9196061deb59319782bf67c0f7605408fb66e0', 'absoluteHash': 'e7ea762b3a23ea79ce84c2f1c6fdb5ae4248fc7090a783a02735e42b70084689', 'errorLogLine': 3, 'queryParams': '-'}, {'dateTime': '05/06/2021, 05:03PM', 'cs-username': '-', 'sc-substatus': '0', 'sc-win32-status': '0', 'sc-bytes': '34303', 'time-taken': '187', 'serverIP': '209.213.12.24', 'method': 'GET', 'file': '/ads.txt', 'errorMessage': "404;https://www.supbro.com:443/ads.txt|49|800a01a8|Object_required:_'xmlDocTemp.documentElement'", 'port': '443', 'clientIP': '54.197.11.69', 'userAgent': 'Mozilla/5.0+(compatible;+proximic;++http://www.proximic.com/info/spider.php)', 'cookie': '-', 'referer': '-', 'statusCode': '500', 'errorHash': 'd5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667', 'absoluteHash': '495a2444a27222883e868f1c9d410b6197f04ce4be43af85f632c35113f8b351', 'errorLogLine': 4, 'queryParams': '-'}]
    fp = StringIO()
    json.dump(errors, fp, indent=2)
    
    print(fp.getvalue())
    

    Outputs:

    '[\n  {\n    "dateTime": "05/06/2021, 05:00PM",\n    "cs-username": "-",\n    "sc-substatus": "0",\n    "sc-win32-status": "0",\n    "sc-bytes": "34255",\n    "time-taken": "218",\n    "serverIP": "209.213.12.24",\n    "method": "GET",\n    "file": "/ads.txt",\n    "errorMessage": "404;https://www.fudge.com:443/ads.txt|49|800a01a8|Object_required:_\'xmlDocTemp.documentElement\'",\n    "port": "443",\n    "clientIP": "66.69.69.145",\n    "userAgent": "Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)",\n    "cookie": "-",\n    "referer": "-",\n    "statusCode": "500",\n    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",\n    "absoluteHash": "538d59226642d818723f0f9cf1e465b9c01061daafdd79c7cbd897bceefcac1c",\n    "errorLogLine": 1,\n    "queryParams": "-"\n  },\n  {\n    "dateTime": "05/06/2021, 05:00PM",\n    "cs-username": "-",\n    "sc-substatus": "0",\n    "sc-win32-status": "0",\n    "sc-bytes": "34255",\n    "time-taken": "156",\n    "serverIP": "209.213.12.24",\n    "method": "GET",\n    "file": "/ads.txt",\n    "errorMessage": "404;https://www.crap.com:443/ads.txt|49|800a01a8|Object_required:_\'xmlDocTemp.documentElement\'",\n    "port": "443",\n    "clientIP": "69.69.69.69",\n    "userAgent": "Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html)",\n    "cookie": "-",\n    "referer": "-",\n    "statusCode": "500",\n    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",\n    "absoluteHash": "6763ca746c468193c53a84a4c95039e672b5592d8808b702ec3b8552b72c54eb",\n    "errorLogLine": 2,\n    "queryParams": "-"\n  },\n  {\n    "dateTime": "05/06/2021, 05:02PM",\n    "cs-username": "-",\n    "sc-substatus": "0",\n    "sc-win32-status": "0",\n    "sc-bytes": "597",\n    "time-taken": "187",\n    "serverIP": "209.212.247.227",\n    "method": "HEAD",\n    "file": "/https:/www.shuckydarn.com/",\n    "errorMessage": "404;https://www.fatfool.com:443/https:/www.goshdarnit.com/|49|800a01a8|Object_required:_\'xmlDocTemp.documentElement\'",\n    "port": "443",\n    "clientIP": "72.54.211.17",\n    "userAgent": "Test+Certificate+Info",\n    "cookie": "-",\n    "referer": "-",\n    "statusCode": "500",\n    "errorHash": "2c0b5cac5730c90c5fd73f401f9196061deb59319782bf67c0f7605408fb66e0",\n    "absoluteHash": "e7ea762b3a23ea79ce84c2f1c6fdb5ae4248fc7090a783a02735e42b70084689",\n    "errorLogLine": 3,\n    "queryParams": "-"\n  },\n  {\n    "dateTime": "05/06/2021, 05:03PM",\n    "cs-username": "-",\n    "sc-substatus": "0",\n    "sc-win32-status": "0",\n    "sc-bytes": "34303",\n    "time-taken": "187",\n    "serverIP": "209.213.12.24",\n    "method": "GET",\n    "file": "/ads.txt",\n    "errorMessage": "404;https://www.supbro.com:443/ads.txt|49|800a01a8|Object_required:_\'xmlDocTemp.documentElement\'",\n    "port": "443",\n    "clientIP": "54.197.11.69",\n    "userAgent": "Mozilla/5.0+(compatible;+proximic;++http://www.proximic.com/info/spider.php)",\n    "cookie": "-",\n    "referer": "-",\n    "statusCode": "500",\n    "errorHash": "d5228f90df950647d0acdaa03ce5dc60b970a1ab0d8287ed611d6e3214d91667",\n    "absoluteHash": "495a2444a27222883e868f1c9d410b6197f04ce4be43af85f632c35113f8b351",\n    "errorLogLine": 4,\n    "queryParams": "-"\n  }\n]'
    

    UPDATE:

    If your goal is a dataframe from errors just do this:

    df = pd.DataFrame(errors)