Search code examples
pythonjsonffprobe

Python accessing data in JSON object


so I do this in my script:

import json
info = json.loads(get_info())
print info

Which outputs:

richard@richard-desktop:~/projects/hello-python$ python main.py 
{
    "streams": [
        {
            "index": 0,
            "codec_name": "mpeg2video",
            "codec_long_name": "MPEG-2 video",
            "codec_type": "video",
            "codec_time_base": "1001/48000",
            "codec_tag_string": "[0][0][0][0]",
            "codec_tag": "0x0000",
            "width": 1920,
            "height": 1080,
            "has_b_frames": 1,
            "sample_aspect_ratio": "1:1",
            "display_aspect_ratio": "16:9",
            "pix_fmt": "yuv422p",
            "level": 2,
            "timecode": "00:59:59:00",
            "id": "0x1e0",
            "r_frame_rate": "24000/1001",
            "avg_frame_rate": "10000/417",
            "time_base": "1/90000",
            "start_time": "0.945411"
        },
        {
            "index": 1,
            "codec_name": "pcm_dvd",
            "codec_long_name": "PCM signed 20|24-bit big-endian",
            "codec_type": "audio",
            "codec_time_base": "1/48000",
            "codec_tag_string": "[0][0][0][0]",
            "codec_tag": "0x0000",
            "sample_fmt": "s32",
            "sample_rate": "48000",
            "channels": 2,
            "bits_per_sample": 0,
            "id": "0xa0",
            "r_frame_rate": "0/0",
            "avg_frame_rate": "0/0",
            "time_base": "1/90000",
            "start_time": "0.945411",
            "duration": "600.595000"
        }
    ],
    "format": {
        "filename": "/home/richard/projects/hello-python/tests/test_1.mpg",
        "nb_streams": 2,
        "format_name": "mpeg",
        "format_long_name": "MPEG-PS format",
        "start_time": "0.945411",
        "duration": "600.595000",
        "size": "4033241092",
        "bit_rate": "53723272"
    }
}

How can I access streams or format and then properties inside them? For example how to get codec_long_name from the second stream or how to get duration from format?

I tried:

print info[0]

Which outputs:

richard@richard-desktop:~/projects/hello-python$ python main.py 
{

If I do print repr(info), I get:

richard@richard-desktop:~/projects/hello-python$ python main.py 
'{\n    "streams": [\n        {\n            "index": 0,\n            "codec_name": "mpeg2video",\n            "codec_long_name": "MPEG-2 video",\n            "codec_type": "video",\n            "codec_time_base": "1001/48000",\n            "codec_tag_string": "[0][0][0][0]",\n            "codec_tag": "0x0000",\n            "width": 1920,\n            "height": 1080,\n            "has_b_frames": 1,\n            "sample_aspect_ratio": "1:1",\n            "display_aspect_ratio": "16:9",\n            "pix_fmt": "yuv422p",\n            "level": 2,\n            "timecode": "00:59:59:00",\n            "id": "0x1e0",\n            "r_frame_rate": "24000/1001",\n            "avg_frame_rate": "10000/417",\n            "time_base": "1/90000",\n            "start_time": "0.945411"\n        },\n        {\n            "index": 1,\n            "codec_name": "pcm_dvd",\n            "codec_long_name": "PCM signed 20|24-bit big-endian",\n            "codec_type": "audio",\n            "codec_time_base": "1/48000",\n            "codec_tag_string": "[0][0][0][0]",\n            "codec_tag": "0x0000",\n            "sample_fmt": "s32",\n            "sample_rate": "48000",\n            "channels": 2,\n            "bits_per_sample": 0,\n            "id": "0xa0",\n            "r_frame_rate": "0/0",\n            "avg_frame_rate": "0/0",\n            "time_base": "1/90000",\n            "start_time": "0.945411",\n            "duration": "600.595000"\n        }\n    ],\n    "format": {\n        "filename": "/home/richard/projects/hello-python/tests/test_1.mpg",\n        "nb_streams": 2,\n        "format_name": "mpeg",\n        "format_long_name": "MPEG-PS format",\n        "start_time": "0.945411",\n        "duration": "600.595000",\n        "size": "4033241092",\n        "bit_rate": "53723272"\n    }\n}\n'

The way I am getting the JSON string is bu running this command:

ffmpeg_command = "ffprobe -v quiet -print_format json -show_format -show_streams %s" % self.absolute_path
return subprocess.check_output(ffmpeg_command, shell=True)

Solution

  • The JSON was encoded twice, and the result of json.loads is a string. Strings in python are sequences thus the first character is a {.

    Decode the item again:

    info = json.loads(json.loads(get_info()))
    

    Now your main.py output should look like:

    >>> result = json.loads(output)
    >>> print result
    {u'streams': [{u'pix_fmt': u'yuv422p', u'index': 0, u'start_time': u'0.945411', u'codec_tag': u'0x0000', u'sample_aspect_ratio': u'1:1', u'level': 2, u'r_frame_rate': u'24000/1001', u'id': u'0x1e0', u'time_base': u'1/90000', u'codec_tag_string': u'[0][0][0][0]', u'codec_type': u'video', u'has_b_frames': 1, u'width': 1920, u'codec_long_name': u'MPEG-2 video', u'display_aspect_ratio': u'16:9', u'codec_name': u'mpeg2video', u'timecode': u'00:59:59:00', u'height': 1080, u'codec_time_base': u'1001/48000', u'avg_frame_rate': u'10000/417'}, {u'index': 1, u'sample_fmt': u's32', u'codec_tag': u'0x0000', u'bits_per_sample': 0, u'r_frame_rate': u'0/0', u'start_time': u'0.945411', u'time_base': u'1/90000', u'codec_tag_string': u'[0][0][0][0]', u'codec_type': u'audio', u'channels': 2, u'duration': u'600.595000', u'codec_long_name': u'PCM signed 20|24-bit big-endian', u'codec_name': u'pcm_dvd', u'id': u'0xa0', u'sample_rate': u'48000', u'codec_time_base': u'1/48000', u'avg_frame_rate': u'0/0'}], u'format': {u'nb_streams': 2, u'start_time': u'0.945411', u'format_long_name': u'MPEG-PS format', u'format_name': u'mpeg', u'filename': u'/home/richard/projects/hello-python/tests/test_1.mpg', u'bit_rate': u'53723272', u'duration': u'600.595000', u'size': u'4033241092'}}
    

    and you can access the streams and format parameters by name:

    >>> result['streams']
    [{u'pix_fmt': u'yuv422p', u'index': 0, u'start_time': u'0.945411', u'codec_tag': u'0x0000', u'sample_aspect_ratio': u'1:1', u'level': 2, u'r_frame_rate': u'24000/1001', u'id': u'0x1e0', u'time_base': u'1/90000', u'codec_tag_string': u'[0][0][0][0]', u'codec_type': u'video', u'has_b_frames': 1, u'width': 1920, u'codec_long_name': u'MPEG-2 video', u'display_aspect_ratio': u'16:9', u'codec_name': u'mpeg2video', u'timecode': u'00:59:59:00', u'height': 1080, u'codec_time_base': u'1001/48000', u'avg_frame_rate': u'10000/417'}, {u'index': 1, u'sample_fmt': u's32', u'codec_tag': u'0x0000', u'bits_per_sample': 0, u'r_frame_rate': u'0/0', u'start_time': u'0.945411', u'time_base': u'1/90000', u'codec_tag_string': u'[0][0][0][0]', u'codec_type': u'audio', u'channels': 2, u'duration': u'600.595000', u'codec_long_name': u'PCM signed 20|24-bit big-endian', u'codec_name': u'pcm_dvd', u'id': u'0xa0', u'sample_rate': u'48000', u'codec_time_base': u'1/48000', u'avg_frame_rate': u'0/0'}]
    

    Pro tip: use the pprint module to format python structures nicely:

    >>> from pprint import pprint
    >>> print pprint(result)
    >>> pprint(result)
    {u'format': {u'bit_rate': u'53723272',
                 u'duration': u'600.595000',
                 u'filename': u'/home/richard/projects/hello-python/tests/test_1.mpg',
                 u'format_long_name': u'MPEG-PS format',
                 u'format_name': u'mpeg',
                 u'nb_streams': 2,
                 u'size': u'4033241092',
                 u'start_time': u'0.945411'},
     u'streams': [{u'avg_frame_rate': u'10000/417',
                   u'codec_long_name': u'MPEG-2 video',
                   u'codec_name': u'mpeg2video',
                   u'codec_tag': u'0x0000',
                   u'codec_tag_string': u'[0][0][0][0]',
                   u'codec_time_base': u'1001/48000',
                   u'codec_type': u'video',
                   u'display_aspect_ratio': u'16:9',
                   u'has_b_frames': 1,
                   u'height': 1080,
                   u'id': u'0x1e0',
                   u'index': 0,
                   u'level': 2,
                   u'pix_fmt': u'yuv422p',
                   u'r_frame_rate': u'24000/1001',
                   u'sample_aspect_ratio': u'1:1',
                   u'start_time': u'0.945411',
                   u'time_base': u'1/90000',
                   u'timecode': u'00:59:59:00',
                   u'width': 1920},
                  {u'avg_frame_rate': u'0/0',
                   u'bits_per_sample': 0,
                   u'channels': 2,
                   u'codec_long_name': u'PCM signed 20|24-bit big-endian',
                   u'codec_name': u'pcm_dvd',
                   u'codec_tag': u'0x0000',
                   u'codec_tag_string': u'[0][0][0][0]',
                   u'codec_time_base': u'1/48000',
                   u'codec_type': u'audio',
                   u'duration': u'600.595000',
                   u'id': u'0xa0',
                   u'index': 1,
                   u'r_frame_rate': u'0/0',
                   u'sample_fmt': u's32',
                   u'sample_rate': u'48000',
                   u'start_time': u'0.945411',
                   u'time_base': u'1/90000'}]}