Search code examples
pythonpython-3.xreverse-proxyhttp-proxy

How to catch response of server in mitmproxy container and past it into file in json format?


I need to catch and fill in separated file every response,request which is going through my mitmproxy container.

Dockerfile

FROM mitmproxy/mitmproxy:latest

RUN mkdir url_catching
WORKDIR /home/$USER/url_catching
COPY ./url.py .



EXPOSE 8080:8080

ENTRYPOINT ["/usr/bin/mitmdump","-s","./url.py"]

Docker run

sudo docker run --rm -it -p 8080:8080 mitmdump_url:latest

My pyhton script(sry, i am novice in python)

from mitmproxy import http

def response(flow):
    url_request: str = str(flow.request.pretty_url)
    url_request = url_request.replace("/", "_")
    with open(url_request, "ab") as ofile:
        ofile.write(flow.request.pretty_url.encode())
        ofile.write(flow.request.content)
        ofile.write(flow.response.content)

  • in the result i get separated files for each request and name of it is equal to url of request, but only url is in human readable format everything else is encoded or just missed. So i need to past response which is going through my proxy in json format to files.

Solution

  • request/responde has headers + empty line + body/content

    I show two methods to convert headers to string/bytes.

    As normal lines of text

    for key, value in flow.response.headers.items():
        ofile.write('{}: {}\n'.format(key, value).encode())
    

    Result

    Access-Control-Allow-Credentials: true
    Access-Control-Allow-Origin: *
    Content-Encoding: gzip
    Content-Type: application/json
    Date: Tue, 14 Jan 2020 11:51:49 GMT
    Referrer-Policy: no-referrer-when-downgrade
    Server: nginx
    X-Content-Type-Options: nosniff
    X-Frame-Options: DENY
    X-XSS-Protection: 1; mode=block
    Content-Length: 181
    Connection: keep-alive
    

    As JSON. I convert to dict() because headers can't be directly converted to JSON

    d = dict(flow.request.headers.items())
    d = json.dumps(d, indents=2)
    ofile.write(d.encode() + b'\n')
    

    Result

    {
      "Host": "httpbin.org",
      "User-Agent": "python-requests/2.22.0",
      "Accept-Encoding": "gzip, deflate",
      "Accept": "*/*",
      "Connection": "keep-alive"
    }
    

    I also skip urls with '/static/'

    from mitmproxy import http
    import json
    
    def response(flow):
        url_request: str = str(flow.request.pretty_url)
    
        if '/static/' not in url_request:
            url_request = url_request.replace("/", "_")
            with open(url_request + '.txt', "ab") as ofile:
    
                ofile.write(b'--- url ---\n')
                ofile.write(flow.request.pretty_url.encode() + b'\n')
    
                ofile.write(b'--- request ---\n')
    
                ofile.write(b'--- headers ---\n')
                #for key, value in flow.request.headers.items():
                #    ofile.write('{}: {}\n'.format(key, value).encode())
                d = dict(flow.request.headers.items())
                d = json.dumps(d, indents=2)
                ofile.write(d.encode() + b'\n')
    
    
                ofile.write(b'--- content ---\n')
                ofile.write(flow.request.content + b'\n')
    
                ofile.write(b'--- response ---\n')
    
                ofile.write(b'--- headers ---\n')
                for key, value in flow.response.headers.items():
                    ofile.write('{}: {}\n'.format(key, value).encode())
    
                ofile.write(b'--- content ---\n')
                ofile.write(flow.response.content + b'\n')
    

    To put all as one JSON you would have to first create dictionary with all elements (headers, body, etc.) and next use json.dumps(all_elements)


    Testing code

    import requests
    
    proxy = {
        'http': 'http://localhost:8080',
        'https': 'http://localhost:8080',
    }
    
    urls = [
        'https://httpbin.org/get',
        'https://httpbin.org/gzip',
        'https://httpbin.org/brotli',
        'https://httpbin.org/deflate',
        'https://httpbin.org/encoding/utf8',
    ]
    
    for url in urls:
        print(url)
        r = requests.get(url, proxies=proxy, verify=False)
        print(r.text)
    

    One of file with results

    --- url ---
    https://httpbin.org/get
    --- request ---
    --- headers ---
    {
      "Host": "httpbin.org",
      "User-Agent": "python-requests/2.22.0",
      "Accept-Encoding": "gzip, deflate",
      "Accept": "*/*",
      "Connection": "keep-alive"
    }
    --- content ---
    
    --- response ---
    --- headers ---
    Access-Control-Allow-Credentials: true
    Access-Control-Allow-Origin: *
    Content-Encoding: gzip
    Content-Type: application/json
    Date: Tue, 14 Jan 2020 12:06:04 GMT
    Referrer-Policy: no-referrer-when-downgrade
    Server: nginx
    X-Content-Type-Options: nosniff
    X-Frame-Options: DENY
    X-XSS-Protection: 1; mode=block
    Content-Length: 181
    Connection: keep-alive
    --- content ---
    {
      "args": {}, 
      "headers": {
        "Accept": "*/*", 
        "Accept-Encoding": "gzip, deflate", 
        "Host": "httpbin.org", 
        "User-Agent": "python-requests/2.22.0"
      }, 
      "origin": "83.23.66.224, 83.23.66.224", 
      "url": "https://httpbin.org/get"
    }