Search code examples
pythonproxyhttp-headerspython-requestshttplib

Reading CONNECT headers


I'm using a proxy service (proxymesh) that puts useful information into the headers sent in response to a CONNECT request. For whatever reason, Python's httplib doesn't parse them:

> CONNECT example.com:443 HTTP/1.1
> Host: example.com:443
>
< HTTP/1.1 200 Connection established
< X-Useful-Header: value  # completely ignored
<

The requests module uses httplib internally, so it ignores them as well. How do I extract headers from a CONNECT request?


Solution

  • Python's httplib actually ignores these headers when creating the tunnel. It's hacky, but you can intercept them and merge the "header" lines with the actual HTTP response's headers:

    import socket
    import httplib
    import requests
    
    from requests.packages.urllib3.connection import HTTPSConnection
    from requests.packages.urllib3.connectionpool import HTTPSConnectionPool
    from requests.packages.urllib3.poolmanager import ProxyManager
    
    from requests.adapters import HTTPAdapter
    
    
    class ProxyHeaderHTTPSConnection(HTTPSConnection):
        def __init__(self, *args, **kwargs):
            super(ProxyHeaderHTTPSConnection, self).__init__(*args, **kwargs)
            self._proxy_headers = []
    
        def _tunnel(self):
            self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, self._tunnel_port))
    
            for header, value in self._tunnel_headers.iteritems():
                self.send("%s: %s\r\n" % (header, value))
    
            self.send("\r\n")
    
            response = self.response_class(self.sock, strict=self.strict, method=self._method)
            version, code, message = response._read_status()
    
            if version == "HTTP/0.9":
                # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has
                # concluded HTTP/0.9 is being used something has gone wrong.
                self.close()
                raise socket.error("Invalid response from tunnel request")
    
            if code != 200:
                self.close()
                raise socket.error("Tunnel connection failed: %d %s" % (code, message.strip()))
    
            self._proxy_headers = []
    
            while True:
                line = response.fp.readline(httplib._MAXLINE + 1)
    
                if len(line) > httplib._MAXLINE:
                    raise LineTooLong("header line")
    
                if not line or line == '\r\n':
                    break
    
                # The line is a header, save it
                if ':' in line:
                    self._proxy_headers.append(line)
    
        def getresponse(self, buffering=False):
            response = super(ProxyHeaderHTTPSConnection, self).getresponse(buffering)
            response.msg.headers.extend(self._proxy_headers)
    
            return response
    
    
    class ProxyHeaderHTTPSConnectionPool(HTTPSConnectionPool):
        ConnectionCls = ProxyHeaderHTTPSConnection
    
    
    class ProxyHeaderProxyManager(ProxyManager):
        def _new_pool(self, scheme, host, port):
            assert scheme == 'https'
    
            return ProxyHeaderHTTPSConnectionPool(host, port, **self.connection_pool_kw)
    
    
    class ProxyHeaderHTTPAdapter(HTTPAdapter):
        def proxy_manager_for(self, proxy, **proxy_kwargs):
            if proxy in self.proxy_manager:
                manager = self.proxy_manager[proxy]
            else:
                proxy_headers = self.proxy_headers(proxy)
                manager = self.proxy_manager[proxy] = ProxyHeaderProxyManager(
                    proxy_url=proxy,
                    proxy_headers=proxy_headers,
                    num_pools=self._pool_connections,
                    maxsize=self._pool_maxsize,
                    block=self._pool_block,
                    **proxy_kwargs)
    
            return manager
    

    You can then install the adapter onto a session:

    session = requests.Session()
    session.mount('https://', ProxyHeaderHTTPAdapter())
    
    response = session.get('https://example.com', proxies={...})
    

    The proxy's headers will be merged in with the response headers, so it should behave as if the proxy modified the response headers directly.