Search code examples
pythonhttptwistedtwisted.client

How do I make a GET request and then print out the response body with Python (Twisted)


On Twisted's website, they have an example called "Receiving Responses" which shows you how to get the response headers and response code among other things, but not the response body (the actual HTML body returned by the request to the website).

in def cbrequest(response), how would I print out the HTML text return by the GET request to example.com? They show methods like response.headers for getting headers, but I havent seen a method for returning the front end's body.

I tried printing response.deliverBody(BeginningPrinter(finished)) to get the response text, but to no avail

http://twistedmatrix.com/documents/12.1.0/web/howto/client.html#auto9

from pprint import pformat

from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.internet.protocol import Protocol
from twisted.web.client import Agent
from twisted.web.http_headers import Headers

class BeginningPrinter(Protocol):
    def __init__(self, finished):
        self.finished = finished
        self.remaining = 1024 * 10

    def dataReceived(self, bytes):
        if self.remaining:
            display = bytes[:self.remaining]
            print 'Some data received:'
        print display
        self.remaining -= len(display)

 def connectionLost(self, reason):
    print 'Finished receiving body:', reason.getErrorMessage()
    self.finished.callback(None)

agent = Agent(reactor)
d = agent.request(
    'GET',
    'http://example.com/',
    Headers({'User-Agent': ['Twisted Web Client Example']}),
    None)

def cbRequest(response):
    print 'Response version:', response.version
    print 'Response code:', response.code
    print 'Response phrase:', response.phrase
    print 'Response headers:'
    print pformat(list(response.headers.getAllRawHeaders()))
    finished = Deferred()
    response.deliverBody(BeginningPrinter(finished))
    return finished
d.addCallback(cbRequest)

def cbShutdown(ignored):
   reactor.stop()
d.addBoth(cbShutdown)

reactor.run()

Solution

  • You'll have to keep the data and return it afterwards

    class BeginningPrinter(Protocol):
        def __init__(self, finished):
            self.finished = finished
            self.remaining = 1024 * 10
            self.buff = None
    
        def dataReceived(self, bytes):
            if self.buff is None:
                self.buff = bytes
            else:
                self.buff += bytes
            if self.remaining:
                display = bytes[:self.remaining]
                print 'Some data received:'
            print display
            self.remaining -= len(display)
    
        def connectionLost(self, reason):
            print 'Finished receiving body:', reason.getErrorMessage()
            self.finished.callback(self.buff)
    

    Note that you should probably remove the variable remaining because this code example expects the server to respond with 1024 * 10 bytes.

    class BeginningPrinter(Protocol):
        def __init__(self, finished):
            self.finished = finished
            self.buff = None
    
        def dataReceived(self, bytes):
            if self.buff is None:
                self.buff = bytes
            else:
                self.buff += bytes
    
        def connectionLost(self, reason):
            print 'Finished receiving body:', reason.getErrorMessage()
            self.finished.callback(self.buff)