Search code examples
pythonpython-2.7gtfs

Parse GTFS realtime without protobuf with python


I am trying to parse GTFS realtime trip_update data that is a plain text file format and not in a pb (protobuf) format.

(here is the feed url)

https://extranet.trainose.gr/epivatikos/transit/trip_updates

However, the only examples that I find deal with pb files.

from google.transit import gtfs_realtime_pb2
....
response = requests.get(url, allow_redirects=True)
feed.ParseFromString(response.content)
for entity in feed.entity:

So how could I parse the feed that is not pb? Thanks.


Solution

  • It turns out that there is a way to process pain text feed with something like this:

       response = requests.get(url, allow_redirects=True)
        ...
        try:
            from google.protobuf import text_format
            text_format.Parse(response.content.decode('UTF-8'), feed, allow_unknown_extension=True)
            print("Parse with text format successfully.")
            printResults(feed)
        except text_format.ParseError as e:
                raise IOError("Cannot parse text %s." % (str(e)))
    

    actually here is my whole script

    from google.transit import gtfs_realtime_pb2
    import os
    import requests
    
    
    def main():
        feed = gtfs_realtime_pb2.FeedMessage()
        url = ('https://feed.utl.com/feed')
        get_feed(feed, url)
    
    def printResults(feed):
        from datetime import datetime
        ts = int(str(feed.header.timestamp))
        print("Last update: " + datetime.fromtimestamp(ts).strftime('%d-%m-%Y %H:%M:%S'))
        for entity in feed.entity:
            print (str(entity.trip_update.trip.trip_id)+';')
            with open('output.txt', mode='w') as f:
                for entity in feed.entity:
                    if entity.HasField('trip_update'):
                            f.write(str(entity.trip_update.trip.trip_id)+';')
    def get_feed(feed, url):
        proxies = {'http': '127.0.0.1:5555','https': '127.0.0.1:5555'}
        response = requests.get(url, allow_redirects=True,proxies=proxies)
        try:
            feed.ParseFromString(response.content)
            printResults(feed)
        except :
            print("Oops!  That was no valid data. Try again...\n\n" + response.content)
            try:
                from google.protobuf import text_format
                text_format.Parse(response.content.decode('UTF-8'), feed, allow_unknown_extension=True)
                print("Parse with text format successfully.")
                printResults(feed)
            except text_format.ParseError as e:
                raise IOError("Cannot parse file %s." % (str(e)))
    if __name__ == "__main__":
        main()