Search code examples
pythondjangoprofilinglive

Live Profiling of Python Server


I want to know where the python interpreter spends the most time. I use it on a live django application, but it should work for all long running python processes.

I answer my own question.


Solution

  • import os, re, sys, time, datetime, collections, thread, threading, atexit, traceback 
    
    u'''
    
    debug_live.start(seconds_float) starts a monitor thread which print 
    the stacktrace of all threads into a logfile.
    You can report which lines are executed the most with this script:
    
    app_foo_d@server:~$ python djangotools/utils/debug_live.py -h
    usage: debug_live.py [-h] [--most-common N] {sum-all-frames,sum-last-frame}
    
    Read stacktrace log
    
    positional arguments:
      {sum-all-frames,sum-last-frame}
    
    optional arguments:
      -h, --help            show this help message and exit
      --most-common N       Display the N most common lines in the stacktraces
    
    ---------------------------------
    
    You can start the watching thread your django middleware like this:
    
    class FOOMiddleware:
        def __init__(self):
            u'This code gets executed once after the start of the wsgi worker process. Not for every request!'
            seconds=getattr(settings, 'debug_live_interval', None)
            if seconds:
                seconds=float(seconds)
                from djangotools.utils import debug_live
                debug_live.start(seconds)
    
    # settings.py
    debug_live_interval=0.3 # ever 0.3 second
    
    # Inspired by http://code.google.com/p/modwsgi/wiki/DebuggingTechniques
    
    You can get a simple report of the log file of stacktraces like below. The lines
    which are not from django are marked with "<====". That's most likely your code
    and this could be a bottle neck.
    
    python ..../debug_live.py read
     1971 File: "/home/foo_bar_p/django/core/handlers/wsgi.py", line 272, in __call__
          response = self.get_response(request)
     1812 File: "/home/foo_bar_p/django/core/handlers/base.py", line 111, in get_response
          response = callback(request, *callback_args, **callback_kwargs)
     1725 File: "/home/foo_bar_p/django/db/backends/postgresql_psycopg2/base.py", line 44, in execute
          return self.cursor.execute(query, args)
     1724 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 735, in execute_sql
          cursor.execute(sql, params)
     1007 File: "/home/foo_bar_p/django/db/models/sql/compiler.py", line 680, in results_iter
          for rows in self.execute_sql(MULTI):
      796 File: "/home/foo_bar_p/django/db/models/query.py", line 273, in iterator
          for row in compiler.results_iter():
      763 File: "/home/foo_bar_p/foo/utils/ticketutils.py", line 135, in __init__      <====
          filter=type_filter(root_node=self.root_node)
      684 File: "/home/foo_bar_p/django/db/models/query.py", line 334, in count
          return self.query.get_count(using=self.db)
      679 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 367, in get_aggregation
          result = query.get_compiler(using).execute_sql(SINGLE)
      677 File: "/home/foo_bar_p/django/db/models/sql/query.py", line 401, in get_count
          number = obj.get_aggregation(using=using)[None]
    
    
    '''
    
    
    from django.conf import settings
    
    outfile = os.path.expanduser('~/tmp/debug_live.log')
    
    other_code=re.compile(r'/(django|python...)/')
    
    
    def stacktraces():
        code=[]
        now=datetime.datetime.now()
        pid=os.getpid()
        my_thread_id=thread.get_ident()
        for thread_id, stack in sys._current_frames().items(): 
            if thread_id==my_thread_id:
                continue # Don't print this monitor thread
            code.append("\n\n#START date: %s\n# ProcessId: %s\n# ThreadID: %s" % (now, pid, thread_id))
            for filename, lineno, name, line in traceback.extract_stack(stack): 
                code.append('File: "%s", line %d, in %s' % (filename, lineno, name)) 
                if line: 
                    code.append("  %s" % (line.strip()))
            code.append('#END')
        if not code:
            return
        fd=open(outfile, 'at')
        fd.write('\n'.join(code))
        fd.close()
    
    def monitor(interval):
        while monitor_thread:
            stacktraces()
            time.sleep(interval)
    
    monitor_thread=None
    
    def exiting():
        global monitor_thread
        monitor_thread=None
    
    
    def start(interval):
        global monitor_thread
        if monitor_thread:
            return
        assert not os.path.islink(outfile), outfile # well known temporary name.... symlink attack...
        monitor_thread = threading.Thread(target=monitor, args=[interval])
        monitor_thread.setDaemon(True)
        atexit.register(exiting)
        monitor_thread.start()
    
    def read_logs(args):
        # The outfile can be huge, don't read the whole file into memory.
        counter=collections.Counter()
        cur_stack=[]
        py_line=''
        code_line=''
        if args.action=='sum-all-frames':
            sum_all_frames=True
        else:
            sum_all_frames=False
        for line in open(outfile):
            if line.startswith('#END'):
                if sum_all_frames:
                    frames=cur_stack
                else:
                    frames=cur_stack[-1:]
                counter.update(frames)
                cur_stack=[]
                continue
            if line[0] in '\n#':
                continue
            if line.startswith('File:'):
                py_line=line.rstrip()
                continue
            if line.startswith(' '):
                code_line=line.rstrip()
                if not (py_line, code_line) in cur_stack:
                    # If there is a recursion, count the line only once per stacktrace
                    cur_stack.append((py_line, code_line))
                continue
            print 'ERROR unparsed', line
        for (py, code), c in counter.most_common(args.most_common):
            if not other_code.search(py):
                py='%s      <====' % py
            print '% 5d %s\n    %s' % (c, py, code)
    
    def main():
        import argparse
        parser=argparse.ArgumentParser(description='Read stacktrace log')
        parser.add_argument('action', choices=['sum-all-frames', 'sum-last-frame'])
        parser.add_argument('--most-common', metavar='N', default=30, type=int, help='Display the N most common lines in the stacktraces')
        args=parser.parse_args()
        return read_logs(args)
    
    if __name__=='__main__':
        main()