Search code examples
pythonvariablesif-statementglobalscoping

Global scope inside if/else for a running median


I'm implementing a running median algorithm in Python using two heaps. However, the heaps don't grow in size even if I push to them...

I suspect it's something to do with the scoping inside if/else statements. I don't really understand how to fix this.

import os
import numpy
import functools
import heapq
from heapq import heapify, heappush, heappop 


@functools.total_ordering
class ReverseCompare(object):
    def __init__(self, obj):
        self.obj = obj
    def __eq__(self, other):
        return isinstance(other, ReverseCompare) and self.obj == other.obj
    def __le__(self, other):
        return isinstance(other, ReverseCompare) and self.obj >= other.obj
    def __str__(self):
        return str(self.obj)
    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, self.obj)


curMedian = 0
leftHeap = map(ReverseCompare, [])
rightHeap = []
heapq.heapify(rightHeap)
heapq.heapify(leftHeap)

def runningMed(n):
    #importing the global variables
    global curMedian 
    global leftHeap
    global rightHeap
    #The first time
    if (curMedian == 0):
        curMedian = n
        return curMedian

    #the second +... time
    # print "debug"
    # print rightHeap
    # print leftHeap
    # heapq.heappush(leftHeap, 3)
    # heapq.heappush(rightHeap, 3)
    # print rightHeap

    print "length of heaps"
    print len(rightHeap)
    print len(leftHeap)

    if (len(rightHeap) > len(leftHeap) + 2):
        print "Debug long right heap"

        if(n >= curMedian):
            heapq.heappush(leftHeap, curMedian)
            curMedian = heapq.heappop(rightHeap)
            heappop.heappush(rightHeap, n)
        else:
            heapq.heappush(leftHeap, n)

    elif (len(leftHeap) > len(rightHeap) + 2):
        print "Debug long"
        if(n <= curMedian):
            heapq.heappush(rightHeap, curMedian)
            curMedian = heapq.heappop(leftHeap)
            heappop.heappush(leftHeap, n)
        else:
            heapq.heappush(rightHeap,n)

    else:
        print "Debug curMedian"
        print n
        print curMedian
        if (n > curMedian):
            heapq.heappush(rightHeap, n)
        else:
            heapq.heappush(leftHeap,n)

    #TReturn the median:

    if (len(leftHeap) == len(rightHeap)):
        return curMedian
    elif (len(leftHeap) > len(rightHeap)):
        return (heapq.heappop(leftHeap) + curMedian)/2
    else:
        return (heapq.heappop(rightHeap) + curMedian)/2


if __name__ == "__main__":
    #TODO: Input/output names must be changed
    inputFile = open('numbers.txt', 'r')
    outputFile = open('output.txt', 'w')

    for line in inputFile:
        num = int(line.rstrip('\n'))
        med = runningMed(num)
        outputFile.write(str(med) + '\n')

    inputFile.close()
    outputFile.close()

Solution

  • Has nothing to do with scope. The heaps don't grow because you pop newly added elements right off at the end:

        return (heapq.heappop(leftHeap) + curMedian)/2
    else:
        return (heapq.heappop(rightHeap) + curMedian)/2
    

    Just look at the max/min element without popping it off:

        return (leftHeap[0] + curMedian)/2
    else:
        return (rightHeap[0] + curMedian)/2
    

    My own version I mentioned in the comments:

    from heapq import heappush, heappop
    
    left, right = [], []
    def runmed(n):
        global left, right
        if len(left) <= len(right):
            heappush(left, -n)
        else:
            heappush(right, n)
        if right and -left[0] > right[0]:
            heappush(left, -heappop(right))
            heappush(right, -heappop(left))
        if len(left) > len(right):
            return -left[0]
        return (right[0] - left[0]) / 2.0
    
    • left is a max-heap of the smaller half of the numbers and contains those numbers negated to get the max-heap functionality.
    • right is a min-heap of the larger half of the numbers.
    • left is always the same size as right, or one larger.

    Test code:

    import random
    numbers = []
    for _ in range(15):
        n = random.randrange(100)
        numbers.append(n)
        print '{:<4} is median of {}'.format(runmed(n), sorted(numbers))
    

    Output:

    38   is median of [38]
    27.5 is median of [17, 38]
    38   is median of [17, 38, 79]
    27.5 is median of [4, 17, 38, 79]
    17   is median of [4, 12, 17, 38, 79]
    27.5 is median of [4, 12, 17, 38, 63, 79]
    38   is median of [4, 12, 17, 38, 63, 69, 79]
    35.0 is median of [4, 12, 17, 32, 38, 63, 69, 79]
    38   is median of [4, 12, 17, 32, 38, 39, 63, 69, 79]
    38.5 is median of [4, 12, 17, 32, 38, 39, 63, 69, 79, 82]
    39   is median of [4, 12, 17, 32, 38, 39, 47, 63, 69, 79, 82]
    38.5 is median of [4, 12, 17, 21, 32, 38, 39, 47, 63, 69, 79, 82]
    38   is median of [4, 12, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
    35.0 is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
    38   is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 62, 63, 69, 79, 82]