The following Python Red Black tree implementation takes ~2s to execute the test function, which doesn't make much sense since the tree height is 14 in the case as the test printed out. So.. I assume I did something stupid in the code. Could you please offer some tips to speed up the code?
import math
BLACK = 0
RED = 1
class RBTreeNode:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
self.color = RED
def setColor(n, c):
n.color = c
return n
def color(n):
if not n:
return BLACK
return n.color
def isRed(n):
return color(n) == RED
def rotateLeft(h):
x = h.right
h.right = x.left
x.left = h
x.color = h.color
h.color = RED
return x
def rotateRight(h):
x = h.left
h.left = x.right
x.right = h
x.color = h.color
h.color = RED
return x
def flipColor(n):
n.left.color = BLACK
n.right.color = BLACK
n.color = RED
return n
def insert(n, v):
if not n:
return RBTreeNode(v)
if v >= n.val:
n.right = insert(n.right, v)
else:
n.left = insert(n.left, v)
if isRed(n.right) and not isRed(n.left):
n = rotateLeft(n)
if isRed(n.left) and isRed(n.left.left):
n = rotateRight(n)
if isRed(n.left) and isRed(n.right):
n = flipColor(n)
return n
def height(n):
if not n:
return 0
h1 = height(n.left)
h2 = height(n.right)
return 1 + max(h1, h2)
def printNode(n, indent):
if not n:
return
print " " * indent,
(v, c) = n.val
print v, "(BLACK)" if c == 0 else "(RED)"
printNode(n.left, indent + 4)
printNode(n.right, indent + 4)
def test():
c = xrange(0, 32768)
tree = None
for i in c:
tree = insert(tree, i)
print 2 * math.log(len(c)) / math.log(2), height(tree)
# printNode(tree, 0)
if __name__ == "__main__":
test()
I did quick profiling of your program with cProfile
. It seems, that most amount of time is spent in color
function.
vagrant@vagrant-ubuntu-utopic-64:/vagrant$ python -m cProfile tree.py
4227046 function calls (3801045 primitive calls) in 3.498 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.006 0.006 3.498 3.498 tree.py:1(<module>)
1835000 1.762 0.000 1.762 0.000 tree.py:17(color)
1835000 0.602 0.000 2.364 0.000 tree.py:22(isRed)
32753 0.016 0.000 0.016 0.000 tree.py:25(rotateLeft)
32752 0.014 0.000 0.014 0.000 tree.py:41(flipColor)
458769/32768 1.071 0.000 3.478 0.000 tree.py:47(insert)
1 0.000 0.000 0.000 0.000 tree.py:6(RBTreeNode)
32768 0.013 0.000 0.013 0.000 tree.py:7(__init__)
1 0.014 0.014 3.492 3.492 tree.py:80(test)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
If you replace if not n:
with if n is None:
you should get a decent speedup.
not n
essentially tests if __nonzero__
is implemented and then calls it. If it is not implemented then __len__
is called if it was defined.
Also note, PEP 8 says, that comparisons to None
should always be done with is
.
color
is only used by isRed
function, so you might want to remove color
function entirely or reimplement isRed
without using `color to get additional speedup.
def isRed(n):
if n is None:
return False
else:
return n.color == RED
With this implementation I got following results
vagrant@vagrant-ubuntu-utopic-64:/vagrant$ python -m cProfile tree_none_no_color.py
2392046 function calls (1966045 primitive calls) in 0.985 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.006 0.006 0.985 0.985 tree_none_no_color.py:1(<module>)
1835000 0.286 0.000 0.286 0.000 tree_none_no_color.py:22(isRed)
32753 0.014 0.000 0.014 0.000 tree_none_no_color.py:28(rotateLeft)
32752 0.013 0.000 0.013 0.000 tree_none_no_color.py:44(flipColor)
458769/32768 0.646 0.000 0.969 0.000 tree_none_no_color.py:50(insert)
1 0.000 0.000 0.000 0.000 tree_none_no_color.py:6(RBTreeNode)
32768 0.011 0.000 0.011 0.000 tree_none_no_color.py:7(__init__)
1 0.011 0.011 0.980 0.980 tree_none_no_color.py:83(test)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}