How to plot cumulative distribution function in python for six sided dice simulation?

I am trying to

Plot histogram with frequencies of dice sum outcomes from the simulation.
Calculate and plot cumulative distribution function.
find and plot median.

So Far this is what I have:

import pylab
import random

sampleSize = 100


## Let's simulate the repeated throwing of a single six-sided die
singleDie = []
for i in range(sampleSize):
    newValue = random.randint(1,6)
    singleDie.append(newValue)

print "Results for throwing a single die", sampleSize, "times."
print "Mean of the sample =", pylab.mean(singleDie)
print "Median of the sample =", pylab.median(singleDie)
#print "Standard deviation of the sample =", pylab.std(singleDie)
print
print

pylab.hist(singleDie, bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5] )
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('singleDie.png')
pylab.show()




## What about repeatedly throwing two dice and summing them?
twoDice = []
for i in range(sampleSize):
    newValue = random.randint(1,6) + random.randint(1,6)
    twoDice.append(newValue)



print "Results for throwing two dices", sampleSize, "times."
print "Mean of the sample =", pylab.mean(twoDice)
print "Median of the sample =", pylab.median(twoDice)
#print "Standard deviation of the sample =", pylab.std(twoDice)

pylab.hist(twoDice, bins= pylab.arange(1.5,12.6,1.0))
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('twoDice.png')
pylab.show()

Could anyone help me how to plot cdf?

Solution

You could directly use the histogram plotting function to achieve that, e.g.

import pylab
import random
import numpy as np

sampleSize = 100


## Let's simulate the repeated throwing of a single six-sided die
singleDie = []
for i in range(sampleSize):
    newValue = random.randint(1,6)
    singleDie.append(newValue)

print "Results for throwing a single die", sampleSize, "times."
print "Mean of the sample =", pylab.mean(singleDie)
print "Median of the sample =", pylab.median(singleDie)
print "Standard deviation of the sample =", pylab.std(singleDie)


pylab.hist(singleDie, bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5] )
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('singleDie.png')
pylab.show()

## What about repeatedly throwing two dice and summing them?
twoDice = []
for i in range(sampleSize):
    newValue = random.randint(1,6) + random.randint(1,6)
    twoDice.append(newValue)

print "Results for throwing two dices", sampleSize, "times."
print "Mean of the sample =", pylab.mean(twoDice)
print "Median of the sample =", pylab.median(twoDice)
#print "Standard deviation of the sample =", pylab.std(twoDice)

pylab.hist(twoDice, bins= pylab.arange(1.5,12.6,1.0))
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('twoDice.png')
pylab.show()

pylab.hist(twoDice, bins=pylab.arange(1.5,12.6,1.0), normed=1, histtype='step', cumulative=True)
pylab.xlabel('Value')
pylab.ylabel('Fraction')
pylab.show()

Notice the new line:

pylab.hist(twoDice, bins=pylab.arange(1.5,12.6,1.0), normed=1, histtype='step', cumulative=True)

which should give directly the cdf plot. If you don't specify normed=1 you will see the scale (0-100) which will represent percentages instead of the usual probability scale (0-1).

There are also other ways to do it. For example:

import pylab
import random
import numpy as np

sampleSize = 100


## Let's simulate the repeated throwing of a single six-sided die
singleDie = []
for i in range(sampleSize):
    newValue = random.randint(1,6)
    singleDie.append(newValue)

print "Results for throwing a single die", sampleSize, "times."
print "Mean of the sample =", pylab.mean(singleDie)
print "Median of the sample =", pylab.median(singleDie)
print "Standard deviation of the sample =", pylab.std(singleDie)


pylab.hist(singleDie, bins=[0.5,1.5,2.5,3.5,4.5,5.5,6.5] )
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('singleDie.png')
pylab.show()

## What about repeatedly throwing two dice and summing them?
twoDice = []
for i in range(sampleSize):
    newValue = random.randint(1,6) + random.randint(1,6)
    twoDice.append(newValue)

print "Results for throwing two dices", sampleSize, "times."
print "Mean of the sample =", pylab.mean(twoDice)
print "Median of the sample =", pylab.median(twoDice)
#print "Standard deviation of the sample =", pylab.std(twoDice)

pylab.hist(twoDice, bins= pylab.arange(1.5,12.6,1.0))
pylab.xlabel('Value')
pylab.ylabel('Count')
pylab.savefig('twoDice.png')
pylab.show()

twod_cdf = np.array(twoDice)

X_values = np.sort(twod_cdf)
F_values = np.array(range(sampleSize))/float(sampleSize)
pylab.plot(X_values, F_values)
pylab.xlabel('Value')
pylab.ylabel('Fraction')
pylab.show()

Notice now that we sort the array, build the function and plot it.