I'm new to python (PYTHON 2.7) and I'm trying to run a program that calculates Pearson correlations. The code is from the " collective intelligence" When I import the functions and run the Pearson's correlation
I receive this error:
>>> sim_pearson(critics,
... 'Lisa Rose','Gene Seymour')
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "recommendations.py", line 49, in sim_pearson
sum1=sum([prefs[p1][it]] for it in si)
TypeError: unsupported operand type(s) for +: 'int' and 'list'
>>>
the code is here
#a dictionary of movie critics and their ratings of a small set of movies
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on the Plane':3.5, 'Just My Luck': 1.5,
'superman returns': 5.0, 'You, Me and Dupree': 3.5}, 'Gene Seymour':{
'Lady in the water':1.0,'Snakes on the Plane':3.5,
'superman returns':5.0, 'You, Me and Dupree':3.5}, 'Michale Philllips':{
'Lady in the Water': 2.5, 'Snakes on the Plane':3.0, 'superman returns': 3.5,
'The Night Listenr': 4.0}, 'Cludia Puig':{'Snakes on the Plane':3.5, 'Just My Luck': 3.0,
'The Night Listenr': 4.5, 'superman returns': 4.0, 'You, Me and Dupree': 2.5},
'Mick LaSalle':{'Lady in the Water': 3.0, 'Snakes on the Plane':4.0, 'Just My Luck': 2.0,
'The Night Listenr': 3.0, 'superman returns': 3.0, 'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on the Plane':4.0,
'The Night Listenr': 3.0, 'superman returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby':{'Snakes on a Plane': 4.5, 'You, Me Dupree':1.0,'superman returns':4.0}}
#Returns a distance-based similarity score for person1 and p
def sim_distance(prefs,person1,person2):
#get the list of shared_items
si={}
for item in prefs[person1]:
if item in prefs[person2]:
si[item]=1
#if they have no rating in common returns zero
if len(si)==0:
return 0
#Add up the squares of all the differences
sum_of_squares=sum(pow(prefs[person1][item]-prefs[person2][item],2)
for item in prefs[person1] if item in prefs[person2])
return 1/(1+sum_of_squares)
#returns the pearson correlation coefficient for p1 and p2
def sim_pearson(prefs,p1,p2):
#get list of mutually rated items
si={}
for item in prefs[p1]:
if item in prefs[p2]: si[item]=1
#find the number of elements
n=len(si)
#if they are no ratings in common, return 0
if n==0: return 0
#add up all the preferences
sum1=sum([prefs[p1][it]] for it in si)#reported line 49
#^
sum2=sum(prefs[p2][it] for it in si)
#sum up the squares
sum1sq=sum([pow(prefs[p1][it] for it in si)])
sum2sq=sum([pow(prefs[p2][it] for it in si)])
#sum up the products
pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
#calculate peason score
num=pSum-(sum1*sum2/n)
den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
r=num/den
return r
I think you wanted to write
sum1=sum([prefs[p1][it] for it in si])
instead of
sum1=sum([prefs[p1][it]] for it in si)
(see the brackets).
The error means that you are trying to sum
integers with lists.