Search code examples
pythonpandasneo4jcypherneo4j-driver

Python loading script for neo4j returns ValueError


I'm relatively new-ish to programming in general (business analytics student turned data analyst) and I'm testing a python script to iterate over csv rows and construct a cypher query for each row to load into neo4j -

import pandas as pd
from neo4j import GraphDatabase

pd.set_option('display.max_colwidth', -1)

# neo4j credentials
uri= "bolt://localhost:7687"
userName= "neo4j"
password= "password"


df = pd.read_csv('C://Users/ABC/Documents/Test/Test/lineage_stored_procedure_dedup.csv', 
                 sep=',', index_col=None, header=0,usecols=[0,1,2,3,4,5])

df.columns.str.replace(' ', '')

graphDB_Driver  = GraphDatabase.driver(uri, auth=(userName, password))

with graphDB_Driver.session() as graphDB_Session:
    for row in df.iterrows():
        cq = 'merge (p:Program{programName:"'+df['Parent_Procedure']+'"}) set p.type = "'+df['Parent_Object_Type']+'"'
        res = graphDB_Session.run(cq)

graphDB_Driver.close()

I get the following error -

Traceback (most recent call last):

  File "<ipython-input-91-01ba397763e3>", line 1, in <module>
    runfile('C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py', wdir='C:/Users/ABC/Documents/Test/Test')

  File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py", line 35, in <module>
    res = graphDB_Session.run(cq)

  File "C:\Users\ABC\Anaconda3\lib\site-packages\neo4j\__init__.py", line 429, in run
    if not statement:

  File "C:\Users\ABC\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1555, in __nonzero__
    self.__class__.__name__

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

I understand that the error means I did not use the correct bitwise operators as per pandas documentation. But I don't understand where did I even need to use it in the code to begin with? Appreciate any and all help. Thank you.


Solution

  • #Connect to the neo4j database server
    graphDB_Driver  = GraphDatabase.driver(uri, auth=(userName, password))
    
    #CREATE NODES (:Program{Parent_Procedure}) set property 'type' = Parent_Object_Type 
    
    with graphDB_Driver.session() as graphDB_Session:
        for index, row in df.iterrows():
            cq1 = 'merge (p:Program{programName:"'+row["Parent_Procedure"]+'"}) set p.type = "'+row["Parent_Object_Type"]+'"'
    #Execute the Cypher query
            res1 = graphDB_Session.run(cq1)
            print(res1)
    #CREATE NODES (:Program{Called_Procedure}) set property 'type' = Called_Object_Type 
        for index, row in df.iterrows():
            cq2 = 'merge (p:Program{programName:"'+row["Called_Procedure"]+'"}) set p.type = "'+row["Called_Object_Type"]+'"'
    #Execute the Cypher query
            res2 = graphDB_Session.run(cq2)
            print(res2)
    
    #Create relationship - (Parent_Procedure)-[:CALLS_TO]->(Called_Procedure)
        for index, row in df.iterrows():
            cq3 = 'match (p1:Program{programName:"'+row["Parent_Procedure"]+'"}) match (p2:Program{programName:"'+row["Called_Procedure"]+'"}) merge (p1)-[:CALLS_TO]->(p2)'
    #Execute the Cypher query
            res3 = graphDB_Session.run(cq3)
            print(res3)        
    
    graphDB_Driver.close()