Search code examples
csvgraphimportetlorientdb

orientdb load graph csv of nodes and edges


I'm a newbie in Orientdb . I have a csv file which has both the nodes and the edge and I need to create a graph out of that csv file . csv file

"p1","p2","score"
"LGG_00001","LGG_01682",282
"LGG_00001",".LGG_01831",183
"LGG_00001","LGG_01491",238

The edge is IsActingWith which had the score attribute

{
"source": {
    "file": {
        "path": "C:/Users/sams/Desktop/OrientDB2/lac2.csv"
    }
},
"extractor": {
    "csv": {}
},
"transformers": [
    {
        "vertex": {
            "class": "lac2"
        }
    },
    {
        "vertex": {
            "class": "lac2"
        }
    },

    {
        "edge":
        {
                "class": "IsActingWith",
                "joinFieldName": "score_p",
                "lookup": "acore",
                "direction": "out"
        }
    }

],
"loader": {
    "orientdb": {
        "dbURL": "plocal:C:/Users/sams/Desktop/OrientDB2/database/proj",
        "dbType": "graph",
        "dbAutoCreate": true,
        "classes": [
            {
                "name": "lac2",
                "extends": "V"
            },
            {
                "name": "lac2",
                "extends": "V"
            },
            {
                "name": "IsActingWith",
                "extends": "E"
            },

        ]

    }
}
}

That is what I tried but it does not seem logic to me. The final result I'm looking for is to have a graaph made of p1->ACTINGWITH-> p2 and ACTINGWITH has score of the score attribute


Solution

  • maybe there's a better solution but this works. My plan is to use 3 different etl scripts: first and second for inserting the vertices and the third for the edges. Of course you'll need to execute them in order.

    vertex_import_p1.json

    {
        "source": { "file": { "path": "/home/ivan/Cose/OrientDB/issues/stack/44641116/file.csv" } },
        "extractor": { "csv": {
            "separator": ",",
        "columns": ["p1:String","p2:String","s:Integer"] } },
        "transformers": [
            { "command": { "command": "UPDATE lac2 set p='${input.p1}' UPSERT WHERE p='${input.p1}'"} }      
        ],
        "loader": {
            "orientdb": {
                "dbURL": "plocal:/home/ivan/Cose/OrientDB/issues/stack/44641116/db",
                "dbUser": "admin",
                "dbPassword": "admin",
                "dbType": "graph",
                "classes": [
                    {"name": "lac2", "extends": "V"},
                    {"name": "isActingWith", "extends": "E"}
                ]
            }
        }
    }
    

    vertex_import_p2.json

    {
        "source": { "file": { "path": "/home/ivan/Cose/OrientDB/issues/stack/44641116/file.csv" } },
        "extractor": { "csv": {
            "separator": ",",
        "columns": ["p1:String","p2:String","s:Integer"] } },
        "transformers": [
            { "command": { "command": "UPDATE lac2 set p='${input.p2}' UPSERT WHERE p='${input.p2}'"} }      
        ],
        "loader": {
            "orientdb": {
                "dbURL": "plocal:/home/ivan/Cose/OrientDB/issues/stack/44641116/db",
                "dbUser": "admin",
                "dbPassword": "admin",
                "dbType": "graph",
                "classes": [
                    {"name": "lac2", "extends": "V"},
                    {"name": "isActingWith", "extends": "E"}
                ]
            }
        }
    }
    

    edge_import_s.json

    {
        "source": { "file": { "path": "/home/ivan/Cose/OrientDB/issues/stack/44641116/file.csv" } },
        "extractor": { "csv": {
            "separator": ",",
        "columns": ["p1:String","p2:String","s:Integer"] } },
        "transformers": [
            { "command": { "command": "CREATE EDGE isActingWith FROM (SELECT FROM lac2 WHERE p='${input.p1}') TO (SELECT FROM lac2 WHERE p='${input.p2}') set score=${input.s}"} }
        ],
        "loader": {
            "orientdb": {
                "dbURL": "plocal:/home/ivan/Cose/OrientDB/issues/stack/44641116/db",
                "dbUser": "admin",
                "dbPassword": "admin",
                "dbType": "graph",
                "classes": [
                    {"name": "lac2", "extends": "V"},
                    {"name": "isActingWith", "extends": "E"}
                ]
            }
        }
    }
    

    And here are the situation after the executions:

    orientdb {db=db}> select from lac2
    
    +----+-----+------+---------+-------------------+---------------+
    |#   |@RID |@CLASS|p        |out_isActingWith   |in_isActingWith|
    +----+-----+------+---------+-------------------+---------------+
    |0   |#21:6|lac2  |LGG_00001|[#25:5,#26:1,#27:1]|               |
    |1   |#21:7|lac2  |LGG_01682|                   |[#25:5]        |
    |2   |#22:3|lac2  |LGG_01831|                   |[#26:1]        |
    |3   |#23:1|lac2  |LGG_01491|                   |[#27:1]        |
    +----+-----+------+---------+-------------------+---------------+
    
    4 item(s) found. Query executed in 0.003 sec(s).
    orientdb {db=db}> select from isActingWith
    
    +----+-----+------------+-----+-----+-----+
    |#   |@RID |@CLASS      |score|out  |in   |
    +----+-----+------------+-----+-----+-----+
    |0   |#25:5|isActingWith|282  |#21:6|#21:7|
    |1   |#26:1|isActingWith|183  |#21:6|#22:3|
    |2   |#27:1|isActingWith|238  |#21:6|#23:1|
    +----+-----+------------+-----+-----+-----+
    
    3 item(s) found. Query executed in 0.004 sec(s).