Search code examples
jsonimportetlorientdb

Import JSON to OrientDB type document using ETL


How can I import some json files to OrientDB to use it like document type (not graph)?

My data is something like this:

    {
    "p_partkey": 1,
    "p_name": "lace spring",
    "lineorder": [{
        "customer": [{
            "c_name": "Customer#000014704"
        }],
        "lo_quantity": 49,
        "lo_orderpriority": "1-URGENT",
        "lo_discount": 3,
        "lo_shipmode": "RAIL|",
        "lo_tax": 0
    }, {
        "customer": [{
            "c_name": "Customer#000026548"
        }],
        "lo_quantity": 15,
        "lo_orderpriority": "3-MEDIUM",
        "lo_discount": 10,
        "lo_shipmode": "SHIP|",
        "lo_tax": 0
    }]
}

and I create a configfile.json like under to import but it dont work:

{
  "config": {
    "log": "debug"
  },
  "source" : {
    "file": { "path": "/home/raphael/Documents/data/part/part1.json", "lock" : true }
  },
  "extractor" : {
    "json": {}
  },
  "transformers" : [
   { "merge": { "joinFieldName":"p_partkey"} },
   { "vertex": { "class": "part"} }
  ],
  "loader" : {
    "orientdb": {
      "dbURL": "plocal:/opt/orientdb/databases/part",
      "dbUser": "root",
      "dbPassword": "rasns1901",
      "dbAutoCreate": true,
      "tx": false,
      "batchCommit": 1000,
      "dbType": "document",
      "classes": [
        {"name": "part", "extends": "V"}
      ],      
      "indexes": [
        {"class":"part", "fields":["p_partkey:integer"], "type":"UNIQUE_HASH_INDEX" }
      ]
    }
  }
}

There's something wrong with my configfile? Theres no example of it on OrientDB documents.


Solution

  • I gave up using the ETL and did it using python, it was easier.

    Here goes my code:

    from __future__ import division
    import csv
    import sys
    import collections
    import pyorient
    
    def Inicio():
    
    
        db_name = "db"
        client = pyorient.OrientDB("127.0.0.1", 2424)
        session_id = client.connect( "admin", "admin" )
        client.db_open( db_name, "admin", "admin" )
        i=1
        while i<3:
            file= open('home/Desktop/part'+str(i)+'.json','rd')
            texto = file.readline()
            co = 'INSERT INTO part CONTENT '+texto 
            client.command(co)
            print("Inserted:"+str(i))
            file.close()
            i=i+1
        client.db_close()
    
    Inicio()
    

    The only thing you have to pay atention is that my json file dont have carriage returns, so the readline() function works.