Search code examples
mongodbmongodb-queryaggregation-frameworkapache-drill

JSON Structure Data Manipulation in MongoDB


I am new to mongodb and facing a problem with the data structure. The hierarchy of the data is not visible. For instance, I have the data in format of

{
        "FCILTY_ID" : 154,
        "ACCT_NO" : 2.14782e+008,
        "STRING_DC_CD" : 8,
        "STRING_DTS" : "25-JAN-14",
        "STRING_ID_NO" : 1,
        "STRING_ITEM_NO" : 0,
        "CHILD_OF_CD" : "",
        "BINTYPE_NO" : 244,
        "PTXT_CODE_STR" : "8.1.71.4.0.0.0.13",
        "PTXT_DESC_TXT" : "DC DATE =",
        "VALUE_NO" : 2.37024e+007,
        "VALUE_FREETEXT_TXT" : "",
        "VALUE_DTS" : "25-JAN-14" 
}
 {
     "FCILTY_ID" : 154,
    "ACCT_NO" : 2.14782e+008,
    "STRING_DC_CD" : 8,
    "STRING_DTS" : "25-JAN-14",
    "STRING_ID_NO" : 1,
    "STRING_ITEM_NO" : 2,
    "CHILD_OF_CD" : "",
    "BINTYPE_NO" : 244,
    "PTXT_CODE_STR" : "8.1.71.4.0.0.0.167",
    "PTXT_DESC_TXT" : "START TIME",
    "VALUE_NO" : 2.37024e+007,
    "VALUE_FREETEXT_TXT" : "",
    "VALUE_DTS" : "25-JAN-14"
}
 {
    "FCILTY_ID" : 154,
    "ACCT_NO" : 2.14782e+008,
    "STRING_DC_CD" : 8,
    "STRING_DTS" : "25-JAN-14",
    "STRING_ID_NO" : 1,
    "STRING_ITEM_NO" : 3,
    "CHILD_OF_CD" : "",
    "BINTYPE_NO" : 241,
    "PTXT_CODE_STR" : "8.1.71.4.0.0.0.153",
    "PTXT_DESC_TXT" : "ORDER TYPE",
    "VALUE_NO" : 0,
    "VALUE_FREETEXT_TXT" : "",
    "VALUE_DTS" : ""
}

I am trying hard but not able to make the data in ideal structure using aggregate and update. I would like to have the data structure like

    {
     "FCILTY_ID" : 154,
        "ACCT_NO" : 2.14782e+008,
        "STRING_DC_CD" : 8,
        "STRING_DTS" : "25-JAN-14",
        "STRING_ID_NO" : 1,

        "ITEM":
[ {

    {"STRING_ITEM_NO" : 0,
        "CHILD_OF_CD" : "",
        "BINTYPE_NO" : 244,
        "PTXT_CODE_STR" : "8.1.71.4.0.0.0.13",
        "PTXT_DESC_TXT" : "DC DATE =",
        "VALUE_NO" : 2.37024e+007,
        "VALUE_FREETEXT_TXT" : "",
        "VALUE_DTS" : "25-JAN-14"}

    {
    "STRING_ITEM_NO" : 2,
        "CHILD_OF_CD" : "",
        "BINTYPE_NO" : 244,
        "PTXT_CODE_STR" : "8.1.71.4.0.0.0.167",
        "PTXT_DESC_TXT" : "START TIME",
        "VALUE_NO" : 2.37024e+007,
        "VALUE_FREETEXT_TXT" : "",
        "VALUE_DTS" : "25-JAN-14"}

    {
    "STRING_ITEM_NO" : 3,
        "CHILD_OF_CD" : "",
        "BINTYPE_NO" : 241,
        "PTXT_CODE_STR" : "8.1.71.4.0.0.0.153",
        "PTXT_DESC_TXT" : "ORDER TYPE",
        "VALUE_NO" : 0,
        "VALUE_FREETEXT_TXT" : "",
        "VALUE_DTS" : ""}
    }

]    }

Is this feasible to do in mongodb? If not is there any other tool which I can use to achieve this. I want to insert the data in apache drill but before that I want the data structure in proper hierarchy. Thanks in advance!


Solution

  • Use mongo aggregation framework to get the results.

    1> First group by FCILTY_ID,ACCT_NO,STRING_DC_CD,STRING_DTS,STRING_ID_NO

    2> Push all remaining data into ITEM

    3> Last used project.

    So mongo aggregation query looks like :

    db.collectionName.aggregate([
        {
            "$group": {
                "_id": {
                    "FCILTY_ID": "$FCILTY_ID",
                    "ACCT_NO": "$ACCT_NO",
                    "STRING_DC_CD": "$STRING_DC_CD",
                    "STRING_DTS": "$STRING_DTS",
                    "STRING_ID_NO": "$STRING_ID_NO"
                },
                "ITEM": {
                    "$push": {
                        "STRING_ITEM_NO": "$STRING_ITEM_NO",
                        "CHILD_OF_CD": "$CHILD_OF_CD",
                        "BINTYPE_NO": "$BINTYPE_NO",
                        "PTXT_CODE_STR": "$PTXT_CODE_STR",
                        "PTXT_DESC_TXT": "$PTXT_DESC_TXT",
                        "VALUE_NO": "$VALUE_NO",
                        "VALUE_FREETEXT_TXT": "$VALUE_FREETEXT_TXT",
                        "VALUE_DTS": "$VALUE_DTS"
                    }
                }
            }
        },
        {
            "$project": {
                "_id": 0,
                "FCILTY_ID": "$_id.FCILTY_ID",
                "ACCT_NO": "$_id.ACCT_NO",
                "STRING_DC_CD": "$_id.STRING_DC_CD",
                "STRING_DTS": "$_id.STRING_DTS",
                "STRING_ID_NO": "$_id.sid",
                "ITEM": 1
            }
        }
    ]).pretty()
    

    If you want to add the above result in new collection then add $out in aggregation.