Search code examples
jsonmongodbsabre

Mongodb aggregate, match, group and sort on complex JSON


I receive a large amount of JSON back (flight data) which I've been thinking about using mongo as the tool to query/filter it before returning back to the UI.

Here's the json response from Sabre (flight search)

{
  "OTA_AirLowFareSearchRS": {
    "PricedItineraries": {
      "PricedItinerary": [
        {
          "SequenceNumber": 1,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T11:40:00",
                      "ArrivalDateTime": "2015-11-12T17:35:00",
                      "FlightNumber": "1980"
                    },
                    {
                      "DepartureDateTime": "2015-11-12T19:35:00",
                      "ArrivalDateTime": "2015-11-13T02:00:00",
                      "FlightNumber": "760"
                    }
                  ]
                },
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T08:25:00",
                      "ArrivalDateTime": "2015-11-19T11:40:00",
                      "FlightNumber": "763"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56,
                  "CurrencyCode": "GBP"
                }
              }
            }
          ]
        },
        {
          "SequenceNumber": 2,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T16:45:00",
                      "ArrivalDateTime": "2015-11-12T22:40:00",
                      "FlightNumber": "1986"
                    },
                    {
                      "DepartureDateTime": "2015-11-13T00:40:00",
                      "ArrivalDateTime": "2015-11-13T07:10:00",
                      "FlightNumber": "762"
                    }
                  ]
                },
                {
                  "ElapsedTime": 640,
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T08:25:00",
                      "ArrivalDateTime": "2015-11-19T11:40:00",
                      "FlightNumber": "763"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56,
                  "CurrencyCode": "GBP"
                }
              }
            }
          ]
        },
        {
          "SequenceNumber": 6,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T11:40:00",
                      "ArrivalDateTime": "2015-11-12T17:35:00",
                      "FlightNumber": "1980"
                    },
                    {
                      "DepartureDateTime": "2015-11-12T19:35:00",
                      "ArrivalDateTime": "2015-11-13T02:00:00",
                      "FlightNumber": "760"
                    }
                  ]
                },
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T03:15:00",
                      "ArrivalDateTime": "2015-11-19T06:30:00",
                      "FlightNumber": "761"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56
                }
              }
            }
          ]
        }
      ]
    }
  }
}

I've been trying to get this into a view that I want but struggling with the complexity of all the nested arrays. My question is how can I achieve a desired result like this:

{
    'Price': 269.56, <-- //Group on price (TotalFare)
    'Outbound': [{
        <Outbound Flights> <--- //$push? flights at array position [0] of OriginDestinationOption 
    }],
    'Inbound': [{
        <Inbound Flights> <-- // flights at array position [1] of OriginDestinationOption
    }]
},
...

The locations of the data for these in the JSON is:

Price: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItineraryPricingInfo[0].ItinTotalFare.TotalFare.Amount;
Inbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[0]
Outbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[1]

Solution

  • With the current MongoDB release, the only way you can get results which are closer to what you want is by using the aggregation framework, and working on the premise that the OriginDestinationOption array will have two elememts, you'd need the $first and $last operators to select the first and last elements in the array after the $unwind operator. For now (based on the above assumptions) you may have to do with running this pipeline:

    db.flights.aggregate([
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },    
        {
            "$project": {
                "Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
                "DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
            }
        },    
        {
            "$group": {
                "_id": "$Price",
                "Outbound" : { "$first": "$DestinationOptions" },
                "Inbound" : { "$last": "$DestinationOptions" }
            }
        }
    ])
    

    which will yield the result (from the sample data):

    /* 1 */
    {
        "result" : [ 
            {
                "_id" : 269.56,
                "Outbound" : {
                    "ElapsedTime" : 620,
                    "FlightSegment" : [ 
                        {
                            "DepartureDateTime" : "2015-11-12T11:40:00",
                            "ArrivalDateTime" : "2015-11-12T17:35:00",
                            "StopQuantity" : 0,
                            "FlightNumber" : "1980",
                            "ElapsedTime" : 235,
                            "DepartureAirport" : {
                                "LocationCode" : "LHR",
                                "TerminalID" : "2",
                                "content" : ""
                            },
                            "ArrivalAirport" : {
                                "LocationCode" : "IST",
                                "TerminalID" : "I",
                                "content" : ""
                            },
                            "OperatingAirline" : {
                                "Code" : "TK",
                                "FlightNumber" : "1980",
                                "content" : ""
                            }
                        }, 
                        {
                            "DepartureDateTime" : "2015-11-12T19:35:00",
                            "ArrivalDateTime" : "2015-11-13T02:00:00",
                            "StopQuantity" : 0,
                            "FlightNumber" : "760",
                            "ResBookDesigCode" : "W",
                            "ElapsedTime" : 265,
                            "DepartureAirport" : {
                                "LocationCode" : "IST",
                                "TerminalID" : "I",
                                "content" : ""
                            },
                            "ArrivalAirport" : {
                                "LocationCode" : "DXB",
                                "TerminalID" : "1",
                                "content" : ""
                            },
                            "OperatingAirline" : {
                                "Code" : "TK",
                                "FlightNumber" : "760",
                                "content" : ""
                            },
                            "Equipment" : [ 
                                {
                                    "AirEquipType" : "343",
                                    "content" : ""
                                }
                            ],
                            "MarketingAirline" : {
                                "Code" : "TK",
                                "content" : ""
                            },
                            "MarriageGrp" : "I",
                            "DepartureTimeZone" : {
                                "GMTOffset" : 2
                            },
                            "ArrivalTimeZone" : {
                                "GMTOffset" : 4
                            },
                            "TPA_Extensions" : {
                                "eTicket" : {
                                    "Ind" : true
                                }
                            }
                        }
                    ]
                },
                "Inbound" : {
                    "ElapsedTime" : 730,
                    "FlightSegment" : [ 
                        {
                            "DepartureDateTime" : "2015-11-19T08:25:00",
                            "ArrivalDateTime" : "2015-11-19T11:40:00",
                            "StopQuantity" : 0,
                            "FlightNumber" : "763",
                            "ResBookDesigCode" : "W",
                            "ElapsedTime" : 315,
                            "DepartureAirport" : {
                                "LocationCode" : "DXB",
                                "TerminalID" : "1",
                                "content" : ""
                            },
                            "ArrivalAirport" : {
                                "LocationCode" : "IST",
                                "TerminalID" : "I",
                                "content" : ""
                            },
                            "OperatingAirline" : {
                                "Code" : "TK",
                                "FlightNumber" : "763",
                                "content" : ""
                            },
                            "Equipment" : [ 
                                {
                                    "AirEquipType" : "330",
                                    "content" : ""
                                }
                            ],
                            "MarketingAirline" : {
                                "Code" : "TK",
                                "content" : ""
                            },
                            "MarriageGrp" : "O",
                            "DepartureTimeZone" : {
                                "GMTOffset" : 4
                            },
                            "ArrivalTimeZone" : {
                                "GMTOffset" : 2
                            },
                            "TPA_Extensions" : {
                                "eTicket" : {
                                    "Ind" : true
                                }
                            }
                        }, 
                        {
                            "DepartureDateTime" : "2015-11-19T14:25:00",
                            "ArrivalDateTime" : "2015-11-19T16:35:00",
                            "StopQuantity" : 0,
                            "FlightNumber" : "1971",
                            "ResBookDesigCode" : "W",
                            "ElapsedTime" : 250,
                            "DepartureAirport" : {
                                "LocationCode" : "IST",
                                "TerminalID" : "I",
                                "content" : ""
                            },
                            "ArrivalAirport" : {
                                "LocationCode" : "LHR",
                                "TerminalID" : "2",
                                "content" : ""
                            },
                            "OperatingAirline" : {
                                "Code" : "TK",
                                "FlightNumber" : "1971",
                                "content" : ""
                            },
                            "Equipment" : [ 
                                {
                                    "AirEquipType" : "32B",
                                    "content" : ""
                                }
                            ],
                            "MarketingAirline" : {
                                "Code" : "TK",
                                "content" : ""
                            },
                            "MarriageGrp" : "I",
                            "DepartureTimeZone" : {
                                "GMTOffset" : 2
                            },
                            "ArrivalTimeZone" : {
                                "GMTOffset" : 0
                            },
                            "TPA_Extensions" : {
                                "eTicket" : {
                                    "Ind" : true
                                }
                            }
                        }
                    ]
                }
            }
        ],
        "ok" : 1
    }
    

    However, with the future releases (MongoDB 3.2 and newer), there are two operators $slice and $arrayElemAt which will work for you to produce the desired result. The $slice operator returns a subset of an array and the $arrayElemAt returns the element at the specified array index.

    Thus you will implement the pipeline as follows -

    Case 1. Using the $slice operator:

    db.flights.aggregate([
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },    
        {
            "$project": {
                "Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
                "DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
            }
        },    
        {
            "$group": {
                "_id": "$Price",
                "DestinationOptions" : { "$push": "$DestinationOptions" }
            }
        },
        { 
            "$project": { 
                "Inbound": { "$slice": [ "$DestinationOptions", 0, 1] },
                "Outbound": { "$slice": [ "$DestinationOptions", 1, 1 ] },
                "Price": "$_id",
                "_id": 0
            }
        }
    ])
    

    Case 2. Using the $arrayElemAt operator:

    db.flights.aggregate([
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
        { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },    
        {
            "$project": {
                "Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
                "DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
            }
        },    
        {
            "$group": {
                "_id": "$Price",
                "DestinationOptions" : { "$push": "$DestinationOptions" }
            }
        },
        { 
            "$project": { 
                "Inbound": { "$arrayElemAt": [ "$DestinationOptions", 0] },
                "Outbound": { "$arrayElemAt": [ "$DestinationOptions", 1 ] },
                "Price": "$_id",
                "_id": 0
            }
        }
    ])