Search code examples
mongodbquery-optimizationsharding

optimize mongoid find query performance


I have a mongo collection which is stores user_id(foreign key) and address_id(foreign key). The collection currently holds more than 5 million records. I have 3 shards and the collection is sharded as

db.adminCommand({shardCollection: "my_db.user_addresses", key: { user_id: 1, address_id: 1}})

the explain query states

pp UserAddress.where(id: '5ace54343b816c0cdf4b2aa9').explain
{"queryPlanner"=>
  {"mongosPlannerVersion"=>1,
   "winningPlan"=>
    {"stage"=>"SHARD_MERGE",
     "shards"=>
      [{"shardName"=>"ShardOne",
        "connectionString"=>
         "ShardOne/ip-xxx-xx-0-111:17018,ip-xxx-xx-9-99:17017",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-9-99",
          "port"=>17017,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]},
       {"shardName"=>"ShardTwo",
        "connectionString"=>
         "ShardTwo/ip-xxx-xx-9-222:11018,ip-xxx-xx-9-66:11017",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-9-66",
          "port"=>11017,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]},
       {"shardName"=>"ShardThree",
        "connectionString"=>
         "ShardThree/ip-xxx-xx-9-143:88888,ip-xxx-xx-0-87:88887",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-0-87",
          "port"=>88887,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]}]}},
 "executionStats"=>
  {"nReturned"=>1,
   "executionTimeMillis"=>1,
   "totalKeysExamined"=>1,
   "totalDocsExamined"=>1,
   "executionStages"=>
    {"stage"=>"SHARD_MERGE",
     "nReturned"=>1,
     "executionTimeMillis"=>1,
     "totalKeysExamined"=>1,
     "totalDocsExamined"=>1,
     "totalChildMillis"=>0,
     "shards"=>
      [{"shardName"=>"ShardOne",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>0,
          "executionTimeMillisEstimate"=>0,
          "works"=>1,
          "advanced"=>0,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>0,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>0,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>0,
            "docsExamined"=>0}}},
       {"shardName"=>"ShardTwo",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>0,
          "executionTimeMillisEstimate"=>0,
          "works"=>1,
          "advanced"=>0,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>0,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>0,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>0,
            "docsExamined"=>0}}},
       {"shardName"=>"ShardThree",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>1,
          "executionTimeMillisEstimate"=>0,
          "works"=>2,
          "advanced"=>1,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>1,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>1,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>1,
            "docsExamined"=>1}}}]},
   "allPlansExecution"=>
    [{"shardName"=>"ShardOne", "allPlans"=>[]},
     {"shardName"=>"ShardTwo", "allPlans"=>[]},
     {"shardName"=>"ShardThree", "allPlans"=>[]}]},
 "ok"=>1.0,
 "$clusterTime"=>
  {"clusterTime"=>
    #<BSON::Timestamp:0x31dbca5d @increment=475, @seconds=1523618199>,
   "signature"=>
    {"hash"=><BSON::Binary:0x2786 type=generic data=0x57ecb8e45eee5178...>,
     "keyId"=>6537488309583609875}},
 "operationTime"=>
  #<BSON::Timestamp:0x21ebf9be @increment=474, @seconds=1523618199>}

the problem is that new relic states that this is the most timeconsuming query of all the database queries.

Avg response time: 50,100 ms
Min: 1.37ms
Max: 62400 ms
Throughput: 104 cpm

How do we optimize the find query?


Solution

  • Min: 1.37ms, Max: 62400 ms suggests the problem is not with the query itself but with the cluster, e.g. when one of the shards hangs for a minute. "stage"=>"IDHACK" basically says it would be tricky to optimize it more.

    I fail to see why you shard it at the first place. 5mln documents x 50 bytes each takes less than half-GB. You can easily fit into memory of a single shard and make the query blazing fast with covered index.