Search code examples
node.jsmongodbmongodb-queryaggregation-frameworkmongodb-indexes

compound index and sort query involving a regex


please read comment and first answer. question is solved.

https://mongoplayground.net/p/J1Hs5RKu39G

I need help interpreting the explain output. I can see that the index is used. However, I am still not sure whether the $sort, $skip, and $limit stages will be done in memory, ie be blocking and subjected to the 100MB limit. Note that the query contains 2 $or in the $match stage. And of course, there is the $regex. So when all of that is combined in one query, I am sure what will happen.

How can I tell from the explain whether or not the $sort, $skip, and $limit stages will be done in memory, ie be blocking and subjected to the 100MB limit?

schema and index:

[
  {
    "collection": "collection",
    "count": 100,
    "content": {
      "name": {
        "type": "enum",
        "values": [
          "a",
          "b",
          "c",
          "d",
          "e"
        ]
      },
      "name_1": {
        "type": "enum",
        "values": [
          "a",
          "b",
          "c",
          "d",
          "e"
        ]
      },
      "utc": {
        "type": "enum",
        "values": [
          1,
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9
        ]
      },
      "description": {
        "type": "enum",
        "values": [
          "Coffee and cakes",
          "Gourmet hamburgers",
          "Just coffee",
          "Discount clothing",
          "Indonesian goods"
        ]
      },
      "description_2": {
        "type": "enum",
        "values": [
          "Coffee and cakes",
          "Gourmet hamburgers",
          "Just coffee",
          "Discount clothing",
          "Indonesian goods"
        ]
      }
    },
    "indexes": [
      {
        "name": "description_text_idx",
        "key": {
          "name": 1,
          "utc": 1,
          "description": 1
        }
      },
      {
        "name": "description_text_idx_2",
        "key": {
          "name": 1,
          "utc": 1,
          "description_2": 1
        }
      },
      {
        "name": "description_text_idx_3",
        "key": {
          "name_1": 1,
          "utc": 1,
          "description": 1
        }
      },
      {
        "name": "description_text_idx_4",
        "key": {
          "name_1": 1,
          "utc": 1,
          "description_2": 1
        }
      }
    ]
  }
]

query:

db.collection.aggregate([
  {
    $match: {
      $and: [
        {
          $or: [
            {
              name: "a"
            },
            {
              name_1: "b"
            }
          ]
        },
        {
          $or: [
            {
              description: {
                $regex: "and"
              }
            },
            {
              description_2: {
                $regex: "goo"
              }
            }
          ]
        }
      ]
    }
  },
  {
    $sort: {
      timestamp_utc: -1
    }
  },
  {
    $skip: 10
  },
  {
    $limit: 100
  }
]).explain()

explain:

{
  "$clusterTime": {
    "clusterTime": Timestamp(1696816196, 105),
    "signature": {
      "hash": BinData(0, "tmzq2+QI7fODnvWyR/eC3yfzclA="),
      "keyId": NumberLong(7230504570105888773)
    }
  },
  "command": {
    "$db": "49da1669e4de3fc843aebffc2dc28e0e",
    "aggregate": "collection",
    "cursor": {
      "batchSize": 1000
    },
    "maxTimeMS": NumberLong(20000),
    "pipeline": [
      {
        "$match": {
          "$and": [
            {
              "$or": [
                {
                  "name": "a"
                },
                {
                  "name_1": "b"
                }
              ]
            },
            {
              "$or": [
                {
                  "description": {
                    "$regex": "and"
                  }
                },
                {
                  "description_2": {
                    "$regex": "goo"
                  }
                }
              ]
            }
          ]
        }
      },
      {
        "$sort": {
          "timestamp_utc": -1
        }
      },
      {
        "$skip": 10
      },
      {
        "$limit": 100
      }
    ]
  },
  "explainVersion": "1",
  "operationTime": Timestamp(1696816196, 105),
  "queryPlanner": {
    "indexFilterSet": false,
    "maxIndexedAndSolutionsReached": false,
    "maxIndexedOrSolutionsReached": false,
    "maxScansToExplodeReached": false,
    "namespace": "49da1669e4de3fc843aebffc2dc28e0e.collection",
    "optimizedPipeline": true,
    "parsedQuery": {
      "$and": [
        {
          "$or": [
            {
              "name": {
                "$eq": "a"
              }
            },
            {
              "name_1": {
                "$eq": "b"
              }
            }
          ]
        },
        {
          "$or": [
            {
              "description": {
                "$regex": "and"
              }
            },
            {
              "description_2": {
                "$regex": "goo"
              }
            }
          ]
        }
      ]
    },
    "planCacheKey": "CB15319B",
    "queryHash": "30C8890A",
    "rejectedPlans": [
      {
        "inputStage": {
          "inputStage": {
            "filter": {
              "$or": [
                {
                  "description": {
                    "$regex": "and"
                  }
                },
                {
                  "description_2": {
                    "$regex": "goo"
                  }
                }
              ]
            },
            "inputStage": {
              "inputStages": [
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description_2": [
                      "[MinKey, MaxKey]"
                    ],
                    "name": [
                      "[\"a\", \"a\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx_2",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description_2": 1,
                    "name": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description_2": [],
                    "name": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                },
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description": [
                      "[MinKey, MaxKey]"
                    ],
                    "name_1": [
                      "[\"b\", \"b\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx_3",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description": 1,
                    "name_1": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description": [],
                    "name_1": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                }
              ],
              "stage": "OR"
            },
            "stage": "FETCH"
          },
          "limitAmount": 110,
          "memLimit": 104857600,
          "sortPattern": {
            "timestamp_utc": -1
          },
          "stage": "SORT",
          "type": "simple"
        },
        "skipAmount": 0,
        "stage": "SKIP"
      },
      {
        "inputStage": {
          "inputStage": {
            "filter": {
              "$or": [
                {
                  "description": {
                    "$regex": "and"
                  }
                },
                {
                  "description_2": {
                    "$regex": "goo"
                  }
                }
              ]
            },
            "inputStage": {
              "inputStages": [
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description": [
                      "[MinKey, MaxKey]"
                    ],
                    "name": [
                      "[\"a\", \"a\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description": 1,
                    "name": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description": [],
                    "name": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                },
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description_2": [
                      "[MinKey, MaxKey]"
                    ],
                    "name_1": [
                      "[\"b\", \"b\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx_4",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description_2": 1,
                    "name_1": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description_2": [],
                    "name_1": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                }
              ],
              "stage": "OR"
            },
            "stage": "FETCH"
          },
          "limitAmount": 110,
          "memLimit": 104857600,
          "sortPattern": {
            "timestamp_utc": -1
          },
          "stage": "SORT",
          "type": "simple"
        },
        "skipAmount": 0,
        "stage": "SKIP"
      },
      {
        "inputStage": {
          "inputStage": {
            "filter": {
              "$or": [
                {
                  "description": {
                    "$regex": "and"
                  }
                },
                {
                  "description_2": {
                    "$regex": "goo"
                  }
                }
              ]
            },
            "inputStage": {
              "inputStages": [
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description_2": [
                      "[MinKey, MaxKey]"
                    ],
                    "name": [
                      "[\"a\", \"a\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx_2",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description_2": 1,
                    "name": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description_2": [],
                    "name": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                },
                {
                  "direction": "forward",
                  "indexBounds": {
                    "description_2": [
                      "[MinKey, MaxKey]"
                    ],
                    "name_1": [
                      "[\"b\", \"b\"]"
                    ],
                    "utc": [
                      "[MinKey, MaxKey]"
                    ]
                  },
                  "indexName": "description_text_idx_4",
                  "indexVersion": 2,
                  "isMultiKey": false,
                  "isPartial": false,
                  "isSparse": false,
                  "isUnique": false,
                  "keyPattern": {
                    "description_2": 1,
                    "name_1": 1,
                    "utc": 1
                  },
                  "multiKeyPaths": {
                    "description_2": [],
                    "name_1": [],
                    "utc": []
                  },
                  "stage": "IXSCAN"
                }
              ],
              "stage": "OR"
            },
            "stage": "FETCH"
          },
          "limitAmount": 110,
          "memLimit": 104857600,
          "sortPattern": {
            "timestamp_utc": -1
          },
          "stage": "SORT",
          "type": "simple"
        },
        "skipAmount": 0,
        "stage": "SKIP"
      }
    ],
    "winningPlan": {
      "inputStage": {
        "inputStage": {
          "filter": {
            "$or": [
              {
                "description": {
                  "$regex": "and"
                }
              },
              {
                "description_2": {
                  "$regex": "goo"
                }
              }
            ]
          },
          "inputStage": {
            "inputStages": [
              {
                "direction": "forward",
                "indexBounds": {
                  "description": [
                    "[MinKey, MaxKey]"
                  ],
                  "name": [
                    "[\"a\", \"a\"]"
                  ],
                  "utc": [
                    "[MinKey, MaxKey]"
                  ]
                },
                "indexName": "description_text_idx",
                "indexVersion": 2,
                "isMultiKey": false,
                "isPartial": false,
                "isSparse": false,
                "isUnique": false,
                "keyPattern": {
                  "description": 1,
                  "name": 1,
                  "utc": 1
                },
                "multiKeyPaths": {
                  "description": [],
                  "name": [],
                  "utc": []
                },
                "stage": "IXSCAN"
              },
              {
                "direction": "forward",
                "indexBounds": {
                  "description": [
                    "[MinKey, MaxKey]"
                  ],
                  "name_1": [
                    "[\"b\", \"b\"]"
                  ],
                  "utc": [
                    "[MinKey, MaxKey]"
                  ]
                },
                "indexName": "description_text_idx_3",
                "indexVersion": 2,
                "isMultiKey": false,
                "isPartial": false,
                "isSparse": false,
                "isUnique": false,
                "keyPattern": {
                  "description": 1,
                  "name_1": 1,
                  "utc": 1
                },
                "multiKeyPaths": {
                  "description": [],
                  "name_1": [],
                  "utc": []
                },
                "stage": "IXSCAN"
              }
            ],
            "stage": "OR"
          },
          "stage": "FETCH"
        },
        "limitAmount": 110,
        "memLimit": 104857600,
        "sortPattern": {
          "timestamp_utc": -1
        },
        "stage": "SORT",
        "type": "simple"
      },
      "skipAmount": 0,
      "stage": "SKIP"
    }
  },
  "serverParameters": {
    "internalDocumentSourceGroupMaxMemoryBytes": 104857600,
    "internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
    "internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
    "internalQueryFacetBufferSizeBytes": 104857600,
    "internalQueryFacetMaxOutputDocSizeBytes": 104857600,
    "internalQueryMaxAddToSetBytes": 104857600,
    "internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
    "internalQueryProhibitBlockingMergeOnMongoS": 0
  }
}

Solution

  • You can tell it from this part:

          "stage": "FETCH"
        },
        "limitAmount": 110,
        "memLimit": 104857600,
        "sortPattern": {
          "timestamp_utc": -1
        },
        "stage": "SORT",
        "type": "simple"
      },
      "skipAmount": 0,
      "stage": "SKIP"
    }
    

    SORT indicates in-memory blocking sort.

    memLimit on FETCH stage is the 100MB limit.