coll = MongoClient(uri)[database][collection]
cursor = coll.find(query, sort=sort) # I want the query to start executing here!
# It doesnt matter to my application if we're stuck here for a couple seconds
# ...
doc = next(cursor) # this must not be slow!
Note: I dont want to fetch all documents right away, just get the first document/batch of documents to avoid the lazy loading.
There is an internal _refresh method of the cursor to fetch first batch of the documents to the internal cache.
For the testing script:
from pymongo import MongoClient
import ssl
from datetime import datetime
import time
connection_string = "mongodb://localhost:27017"
print("start", datetime.now())
client = MongoClient(connection_string, ssl_cert_reqs=ssl.CERT_NONE)
print("after connect", datetime.now())
db = client['test']
collection = db['companies']
documents = collection.find({}).limit(3)
print("after find", datetime.now())
documents._refresh() # <== THIS
print("after refresh", datetime.now())
time.sleep(2)
for document in documents:
time.sleep(1)
print("next document", datetime.now())
print(document['_id'])
# Print the fetched document
print("end", datetime.now())
The screenshot below shows mongodb profiler with the timestamp of the query reaching the db on _refresh:
Disclaimer: use internal methods with care, and always cover with tests. They might change without a notice.