I want to get posts related to a user. which one is faster
to find the User and populate posts
User.findOne({ _id: id}).populate("posts")
Or to search in the Posts model Directly
Post.find({ owner: user_id })
This benchmark code suggests that Post.find({ owner: user_id })
is somewhat faster.
const mongoose = require('mongoose');
const Schema = mongoose.Schema;
const NUM_USERS = 100;
const NUM_POSTS_PER_USER = 10;
mongoose.connect('mongodb://localhost:27017/testdb', { useNewUrlParser: true });
const userSchema = Schema({
posts: [{ type: Schema.Types.ObjectId, ref: 'Post' }]
});
const postSchema = Schema({
owner: { type: Schema.Types.ObjectId, ref: 'User' },
title: String,
content: String,
});
const User = mongoose.model('User', userSchema);
const Post = mongoose.model('Post', postSchema);
const userIds = [];
async function seed() {
await User.deleteMany({});
await Post.deleteMany({});
for (let i = 0; i < NUM_USERS; ++i) {
const user = new User();
await user.save();
for (let i = 0; i < NUM_POSTS_PER_USER; ++i) {
const post = new Post({
owner: user,
title: Array(50).fill('a').join(''),
content: Array(1000).fill('b').join(''),
});
await post.save();
user.posts.push(post);
}
await user.save();
userIds.push(user._id);
}
}
async function benchmarkPopulate() {
console.time('populate');
for (const id of userIds) {
await User.findOne({ _id: id }).populate("posts");
}
console.timeEnd('populate');
}
async function benchmarkFind() {
console.time('find');
for (const user_id of userIds) {
await Post.find({ owner: user_id });
}
console.timeEnd('find');
}
async function main() {
await seed();
await benchmarkPopulate();
await benchmarkFind();
await benchmarkPopulate();
await benchmarkFind();
await mongoose.disconnect();
}
main();
Output:
populate: 217.534ms
find: 121.905ms
populate: 169.181ms
find: 120.171ms
This is not surprising since Post.find({ owner: user_id })
only needs to query one collection.
These results are fairly consistent across runs (and even if you reverse the order of the benchmarks).
Your mileage may vary and this difference doesn't really matter, especially if you're querying the database over a network.