Search code examples
javascriptnode.jsmongodbmongooserelevance

Sorting by relevance performance issues


Is there a function / method within mongoose (and or mongodb) which could be used to sort a queries results based on relevance (highest number of matching query params)?

The example below is what I am using at the moment (the query uses $in:[], otherwise the same) - my collection is quite small so performance is fine but on larger collections it slows things down dramatically.

Alternatively, if there is a better performing method (outside of mongoose / mongodb) I'd be happy to know about it.

Example:

var docs = [
    {
        fruits: ['apple', 'orange', 'tomato'],
        colors: ['blue', 'green'],
        // relevance: 3
    },
    {
        fruits: ['apple', 'carrot'],
        colors: ['red', 'green'],
        // relevance: 2
    }
]

var query = {fruits: ['apple', 'orange'], colors: ['green']}

docs.forEach(function(doc){
    var relevance = 0
    Object.keys(query).forEach(function(_query){
        var arrays = [doc[_query], query[_query]]
        var result = arrays.shift().filter(function(v) {
            return arrays.every(function(a) {
                return a.indexOf(v) !== -1;
            });
        });
        relevance += result.length
    })
    doc.relevance = relevance
})

Result:

var docs = [
    {
        fruits: ['apple', 'orange', 'tomato'],
        colors: ['blue', 'green'],
        relevance: 3
    },
    {
        fruits: ['apple', 'carrot'],
        colors: ['red', 'green'],
        relevance: 2
    }
]

Solution

  • You can do it with aggregation:

    db.getCollection('docs').aggregate([
    {$match: {fruits: {$in: ['apple', 'orange']}, colors: {$in: ['green']}}},
    {$project: {
        relevance: {
            $sum: [
              {$cond: {if: { "$setIsSubset": [['orange'], "$fruits" ]}, then: 1, else: 0}},
              {$cond: {if: { "$setIsSubset": [['apple'], "$fruits" ]}, then: 1, else: 0}},
              {$cond: {if: { "$setIsSubset": [['green'], "$colors" ]}, then: 1, else: 0}}]
        },
        doc: '$$ROOT'}}
    ])

    result:

    /* 1 */
    {
        "_id" : ObjectId("57be8a9b65d2835e960df543"),
        "relevance" : 3,
        "doc" : {
            "_id" : ObjectId("57be8a9b65d2835e960df543"),
            "fruits" : [ 
                "apple", 
                "orange", 
                "tomato"
            ],
            "colors" : [ 
                "blue", 
                "green"
            ]
        }
    }
    
    /* 2 */
    {
        "_id" : ObjectId("57be8aa865d2835e960df544"),
        "relevance" : 2,
        "doc" : {
            "_id" : ObjectId("57be8aa865d2835e960df544"),
            "fruits" : [ 
                "apple", 
                "carrot"
            ],
            "colors" : [ 
                "red", 
                "green"
            ]
        }
    }