Search code examples
phpmongodbmapreduceaggregation-frameworkdoctrine-odm

Mongo mapreduce - use $dateToString on mapping stage


I have simple collection of elements with following structure:

{
   _id: MongoId
   pvs: int
   day: IsoDate()
   uid: int
}

I'd like to use MapReduce to count pageviews for given user, grouped by certain date range (day/week/month, date format compatible).

What I'm stuck is how to reformat IsoDate in map function using $dateToString before emitting, so it emmits the format I want, like %Y-%m-%d or %Y-%m or %Y-%m-%U. When I'm calling it, I'm not getting reformatted date, but object with format and date fields.

Example:

function(){
    emit(
        {'$dateToString': {'format': "%Y-%m-%d", 'date': this.day}}, 
        this.pvs
    )}

will return

{
    "pvs" : 5
    "$dateToString" : {
        "format" : "%Y-%m-%d",
        "date" : ISODate("2016-07-13T08:27:29.000Z")
    }
}

I want to have this returned instead:

{
    "pvs": 5,
    "day": "2016-07-13"
}

Solution

  • If using mapReduce then you would have to create your own custom function that formats the date and call that in your map function:

    dateToString = function(date){
        return date.getFullYear() + '-' (date.getMonth() + 1) + '-' + date.getDate();
    }
    
    map = function() {
        emit(dateToString(this.day), this.pvs);
    }
    

    Better with the aggregation framework which runs "within" MongoDB in its C++ code hence more efficient that mapReduce which runs within a V8/spidermonkey (depending on your version) environment within the bundled JS console:

    db.collectionName.aggregate([
        { "$match": { "uid": userId } },
        { 
            "$project": {
                "formattedDate": { 
                    "$dateToString": { "format": "%Y-%m-%d", "date": "$day" } 
                },
                "pvs": 1
            }
        },
        {
             "$group": {
                 "_id": "$formattedDate",
                 "pvs": { "$sum": "$pvs" }
             }
        }
    ])
    

    which in doctrine mongo odm you can run your pipeline using the command function as:

    $connection = $this->get('doctrine_mongodb')->getConnection();
    $mongo = $connection->getMongo();
    if (!$mongo) {
        $connection->connect();
        $mongo = $connection->getMongo();
    }
    $db = $mongo->selectDB('test_database');
    $aggregate_results = $db ->command(array( 
        "aggregate" => "collectionName",
        "pipeline" => array( 
            array("$match" => array("uid"=>  userId )),
            array( 
                "$project" => array(
                    "formattedDate" => array( 
                        "$dateToString" => array("format" => "%Y-%m-%d", "date"=>  "$day") 
                    ),
                    "pvs" =>  1
                )
            ),
            array(
                 "$group" => array(
                     "_id" => "$formattedDate",
                     "pvs" => array("$sum" => "$pvs")
                 )
            )
        )
    ));