Search code examples
pythonjsonflaskpymongo

Searching for objects within nested array: how to return only found objects and parent elements?


Here's an excerpt from pictures MongoDB collection:

[
  {
    "_id": "57582b6b",
    "source": "integration",
    "url": "https://example.com/images/51/landscapes-polar.xml",
    "pictures": [
      {
        "name": "pines",
        "version": "2"
      },
      {
        "name": "penguins",
        "version": "1"
      },
      {
        "name": "pineapple",
        "version": "7"
      }
    ]
  },
  {
    "_id": "57582b6d",
    "source": "customer",
    "url": "https://example.com/images/15/nature.xml",
    "pictures": [
      {
        "name": "mountains",
        "version": "2"
      },
      {
        "name": "pines",
        "version": "1"
      }
    ]
  },
  {
    "_id": "57582b6c",
    "source": "qa",
    "url": "https://example.com/image/32/landscapes.xml",
    "pictures": [
      {
        "name": "alps",
        "version": "1"
      },
      {
        "name": "pineapple",
        "version": "7"
      },
      {
        "name": "pines",
        "version": "3"
      }
    ]
  }
]

My main concern is to find specific names from inside of nested pictures array. When names matching partial query string are found, they should be preserved in pictures array and displayed along with pictures's array parent. Using PyMongo library, I was able to retrieve queried data using this function:

import re
from flask import Flask, jsonify

from controller.database import client, database_name, temp_collection


app = Flask(__name__)
db = client[database_name]
collection = db[temp_collection]


@app.route('/component/find/<picture_name>', methods=['GET'])
def get_component(picture_name):
    pattern = re.compile(picture_name, re.IGNORECASE)

    pipeline = [
        {"$unwind": "$pictures"},
        {"$match": {"pictures.name": {"$regex": pattern}}},
        {"$group": {
            "_id": "$_id",
            "url": {"$first": "$url"},
            "source": {"$first": "$source"},
            "pictures": {"$addToSet": "$pictures"},
            "root": {"$first": "$$ROOT"}
        }},
        {"$replaceRoot": {
            "newRoot": {
                "$mergeObjects": ["$root", {"pictures": "$pictures"}]
            }
        }},
        {"$project": {
            "_id": {"$toString": "$_id"},
            "url": 1,
            "source": 1,
            "pictures": 1
        }}
    ]

    result = list(collection.aggregate(pipeline))

    if result:
        return jsonify(result)
    else:
        return jsonify({"message": "Component with picture '{}' not found.".format(picture_name)}), 404


if __name__ == "__main__":
    app.run(debug=True)

However, retrieved data only contains one-element pictures arrays, instead of putting there all matching objects.

In other words, this is what I'd like to get:

[
  {
    "_id": "57582b6b",
    "source": "integration",
    "url": "https://example.com/51/landscapes-polar.xml",
    "pictures": [
      {
        "name": "pines",
        "version": "2"
      },
      {
        "name": "pineapple",
        "version": "7"
      }
    ]
  },
  {
    "_id": "57582b6d",
    "source": "customer",
    "url": "https://example.com/15/nature.xml",
    "pictures": [
      {
        "name": "pines",
        "version": "1"
      }
    ]
  },
  {
    "_id": "57582b6c",
    "source": "qa",
    "url": "https://example.com/image/32/landscapes.xml",
    "pictures": [
      {
        "name": "pineapple",
        "version": "7"
      },
      {
        "name": "pines",
        "version": "3"
      }
    ]
  }
]

and this is what I get now:

[
  {
    "_id": "57582b6b",
    "source": "integration",
    "url": "https://example.com/51/landscapes-polar.xml",
    "pictures": [
      {
        "name": "pines",
        "version": "2"
      }
    ]
  },
  {
    "_id": "57582b6d",
    "source": "customer",
    "url": "https://example.com/15/nature.xml",
    "pictures": [
      {
        "name": "pines",
        "version": "1"
      }
    ]
  },
  {
    "_id": "57582b6c",
    "source": "qa",
    "url": "https://example.com/image/32/landscapes.xml",
    "pictures": [
      {
        "name": "pineapple",
        "version": "7"
      }
    ]
  }
]

How to make sure all matching pictures objects get pushed to proper arrays? (Using $push instead of $addToSet returns the same results.)


Solution

  • Using Mongo Playground, I was able to build up the query step by step and found out that removing some markup results in responding with the contents I wanted. The Flask app should be set up this way:

    import re
    from flask import Flask, jsonify
    
    from controller.database import client, database_name, temp_collection
    
    
    app = Flask(__name__)
    db = client[database_name]
    collection = db[temp_collection]
    
    
    @app.route('/component/find/<picture_name>', methods=['GET'])
    def get_component(picture_name):
        pattern = re.compile(picture_name, re.IGNORECASE)
    
        pipeline = [
            {"$unwind": "$pictures"},
            {"$match": {"pictures.name": {"$regex": pattern}}},
            {"$group": {
                "_id": {"$toString": "$_id"}, # convert _id to str
                "url": {"$first": "$url"},
                "source": {"$first": "$source"},
                "pictures": {"$addToSet": "$pictures"}
            }}
        ]
    
        result = list(collection.aggregate(pipeline))
    
        if result:
            return jsonify(result)
        else:
            return jsonify({"message": "Component with picture '{}' not found.".format(picture_name)}), 404
    
    
    if __name__ == "__main__":
        app.run(debug=True)