Search code examples
mongodbmongodb-querypymongo

pymongo - ifnull repacing the entire object instead of the the particular field


I have the following document:

[
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892a8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": [],
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ab)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": [
          {
            "start": 1019,
            "end": 1033,
            "text": "F10-749-113245",
            "tag": "ACCOUNT_NO",
            "color": "rgb(244, 10, 10, 0.527)"
          }
        ],
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "BL"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ae)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b1)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": [
          {
            "start": 65,
            "end": 81,
            "text": "Nowhere BANK LTD ",
            "tag": "DRAWER",
            "color": "rgb(85, 239, 239, 0.527)"
          }
        ],
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "APPLICATION"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b4)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": [],
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "APPLICATION"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b7)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892bf)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c2)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c5)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  }
]

I ran the following query to get my required data:

{
    '$unwind': {
        'path': '$annotations.ner_labels.value',
        "preserveNullAndEmptyArrays": True
    }
},

I get the following output:

[
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892a8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ab)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": {
          "start": 1019,
          "end": 1033,
          "text": "F10-749-113245",
          "tag": "ACCOUNT_NO",
          "color": "rgb(244, 10, 10, 0.527)"
        },
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "BL"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ae)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b1)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": {
          "start": 65,
          "end": 81,
          "text": "nowhere BANK LTD ",
          "tag": "DRAWER",
          "color": "rgb(85, 239, 239, 0.527)"
        },
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "APPLICATION"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b4)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "tag": "ACCOUNT_NO"
      },
      "doc_type_class_labels": "APPLICATION"
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b7)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892bf)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c2)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c5)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
  }
]

The preserveNullAndEmptyArrays is creating an empty object. Now i need to add a temporary value inside the empty object. So I used the following code:

{
    '$project': {
        'annotations.ner_labels.value': {
            '$ifNull': [
                '$annotations.ner_labels.value', [],
            ]
        },
        'category': 1,
        'job_id': 1
    }
},

After this the entire object is modified instead if the particular field, the following is the output:

[
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892a8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ab)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": {
          "start": 1019,
          "end": 1033,
          "text": "F10-749-113245",
          "tag": "ACCOUNT_NO",
          "color": "rgb(244, 10, 10, 0.527)"
        }
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892ae)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b1)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": {
          "start": 65,
          "end": 81,
          "text": "nowhere BANK LTD ",
          "tag": "DRAWER",
          "color": "rgb(85, 239, 239, 0.527)"
        }
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b4)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cae9aa015ee5fc8892b7)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892bf)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c2)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c5)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  },
  {
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c8)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
  }
]

The following

{
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c2)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {},
      "doc_type_class_labels": ""
    },
    "category": "TEXT"
}

is changed to the following after ifnull

{
    "_id": "ObjectId(6448cbfdaa015ee5fc8892c2)",
    "job_id": "ObjectId(6448cac2aa015ee5fc8892a0)",
    "annotations": {
      "ner_labels": {
        "value": []
      }
    },
    "category": "TEXT"
}

The field doc_type_class_labels is missing. How to change only the missing field instead of the entire object? Kindly advise. Thank you.


Solution

  • you can use $addFields instead of $project since project will not show the fields that you do not include.
    The $addFields stage is equivalent to a $project stage that explicitly specifies all existing fields in the input documents and adds the new fields.

    playground

    db.collection.aggregate([
      {
        $unwind: {
          path: "$annotations.ner_labels.value",
          preserveNullAndEmptyArrays: true
        }
      },
      {
        $addFields: {
          "annotations.ner_labels.value": {
            $ifNull: [
              "$annotations.ner_labels.value",
              []
            ]
          }
        }
      }
    ])