How can I find the number of duplicates in each document in Java-MongoDB I have collection like this. Collection example:
{
"_id": {
"$oid": "5fc8eb07d473e148192fbecd"
},
"ip_address": "192.168.0.1",
"mac_address": "00:A0:C9:14:C8:29",
"url": "https://people.richland.edu/dkirby/141macaddress.htm",
"datetimes": {
"$date": "2021-02-13T02:02:00.000Z"
}
{
"_id": {
"$oid": "5ff539269a10d529d88d19f4"
},
"ip_address": "192.168.0.7",
"mac_address": "00:A0:C9:14:C8:30",
"url": "https://people.richland.edu/dkirby/141macaddress.htm",
"datetimes": {
"$date": "2021-02-12T19:00:00.000Z"
}
}
{
"_id": {
"$oid": "60083d9a1cad2b613cd0c0a2"
},
"ip_address": "192.168.1.5",
"mac_address": "00:0A:05:C7:C8:31",
"url": "www.facebook.com",
"datetimes": {
"$date": "2021-01-24T17:00:00.000Z"
}
}
example query:
BasicDBObject whereQuery = new BasicDBObject();
DBCursor cursor = table1.find(whereQuery);
while (cursor.hasNext()) {
DBObject obj = cursor.next();
String ip_address = (String) obj.get("ip_address");
String mac_address = (String) obj.get("mac_address");
Date datetimes = (Date) obj.get("datetimes");
String url = (String) obj.get("url");
System.out.println(ip_address, mac_address, datetimes, url);
}
in Java, How I can know count duplicated data of "url". And how many of duplicated.
in mongodb you can solve this problem with "Aggregation Pipelines". You need to implement this pipeline in "Mongodb Java Driver". It gives only duplicated results with their duplicates count.
db.getCollection('table1').aggregate([
{
"$group": {
// group by url and calculate count of duplicates by url
"_id": "$url",
"url": {
"$first": "$url"
},
"duplicates_count": {
"$sum": 1
},
"duplicates": {
"$push": {
"_id": "$_id",
"ip_address": "$ip_address",
"mac_address": "$mac_address",
"url": "$url",
"datetimes": "$datetimes"
}
}
}
},
{ // select documents that only duplicates count higher than 1
"$match": {
"duplicates_count": {
"$gt": 1
}
}
},
{
"$project": {
"_id": 0
}
}
]);
Output Result:
{
"url" : "https://people.richland.edu/dkirby/141macaddress.htm",
"duplicates_count" : 2.0,
"duplicates" : [
{
"_id" : ObjectId("5fc8eb07d473e148192fbecd"),
"ip_address" : "192.168.0.1",
"mac_address" : "00:A0:C9:14:C8:29",
"url" : "https://people.richland.edu/dkirby/141macaddress.htm",
"datetimes" : {
"$date" : "2021-02-13T02:02:00.000Z"
}
},
{
"_id" : ObjectId("5ff539269a10d529d88d19f4"),
"ip_address" : "192.168.0.7",
"mac_address" : "00:A0:C9:14:C8:30",
"url" : "https://people.richland.edu/dkirby/141macaddress.htm",
"datetimes" : {
"$date" : "2021-02-12T19:00:00.000Z"
}
}
]
}