Search code examples
jqueryarraysduplicatessplice

Remove Duplicates from array, but annotate remaining row that there were others


I have a list of newspaper articles that come in daily. Because many newspapers are part of larger chains, I don't want to see every single version of the same article, we do however want to see in how many other outlets it was carried.

So..this is want I want to see

Article 1 Source - National Post, Also in Seattle Blaze, New York Times

Article 2 Source - Washington Post

I was doing this successfully using this code..but it seemed clunky

Sample JSON

    var data = {
        "articles": [
                    {
                        "id": "1",
                        "title": "xxxx'",
                        "body": "<p>Body goes here",
                        "publication": {
                            "id": 1,
                            "name": "National Post"
                        },
                        "articleUrl": "http://www.foo.com/1"
                    },
                    {
                        "id": "2",
                        "title": "yyyy'",
                        "body": "<p>Body goes here",
                        "publication": {
                            "id": 1,
                            "name": "Washington Post"
                        },
                        "articleUrl": "http://www.foo.com/2"
                    },
                    {
                        "id": "3",
                        "title": "xxxx'",
                        "body": "<p>Body goes here",
                        "publication": {
                            "id": 1,
                            "name": "Seattle Blaze"
                        },
                        "articleUrl": "http://www.foo.com/3"
                    },
                    {
                        "id": "4",
                        "title": "xxxx'",
                        "body": "<p>Body goes here",
                        "publication": {
                            "id": 1,
                            "name": "New York Times"
                        },
                        "articleUrl": "http://www.foo.com/4"
                    }
                ]
            }


js.utils.RemoveDups = function RemoveDups(json) {

var articles = new Array();
for (var i = 0; i < json.length; i++) {
    var seen = false;
    for (var j = 0; j != articles.length; ++j) {

        if (json[i] != null && articles[j] != null) {
            if (articles[j].title == json[i].title) {
                seen = true;

                articles[j].publication.name = articles[j].publication.name + ", <a href='" + json[i].articleUrl + "' target='_blank'>" + json[i].publication.name + '</a>';
            }
        }
    }
    if (!seen) articles.push(json[i]);
}
return articles;
};

I'm now messing with this code, which is more compact and likely faster but because I don't have the Full object from

dataArr = data.map(function (item) { return item.title });

I cant return the current publication name i'm removing

//Clean the Data
if (json != null) {

    var data = json.articles,
    dataArr = data.map(function (item) { return item.title });

    //Remove Duplicates
    dataArr.some(function (item, index) {
        var isDuplicate = dataArr.indexOf(item, index + 1) !== -1;
        if (isDuplicate) {
            data[index].publication.name = data[index].publication.name + ',' + item[index].publication.name //<- dont have full object
            data = removeDuplicate(data, item);
        }
    });
 function removeDuplicate(data, title) {
  $.each(data, function (index) {
    if (this.title == title) {
        data.splice(index, 1);
        return false;
    }
  });
 return data;
 }

:Bonus Question...I'm not entirely sure what parameter the machine uses to determine which copy to keep and which to remove...ideally, i'd want to keep the version in which the item object (item.wordCount) wordCount was the highest...


Solution

  • Don't use an array in the first place, use an object whose keys are the article titles.

    js.utils.RemoveDups = function RemoveDups(json) {
        var articles = {};
        json.articles.forEach(function(a) {
            if (a.title in articles) {
                articles[a.title].publication.name += ', ' + a.publication.name;
            } else {
                articles[a.title] = a;
            }
        });
        return articles;
    }
    

    If you need the result turned back into an array, replace return articles; with:

        return Object.keys(articles).map(function(title) {
            return articles[title];
        });