I have a list of newspaper articles that come in daily. Because many newspapers are part of larger chains, I don't want to see every single version of the same article, we do however want to see in how many other outlets it was carried.
So..this is want I want to see
Article 1 Source - National Post, Also in Seattle Blaze, New York Times
Article 2 Source - Washington Post
I was doing this successfully using this code..but it seemed clunky
Sample JSON
var data = {
"articles": [
{
"id": "1",
"title": "xxxx'",
"body": "<p>Body goes here",
"publication": {
"id": 1,
"name": "National Post"
},
"articleUrl": "http://www.foo.com/1"
},
{
"id": "2",
"title": "yyyy'",
"body": "<p>Body goes here",
"publication": {
"id": 1,
"name": "Washington Post"
},
"articleUrl": "http://www.foo.com/2"
},
{
"id": "3",
"title": "xxxx'",
"body": "<p>Body goes here",
"publication": {
"id": 1,
"name": "Seattle Blaze"
},
"articleUrl": "http://www.foo.com/3"
},
{
"id": "4",
"title": "xxxx'",
"body": "<p>Body goes here",
"publication": {
"id": 1,
"name": "New York Times"
},
"articleUrl": "http://www.foo.com/4"
}
]
}
js.utils.RemoveDups = function RemoveDups(json) {
var articles = new Array();
for (var i = 0; i < json.length; i++) {
var seen = false;
for (var j = 0; j != articles.length; ++j) {
if (json[i] != null && articles[j] != null) {
if (articles[j].title == json[i].title) {
seen = true;
articles[j].publication.name = articles[j].publication.name + ", <a href='" + json[i].articleUrl + "' target='_blank'>" + json[i].publication.name + '</a>';
}
}
}
if (!seen) articles.push(json[i]);
}
return articles;
};
I'm now messing with this code, which is more compact and likely faster but because I don't have the Full object from
dataArr = data.map(function (item) { return item.title });
I cant return the current publication name i'm removing
//Clean the Data
if (json != null) {
var data = json.articles,
dataArr = data.map(function (item) { return item.title });
//Remove Duplicates
dataArr.some(function (item, index) {
var isDuplicate = dataArr.indexOf(item, index + 1) !== -1;
if (isDuplicate) {
data[index].publication.name = data[index].publication.name + ',' + item[index].publication.name //<- dont have full object
data = removeDuplicate(data, item);
}
});
function removeDuplicate(data, title) {
$.each(data, function (index) {
if (this.title == title) {
data.splice(index, 1);
return false;
}
});
return data;
}
:Bonus Question...I'm not entirely sure what parameter the machine uses to determine which copy to keep and which to remove...ideally, i'd want to keep the version in which the item object (item.wordCount) wordCount was the highest...
Don't use an array in the first place, use an object whose keys are the article titles.
js.utils.RemoveDups = function RemoveDups(json) {
var articles = {};
json.articles.forEach(function(a) {
if (a.title in articles) {
articles[a.title].publication.name += ', ' + a.publication.name;
} else {
articles[a.title] = a;
}
});
return articles;
}
If you need the result turned back into an array, replace return articles;
with:
return Object.keys(articles).map(function(title) {
return articles[title];
});