I've created an algorithm which weighs the relevance of a list of articles against two lists of keywords that correlate to attributes of the article.
It works great and is super efficient... but it's a mess. It's not terribly readable, so it's difficult to discern what's going.
The operation in pseudo code goes something like this:
(List<Article>)
(List<string>)
(Article.Roles = List<string>)
What would be a better way to write the following code? I can't use foreach
except in one or two places, because I need to match indexes to know what value to add on a match.
private static List<Article> WeighArticles(List<Article> articles, List<string> roles, List<string> topics, List<string> industries)
{
var returnList = new List<Article>();
for (int currentArticle = 0; currentArticle < articles.Count; currentArticle++)
{
for (int currentRole = 0; currentRole < roles.Count; currentRole++)
{
if (articles[currentArticle].Roles != null && articles[currentArticle].Roles.Count > 0)
{
for (int currentArticleRole = 0; currentArticleRole < articles[currentArticle].Roles.Count; currentArticleRole++)
{
if (articles[currentArticle].Roles[currentArticleRole].ToLower() == roles[currentRole].ToLower())
{
if (currentArticleRole == 0 && currentRole == 0)
articles[currentArticle].Weight += 3;
else
articles[currentArticle].Weight += 1;
}
}
}
}
for (int currentTopic = 0; currentTopic < topics.Count; currentTopic++)
{
if (articles[currentArticle].Topics != null && articles[currentArticle].Topics.Count > 0)
{
for (int currentArticleTopic = 0; currentArticleTopic < articles[currentArticle].Topics.Count; currentArticleTopic++)
{
if (articles[currentArticle].Topics[currentArticleTopic].ToLower() == topics[currentTopic].ToLower())
{
articles[currentArticle].Weight += 0.8;
}
}
}
}
returnList.Add(articles[currentArticle]);
}
return returnList;
}
//Article Class stub (unused properties left out)
public class Article
{
public List<string> Roles { get; set; }
public List<string> Topics { get; set; }
public double Weight { get; set; }
}
If you'll examine your code, you'll find that you are asking Article class to many times for data. Use Tell, Don't Ask principle and move weight adding logic to Article class, where it should belong. That will increase cohesion of Article, and make your original code much more readable. Here is how your original code will look like:
foreach(var article in articles)
{
article.AddWeights(roles);
article.AddWeights(topics);
}
And Article will look like:
public double Weight { get; private set; } // probably you don't need setter
public void AddWeights(IEnumerable<Role> roles)
{
const double RoleWeight = 1;
const double PrimaryRoleWeight = 3;
if (!roles.Any())
return;
if (Roles == null || !Roles.Any())
return;
var pirmaryRole = roles.First();
var comparison = StringComparison.CurrentCultureIgnoreCase;
if (String.Equals(Roles[0], primaryRole, comparison))
{
Weight += PrimaryRoleWeight;
return;
}
foreach(var role in roles)
if (Roles.Contains(role, StringComparer.CurrentCultureIgnoreCase))
Weight += RoleWeight;
}
Adding topics weights:
public void AddWeights(IEnumerable<Topic> topics)
{
const double TopicWeight = 0.8;
if (Topics == null || !Topics.Any() || !topics.Any())
return;
foreach(var topic in topics)
if (Topics.Contains(topic, StringComparer.CurrentCultureIgnoreCase))
Weight += TopicWeight;
}