Search code examples
c#linqlistiequalitycomparer

Compare two lists, and get all differences?


I have to compare two lists of type Slide which contain another List of Items, called Charts. I have to find all differences between the Slide-lists, whereby a difference could be:

a Slide in List A , but not in B

a Slide in List A , but not in B

a Slide which is in both Lists, but their Charts are different

I tried to use except, but it returns Lists with different Charts as "the same". Example List A: A B C D List B: A B C D* (d contains different charts) Should return D*, but this does not work.

I am little bit confused as to why this happens - to comparer looks ok for me.

My code:

class PPDetectDifferences
{
    private PPPresentation laterVersion;
    private string Path { get; set; }

    private PPPresentation OriginalPresentation { get; set; }

    private PPPresentation GetLaterPresentation()
    {
        var ppDal = new PPDAL(Path);
        Task<PPPresentation> task = Task.Run(() => ppDal.GetPresentation());

        var presentation = task.Result;
        return presentation;
    }

    public PPDetectDifferences(string path, PPPresentation ppPresentation)
    {
        if (path != null)
        {
            this.Path = path;
        }
        else
        {
            throw new ArgumentNullException("path");
        }

        if (ppPresentation != null)
        {
            this.OriginalPresentation = ppPresentation;
        }
        else
        {
            throw new ArgumentNullException("ppPresentation");
        }
    }

    public bool IsDifferent()
    {
        //// getting the new List of Slides
        laterVersion = GetLaterPresentation();

        //// Compare the newer version with the older version
        var result = laterVersion.Slides.Except(OriginalPresentation.Slides, new PPSlideComparer()).ToList();

        //// If there are no differences, result.count should be 0, otherwise some other value.
        return result.Count != 0;
    }
}

/// <summary>
/// Compares two Slides with each other
/// </summary>
public class PPSlideComparer : IEqualityComparer<PPSlide>
{
    public int GetHashCode(PPSlide slide)
    {
        if (slide == null)
        {
            return 0;
        }
        //// ID is an INT, which is unique to this Slide
        return slide.ID.GetHashCode();
    }

    public bool Equals(PPSlide s1, PPSlide s2)
    {
        var s1Charts = (from x in s1.Charts select x).ToList();
        var s2Charts = (from x in s2.Charts select x).ToList();

        var result = s1Charts.Except(s2Charts, new PPChartComparer()).ToList();

        return result.Count == 0;
    }
}

/// <summary>
/// Compares two Charts with each other
/// </summary>
public class PPChartComparer : IEqualityComparer<PPChart>
{
    public int GetHashCode(PPChart chart)
    {
        //// UID is an INT, which is unique to this chart
        return chart == null ? 0 : chart.UID.GetHashCode();
    }

    public bool Equals(PPChart c1, PPChart c2)
    {
        var rvalue = c1.UID == c2.UID;

        if (c1.ChartType != c2.ChartType)
        {
            rvalue = false;
        }
        return rvalue;
    }
}

Solution

  • To compare two sequences using except, you'll need to check both directions. For example:

    List<T> a, b;
    IEqualityComparer<T> cmp = ...
    var areEqual = !a.Except(b, cmp).Concat(b.Except(c, cmp)).Any();
    

    This issue comes up twice in your code: first when you are comparing 2 lists of slides and again when you are comparing 2 lists of charts.

    Another thing to be aware of when using Except() for collection comparison is that it acts as a set operation. Thus, { A, A, A }.Except({ A }) will return empty.

    Thus, I'd recommend something more like the following:

    public static bool CollectionEquals<T>(this ICollection<T> @this, ICollection<T> that,  IEqualityComparer<T> cmp = null)
    {
        // to be equal, the 2 collections must not be null unless they're both null or have the same count
        if (ReferenceEquals(@this, that)) { return true; }
        if (@this == null || that == null) { return false; }
        if (@this.Count != that.Count) { return false; }
    
        // use the default comparer if one wasn't passed in
        var comparer = cmp ?? EqualityComparer<T>.Default;
    
        // to handle duplicates, we convert @this into a "bag" (a mapping 
        // from value -> # occurrences of that value in the collection
        var thisDict = @this.GroupBy(t => t, comparer)
            .ToDictionary(g => g.Key, g => g.Count(), comparer);
        // do the same for that
        var thatDict = @this.GroupBy(t => t, comparer)
            .ToDictionary(g => g.Key, g => g.Count(), comparer);
    
        // the two collections are equal if they have the same number of distinct values
        return thisDict.Count == thatDict.Count
            // and if, for each distinct value in @this, that value is also in that
            // and has the same number of occurrences in @this and that
            && thisDict.All(kvp => thatDict.ContainsKey(kvp.Key) 
                            && thatDict[kvp.Key] == kvp.Value);
    }