Search code examples
c#jsonlinqienumerable

Recursively Iterate over JObject and find duplicates for attribute title's value


This is sample JSON im working on :

string json = @"{
            ""children"": [
                {
                    ""children"": [
                        {
                            ""uri"": ""https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title11-front&num=0&edition=prelim#125Editof%2B85a751ad"",
                            ""cachedUri"": null,
                            ""transformedUri"": ""509d92cc0883efca255e8c98d6dab41defa3f3ec_transform"",
                            ""title"": ""Editorial Notes23"",
                            ""titleHtml"": ""<html><head></head><body><h4 class=\""note-head\"">\n<strong>Editorial Notes</strong>\n</h4></body></html>"",
                            ""cachedDateTimeOffset"": ""0001-01-01T00:00:00+00:00"",
                            ""hash"": ""e3d9ca5d1d44c6e1288fc505b27194258e952757""
                        },
                        {
                            ""uri"": ""https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title11-front&num=0&edition=prelim#125Amenda%2B427578a3"",
                            ""cachedUri"": null,
                            ""transformedUri"": ""23cf8e2fbf0815462a33e5d4bc712903401521d4_transform"",
                            ""title"": ""Editorial Notes23"",
                            ""titleHtml"": ""<html><head></head><body><h4 class=\""note-head\"">Amendments</h4></body></html>"",
                            ""cachedDateTimeOffset"": ""0001-01-01T00:00:00+00:00"",
                            ""hash"": ""5185314ca13a0fcc5675662655ad0f396837330d""
                        }
                    ],
                    ""uri"": ""https://uscode.house.gov/view.xhtml?path=/prelim@title11/title11a&edition=prelim"",
                    ""cachedUri"": ""22668d04d819885f938f6186800d8b7878393424"",
                    ""title"": ""Title 11—Appendix"",
                    ""titleHtml"": ""<html><head></head><body><a name=\""USC-prelim-title11a\"" target=\""_top\"" href=\""/browse/prelim@title11/title11a&amp;edition=prelim\"" style=\""text-decoration: none;\"">Title 11—Appendix</a></body></html>"",
                    ""cachedDateTimeOffset"": ""2023-07-04T05:12:25.176829+00:00"",
                    ""contentType"": ""text/html; charset=UTF-8"",
                    ""hash"": ""76d340f8b80dbd5cbbc427f4051257aaf2192b64""
                }
            ],
            ""uri"": ""https://uscode.house.gov/browse/prelim@title11&edition=prelim"",
            ""cachedUri"": ""e5685f01f5449d5c26a01aa7506df7ae9a264278"",
            ""title"": ""Editorial Notes232"",
            ""titleHtml"": ""<html><head></head><body>TITLE 11—BANKRUPTCY</body></html>"",
            ""cachedDateTimeOffset"": ""2023-07-04T04:25:34.4418013+00:00"",
            ""contentType"": ""text/html;"",
            ""issueDate"": ""2023-07-05T00:00:00"",
            ""hash"": ""a4eb38c88c720a81e8b79a9fa216dc9339325a87""
        }";

My Program.cs :

using System;
using System.Collections.Generic;
using Newtonsoft.Json.Linq;

class Program
{
    static void Main()
    {
        string json = @"{
            ""children"": [
                {
                    ""children"": [
                        {
                            ""uri"": ""https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title11-front&num=0&edition=prelim#125Editof%2B85a751ad"",
                            ""cachedUri"": null,
                            ""transformedUri"": ""509d92cc0883efca255e8c98d6dab41defa3f3ec_transform"",
                            ""title"": ""Editorial Notes23"",
                            ""titleHtml"": ""<html><head></head><body><h4 class=\""note-head\"">\n<strong>Editorial Notes</strong>\n</h4></body></html>"",
                            ""cachedDateTimeOffset"": ""0001-01-01T00:00:00+00:00"",
                            ""hash"": ""e3d9ca5d1d44c6e1288fc505b27194258e952757""
                        },
                        {
                            ""uri"": ""https://uscode.house.gov/view.xhtml?req=granuleid:USC-prelim-title11-front&num=0&edition=prelim#125Amenda%2B427578a3"",
                            ""cachedUri"": null,
                            ""transformedUri"": ""23cf8e2fbf0815462a33e5d4bc712903401521d4_transform"",
                            ""title"": ""Editorial Notes23"",
                            ""titleHtml"": ""<html><head></head><body><h4 class=\""note-head\"">Amendments</h4></body></html>"",
                            ""cachedDateTimeOffset"": ""0001-01-01T00:00:00+00:00"",
                            ""hash"": ""5185314ca13a0fcc5675662655ad0f396837330d""
                        }
                    ],
                    ""uri"": ""https://uscode.house.gov/view.xhtml?path=/prelim@title11/title11a&edition=prelim"",
                    ""cachedUri"": ""22668d04d819885f938f6186800d8b7878393424"",
                    ""title"": ""Title 11—Appendix"",
                    ""titleHtml"": ""<html><head></head><body><a name=\""USC-prelim-title11a\"" target=\""_top\"" href=\""/browse/prelim@title11/title11a&amp;edition=prelim\"" style=\""text-decoration: none;\"">Title 11—Appendix</a></body></html>"",
                    ""cachedDateTimeOffset"": ""2023-07-04T05:12:25.176829+00:00"",
                    ""contentType"": ""text/html; charset=UTF-8"",
                    ""hash"": ""76d340f8b80dbd5cbbc427f4051257aaf2192b64""
                }
            ],
            ""uri"": ""https://uscode.house.gov/browse/prelim@title11&edition=prelim"",
            ""cachedUri"": ""e5685f01f5449d5c26a01aa7506df7ae9a264278"",
            ""title"": ""Editorial Notes232"",
            ""titleHtml"": ""<html><head></head><body>TITLE 11—BANKRUPTCY</body></html>"",
            ""cachedDateTimeOffset"": ""2023-07-04T04:25:34.4418013+00:00"",
            ""contentType"": ""text/html;"",
            ""issueDate"": ""2023-07-05T00:00:00"",
            ""hash"": ""a4eb38c88c720a81e8b79a9fa216dc9339325a87""
        }";

        JObject obj = JObject.Parse(json);
        HashSet<string> titles = new HashSet<string>();

        bool hasDuplicates = CheckTitleDuplicates(obj, titles);

        if (hasDuplicates)
        {
            Console.WriteLine("Duplicate titles found.");
        }
        else
        {
            Console.WriteLine("No duplicate titles found.");
        }
    }

    static bool CheckTitleDuplicates(JObject obj, HashSet<string> titles)
    {
        if (obj.TryGetValue("title", out JToken titleToken) && titleToken.Type == JTokenType.String)
        {
            string title = titleToken.Value<string>();
            if (titles.Contains(title))
            {
                return true; // Duplicate title found
            }
            else
            {
                titles.Add(title);
            }
        }

        if (obj.TryGetValue("children", out JToken childrenToken) && childrenToken.Type == JTokenType.Array)
        {
            foreach (JObject childObj in childrenToken.Values<JObject>())
            {
                bool hasDuplicates = CheckTitleDuplicates(childObj, titles);
                if (hasDuplicates)
                {
                    return true; // Duplicate title found in child
                }
            }
        }

        return false; // No duplicate titles found
    }
}

This program works fine, Only Issue is it checks the title value with all other titles in all levels. What I want is to find values equivalent to title wchich is in the same level only. For Ex "title"": ""Editorial Notes23"" compared with ""title"": ""Editorial Notes232""


Solution

  • The obvious solution with little modification would be to add level parameter to method and increment it on recursive calls and maintain hashsets in dictionary where level will be the key:

    bool CheckTitleDuplicates(JObject obj, Dictionary<int, HashSet<string>> titleLevels, int level = 0)
    {
        if (!titleLevels.ContainsKey(level))
        {
            titleLevels[level] = new HashSet<string>();
        }
    
        var titles = titleLevels[level];
        if (obj.TryGetValue("title", out JToken titleToken) && titleToken.Type == JTokenType.String)
        {
            string title = titleToken.Value<string>();
            if (titles.Contains(title))
            {
                return true; // Duplicate title found
            }
            else
            {
                titles.Add(title);
            }
        }
    
        if (obj.TryGetValue("children", out JToken childrenToken) && childrenToken.Type == JTokenType.Array)
        {
            foreach (JObject childObj in childrenToken.Values<JObject>())
            {
                bool hasDuplicates = CheckTitleDuplicates(childObj, titleLevels, level + 1);
                if (hasDuplicates)
                {
                    return true; // Duplicate title found in child
                }
            }
        }
    
        return false; // No duplicate titles found
    }
    

    And usage:

    JObject obj = JObject.Parse(json1);
    
    var titles = new Dictionary<int, HashSet<string>>();
    
    bool hasDuplicates = CheckTitleDuplicates(obj, titles);
    

    Another approach would be to scan the "same" level using some LINQ-to-JSON:

    bool CheckTitleDuplicates2(JObject root)
    {
        var current = new List<JArray> { obj["children"] as JArray };
        while (current.Any(arr => arr is not null))
        {
            var duplicates = current
                .SelectMany(arr => arr.Children<JObject>())
                .Where(o => o["title"] is not null)
                .GroupBy(o => o["title"])
                .Any(gr => gr.Take(2).Count() > 1);
            if (duplicates)
            {
                return true;
            }
    
            current = current
                .SelectMany(arr => arr.Children<JObject>())
                .Select(o => o["children"] as JArray)
                .Where(arr => arr is not null)
                .ToList();
        }
    
        return false;
    }