Search code examples
c#linqrecursionhierarchical

Creating a hierarchical structure from a list of sequence strings


Given a list of sequence strings, what's the best way to create a hierarchical structure from it?

A sample of the type of list of sequence strings is below:

List<string> sequenceList = new List<string>() { "1", "1.1", "1.2", "2", "2.1", "2.1.1", "2.1.2" };

I've included an initial pass of the code below too:

public class Hierarchy
{
    public int ID { get; set; }
    public int ParentID { get; set; }
}

public IList<Hierarchy> GetHierarchy(IList<string> sequenceList)
{
    int iD = 0;
    List<Hierarchy> hierarchy = new List<Hierarchy>();

    foreach (string sequence in sequenceList)
    {
        iD++;
        List<string> childSequence = new List<string>();

        string[] sequenceParts = sequence.Split(new char[] { '.' }, StringSplitOptions.RemoveEmptyEntries);
        // If the sequence contains sub-sequence, i.e. "2.1" is a sub-sequence of "2".
        if (sequenceParts.Count() > 1)
        {
            // Struggling with this part, how to obtain the child sequence...
            childSequence = sequenceList.Where(s => s.Substring(0, (s.Length - 2)) == sequence.Substring(0, (s.Length - 2))).ToList();

            if (childSequence.Count() > 0)
            {
                int parentID = iD;
                foreach (string subSequence in childSequence)
                    hierarchy.Add(new Hierarchy() { ID = iD, ParentID = parentID });
            }
        }
        else
            // Add top level.
            hierarchy.Add(new Hierarchy() { ID = iD, ParentID = 0 });
    }

    return (IList<Hierarchy>)hierarchy;
}

I suspect I'm missing a trick with recursion on this so any help would be greatly appreciated.


Solution

  • It would help a bit to add Sequence property to the class and use it to find the parent:

    public class Hierarchy
    {
        public int ID { get; set; }
        public int ParentID { get; set; }
        public string Sequence { get; set; }
    }
    
    public static IList<Hierarchy> GetHierarchy(IList<string> sequenceList)
    {
        int iD = 0;
        List<Hierarchy> hierarchy = new List<Hierarchy>();
    
        foreach (string sequence in sequenceList)
        {
            iD++;
            List<string> childSequence = new List<string>();
    
            string[] sequenceParts = sequence.Split(new char[] { '.' }, StringSplitOptions.RemoveEmptyEntries);
            // If the sequence contains sub-sequence, i.e. "2.1" is a sub-sequence of "2".
            if (sequenceParts.Count() > 1)
            {
                var parentSequence = sequence.Substring(0, sequence.LastIndexOf("."));
                var parent = hierarchy.Single(x => x.Sequence == parentSequence);
    
                hierarchy.Add(new Hierarchy() { ID = iD, ParentID = parent.ID, Sequence = sequence });
            }
            else
                // Add top level.
                hierarchy.Add(new Hierarchy() { ID = iD, ParentID = 0, Sequence = sequence });
        }
    
        return (IList<Hierarchy>)hierarchy;
    }
    

    EDITED: this version uses recursion

    public class Hierarchy
    {
        public int ID { get; set; }
        public int ParentID { get; set; }
    }
    
    public static IList<Hierarchy> GetHierarchy(IList<string> sequenceList)
    {
        int iD = 0;
        List<Hierarchy> hierarchy = new List<Hierarchy>();
    
        foreach (string sequence in sequenceList.Where(x => x.Count(f => f == '.') == 0)) // get the root nodes
        {
            iD++;
    
            var item = new Hierarchy() { ID = iD, ParentID = 0 };
            hierarchy.Add(item);
            hierarchy.AddRange(GetChildsRecursive(sequence, item.ID, sequenceList, () => ++iD));
        }
    
        return (IList<Hierarchy>)hierarchy;
    }
    
    private static IList<Hierarchy> GetChildsRecursive(string parentSequence, int parentId, IList<string> sequences, Func<int> idGenerator)
    {
        var parentDots = parentSequence.Count(f => f == '.');
        var childSequences = sequences.Where(x => x.StartsWith(parentSequence) && x.Count(f => f == '.') == parentDots + 1);
    
        var list = new List<Hierarchy>();
        foreach (var childSequence in childSequences)
        {
            var item = new Hierarchy() { ID = idGenerator(), ParentID = parentId };
            list.Add(item);
            list.AddRange(GetChildsRecursive(childSequence, item.ID, sequences, idGenerator));
        }
        return list;
    }