Search code examples
c#xmllinq-to-xml

How to make nested list from same level elements, based on start-text


I'm trying to a make nested list based on list-item's (same level), but having different label text, which will start-text of each list-item. I have done with some regex replaces to meet the nesting. I assume my code is not up to mark to meet the grouping|nesting.

IN.xml:

<?xml version="1.0" encoding="UTF-8"?>
<article>
   <p>The Simple list sample</p>
   <list-item>1. First</list-item>
   <list-item>2. Second</list-item>
   <list-item>3. Third</list-item>
   <p>The Nested list sample</p>
   <list-item>1. FirstLevel First Text</list-item>
   <list-item>1.1 SecondLevel First Text</list-item>
   <list-item>1.1.1 ThirdLevel First Text</list-item>
   <list-item>1.1.2 ThirdLevel Second Text</list-item>
   <list-item>1.2 SecondLevel Second Text</list-item>
   <list-item>2. FirstLevel Second Text</list-item>
   <list-item>2.1 SecondLevel First Text</list-item>
   <list-item>2.2 SecondLevel Second Text</list-item>
   <list-item>3. FirstLevel Third Text</list-item>
   <list-item>4. FirstLevel Fourth Text</list-item>
</article>

C# (tried code):

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;
using System.Linq;
using System.Linq.Expressions;

namespace ListNesting1
{
    class Program
    {
        static void Main(string[] args)
        {
            XmlDocument XMLDoc1 = new XmlDocument();
            XmlNodeList NDL1;
            XmlElement XEle1;
           
            String S1, S2, StrFinal, StrEle1;
            StreamReader SR1;
            StreamWriter SW1;

            try
            {
                SR1 = new StreamReader(args[0]);
                S1 = SR1.ReadToEnd();
                SR1.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
                return;
            }

            XMLDoc1.LoadXml(S1);

            NDL1 = XMLDoc1.SelectNodes("//list-item");

            for(int i=0; i<NDL1.Count; i++)
            {
                if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\. "))
                    {
                    StrEle1 = "List1";
                }
                
                else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9] "))
                {
                    StrEle1 = "List2";
                }
                else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9]\.[0-9] "))
                {
                    StrEle1 = "List3";
                }
                else
                {
                    StrEle1 = "List4";
                }
                XEle1 = XMLDoc1.CreateElement(StrEle1);
                S2 = NDL1[i].OuterXml;
                XEle1.InnerXml = S2;
                
                NDL1[i].ParentNode.InsertAfter(XEle1, NDL1[i]);
                NDL1[i].ParentNode.RemoveChild(NDL1[i]);
            }

            StrFinal = XMLDoc1.OuterXml;
            StrFinal = StrFinal.Replace("</List1><List1>", "");
            StrFinal = StrFinal.Replace("</List2><List2>", "");
            StrFinal = StrFinal.Replace("</List3><List3>", "");
            StrFinal = StrFinal.Replace("</List4><List4>", "");

            StrFinal = StrFinal.Replace("</list-item></List1><List2>", "<List2>");
            StrFinal = StrFinal.Replace("</list-item></List2><List3>", "<List3>");
            StrFinal = StrFinal.Replace("</list-item></List3><List4>", "<List4>");

            StrFinal = StrFinal.Replace("</List2><List1>", "</List2></list-item>");
            StrFinal = StrFinal.Replace("</List3><List2>", "</List3></list-item>");
            StrFinal = StrFinal.Replace("</List4><List3>", "</List4></list-item>");

            StrFinal = StrFinal.Replace("><", ">\n<");

            SW1 = new StreamWriter(args[1]);
            SW1.Write(StrFinal);
            SW1.Close();
        }
    }
}

Required XML:

<?xml version="1.0" encoding="UTF-8"?>
<article>
   <p>The Simple list sample</p>
   <List1>
      <list-item>1. First</list-item>
      <list-item>2. Second</list-item>
      <list-item>3. Third</list-item>
   </List1>
   <p>The Nested list sample</p>
   <List1>
      <list-item>1. FirstLevel First Text
         <List2>
            <list-item>1.1 SecondLevel First Text
               <List3>
                  <list-item>1.1.1 ThirdLevel First Text</list-item>
                  <list-item>1.1.2 ThirdLevel Second Text</list-item>
               </List3>
            </list-item>
            <list-item>1.2 SecondLevel Second Text</list-item>
         </List2>
      </list-item>
      <list-item>2. FirstLevel Second Text
         <List2>
            <list-item>2.1 SecondLevel First Text</list-item>
            <list-item>2.2 SecondLevel Second Text</list-item>
         </List2>
      </list-item>
      <list-item>3. FirstLevel Third Text</list-item>
      <list-item>4. FirstLevel Fourth Text</list-item>
   </List1>
</article>

Solution

  • C# code

    using System;
    using System.Text.RegularExpressions;
    
    public class Example
    {
        public static void Main()
        {
            string pattern1 = @"(<list-item>1\. [\s\S]*?</list-item>(?!\s+<list-item>\d))";
            string substitution1 = @"<list1>$1</list1>";
    
            string pattern2 = @"(<list-item>\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d))";
            string substitution2 = @"<list2>$1</list2>";
    
    
            string pattern3 = @"(<list-item>\d.\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d.\d))";
            string substitution3 = @"<list3>$1</list3>";
    
    
            string input = @"<?xml version=""1.0"" encoding=""UTF-8""?>
    <article>
       <p>The Simple list sample</p>
       <list-item>1. First</list-item>
       <list-item>2. Second</list-item>
       <list-item>3. Third</list-item>
       <p>The Nested list sample</p>
       <list-item>1. FirstLevel First Text</list-item>
       <list-item>1.1 SecondLevel First Text</list-item>
       <list-item>1.1.1 ThirdLevel First Text</list-item>
       <list-item>1.1.2 ThirdLevel Second Text</list-item>
       <list-item>1.2 SecondLevel Second Text</list-item>
       <list-item>2. FirstLevel Second Text</list-item>
       <list-item>2.1 SecondLevel First Text</list-item>
       <list-item>2.2 SecondLevel Second Text</list-item>
       <list-item>3. FirstLevel Third Text</list-item>
       <list-item>4. FirstLevel Fourth Text</list-item>
    </article>";
    
            Regex regex = new Regex(pattern1);
            input = regex.Replace(input, substitution1);
    
    
            Regex regex2 = new Regex(pattern2);
            input = regex2.Replace(input, substitution2);
    
    
            Regex regex3 = new Regex(pattern3);
            input = regex3.Replace(input, substitution3);
        }
    }
    
    

    output

    <?xml version="1.0" encoding="UTF-8"?>
    <article>
        <p>The Simple list sample</p>
        <list1>
            <list-item>1. First</list-item>
            <list-item>2. Second</list-item>
            <list-item>3. Third</list-item>
        </list1>
        <p>The Nested list sample</p>
        <list1>
            <list-item>1. FirstLevel First Text</list-item>
            <list2>
                <list-item>1.1 SecondLevel First Text</list-item>
                <list3>
                    <list-item>1.1.1 ThirdLevel First Text</list-item>
                    <list-item>1.1.2 ThirdLevel Second Text</list-item>
                </list3>
                <list-item>1.2 SecondLevel Second Text</list-item>
            </list2>
            <list-item>2. FirstLevel Second Text</list-item>
            <list2>
                <list-item>2.1 SecondLevel First Text</list-item>
                <list-item>2.2 SecondLevel Second Text</list-item>
            </list2>
            <list-item>3. FirstLevel Third Text</list-item>
            <list-item>4. FirstLevel Fourth Text</list-item>
        </list1>
    </article>