I'm trying to a make nested list based on list-item's (same level), but having different label text, which will start-text of each list-item. I have done with some regex replaces to meet the nesting. I assume my code is not up to mark to meet the grouping|nesting.
IN.xml:
<?xml version="1.0" encoding="UTF-8"?>
<article>
<p>The Simple list sample</p>
<list-item>1. First</list-item>
<list-item>2. Second</list-item>
<list-item>3. Third</list-item>
<p>The Nested list sample</p>
<list-item>1. FirstLevel First Text</list-item>
<list-item>1.1 SecondLevel First Text</list-item>
<list-item>1.1.1 ThirdLevel First Text</list-item>
<list-item>1.1.2 ThirdLevel Second Text</list-item>
<list-item>1.2 SecondLevel Second Text</list-item>
<list-item>2. FirstLevel Second Text</list-item>
<list-item>2.1 SecondLevel First Text</list-item>
<list-item>2.2 SecondLevel Second Text</list-item>
<list-item>3. FirstLevel Third Text</list-item>
<list-item>4. FirstLevel Fourth Text</list-item>
</article>
C# (tried code):
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;
using System.Linq;
using System.Linq.Expressions;
namespace ListNesting1
{
class Program
{
static void Main(string[] args)
{
XmlDocument XMLDoc1 = new XmlDocument();
XmlNodeList NDL1;
XmlElement XEle1;
String S1, S2, StrFinal, StrEle1;
StreamReader SR1;
StreamWriter SW1;
try
{
SR1 = new StreamReader(args[0]);
S1 = SR1.ReadToEnd();
SR1.Close();
}
catch (Exception e)
{
Console.WriteLine(e.Message);
return;
}
XMLDoc1.LoadXml(S1);
NDL1 = XMLDoc1.SelectNodes("//list-item");
for(int i=0; i<NDL1.Count; i++)
{
if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\. "))
{
StrEle1 = "List1";
}
else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9] "))
{
StrEle1 = "List2";
}
else if (Regex.IsMatch(NDL1[i].InnerText, @"^[0-9]\.[0-9]\.[0-9] "))
{
StrEle1 = "List3";
}
else
{
StrEle1 = "List4";
}
XEle1 = XMLDoc1.CreateElement(StrEle1);
S2 = NDL1[i].OuterXml;
XEle1.InnerXml = S2;
NDL1[i].ParentNode.InsertAfter(XEle1, NDL1[i]);
NDL1[i].ParentNode.RemoveChild(NDL1[i]);
}
StrFinal = XMLDoc1.OuterXml;
StrFinal = StrFinal.Replace("</List1><List1>", "");
StrFinal = StrFinal.Replace("</List2><List2>", "");
StrFinal = StrFinal.Replace("</List3><List3>", "");
StrFinal = StrFinal.Replace("</List4><List4>", "");
StrFinal = StrFinal.Replace("</list-item></List1><List2>", "<List2>");
StrFinal = StrFinal.Replace("</list-item></List2><List3>", "<List3>");
StrFinal = StrFinal.Replace("</list-item></List3><List4>", "<List4>");
StrFinal = StrFinal.Replace("</List2><List1>", "</List2></list-item>");
StrFinal = StrFinal.Replace("</List3><List2>", "</List3></list-item>");
StrFinal = StrFinal.Replace("</List4><List3>", "</List4></list-item>");
StrFinal = StrFinal.Replace("><", ">\n<");
SW1 = new StreamWriter(args[1]);
SW1.Write(StrFinal);
SW1.Close();
}
}
}
Required XML:
<?xml version="1.0" encoding="UTF-8"?>
<article>
<p>The Simple list sample</p>
<List1>
<list-item>1. First</list-item>
<list-item>2. Second</list-item>
<list-item>3. Third</list-item>
</List1>
<p>The Nested list sample</p>
<List1>
<list-item>1. FirstLevel First Text
<List2>
<list-item>1.1 SecondLevel First Text
<List3>
<list-item>1.1.1 ThirdLevel First Text</list-item>
<list-item>1.1.2 ThirdLevel Second Text</list-item>
</List3>
</list-item>
<list-item>1.2 SecondLevel Second Text</list-item>
</List2>
</list-item>
<list-item>2. FirstLevel Second Text
<List2>
<list-item>2.1 SecondLevel First Text</list-item>
<list-item>2.2 SecondLevel Second Text</list-item>
</List2>
</list-item>
<list-item>3. FirstLevel Third Text</list-item>
<list-item>4. FirstLevel Fourth Text</list-item>
</List1>
</article>
C# code
using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
string pattern1 = @"(<list-item>1\. [\s\S]*?</list-item>(?!\s+<list-item>\d))";
string substitution1 = @"<list1>$1</list1>";
string pattern2 = @"(<list-item>\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d))";
string substitution2 = @"<list2>$1</list2>";
string pattern3 = @"(<list-item>\d.\d\.1 [\s\S]*?</list-item>(?!\s+<list-item>\d.\d.\d))";
string substitution3 = @"<list3>$1</list3>";
string input = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<article>
<p>The Simple list sample</p>
<list-item>1. First</list-item>
<list-item>2. Second</list-item>
<list-item>3. Third</list-item>
<p>The Nested list sample</p>
<list-item>1. FirstLevel First Text</list-item>
<list-item>1.1 SecondLevel First Text</list-item>
<list-item>1.1.1 ThirdLevel First Text</list-item>
<list-item>1.1.2 ThirdLevel Second Text</list-item>
<list-item>1.2 SecondLevel Second Text</list-item>
<list-item>2. FirstLevel Second Text</list-item>
<list-item>2.1 SecondLevel First Text</list-item>
<list-item>2.2 SecondLevel Second Text</list-item>
<list-item>3. FirstLevel Third Text</list-item>
<list-item>4. FirstLevel Fourth Text</list-item>
</article>";
Regex regex = new Regex(pattern1);
input = regex.Replace(input, substitution1);
Regex regex2 = new Regex(pattern2);
input = regex2.Replace(input, substitution2);
Regex regex3 = new Regex(pattern3);
input = regex3.Replace(input, substitution3);
}
}
output
<?xml version="1.0" encoding="UTF-8"?>
<article>
<p>The Simple list sample</p>
<list1>
<list-item>1. First</list-item>
<list-item>2. Second</list-item>
<list-item>3. Third</list-item>
</list1>
<p>The Nested list sample</p>
<list1>
<list-item>1. FirstLevel First Text</list-item>
<list2>
<list-item>1.1 SecondLevel First Text</list-item>
<list3>
<list-item>1.1.1 ThirdLevel First Text</list-item>
<list-item>1.1.2 ThirdLevel Second Text</list-item>
</list3>
<list-item>1.2 SecondLevel Second Text</list-item>
</list2>
<list-item>2. FirstLevel Second Text</list-item>
<list2>
<list-item>2.1 SecondLevel First Text</list-item>
<list-item>2.2 SecondLevel Second Text</list-item>
</list2>
<list-item>3. FirstLevel Third Text</list-item>
<list-item>4. FirstLevel Fourth Text</list-item>
</list1>
</article>