I have a string I will present in a text box that will vary somewhat but has some format to it. The string is coming from a .pdf file. It will be formatted as below:
1
EA
2.00 2814-212-D003 0.00 0.00
LONG JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-213-703 Seq No: 9002
2
EA
2.00 2814-212-D003 0.00 0.00
LONG JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-214-703 Seq No: 9002
3
EA
2.00 2814-212-D004 0.00 0.00
SHORT JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-213-703 Seq No: 9003
4
EA
2.00 2814-212-D004 0.00 0.00
SHORT JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-214-703 Seq No: 9003
I want to pull several items from this text in a loop. To put in perspective, this text is a purchase order from a customer.
I want to pull the line item, the qty, the part number, description, who drew it, and the due date for the line.
My problem is that the method I am using to get the info doesn't seem like the best option out there. How would one go about this in a more efficient manner?
I plan to loop through each line item(I figure there's a way to loop every 7 lines) and place each piece of data into a variable in a object I will create, and place those objects in a list.
So far I am getting some of the information with little effort but I feel I am doing it in a rather messy way. here is what I came up with this morning:
string startString = richTextBox1.Text;
string qty = startString.Substring(6, startString.IndexOf(' '));
int index = startString.IndexOf(' ');
index = startString.IndexOf(' ', index + 1);
string partNumber = startString.Substring(index, 14);
string description = startString.Substring(index+ 25, startString.IndexOf(":"));
once I build this list I will place this data into an excel sheet, which I will figure out later(one problem at a time!)
Use code like this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
namespace ConsoleApplication52
{
class Program
{
static void Main(string[] args)
{
string input = @"
1
EA
2.00 2814-212-D003 0.00 0.00
LONG JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-213-703 Seq No: 9002
2
EA
2.00 2814-212-D003 0.00 0.00
LONG JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-214-703 Seq No: 9002
3
EA
2.00 2814-212-D004 0.00 0.00
SHORT JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-213-703 Seq No: 9003
4
EA
2.00 2814-212-D004 0.00 0.00
SHORT JACK PAD
Drawing: OPT
Due: 05/19/2023 Requester: NMB
Order: 2843HR-214-703 Seq No: 9003
";
Purchas_Order po = new Purchas_Order(input);
po.Print(@"c:\temp\test.txt");
}
}
public class Purchas_Order
{
public static List<Purchas_Order> purchase_orders { get; set; }
public int quantity { get; set; }
public string size { get; set; }
public string partNumber { get; set; }
public string description { get; set; }
public string drawing { get; set; }
public DateTime due { get; set; }
public string requester { get; set; }
public string order { get; set; }
public int seq_num { get; set; }
public Purchas_Order() { }
public Purchas_Order(string orders)
{
StringReader reader = new StringReader(orders);
string line = "";
int index = 0;
purchase_orders = new List<Purchas_Order>();
Purchas_Order newOrder = null;
string[] splitLine;
string pattern = @"\s*(?'key'[^:]+):\s*(?'value'[^\s]+)";
MatchCollection matches;
int lineNumber = 0;
while ((line = reader.ReadLine()) != null)
{
lineNumber++;
if (line.Length > 0)
{
try
{
switch (index % 7)
{
case 0:
newOrder = new Purchas_Order();
purchase_orders.Add(newOrder);
newOrder.quantity = int.Parse(line);
break;
case 1:
newOrder.size = line.Trim();
break;
case 2:
newOrder.partNumber = line.Trim();
break;
case 3:
newOrder.description = line.Trim();
break;
case 4:
splitLine = line.Split(new char[] { ':' });
newOrder.drawing = splitLine[1].Trim();
break;
case 5:
matches = Regex.Matches(line, pattern);
newOrder.due = DateTime.Parse(matches[0].Groups["value"].Value);
newOrder.requester = matches[1].Groups["value"].Value.Trim();
break;
case 6:
matches = Regex.Matches(line, pattern);
newOrder.order = matches[0].Groups["value"].Value.Trim();
newOrder.seq_num = int.Parse(matches[1].Groups["value"].Value);
break;
}
}
catch(Exception ex)
{
Console.WriteLine("Line Number = {0}, Index = {1}, Case = {2}, Line = {3}", lineNumber, index, index % 7, line);
}
index++;
}
}
}
public void Print(string filename)
{
StreamWriter writer = new StreamWriter(filename);
foreach(Purchas_Order order in Purchas_Order.purchase_orders)
{
writer.WriteLine("Quantity : {0}", order.quantity);
writer.WriteLine("Size : {0}", order.size);
writer.WriteLine("Part Number : {0}", order.partNumber);
writer.WriteLine("Description : {0}", order.description);
writer.WriteLine("Drawing : {0}", order.drawing);
writer.WriteLine("Due : {0}", order.due);
writer.WriteLine("Requestor : {0}", order.requester);
writer.WriteLine("Order : {0}", order.order);
writer.WriteLine("Sequence Number : {0}", order.seq_num);
writer.WriteLine("");
}
writer.Flush();
writer.Close();
}
}
}
Here is the Regex pattern
Regex pattern
s* - One or more spaces
Define Group Key
(?'key'[^:]+)
Matches any characters up to the colon
^ - NOT
: - Match a colon
\s* - One or Mode spaces
Define Group Value
(?'value'[^\s]+)
Matches any characters up to a space
^ - NOT