I need to parse a FIX message as fast as possible.
My approach is below. I keep a ref to the FixString in FP_FixString, then I work through it sequencially getting Tag values as I need them.
The fix message is large (approximately 4,000 chars and with about 100 repeating group of 3 tags that I need to extract).
Am I doing this as efficiently as possible? From a string building and parsing perspective?
public FixParser
{
string FP_FixString;
int FP_m;
int FP_Lastm;
public void Go()
{
using (StreamReader reader = new StreamReader(new GZipStream(File.OpenRead(@"L:\Logs\FIX.4.2-D.messages.current.log.20140512T.gz"), CompressionMode.Decompress)))
{
string line = "";
while ( (line = reader.ReadLine()) != null)
{
InitiateFixParse(ref line);
string Symbol;
if (!GetTagString(55, out Symbol))
return;
//DO ALL OTHER PROCESSING OF TAGS HERE
}
}
public void InitiateFixParse(ref string fixString)
{
FP_Lastm = fixString.Length - 1;
FP_FixString = fixString;
FP_m = 0;
}
public bool IsEndMark()
{
if (FP_m>=FP_Lastm || FP_FixString[FP_m].Equals('\x01'))
return true;
return false;
}
public bool NextTag(out int Tag, out string ValueString)
{
Tag = 0;
ValueString = "";
if(FP_m>=FP_Lastm)
return false;
string TagString = "";
bool GettingTag=true;
while (!IsEndMark())
{
if (FP_FixString[FP_m].Equals('='))
{
GettingTag = false;
FP_m++;
}
if(GettingTag)
TagString = TagString + FP_FixString[FP_m];
else
ValueString = ValueString + FP_FixString[FP_m];
FP_m++;
}
Tag=int.Parse(TagString);
FP_m++; //Start of next Tag
return true;
}
public bool GetTagString(int Tag, out string ValueString)
{
//bool FountIt = false;
int aTag;
string aValueString;
while (NextTag(out aTag, out aValueString))
{
if (aTag == Tag)
{
ValueString = aValueString;
return true;
}
}
ValueString = "";
return false;
}
}
If you are dealing with very large files you want to avoid GC pressure as much as possible as already mentioned. StringBuilder helps but you are still creating a StringBuilder instance. Some quick wins:
if (GettingTag)
{
//TagString = TagString + FP_FixString[FP_m];
Tag = Tag * 10 + ((byte)FP_FixString[FP_m] -48);
}// This reduces time by about 40%
//Tag = int.Parse(TagString);
The code below reduces the time even further (reduces to 50% of original):
int valueStart = 0;
while (!IsEndMark())
{
if (FP_FixString[FP_m].Equals('='))
{
GettingTag = false;
FP_m++;
valueStart = FP_m;
}
if (GettingTag)
{
//TagString = TagString + FP_FixString[FP_m];
Tag = Tag * 10 + ((byte)FP_FixString[FP_m] -48);
}
//else
//ValueString = ValueString + FP_FixString[FP_m];
FP_m++;
}
ValueString = FP_FixString.Substring(valueStart, FP_m - valueStart);
//Tag = int.Parse(TagString);
FP_m++; //Start of next Tag
I havent looked at the code further.