I'm working on a small local c# app that processes emails. I'm using S22/Imap which downloads and separates headers. When I try to get the Date of the email it returns me the string version, as in MIME headers.
For most of them the DateTime.TryParse runs fine, but for some dates it fails. Here are an example of each of them, I can't find how to process them in the RFC:
Could anyone tell me the meaning of the first five, or point me out a documentation that would explain the other format ?
Thank you in advance for any help.
I had to write my own class to parse these (if your interested, I have a competing library to S22.Imap called MailKit which uses my MimeKit library for parsing messages, dates, etc).
I've taken the liberty of splitting my date parser out of MimeKit in order to paste it here as a stand-alone
using System;
using System.Text;
using System.Collections.Generic;
namespace DateParserUtils {
[Flags]
enum DateTokenFlags : byte
{
None = 0,
NonNumeric = (1 << 0),
NonWeekday = (1 << 1),
NonMonth = (1 << 2),
NonTime = (1 << 3),
NonAlphaZone = (1 << 4),
NonNumericZone = (1 << 5),
HasColon = (1 << 6),
HasSign = (1 << 7),
}
class DateToken
{
public DateTokenFlags Flags { get; private set; }
public int StartIndex { get; private set; }
public int Length { get; private set; }
public bool IsNumeric {
get { return (Flags & DateTokenFlags.NonNumeric) == 0; }
}
public bool IsWeekday {
get { return (Flags & DateTokenFlags.NonWeekday) == 0; }
}
public bool IsMonth {
get { return (Flags & DateTokenFlags.NonMonth) == 0; }
}
public bool IsTimeOfDay {
get { return (Flags & DateTokenFlags.NonTime) == 0 && (Flags & DateTokenFlags.HasColon) != 0; }
}
public bool IsNumericZone {
get { return (Flags & DateTokenFlags.NonNumericZone) == 0 && (Flags & DateTokenFlags.HasSign) != 0; }
}
public bool IsAlphaZone {
get { return (Flags & DateTokenFlags.NonAlphaZone) == 0; }
}
public bool IsTimeZone {
get { return IsNumericZone || IsAlphaZone; }
}
public DateToken (DateTokenFlags flags, int startIndex, int length)
{
StartIndex = startIndex;
Length = length;
Flags = flags;
}
}
/// <summary>
/// Utility methods to parse and format rfc822 date strings.
/// </summary>
/// <remarks>
/// Utility methods to parse and format rfc822 date strings.
/// </remarks>
public static class DateUtils
{
internal static readonly DateTime UnixEpoch = new DateTime (1970, 1, 1, 0, 0, 0, 0);
const string MonthCharacters = "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember";
const string WeekdayCharacters = "SundayMondayTuesdayWednesdayThursdayFridaySaturday";
const string AlphaZoneCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string NumericZoneCharacters = "+-0123456789";
const string NumericCharacters = "0123456789";
const string TimeCharacters = "0123456789:";
static readonly string[] Months = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
static readonly string[] WeekDays = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
static readonly Dictionary<string, int> timezones;
static readonly DateTokenFlags[] datetok;
static DateUtils ()
{
timezones = new Dictionary<string, int> {
{ "UT", 0 }, { "UTC", 0 }, { "GMT", 0 },
{ "EDT", -400 }, { "EST", -500 },
{ "CDT", -500 }, { "CST", -600 },
{ "MDT", -600 }, { "MST", -700 },
{ "PDT", -700 }, { "PST", -800 },
// Note: rfc822 got the signs backwards for the military
// timezones so some sending clients may mistakenly use the
// wrong values.
{ "A", 100 }, { "B", 200 }, { "C", 300 },
{ "D", 400 }, { "E", 500 }, { "F", 600 },
{ "G", 700 }, { "H", 800 }, { "I", 900 },
{ "K", 1000 }, { "L", 1100 }, { "M", 1200 },
{ "N", -100 }, { "O", -200 }, { "P", -300 },
{ "Q", -400 }, { "R", -500 }, { "S", -600 },
{ "T", -700 }, { "U", -800 }, { "V", -900 },
{ "W", -1000 }, { "X", -1100 }, { "Y", -1200 },
{ "Z", 0 },
};
datetok = new DateTokenFlags[256];
var any = new char[2];
for (int c = 0; c < 256; c++) {
if (c >= 0x41 && c <= 0x5a) {
any[1] = (char) (c + 0x20);
any[0] = (char) c;
} else if (c >= 0x61 && c <= 0x7a) {
any[0] = (char) (c - 0x20);
any[1] = (char) c;
}
if (NumericZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumericZone;
if (AlphaZoneCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonAlphaZone;
if (WeekdayCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonWeekday;
if (NumericCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonNumeric;
if (MonthCharacters.IndexOfAny (any) == -1)
datetok[c] |= DateTokenFlags.NonMonth;
if (TimeCharacters.IndexOf ((char) c) == -1)
datetok[c] |= DateTokenFlags.NonTime;
}
datetok[':'] |= DateTokenFlags.HasColon;
datetok['+'] |= DateTokenFlags.HasSign;
datetok['-'] |= DateTokenFlags.HasSign;
}
static bool TryGetWeekday (DateToken token, byte[] text, out DayOfWeek weekday)
{
weekday = DayOfWeek.Sunday;
if (!token.IsWeekday || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int day = 0; day < WeekDays.Length; day++) {
if (WeekDays[day].Equals (name, StringComparison.OrdinalIgnoreCase)) {
weekday = (DayOfWeek) day;
return true;
}
}
return false;
}
static bool TryParseInt32 (byte[] text, ref int index, int endIndex, out int value)
{
int startIndex = index;
value = 0;
while (index < endIndex && text[index] >= (byte) '0' && text[index] <= (byte) '9') {
int digit = text[index] - (byte) '0';
if (value > int.MaxValue / 10) {
// integer overflow
return false;
}
if (value == int.MaxValue / 10 && digit > int.MaxValue % 10) {
// integer overflow
return false;
}
value = (value * 10) + digit;
index++;
}
return index > startIndex;
}
static bool TryGetDayOfMonth (DateToken token, byte[] text, out int day)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
day = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out day))
return false;
if (day <= 0 || day > 31)
return false;
return true;
}
static bool TryGetMonth (DateToken token, byte[] text, out int month)
{
month = 0;
if (!token.IsMonth || token.Length < 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (name.Length > 3)
name = name.Substring (0, 3);
for (int i = 0; i < Months.Length; i++) {
if (Months[i].Equals (name, StringComparison.OrdinalIgnoreCase)) {
month = i + 1;
return true;
}
}
return false;
}
static bool TryGetYear (DateToken token, byte[] text, out int year)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
year = 0;
if (!token.IsNumeric)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out year))
return false;
if (year < 100)
year += (year < 70) ? 2000 : 1900;
return year >= 1969;
}
static bool TryGetTimeOfDay (DateToken token, byte[] text, out int hour, out int minute, out int second)
{
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
hour = minute = second = 0;
if (!token.IsTimeOfDay)
return false;
if (!TryParseInt32 (text, ref index, endIndex, out hour) || hour > 23)
return false;
if (index >= endIndex || text[index++] != (byte) ':')
return false;
if (!TryParseInt32 (text, ref index, endIndex, out minute) || minute > 59)
return false;
// Allow just hh:mm (i.e. w/o the :ss?)
if (index >= endIndex || text[index++] != (byte) ':')
return true;
if (!TryParseInt32 (text, ref index, endIndex, out second) || second > 59)
return false;
return index == endIndex;
}
static bool TryGetTimeZone (DateToken token, byte[] text, out int tzone)
{
tzone = 0;
if (token.IsNumericZone) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
int sign;
if (text[index] == (byte) '-')
sign = -1;
else if (text[index] == (byte) '+')
sign = 1;
else
return false;
index++;
if (!TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
tzone *= sign;
} else if (token.IsAlphaZone) {
if (token.Length > 3)
return false;
var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);
if (!timezones.TryGetValue (name, out tzone))
return false;
} else if (token.IsNumeric) {
int endIndex = token.StartIndex + token.Length;
int index = token.StartIndex;
if (!ParseUtils.TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
return false;
}
return true;
}
static bool IsWhiteSpace (byte c)
{
return c == ' ' || c == '\t';
}
static bool IsTokenDelimeter (byte c)
{
return c == (byte) '-' || c == (byte) '/' || c == (byte) ',' || IsWhiteSpace (c);
}
static bool SkipWhiteSpace (byte[] text, ref int index, int endIndex)
{
int startIndex = index;
while (index < endIndex && IsWhiteSpace (text[index]))
index++;
return index > startIndex;
}
static bool SkipComment (byte[] text, ref int index, int endIndex)
{
bool escaped = false;
int depth = 1;
index++;
while (index < endIndex && depth > 0) {
if (text[index] == (byte) '\\') {
escaped = !escaped;
} else if (!escaped) {
if (text[index] == (byte) '(')
depth++;
else if (text[index] == (byte) ')')
depth--;
escaped = false;
} else {
escaped = false;
}
index++;
}
return depth == 0;
}
static bool SkipCommentsAndWhiteSpace (byte[] text, ref int index, int endIndex)
{
SkipWhiteSpace (text, ref index, endIndex);
while (index < endIndex && text[index] == (byte) '(') {
int startIndex = index;
if (!SkipComment (text, ref index, endIndex))
return false;
SkipWhiteSpace (text, ref index, endIndex);
}
return true;
}
static IEnumerable<DateToken> TokenizeDate (byte[] text, int startIndex, int length)
{
int endIndex = startIndex + length;
int index = startIndex;
DateTokenFlags mask;
int start;
while (index < endIndex) {
if (!SkipCommentsAndWhiteSpace (text, ref index, endIndex))
break;
if (index >= endIndex)
break;
// get the initial mask for this token
if ((mask = datetok[text[index]]) != DateTokenFlags.None) {
start = index++;
// find the end of this token
while (index < endIndex && !IsTokenDelimeter (text[index]))
mask |= datetok[text[index++]];
yield return new DateToken (mask, start, index - start);
}
// skip over the token delimeter
index++;
}
yield break;
}
static bool TryParseStandardDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int day, month, year, tzone;
int hour, minute, second;
DayOfWeek weekday;
//bool haveWeekday;
int n = 0;
date = new DateTimeOffset ();
// we need at least 5 tokens, 6 if we have a weekday
if (tokens.Count < 5)
return false;
// Note: the weekday is not required
if (TryGetWeekday (tokens[n], text, out weekday)) {
if (tokens.Count < 6)
return false;
//haveWeekday = true;
n++;
}
if (!TryGetDayOfMonth (tokens[n++], text, out day))
return false;
if (!TryGetMonth (tokens[n++], text, out month))
return false;
if (!TryGetYear (tokens[n++], text, out year))
return false;
if (!TryGetTimeOfDay (tokens[n++], text, out hour, out minute, out second))
return false;
if (!TryGetTimeZone (tokens[n], text, out tzone))
tzone = 0;
while (tzone < -1400)
tzone += 2400;
while (tzone > 1400)
tzone -= 2400;
int minutes = tzone % 100;
int hours = tzone / 100;
var offset = new TimeSpan (hours, minutes, 0);
try {
date = new DateTimeOffset (year, month, day, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
return false;
}
return true;
}
static bool TryParseUnknownDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
{
int? day = null, month = null, year = null, tzone = null;
int hour = 0, minute = 0, second = 0;
bool numericMonth = false;
bool haveWeekday = false;
bool haveTime = false;
DayOfWeek weekday;
TimeSpan offset;
for (int i = 0; i < tokens.Count; i++) {
int value;
if (!haveWeekday && tokens[i].IsWeekday) {
if (TryGetWeekday (tokens[i], text, out weekday)) {
haveWeekday = true;
continue;
}
}
if ((month == null || numericMonth) && tokens[i].IsMonth) {
if (TryGetMonth (tokens[i], text, out value)) {
if (numericMonth) {
numericMonth = false;
day = month;
}
month = value;
continue;
}
}
if (!haveTime && tokens[i].IsTimeOfDay) {
if (TryGetTimeOfDay (tokens[i], text, out hour, out minute, out second)) {
haveTime = true;
continue;
}
}
if (tzone == null && tokens[i].IsTimeZone) {
if (TryGetTimeZone (tokens[i], text, out value)) {
tzone = value;
continue;
}
}
if (tokens[i].IsNumeric) {
if (tokens[i].Length == 4) {
if (year == null) {
if (TryGetYear (tokens[i], text, out value))
year = value;
} else if (tzone == null) {
if (TryGetTimeZone (tokens[i], text, out value))
tzone = value;
}
continue;
}
if (tokens[i].Length > 2)
continue;
// Note: we likely have either YYYY[-/]MM[-/]DD or MM[-/]DD[-/]YY
int endIndex = tokens[i].StartIndex + tokens[i].Length;
int index = tokens[i].StartIndex;
TryParseInt32 (text, ref index, endIndex, out value);
if (month == null && value > 0 && value <= 12) {
numericMonth = true;
month = value;
continue;
}
if (day == null && value > 0 && value <= 31) {
day = value;
continue;
}
if (year == null && value >= 69) {
year = 1900 + value;
continue;
}
}
// WTF is this??
}
if (year == null || month == null || day == null) {
date = new DateTimeOffset ();
return false;
}
if (!haveTime)
hour = minute = second = 0;
if (tzone != null) {
int minutes = tzone.Value % 100;
int hours = tzone.Value / 100;
offset = new TimeSpan (hours, minutes, 0);
} else {
offset = new TimeSpan (0);
}
try {
date = new DateTimeOffset (year.Value, month.Value, day.Value, hour, minute, second, offset);
} catch (ArgumentOutOfRangeException) {
date = new DateTimeOffset ();
return false;
}
return true;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the given index
/// and spanning across the specified number of bytes.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="length">The number of bytes in the input buffer to parse.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
/// a valid range in the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, int length, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
if (length < 0 || length > (buffer.Length - startIndex))
throw new ArgumentOutOfRangeException ("length");
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the supplied buffer starting at the specified index.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="startIndex">The starting index of the input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="startIndex"/> is not within the range of the byte array.
/// </exception>
public static bool TryParse (byte[] buffer, int startIndex, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
if (startIndex < 0 || startIndex > buffer.Length)
throw new ArgumentOutOfRangeException ("startIndex");
int length = buffer.Length - startIndex;
var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified buffer.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="buffer">The input buffer.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="buffer"/> is <c>null</c>.
/// </exception>
public static bool TryParse (byte[] buffer, out DateTimeOffset date)
{
if (buffer == null)
throw new ArgumentNullException ("buffer");
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
/// <summary>
/// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
/// </summary>
/// <remarks>
/// Parses an rfc822 date and time from the specified text.
/// </remarks>
/// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
/// <param name="text">The input text.</param>
/// <param name="date">The parsed date.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="text"/> is <c>null</c>.
/// </exception>
public static bool TryParse (string text, out DateTimeOffset date)
{
if (text == null)
throw new ArgumentNullException ("text");
var buffer = Encoding.UTF8.GetBytes (text);
var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));
if (TryParseStandardDateFormat (tokens, buffer, out date))
return true;
if (TryParseUnknownDateFormat (tokens, buffer, out date))
return true;
date = new DateTimeOffset ();
return false;
}
}
}