Is there any efficient (for text files > 5MB) quoted printable decoder written in C? I need such a decoder in an iOS project.
At the meanwhile I am using a high level decoder which is way too slow. It takes up to 10 minutes on device to decode a 5MB file:
- (NSString *)decodedQuotedPrintable:(NSString *)string
{
NSMutableString *decodedString = string.mutableCopy;
[decodedString replaceOccurrencesOfString:@"=\r\n" withString:@"" options:NSCaseInsensitiveSearch range:NSMakeRange(0, decodedString.length)];
NSInteger idx = 0;
_WHILE(idx != NSNotFound)
{
idx = [decodedString rangeOfString:@"="
options:NSCaseInsensitiveSearch
range:NSMakeRange(idx + 1, decodedString.length - idx - 1)].location;
_IF(idx + 5> decodedString.length)
{
break;
}
unsigned int hex = 0;
NSScanner *scanner = [NSScanner scannerWithString:[decodedString substringWithRange:
NSMakeRange(idx+1, 2)]];
[scanner scanHexInt:&hex];
[decodedString replaceCharactersInRange:NSMakeRange(idx, 3)
withString:[NSString stringWithFormat:@"%c", hex]];
}
return decodedString;
}
I've got it now using C-Level string manipulation. It works like a charm. This reduced my processing time for a test file from 6 minutes to 3 seconds on an iPad2:
- (char *)replace1:(char const * const)original
{
char const * const pattern = "=\r\n";
size_t const patlen = strlen(pattern);
size_t const orilen = strlen(original);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
_FOR(oriptr = original;
(patloc = strstr(oriptr, pattern));
oriptr = patloc + patlen)
{
patcnt++;
}
{
// allocate memory for the new string
size_t const retlen = orilen - patcnt * patlen;
char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );
_IF(returned != NULL)
{
// copy the original string,
// replacing all the instances of the pattern
char * retptr = returned;
_FOR(oriptr = original;
(patloc = strstr(oriptr, pattern));
oriptr = patloc + patlen)
{
size_t const skplen = patloc - oriptr;
// copy the section until the occurence of the pattern
strncpy(retptr, oriptr, skplen);
retptr += skplen;
}
// copy the rest of the string.
strcpy(retptr, oriptr);
}
return returned;
}
}
- (char *)replace2:(char const * const)original
{
size_t const replen = 1;
size_t const patlen = 3;
size_t const orilen = strlen(original);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
_FOR(oriptr = original; (patloc = strstr(oriptr, "=")); oriptr = patloc + patlen)
{
patcnt++;
}
{
// allocate memory for the new string
size_t const retlen = orilen + patcnt * (replen - patlen);
char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );
_IF(returned != NULL)
{
// copy the original string,
// replacing all the instances of the pattern
char * retptr = returned;
_FOR(oriptr = original;
(patloc = strstr(oriptr, "="));
oriptr = patloc + patlen)
{
char newRep[3];
newRep[0] = patloc[1];
newRep[1] = patloc[2];
newRep[2] = '\0';
char _rep[2];
_rep[0] = (char)(int)strtol(newRep, NULL, 16);
_rep[1] = '\0';
size_t const skplen = patloc - oriptr;
// copy the section until the occurence of the pattern
strncpy(retptr, oriptr, skplen);
retptr += skplen;
// copy the replacement
strncpy(retptr, _rep, replen);
retptr += replen;
}
// copy the rest of the string.
strcpy(retptr, oriptr);
}
return returned;
}
}
- (NSString *)decodedQuotedPrintable:(NSString *)string
{
char *temp = [self replace1:(char *)[string UTF8String]];
temp = [self replace2:temp];
return [NSString stringWithCString:temp
encoding:NSUTF8StringEncoding];
}