I'm using this little piece of code to go through some inputted text and extracting sentences separated by the markers:
NSCharacterSet *punctuation =
[NSCharacterSet characterSetWithCharactersInString:@".!?\n"];
NSArray *parts = [data componentsSeparatedByCharactersInSet:punctuation];
The issue is that the resulting array is stripped from the punctuation. How do i go about storing the data with the appropriate punctuation? If possible, i'd like keep sentences marked with a newline (\n) as they are.
For example, if I enter this:
This is a sentence. It is marked by a period. This sentence is not marked by one How do you do? I'm doing very good!
I'd like to get something like this:
This is a sentence.
It is marked by a period.
This sentence is not marked by one
How do you do?
I'm doing very good!
Hope this helps:
NSString *string = @"This is a sentence. It is marked by a period. This sentence is not marked by one How do you do? I'm doing very good!";
NSError *error = nil;
NSString *pattern = @"(\\.|,|!|\\?|\\n)\\s*";
NSRegularExpression *expression = [NSRegularExpression regularExpressionWithPattern:pattern
options:0
error:&error];
if (expression)
{
NSArray *matches = [expression matchesInString:string
options:0
range:NSMakeRange(0, [string length])];
NSLog(@"%@", matches);
if ([matches count] > 0)
{
NSMutableArray *sentences = [[NSMutableArray alloc] initWithCapacity:[matches count]];
NSUInteger sentenceStart = 0;
for (NSTextCheckingResult *result in matches)
{
NSUInteger sentenceEnd = result.range.location + 1;
[sentences addObject:[string substringWithRange:NSMakeRange(sentenceStart, sentenceEnd - sentenceStart)]];
sentenceStart = sentenceEnd + (result.range.length - 1);
}
NSLog(@"%@", sentences);
}
}
else
{
NSLog(@"ERROR: %@", error);
}