Search code examples
cparsingscanftokenize

Is there a variant of sscanf with pointer to input string instead of buffers?


sscanf works like this:

int main(const int argc, const char *argv[]) {
    char buf1[1024] = {0};
    char buf2[1024] = {0};
    char buf3[1024] = {0};
    char *str = "abc, 123; xyz";
    sscanf(str, "%[^,], %[^;]; %s", buf1, buf2, buf3);
    printf("'%s' '%s' '%s'", buf1, buf2, buf3); // Prints: "'abc' '123' 'xyz'"
    return 0;
}

I'm wondering if there is a function which does not require copying the contents of str into the buffers (buf1, buf2, buf3), nor allocating any new memory. Instead it would just set the pointers (ptr1, ptr2, ptr3) to point at the matching parts in str and null terminate whatever comes after the match.

int main(const int argc, const char *argv[]) {
    char *ptr1 = NULL;
    char *ptr2 = NULL;
    char *ptr3 = NULL;
    char *str = "abc, 123; xyz";
    //
    // str = "abc, 123; xyz\0"
    //
    _sscanf(str, "%[^,], %[^;]; %s", &ptr1, &ptr2, &ptr3);
    //
    // str = "abc\0 123\0 xyz\0"
    //        ^     ^     ^
    //       ptr1  ptr2  ptr3
    //
    printf("'%s' '%s' '%s'", ptr1, ptr2, ptr3); // Prints: "'abc' '123' 'xyz'"

    return 0;
}

I know there are functions such as strtok_r and the regex.h library which could be used, but I think this would be more convenient in cases where the input string can be modified.


Solution

  • It isn't pretty but the %n specifier might be used to capture the index to the start and end of the tokens. Error checking would make sure the index and end values are not -1

    #include <stdio.h>
    
    int main(int argc, char *argv[]) {
        int index1 = -1;
        int end1 = -1;
        int index2 = -1;
        int end2 = -1;
        int index3 = -1;
        int end3 = -1;
        char *str = "abc, 123; xyz";
        sscanf(str, " %n%*[^,]%n, %n%*[^;]%n; %n%*s%n", &index1, &end1, &index2, &end2, &index3, &end3);
        printf("'%.*s' '%.*s' '%.*s'", end1, str + index1, end2 - index2, str + index2, end3 - index3, str + index3); // Prints: "'abc' '123' 'xyz'"
        return 0;
    }