Search code examples
calgorithmbinary-search

Binary search function that returns key's successor or predecessor if not found


I have this binary search function written in C that returns the first occurrance of a given key. But I wanted to add the functionality described in the title, using the not_found parameter.

void* binary_search(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), int not_found) {
    const void *p;
    int lo = 0;
    int hi = nmemb - 1;
    int mid, c;
    int found = -1;

    while (lo <= hi) {
        mid = lo + (hi - lo + 1)/2;
        p = base + mid * size;
        c = compar(key, p);
        if (c == 0) {
            found = mid;
            hi = mid - 1;
        }
        else if (c > 0)
            lo = mid + 1;
        else
            hi = mid - 1;
    }
    
    if (found != -1) {
        p = base + found * size;
        return (void*)p;
    }
    else if (not_found == 0) {
        return NULL;
    }
    else if (not_found == 1) {
        /*
        p = base + mid * size;
        return (void*)p;
        */
    }
    else {
        /*
        p = base + (mid-1) * size;
        return (void*)p;
        */
    }
}

How can I make that if the key is not found and not_found == 1, then return its successor or if not_found == -1, return its predecessor

That commented part of code worked in some cases, but it really depends on the case. From this list of strings, "A", "C", "E", "G", "I", "K", "M", "O", searching for "D" with not_found = -1 returns "A" but searching for "F" it returns correctly "E". The same goes for successor cases, that part of the code doesn't always get it right.


Solution

  • You said you wanted the "first" match which would be the leftmost binary search algorithm. Then I suggest you wrap the standard algorithm for handling not_found post-processing of the result:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    enum not_found {
        PREDECESSOR = -1,
        CURRENT,
        SUCCESSOR
    };
    
    const void *binary_search(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)) {
        size_t l = 0;
        size_t r = nmemb;
        while(l < r) {
            size_t m = (l + r) / 2;
            if(compar((const char *) base + m * size, key) < 0)
                l = m + 1;
            else
                r = m;
        }
        return (const char *) base + l * size;
    }
    
    const void *binary_search2(const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *), enum not_found not_found) {
        const void *p = binary_search(key, base, nmemb, size, compar);
        if(!compar(p, key))
            return p;
        switch(not_found) {
            case PREDECESSOR:
                return (const char *) p - size;
            case CURRENT:
                return NULL;
            case SUCCESSOR:
                return p;
        }
    }
    
    int charcmp(const void *a, const void *b) {
        const char *a2 = a;
        const char *b2 = b;
        if(*a2 < *b2) return -1;
        if(*a2 > *b2) return 1;
        return 0;
    }
    
    int main(void) {
        char *s = "ACEGIKMO";
        enum not_found tests[] = {
            PREDECESSOR,
            CURRENT,
            SUCCESSOR
        };
        for(size_t i = 0; i < sizeof tests / sizeof *tests; i++) {
            const void *p = binary_search2("D", s,  strlen(s), 1, charcmp, tests[i]);
            if(!p)
                printf("NULL\n");
            else
                printf("%.1s\n", (const char *) p);
        }
    }
    

    and example run:

    C
    NULL
    E
    

    A pointer to the element before the first in base is undefined behavior. If you need that then you would need to change the interface. It's ok to point to the element after the last one you just can't dereference it in general like I do in the test. For strings it is ok as that would be the terminating '\0'.