Search code examples
cdata-structurestimetime-complexityskip-lists

Is my skip-list really searching in N rather than log(N)?


So I finally thought I finished creating a working skip-list and figured I should check my work and look into timing the search function. I found a tutorial for how to use clock() and implemented it like this:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "skiplist.h"

int main() {
    // initialize
    SkipList *list = initSkipList();
    // numbers to search for, array to store time
    int n[] = {1, 10, 50, 100, 1000, 5000, 10000, 25000, 50000, 100000, 200000};
    double time[11];

    // insert
    for (int i = 1; i <= 200000; i++)
        insertElement(list, i);

    // search, time, print
    for (int i = 0; i < 11; i++) {
        clock_t t;
        t = clock();
        findElement(list, n[i]);
        t = clock() - t;
        double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
        ti[i] = time_taken;
        printf("fun() took %f seconds to execute \n", time_taken);
    }
    return 0;
}

I thought by doing this and plotting N vs. time I would get a graph that looks logarithmic, but instead my graph looks linear:

enter image description here

Do I have a misunderstanding about how I should go about timing my functions to try and test time-complexity or is my skip-list just not searching as it should be? Here's my entire skip-list implementation just in case, but the search function is called findElement():

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#ifndef SKIPLIST_H_
#define SKIPLIST_H_


// number of lists
#define MAXLEVEL 5 

// node with pointers
typedef struct Node {
    int data;
    struct Node *next[1]; // or C99 using flexible array members: *next[]
} Node;


// skiplist
typedef struct SkipList {
    Node *header;
    int level;
    int count;
} SkipList;


// initialize skip list
SkipList* initSkipList() {  
    int i;
    SkipList *list = calloc(1, sizeof(SkipList));
    if (!list) {
        printf("Memory Error\n");
        exit(1);
    }
    //list->level = 0;  
    if ((list->header = calloc(1, sizeof(Node) + MAXLEVEL*sizeof(Node*))) == 0) {
        printf("Memory Error\n");
        exit(1);
    }   
    for (i = 0; i <= MAXLEVEL; i++)
        list->header->next[i] = list->header;   // or = list->header?
    printf("Skip list initialized.\n");
    //srand(time(NULL));
    return list;
}

// insert into skip list, return pointer to node
Node* insertElement(SkipList *list,int data) {
    int i, newLevel;
    Node* temp = list->header;
    Node *update[MAXLEVEL+1];

    // find where data belongs
    for (i = list->level; i >= 0; i--) {
        while(temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
        update[i] = temp;
    }
    temp = temp->next[0];
    // if element already exists, just return it (no duplicates)
    if (temp != list->header && temp->data == data)
        return temp;

    // determine level (coin flips till failure or max level reached)
    for (newLevel = 0; (rand() < RAND_MAX/2) && (newLevel < MAXLEVEL); newLevel++); // Pr(4) == Pr(5)??
    if (newLevel > list->level) {
        for (i = list->level + 1; i <= newLevel; i++)
            update[i] =  list->header;
        list->level = newLevel;
    }

    // make new  node
    if ((temp = calloc(1, sizeof(Node) + newLevel*sizeof(Node*))) == 0) {
        printf("Memory Error\n");
        exit(1);
    }
    temp->data = data;
    list->count++;
    // update next links
    for (i = 0; i <= newLevel; i++) {
        temp->next[i] = update[i]->next[i];
        update[i]->next[i] = temp;
    }

    //printf("Element %d inserted into list. (level %d)\n", data, newLevel);
    return temp;
}


// delete node containing data
void deleteElement(SkipList *list, int data) {
    int i;
    Node *update[MAXLEVEL+1], *temp;
    temp = list->header;
    for (i = list->level; i >= 0; i--) {
        while (temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
        update[i] = temp;
    }
    // move to (possible) node to delete
    temp = temp->next[0];
    // if doesn't exist
    if (temp == list->header || temp->data != data) {
        printf("Element %d doesn't exist.\n", data);    
        return;
    }
    // adjust next pointers
    for (i = 0; i <= list->level; i++) {
        if (update[i]->next[i] != temp) break;
        update[i]->next[i] = temp->next[i];
    }
    free (temp);
    printf("Element %d deleted from list.\n", data);
    list->count--;
    // adjust header level
    while ((list->level > 0) && (list->header->next[list->level] == list->header)) // double check
        list->level--;
}


// find node containing data
Node* findElement(SkipList *list, int data){
    int i;
    Node *temp = list->header;
    for (i = list->level; i >= 0; i--) {
        while (temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
    }
    temp = temp->next[0];
    if (temp != list->header && temp->data == data) {
        printf("Element %d found and returned.\n", data);
        return (temp);
    }
    printf("Element %d not found.\n", data);
    return 0;
}


/* Dynamic array for use in printSkipList() function */
typedef struct {
    int *array;
    size_t used;
    size_t size;
} Array;
void initArray(Array *a, size_t initialSize) {
    a->array = malloc(initialSize * sizeof(int));
    a->used = 0;
    a->size = initialSize;
}
void insertArray(Array *a, int element) {
    if (a->used == a->size) {
        a->size *= 2;
        a->array = realloc(a->array, a->size * sizeof(int));
    }
    a->array[a->used++] = element;
}
void freeArray(Array *a) {
    free(a->array);
    a->array = NULL;
    a->used = a->size = 0;
}


// print skip-list info and representational figure
void printSkipList(SkipList *list) {
    int i, j, k, pos = 0, prevPos = 0, diff, numDigits;
    Node* temp = list->header;
    Array a;

    // fill dynamic array with level 0 elements
    initArray(&a, 10);
    while (temp->next[0] != list->header) {
        temp = temp->next[0];
        insertArray(&a, temp->data);
    }
    temp = list->header;
    // print number of levels
    printf("\nNumber of levels in skip-list: %d\n", list->level + 1);
    printf("Number of elements in skip-list: %d\n", list->count);
    printf("Skip-list figure: \n");
    // print data
    for (i = list->level; i >= 0; i--) {
        pos = 0, prevPos = 0;
        while (temp->next[i] != list->header) {
        numDigits = 0;      
            temp = temp->next[i];
            while (temp->data != a.array[pos]) {
                numDigits += floor (log10 (abs (a.array[pos]))) + 1;
                pos++;
            }
            pos++;
            diff = (pos - prevPos) - 1; 
            if (diff >= 1) {
                for (j = 0; j < (4*diff)+numDigits; j++) 
                        printf(" ");    
            }           
            printf("%d -> ", temp->data);
            prevPos = pos;
        }
        temp = list->header;
        printf("\n");       
    }
    printf("\n\n");
}

#endif // SKIPLIST_H_

Any advice is greatly appreciated, thanks.


Solution

  • Your MAXLEVEL is way too small. I think original paper had the level at up to lg(n), that is log base 2 of the list size.

    From Puch's original 1990 skiplist paper:

    Determining MaxLevel

    Since we can safely cap levels at L(n), we should choose MaxLevel = L(N) (where N is an upper bound on the number of elements in a skip list). If p = l/2, using MaxLevel = 16 is appropriate for data structures containing up to 2^16 elements

    In general, if p is 1/X, then use log base X of the list size.

    MAXLEVEL=5, and I get about the same results you see.

    evaitl@bb ~/se $ ./foo 
    Skip list initialized.
    Element 1 found and returned.
    fun() took 0.000014 seconds to execute 
    Element 10 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 50 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 100 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 1000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 5000 found and returned.
    fun() took 0.000004 seconds to execute 
    Element 10000 found and returned.
    fun() took 0.000006 seconds to execute 
    Element 25000 found and returned.
    fun() took 0.000011 seconds to execute 
    Element 50000 found and returned.
    fun() took 0.000021 seconds to execute 
    Element 100000 found and returned.
    fun() took 0.000044 seconds to execute 
    Element 200000 found and returned.
    fun() took 0.000087 seconds to execute 
    

    Raised the MAXLEVEL to 20, and I get:

    evaitl@bb ~/se $ ./foo 
    Skip list initialized.
    Element 1 found and returned.
    fun() took 0.000016 seconds to execute 
    Element 10 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 50 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 100 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 1000 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 5000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 10000 found and returned.
    fun() took 0.000004 seconds to execute 
    Element 25000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 50000 found and returned.
    fun() took 0.000004 seconds to execute 
    Element 100000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 200000 found and returned.
    fun() took 0.000004 seconds to execute 
    

    Added 400000 and 800000 to your n[]:

    evaitl@bb ~/se $ ./foo 
    Skip list initialized.
    Element 1 found and returned.
    fun() took 0.000016 seconds to execute 
    Element 10 found and returned.
    fun() took 0.000001 seconds to execute 
    Element 50 found and returned.
    fun() took 0.000001 seconds to execute 
    Element 100 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 1000 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 5000 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 10000 found and returned.
    fun() took 0.000002 seconds to execute 
    Element 25000 found and returned.
    fun() took 0.000004 seconds to execute 
    Element 50000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 100000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 200000 found and returned.
    fun() took 0.000003 seconds to execute 
    Element 400000 found and returned.
    fun() took 0.000004 seconds to execute 
    Element 800000 found and returned.
    fun() took 0.000003 seconds to execute