I want to make a quick, relatively small lookup table, but with a large input range: -Input: 32bit Value max. (a 32bit color value) -Output: 8bit Index max. (index to a table)
Something like the code below. (If more than 256 values are indexed, the index is just going to be 0)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
static uint8_t getIndx(uint32_t value);
static uint8_t **indx;
static uint8_t count = 0;
int main(void) {
// set up the indx
const uint32_t size = 0xFFFF; // for demonstrative purposes not even nearly as large as wished to be (0xFFFFFFFF) plus my for loop below would get in trouble, I think
indx = (uint8_t**)malloc(sizeof(uint8_t*) * size);
if(indx == NULL) {
printf("could not allocate memory\n");
return 0;
}
for(int i = 0; i < size + 1; i++) {
indx[i] = NULL;
}
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
return 0;
}
static uint8_t getIndx(uint32_t value) {
if(indx[value] == NULL) {
if(count > 255) return 0;
indx[value] = (uint8_t*)malloc(sizeof(uint8_t));
*indx[value] = count;
count++;
}
return *(indx[value]);
}
The output is:
0
1
2
0
1
2
No matter what I think of, I always end up with something like that. With an input range of 32bit (4294967296 states) I would need to allocate way too much memory to only get 256 possible outputs. And forming 256 if else
, inside a for
loop or not, is also not what I want.
Is there some method that is quick, either a table or not which in the end has the same function, that I have not yet heard about?
Many thanks in advance!
You can use a hash table to map the 32-bit values to the look-up table indices. The length of the hash table should be significantly longer than the look-up table to reduce the chance of hash collisions.
The following example uses a linear search of the hash table starting at the hash value derived from the input value until a matching entry is found or an empty hash table is found. If the input value is not found, and there is room in the look-up table and room in the hash table (which there should be since it is longer than the look-up table), the input value is added to the look-up table and the hash table is updated. It uses a hash table four times the size of the look-up table.
The example does two passes with the same pseudo-random sequence in each pass. The first pass should mostly fill up the look-up table and update the hash table. The second pass shouldn't make any more changes to the look-up table or hash table because it is just repeating the first sequence.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
struct hash_index {
uint8_t index;
char used;
};
#define HASHBITS 10
#define HASHSIZE (1U << HASHBITS)
#define HASHMASK (HASHSIZE - 1)
#define LOOKUPSIZE 256U
static uint8_t getIndx(uint32_t value);
static uint32_t lookup[LOOKUPSIZE];
static struct hash_index hashtab[HASHSIZE];
static unsigned int count = 0;
static unsigned int tot_collisions = 0;
static unsigned int max_collisions = 0;
static unsigned int hash_used = 0;
int main(void) {
int pass;
unsigned int i;
uint32_t value;
uint8_t index;
unsigned int successes;
unsigned int failures;
int ok;
printf("Lookup size: %u, Hash size: %u\n", LOOKUPSIZE, HASHSIZE);
for (pass = 1; pass <= 2; pass++) {
successes = 0;
failures = 0;
tot_collisions = 0;
max_collisions = 0;
/* Not resetting hash_used because it shouldn't change after first pass. */
printf("\nPass %d, currently used hashes: %u\n\n", pass, hash_used);
srand(1);
for (i = 0; i < 260; i++) {
static const char * const outcomes[2] = {"FAIL", "OK"};
value = rand();
index = getIndx(value);
ok = lookup[index] == value;
printf("%" PRIu32 " -> %" PRIu8 " (%s)\n", value, index, outcomes[ok]);
if (ok) {
successes++;
} else {
failures++;
}
}
printf("\nSuccesses: %u, Failures: %u\n", successes, failures);
printf("Used hashes: %u, Total collisions: %u, Max collisions: %u\n\n",
hash_used, tot_collisions, max_collisions);
}
return 0;
}
static uint8_t getIndx(uint32_t value) {
unsigned int initial_hash;
unsigned int hash;
unsigned int collisions = 0;
uint8_t index;
/*
* Search for value using hash table, starting at position hashed from value.
*
* The hash table is longer than the maximum number of used entries,
* so we should always be able to find an unused entry in the hash table.
*/
initial_hash = ((value * UINT32_C(0x61c88647)) >> (32 - HASHBITS)) & HASHMASK;
for (hash = initial_hash;
collisions < HASHSIZE && hashtab[hash].used;
hash = (hash + 1) & HASHMASK) {
/*
* This hash table entry is used. Get the corresponding index in the
* main lookup table to check if the value matches.
*/
index = hashtab[hash].index;
if (lookup[index] == value) {
/* Matching value found. Return its index in the main table. */
return index;
}
/* Count hash collisions and total hash collisions. */
collisions++;
tot_collisions++;
if (max_collisions < collisions) {
/* Update max hash collisions for diagnostics. */
max_collisions = collisions;
}
}
/* Value not found. */
if (count < LOOKUPSIZE && collisions < HASHSIZE) {
/*
* There is room in the main lookup table for the new value
* and room in the hash table. The index of the new value in the
* main lookup table will be the current count, which will be incremented.
*/
index = count++;
/*
* Add value to main lookup table,
* add index in main lookup table to hash table,
* and return the index in the main lookup table.
*/
lookup[index] = value;
hashtab[hash].index = index;
hashtab[hash].used = 1;
hash_used++; /* Count of used hash table entries for diagnostics. */
return index;
}
/*
* Value not found and main lookup table is full or hash table is full.
* Give up.
*/
return 0;
}
Another approach for dealing with collisions is to make each hash table entry point to a linked list of matching values, but that is more complicated.