Search code examples
javascripthashhash-function

Which hash function is better fitted to represent 128bit random id in a small hash-table


In my classes I've got the following exercise:

I've got GUIDs (Globally Unique Identifier) with 128bit .

Which hash function is better to represent the values in buckets with hashID 000 to 899, each bucket has 100 free places to store the hash collisions?

I want to compare the following hash-functions:

a) h(a) = a mod 900
b) h(a) = a mod 887
c) h(a) = a^2 mod 887
d) there are not enough information to answer this question

What I've got:

I think it's not better to use a^2, cause it would only give us a benefit in the first few thousand ids, they should be better distributed but afterward, I would probably have to make more collision-probing to store those values in other buckets.

I've tried to accomplish a behavior described above: In the snippet below I generate 90000 'randomly' unique numbers which are stored inside a map, with hash function following the mod 900. I know that for some reasons prime numbers are preferred to use for hash-functions.

Randomness only implemented to 32bit max. But i think this shouldn't be too important that I didn't use the 128bit max.

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

Next snippet with the same functions but with mod 887:

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

and with a^2:

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

all inside one:

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

If I'm comparing those 3 Methods they show me that the highest collision count with mod a^2 is higher than both 887 and 900 without powering the guid. So I assume this can't be the right answer.

But how should I compare the other two? they show me similar peaks with only a small difference.


Solution

  • You can compare the other two by simply checking which has lesser number of factors, since prime number has lesser factors they are used for hashing.

    The reason why the difference between the two are negligible is mainly due to the hash function you are using. Your hashing function already gives well distributed values. But since the question is about direct compare. The best way to do this is choose the one that has prime number a mod 887

    There is a very good explanation on this in cs.stackexchange

    Please visit this link for more info https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function

    and this for more details on modular hashing https://algs4.cs.princeton.edu/34hash/