Search code examples
c++bitset

std::bitset hash function algorithm


Does anybody know what algorithm thw hash function for bitset is using,

this is from website : http://en.cppreference.com/w/cpp/utility/bitset/hash

#include <iostream>
#include <bitset>
#include <functional>

int main()
{
    std::bitset<4> b1(1);
    std::bitset<4> b2(2);
    std::bitset<4> b3(b2);
    std::bitset<4> b4(8);
    std::cout<<b4<<'\n';
    std::hash<std::bitset<4>> hash_fn;

    size_t h1 = hash_fn(b1);
    size_t h2 = hash_fn(b2);
    size_t h3 = hash_fn(b4);

    std::cout << h1 << '\n';
    std::cout << h2 << '\n';
    std::cout << h3 << '\n';
}

and the output is

1000
4334672815104069193
16667047557902998627
2258353126044249582

http://en.cppreference.com/w/cpp/utility/bitset/hash

Also why doesn't it convert the bits to unsigend long and generate a hash value?


Solution

  • As noted by Igor, C++ standard does not specify algorithm, it only requires that hash value depends only on object and will be same for the duration of the program: http://eel.is/c++draft/hash.requirements

    20.5.3.4 Hash requirements [hash.requirements] 1 A type H meets the Hash requirements if:

    • (1.1) it is a function object type,
    • (1.2) it satisfies the requirements of CopyConstructible and Destructible, and
    • (1.3) the expressions shown in Table 29 are valid and have the indicated semantics.

    2 Given Key is an argument type for function objects of type H, in Table 29 h is a value of type (possibly const) H, u is an lvalue of type Key, and k is a value of a type convertible to (possibly const) Key.

    Table 29 — Hash requirements

    • Expression Return type Requirement
    • h(k) size_­t The value returned shall depend only on the argument k for the duration of the program. [ Note: Thus all evaluations of the expression h(k) with the same value for k yield the same result for a given execution of the program. — end note ] [ Note: For two different values t1 and t2, the probability that h(t1) and h(t2) compare equal should be very small, approaching 1.0 / numeric_­limits​::​max(). — end note ]
    • h(u) size_­t Shall not modify u.

    Gcc's libstdc++ implementation of bitset uses std::hash: https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/debug/bitset

    #if __cplusplus >= 201103L
      // DR 1182.
      /// std::hash specialization for bitset.
      template<size_t _Nb>
        struct hash<__debug::bitset<_Nb>>
        : public __hash_base<size_t, __debug::bitset<_Nb>>
        {
          size_t
          operator()(const __debug::bitset<_Nb>& __b) const noexcept
          { return std::hash<_GLIBCXX_STD_C::bitset<_Nb>>()(__b._M_base()); }
        };
    #endif
    

    https://github.com/gcc-mirror/gcc/blob/1cb6c2eb3b8361d850be8e8270c597270a1a7967/libstdc%2B%2B-v3/include/std/bitset#L1561

      // DR 1182.
      /// std::hash specialization for bitset.
      template<size_t _Nb>
        struct hash<_GLIBCXX_STD_C::bitset<_Nb>>
        : public __hash_base<size_t, _GLIBCXX_STD_C::bitset<_Nb>>
        {
          size_t
          operator()(const _GLIBCXX_STD_C::bitset<_Nb>& __b) const noexcept
          {
            const size_t __clength = (_Nb + __CHAR_BIT__ - 1) / __CHAR_BIT__;
            return std::_Hash_impl::hash(__b._M_getdata(), __clength);
          }
        };
    

    LLVM's libcxx uses own implementation for bitset, xoring all words: https://github.com/llvm-mirror/libcxx/blob/2c4b8af9aada61d83610330416eb8a39a8aa5494/include/bitset#L417

    template <size_t _Size>
    struct _LIBCPP_TEMPLATE_VIS hash<bitset<_Size> >
        : public unary_function<bitset<_Size>, size_t>
    {
        _LIBCPP_INLINE_VISIBILITY
        size_t operator()(const bitset<_Size>& __bs) const _NOEXCEPT
            {return __bs.__hash_code();}
    };
    
    template <size_t _N_words, size_t _Size>
    inline
    size_t
    __bitset<_N_words, _Size>::__hash_code() const _NOEXCEPT
    {
        size_t __h = 0;
        for (size_type __i = 0; __i < _N_words; ++__i)
            __h ^= __first_[__i];
        return __h;
    }
    

    and simpler variant for 1 word bitset:

    inline
    size_t
    __bitset<1, _Size>::__hash_code() const _NOEXCEPT
    {
        return __first_;
    }