Search code examples
encryptionpasswordsnim-lang

How do I re-build django's password hashing in nim?


I'm rewriting my webapplication from Python (Django) to Nim (Prologue). I want to keep using Django's database that it has provided me with thus far, but I'm struggling how to re-build the way Django encrypts the password. That is, according to Django's own documentation:

By default, Django uses the PBKDF2 algorithm with a SHA256 hash

I found the nimcrypto library as a starting point, but given my lack of cryptographic knowledge a lot of the terminology in there and how to use the given procs goes completely over my head. How can I rebuild the Django encryption using it?


Solution

  • Warning: As has been pointed out to me, it might be a better idea to use a nim-wrapper of libsodium for this, as it is the more established library. How you would do that is beyond me though, as libsodium has no obvious "pbkdf2" functions. However, using openssl directly as in the latter 2 approaches should also be perfectly valid.

    1) In pure nim with nimcrypto

    After researching this topic for a fair bit and going over the code in the django repo and the nimcrypto lib, I was able to reproduce the hashes I have in my database. The key here is, that nimcrypto has a pbkdf2 proc in a module of the same name, that you need to pass an HMAC type, which contains the sha256 algorithm as well as your server's secret key. You also need to be aware that the hashes stored in your database as strings will have the number of iterations and salt appended to them. They'll also be base64 encoded versions of the hash.

    import nimcrypto
    import nimcrypto/pbkdf2
    import base64
    
    var hctx: HMAC[sha256]
    let serverSecretKey = "YourServersSecretKeyInDjangoFromSettings"
    hctx.init(serverSecretKey)
    
    let password = "thePasswordToHash"
    let salt = "asalt" #<-- You have to get this one from the passwords entry in your database
    var output: array[32, byte] # <-- 256 bits stored in 32 bytes
    let iterations = 180000 # <-- You have to get this one from the passwords entry in your database
    
    discard pbkdf2(hctx, password, salt, iterations, output) #The output of pbkdf2 is irrelevant, you care of the hash in the `output` byte array aka the digest
    
    let newHash = encode(output) # This turns the byte.array into a base64 encoded string. That string should be identical to the hash-string you have in your database.
    

    Edit:

    2) In nim and python with openssl

    I have since learned of the existence of nimpy, an incredible project for importing nim into python and vice versa. Using it, we can recreate the method django uses for hashing.

    import nimpy
    
    proc python_calculate_SHA256_pbkdf2_hash(password: string, salt: string, iterations: int, secretKey: string): string {.gcsafe.} =
        let hashlib = pyImport("hashlib")
        let pyBase64 = pyImport("base64")
        let djangoEncodingUtils = pyImport("django.utils.encoding")
    
        let password = djangoEncodingUtils.force_bytes(password)
        let salt = djangoEncodingUtils.force_bytes(salt)
        let hash = hashlib.pbkdf2_hmac("sha256", password, salt, iterations)
        let string_hash = pyBase64.b64encode(hash).decode("ascii").strip()
        return $string_hash
    

    This will be significantly (at least 2x) faster, mostly because pythons "hashlib" is wrapping an incredibly efficient C library.

    3) In pure nim with a direct import of openssl

    2nd Edit:

    Thanks to hotdog and xflywind there now is a refactored version of the python code that just directly does the calls to openssl. It is slightly faster than the python version according to the haphazard benchmarks that were run of the python version against the pure nim version:

    from std/openssl import EVP_MD, DLLUtilName, DLLSSLName
    import std/[strformat, base64]
    
    proc PKCS5_PBKDF2_HMAC(
      pass: cstring,
      passLen: cint,
      salt: cstring,
      saltLen: cint,
      iter: cint,
      digest: EVP_MD,
      keylen: cint,
      output: cstring
    ): cint {.cdecl, dynlib: DLLSSLName, importc: "PKCS5_PBKDF2_HMAC".}
    
    proc EVP_MD_size_fixed*(md: EVP_MD): cint {.cdecl, dynlib: DLLUtilName, importc: "EVP_MD_get_size".}
    proc EVP_sha256_fixed*(): EVP_MD {.cdecl, dynlib: DLLUtilName, importc: "EVP_sha256".}
    
    proc opensslCalculateSHA256Pbkdf2Hash(password: string, salt: string, iterations: int): string {.gcsafe.} =
      let hasTooManyIterations = iterations > cint.high
      if hasTooManyIterations: 
        raise newException(ValueError, fmt"You can not have more iterations than a c integer can carry. Choose a number below {cint.high}")
    
      let digest: EVP_MD = EVP_sha256_fixed()
      let hashLength: cint = EVP_MD_size_fixed(digest)
      let output = newString(hashLength)
      let outputStartingpoint: cstring = cast[cstring](output[0].unsafeAddr)
    
      let hashOperationReturnCode = PKCS5_PBKDF2_HMAC(
          password.cstring,
          -1,
          salt.cstring,
          len(salt).cint,
          iterations.cint,
          digest,
          hashLength,
          outputStartingpoint
        )
    
      let wasHashSuccessful = hashOperationReturnCode == 1
      doAssert wasHashSuccessful
    
      result = encode(output)
    

    3rd edit:

    4) Variation of 3) compatible with openssl 3

    With openssl 3 the EVP_MD_size function got renamed to EVP_MD_get_size. That and a general minor refactor made me want to post this updated version of 3).

    from std/openssl import EVP_MD, DLLUtilName, DLLSSLName
    import std/[strformat, base64]
    
    proc PKCS5_PBKDF2_HMAC(
      pass: cstring,
      passLen: cint,
      salt: cstring,
      saltLen: cint,
      iter: cint,
      digest: EVP_MD,
      keylen: cint,
      output: cstring
    ): cint {.cdecl, dynlib: DLLSSLName, importc: "PKCS5_PBKDF2_HMAC".}
    
    proc EVP_MD_size_fixed*(md: EVP_MD): cint {.cdecl, dynlib: DLLUtilName, importc: "EVP_MD_get_size".}
    proc EVP_sha256_fixed*(): EVP_MD {.cdecl, dynlib: DLLUtilName, importc: "EVP_sha256".}
    
    proc opensslCalculateSHA256Pbkdf2Hash(password: string, salt: string, iterations: int): string {.gcsafe.} =
      let hasTooManyIterations = iterations > cint.high
      if hasTooManyIterations: 
        raise newException(ValueError, fmt"You can not have more iterations than a c integer can carry. Choose a number below {cint.high}")
    
      let digest: EVP_MD = EVP_sha256_fixed()
      let hashLength: cint = EVP_MD_size_fixed(digest)
      let output = newString(hashLength)
      let outputStartingpoint: cstring = cast[cstring](output[0].unsafeAddr)
    
      let hashOperationReturnCode = PKCS5_PBKDF2_HMAC(
          password.cstring,
          -1,
          salt.cstring,
          len(salt).cint,
          iterations.cint,
          digest,
          hashLength,
          outputStartingpoint
        )
    
      let wasHashSuccessful = hashOperationReturnCode == 1
      doAssert wasHashSuccessful
    
      result = encode(output)
    
    name ............................... min time      avg time    std dv   runs
    openssl ............................ 0.226 ms      0.230 ms    ±0.009  x1000
    python ............................. 0.233 ms      0.244 ms    ±0.031  x1000