Search code examples
calgorithmsqrtquake

fast inversion algorithm slower than math.h 1/sqrt function


i just want to learn why fast inversion algorithm is slower than math.h sqrt function. Here is my code sample

Code tries to demonstrate compare slow inversion and fast inversion. While debugging i see 1 seconds for slow inversion and 4 second for fast inversion. Where is the problem?

    #include<stdio.h>
    #include<time.h>
    #include<math.h>
    #include"inverse.h"

    #define SIZE 256

    int main()
    {
       char buffer[SIZE];
       time_t curtime;
       time_t curtime2;
       struct tm *loctime;
       int i = 0;
       float x = 0;

       curtime = time(NULL);
       loctime = localtime (&curtime);
       fputs (asctime (loctime), stdout);

       while(i < 100000000)
       {
          i++;
          //x = 1/sqrt(465464.015465);
          x = inverse_square_root(465464.015465);
       }

       curtime = time(NULL);
       loctime = localtime (&curtime);
       fputs (asctime (loctime), stdout);

       getchar();
       return 0;
    }

    float inverse_square_root(float number)
    {
       long i;
       float x2, y;
       const float threehalfs = 1.5F;

       x2 = number * 0.5F;
       y  = number;
       i  = * ( long * ) &y;             // evil floating point bit level hacking
       i  = 0x5f3759df - ( i >> 1 );     // what the heck?
       y  = * ( float * ) &i;
       y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
    // y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed
       return y;
    }

Solution

  • "The problem" might be that you have hardware that implements sqrt() now, making it faster than a software approach. It's hard to tell without much more detail about your system and perhaps some profiling and disassembly data.

    See this answer for details about the number of cycles for the x86 fsqrt instruction, for instance.