Search code examples
cassemblygccx86-64

Smallest 32-bit Bit Reversal in C using inline __asm()


Trying to create the smallest 32-bit Bit Reversal in C using inline __asm().

So far, I've managed to get the __asm() code size to 23 bytes.

I'm curious if there are ways to further decrease the code size by using

  • compiler instrinsics
  • specialized assembly instructions
  • vanilla C code

Example

godbolt

#include <stdio.h>

unsigned int CodeSize;


void print_binary(unsigned int number) { if (number >> 1) print_binary(number >> 1); putc((number & 1) ? '1' : '0', stdout); }


void reverseBits(unsigned int OriginalValue)
{
    unsigned int ReversedValue = 0;

    start_asm:
    __asm__ (
        "movl  %1, %%eax\n"       // load the value
        "xorl  %%ebx, %%ebx\n"    // clear EBX (optimized)
        "bsrl  %%eax, %%ecx\n"    // find highest order bit set to 1 in EAX
        "incl  %%ecx\n"           // increment to get the correct number of iterations
    "reverse:\n\t"                // local label
        "shrl  $1, %%eax\n"       // shift the LSB of EAX to the carry flag CF
        "rcll  $1, %%ebx\n"       // the MSB of EBX goes into CF and CF's previous value goes into the LSB of EBX
        "loop  reverse\n"         // loop back to the local label
        "movl  %%ebx, %0"         // move the result to the output
        : "=r" (ReversedValue)    // output
        : "r" (OriginalValue)     // input
        : "eax", "ebx", "ecx"     // clobbered registers
    );
    end_asm:

    CodeSize = (char *)&&end_asm - (char *)&&start_asm;

    printf("\nOriginal: 0x%X ", OriginalValue); print_binary(OriginalValue);
    printf("\nReversed: 0x%X ", ReversedValue); print_binary(ReversedValue);
    printf("\n");
}


int main()
{
    reverseBits(0xfeedface);  
    reverseBits(0xfeed);  
    reverseBits(0xfe);               

    printf("\nCodeSize: %u bytes\n", CodeSize); 
    return 0;
}

Output

Original: 0xFEEDFACE 11111110111011011111101011001110
Reversed: 0x735FB77F 1110011010111111011011101111111

Original: 0xFEEDFA 111111101110110111111010
Reversed: 0x5FB77F 10111111011011101111111

Original: 0xFEED 1111111011101101
Reversed: 0xB77F 1011011101111111

Original: 0xFE 11111110
Reversed: 0x7F 1111111

CodeSize: 23 bytes

Update

From the helpful comments below, the code size is now 13 bytes

uint32_t reverseBits(uint32_t OriginalValue) {
    uint32_t ReversedValue = 0;

    __asm__ (
        "start_asm:\n"             // start label
        "xorl  %%ebx, %%ebx\n"     // clear EBX
        "bsrl  %1, %%ecx\n"        // find highest order bit set to 1 in EAX (which is %1)
    "reverse:\n"                   // local label for looping
        "shrl  $1, %1\n"           // shift the LSB of EAX (which is %1) to the carry flag CF
        "rcll  $1, %%ebx\n"        // the MSB of EBX goes into CF and CF's previous value goes into the LSB of EBX
        "decl  %%ecx\n"            // manually decrement ECX
        "jns   reverse\n"          // jump to reverse if sign flag is set (i.e., if ecx is negative)
        "end_asm:\n"               // end label
        : "=b" (ReversedValue)     // output directly to EBX
        : "a" (OriginalValue)      // input directly to EAX
        : "ecx"                    // clobbered register
    );

    return ReversedValue;
}

Solution

  • 10 bytes:

    __asm__ (
        "start_asm:\n"
        "xorl  %%ebx, %%ebx\n"    //Clear destination and CF
     "repeat:\n\t"
        "rcll  $1, %%ebx\n"       // Shift CF into destination LSB
        "shrl  $1, %%eax\n"       // Shift LSB from source into CF
        "jnz  repeat\n"           // If source is not zero - repeat
                                  // else (source is zero, CF is always 1)
        "rcll  $1, %%ebx\n"       // Shift the last 1 into destination
        "end_asm:\n"
        : "=b" (ReversedValue)    // output
        : "a" (OriginalValue)     // input
        :                         // no clobbered registers
    );