Search code examples
cgccstructmingw-w64bit-fields

MinGW64 bit field access across word-boundaries is wrong


I'm investigation how different compilers handle unaligned access of structure bitfields members as well as members that cross the primitive types' boundaries, and I think MinGW64 is bugged. My test program is:

#include <stdint.h>
#include <stdio.h>

/* Structure for testing element access

The crux is the ISO C99 6.7.2.1p10 item:

An implementation may allocate any addressable storage unit large enough to hold a bitfield.
If enough space remains, a bit-field that immediately follows another bit-field in a
structure shall be packed into adjacent bits of the same unit. If insufficient space remains,
whether a bit-field that does not fit is put into the next unit or overlaps adjacent units is
implementation-defined. The order of allocation of bit-fields within a unit (high-order to
low-order or low-order to high-order) is implementation-defined. The alignment of the
addressable storage unit is unspecified.
*/

typedef struct _my_struct
{
    /* word 0 */
    uint32_t    first           :32;    /**< A whole word element   */
    /* word 1 */
    uint32_t    second          :8;     /**< bits 7-0               */
    uint32_t    third           :8;     /**< bits 15-8              */
    uint32_t    fourth          :8;     /**< bits 23-16             */
    uint32_t    fifth           :8;     /**< bits 31-24             */
    /* word 2 */
    uint32_t    sixth           :16;    /**< bits 15-0              */
    uint32_t    seventh         :16;    /**< bits 31-16             */
    /* word 3 */
    uint32_t    eigth           :24;    /**< bits 23-0              */
    uint32_t    ninth           :8;     /**< bits 31-24             */
    /* word 4 */
    uint32_t    tenth           :8;     /**< bits 7-0               */
    uint32_t    eleventh        :24;    /**< bits 31-8              */
    /* word 5 */
    uint32_t    twelfth         :8;     /**< bits 7-0               */
    uint32_t    thirteeneth     :16;    /**< bits 23-8              */
    uint32_t    fourteenth      :8;     /**< bits 31-24             */
    /* words 6 & 7 */
    uint32_t    fifteenth       :16;    /**< bits 15-0              */
    uint32_t    sixteenth       :8;     /**< bits 23-16             */
    uint32_t    seventeenth     :16;    /**< bits 31-24 & 7-0       */
    /* word 7 */
    uint32_t    eighteenth      :24;    /**< bits 31-8              */
    /* word 8 */
    uint32_t    nineteenth      :32;    /**< bits 31-0              */
    /* words 9 & 10 */
    uint32_t    twentieth       :16;    /**< bits 15-0              */
    uint32_t    twenty_first    :32;    /**< bits 31-16 & 15-0      */
    uint32_t    twenty_second   :16;    /**< bits 31-16             */
    /* word 11 */
    uint32_t    twenty_third    :32;    /**< bits 31-0              */
} __attribute__((packed)) my_struct;


uint32_t buf[] = {
        0x11223344, 0x55667788, 0x99AABBCC, 0x01020304, /* words 0  - 3     */
        0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x12131415, /* words 4  - 7     */
        0x16171819, 0x20212324, 0x25262728, 0x29303132, /* words 8  - 11    */
        0x34353637, 0x35363738, 0x39404142, 0x43454647  /* words 12 - 15    */
};

uint32_t data[64];

int main(void)
{
    my_struct *p;

    p = (my_struct*) buf;

    data[0] = 0;
    data[1] = p->first;
    data[2] = p->second;
    data[3] = p->third;
    data[4] = p->fourth;
    data[5] = p->fifth;
    data[6] = p->sixth;
    data[7] = p->seventh;
    data[8] = p->eigth;
    data[9] = p->ninth;
    data[10] = p->tenth;
    data[11] = p->eleventh;
    data[12] = p->twelfth;
    data[13] = p->thirteeneth;
    data[14] = p->fourteenth;
    data[15] = p->fifteenth;
    data[16] = p->sixteenth;
    data[17] = p->seventeenth;
    data[18] = p->eighteenth;
    data[19] = p->nineteenth;
    data[20] = p->twentieth;
    data[21] = p->twenty_first;
    data[22] = p->twenty_second;
    data[23] = p->twenty_third;

    if( p->fifth == 0x55 )
    {
        data[0] = 0xCAFECAFE;
    }
    else
    {
        data[0] = 0xDEADBEEF;
    }

    int i;
    for (i = 0; i < 24; ++i) {
        printf("data[%d] = 0x%0x\n", i, data[i]);
    }
    return data[0];
}

And the results I found are:

| Data Member | Type    | GCC Cortex M3  | GCC mingw64   | GCC Linux     | GCC Cygwin    |
|:------------|:-------:|:---------------|:--------------|:--------------|:--------------|
| data[0]     | uint32_t| 0x0            | 0xcafecafe    | 0xcafecafe    | 0xcafecafe    |
| data[1]     | uint32_t| 0x11223344     | 0x11223344    | 0x11223344    | 0x11223344    |
| data[2]     | uint32_t| 0x88           | 0x88          | 0x88          | 0x88          |
| data[3]     | uint32_t| 0x77           | 0x77          | 0x77          | 0x77          |
| data[4]     | uint32_t| 0x66           | 0x66          | 0x66          | 0x66          |
| data[5]     | uint32_t| 0x55           | 0x55          | 0x55          | 0x55          |
| data[6]     | uint32_t| 0xbbcc         | 0xbbcc        | 0xbbcc        | 0xbbcc        |
| data[7]     | uint32_t| 0x99aa         | 0x99aa        | 0x99aa        | 0x99aa        |
| data[8]     | uint32_t| 0x20304        | 0x20304       | 0x20304       | 0x20304       |
| data[9]     | uint32_t| 0x1            | 0x1           | 0x1           | 0x1           |
| data[10]    | uint32_t| 0x8            | 0x8           | 0x8           | 0x8           |
| data[11]    | uint32_t| 0x50607        | 0x50607       | 0x50607       | 0x50607       |
| data[12]    | uint32_t| 0xc            | 0xc           | 0xc           | 0xc           |
| data[13]    | uint32_t| 0xa0b          | 0xa0b         | 0xa0b         | 0xa0b         |
| data[14]    | uint32_t| 0x9            | 0x9           | 0x9           | 0x9           |
| data[15]    | uint32_t| 0xf10          | 0xf10         | 0xf10         | 0xf10         |
| data[16]    | uint32_t| 0xe            | 0xe           | 0xe           | 0xe           |
| data[17]    | uint32_t| 0x150d         | 0x1415        | 0x150d        | 0x150d        |
| data[18]    | uint32_t| 0x121314       | 0x171819      | 0x121314      | 0x121314      |
| data[19]    | uint32_t| 0x16171819     | 0x20212324    | 0x16171819    | 0x16171819    |
| data[20]    | uint32_t| 0x2324         | 0x2728        | 0x2324        | 0x2324        |
| data[21]    | uint32_t| 0x27282021     | 0x29303132    | 0x27282021    | 0x27282021    |
| data[22]    | uint32_t| 0x2526         | 0x3637        | 0x2526        | 0x2526        |
| data[23]    | uint32_t| 0x29303132     | 0x35363738    | 0x29303132    | 0x29303132    |

GCC Cortex M3 is
arm-none-eabi-gcc (GNU MCU Eclipse ARM Embedded GCC, 32-bit) 8.2.1 20181213 (release) [gcc-8-branch revision 267074]

GCC Mingw is
gcc.exe (i686-posix-dwarf-rev0, Built by MinGW-W64 project) 8.1.0

GCC Linux is
gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)

GCC Cygwin is
gcc (GCC) 7.4.0

All GCC versions seem to correctly handle unaligned access (like my_struct.thirteeneth).

The problem is not that members who cross the word boundary (my_struct.seventeenth) are different, as the C99 standard quoted above clearly states that the behaviour is implementation-defined. The problem is that all subsequent accesses are clearly incorrect (data[17] and on) even for aligned members (my_struct.nineteenth & my_struct.twenty_third). What's going on here, is this a bug or are these valid values?


Solution

  • It is not bugged, it lays the bitfields according to windows ABI.

    According to gcc docs:

    If packed is used on a structure, or if bit-fields are used, it may be that the Microsoft ABI lays out the structure differently than the way GCC normally does.

    Compile mingw64 version with -mno-ms-bitfields to fix the difference. Or compile all other versions with -mms-bitfields to lay out the structure the same as mingw.