Search code examples
cavr-gccatmega

Efficently accessing the individial bytes in a long in C (On an 8-bit platform)


I have a application where I am receiving a binary serial data stream, and I need to split this data-stream up into separate variables of various length (uint16_t and uint32_t).

Right now, I'm doing the ultra-simple:

#define OFFSET_iTOW 0

volatile uint8_t    temp[128];
volatile uint32_t   recBytes;

void main()
{
    while (1)
    {
        recBytes    = temp[OFFSET_iTOW+3];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+2];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+1];
        recBytes    <<= 8;
        recBytes    |= temp[OFFSET_iTOW+0];

    }
}

(Data is sent little-endian. The OFFSET_iTOW is one of about three dozen offsets (defined in a header file normally) for the various sections of a data packet)

However, this results in a rather enormous assembly output (slightly truncated to section of interest):

void main()
{

    recBytes = 0;
 12e:   10 92 04 02     sts 0x0204, r1
 132:   10 92 05 02     sts 0x0205, r1
 136:   10 92 06 02     sts 0x0206, r1
 13a:   10 92 07 02     sts 0x0207, r1
    while (1)
    {



        recBytes    = temp[OFFSET_iTOW+3];
 13e:   80 91 03 02     lds r24, 0x0203
 142:   90 e0           ldi r25, 0x00   ; 0
 144:   a0 e0           ldi r26, 0x00   ; 0
 146:   b0 e0           ldi r27, 0x00   ; 0
 148:   80 93 04 02     sts 0x0204, r24
 14c:   90 93 05 02     sts 0x0205, r25
 150:   a0 93 06 02     sts 0x0206, r26
 154:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 158:   80 91 04 02     lds r24, 0x0204
 15c:   90 91 05 02     lds r25, 0x0205
 160:   a0 91 06 02     lds r26, 0x0206
 164:   b0 91 07 02     lds r27, 0x0207
 168:   ba 2f           mov r27, r26
 16a:   a9 2f           mov r26, r25
 16c:   98 2f           mov r25, r24
 16e:   88 27           eor r24, r24
 170:   80 93 04 02     sts 0x0204, r24
 174:   90 93 05 02     sts 0x0205, r25
 178:   a0 93 06 02     sts 0x0206, r26
 17c:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+2];
 180:   20 91 04 02     lds r18, 0x0204
 184:   30 91 05 02     lds r19, 0x0205
 188:   40 91 06 02     lds r20, 0x0206
 18c:   50 91 07 02     lds r21, 0x0207
 190:   80 91 02 02     lds r24, 0x0202
 194:   90 e0           ldi r25, 0x00   ; 0
 196:   a0 e0           ldi r26, 0x00   ; 0
 198:   b0 e0           ldi r27, 0x00   ; 0
 19a:   82 2b           or  r24, r18
 19c:   93 2b           or  r25, r19
 19e:   a4 2b           or  r26, r20
 1a0:   b5 2b           or  r27, r21
 1a2:   80 93 04 02     sts 0x0204, r24
 1a6:   90 93 05 02     sts 0x0205, r25
 1aa:   a0 93 06 02     sts 0x0206, r26
 1ae:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 1b2:   80 91 04 02     lds r24, 0x0204
 1b6:   90 91 05 02     lds r25, 0x0205
 1ba:   a0 91 06 02     lds r26, 0x0206
 1be:   b0 91 07 02     lds r27, 0x0207
 1c2:   ba 2f           mov r27, r26
 1c4:   a9 2f           mov r26, r25
 1c6:   98 2f           mov r25, r24
 1c8:   88 27           eor r24, r24
 1ca:   80 93 04 02     sts 0x0204, r24
 1ce:   90 93 05 02     sts 0x0205, r25
 1d2:   a0 93 06 02     sts 0x0206, r26
 1d6:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+1];
 1da:   20 91 04 02     lds r18, 0x0204
 1de:   30 91 05 02     lds r19, 0x0205
 1e2:   40 91 06 02     lds r20, 0x0206
 1e6:   50 91 07 02     lds r21, 0x0207
 1ea:   80 91 01 02     lds r24, 0x0201
 1ee:   90 e0           ldi r25, 0x00   ; 0
 1f0:   a0 e0           ldi r26, 0x00   ; 0
 1f2:   b0 e0           ldi r27, 0x00   ; 0
 1f4:   82 2b           or  r24, r18
 1f6:   93 2b           or  r25, r19
 1f8:   a4 2b           or  r26, r20
 1fa:   b5 2b           or  r27, r21
 1fc:   80 93 04 02     sts 0x0204, r24
 200:   90 93 05 02     sts 0x0205, r25
 204:   a0 93 06 02     sts 0x0206, r26
 208:   b0 93 07 02     sts 0x0207, r27
        recBytes    <<= 8;
 20c:   80 91 04 02     lds r24, 0x0204
 210:   90 91 05 02     lds r25, 0x0205
 214:   a0 91 06 02     lds r26, 0x0206
 218:   b0 91 07 02     lds r27, 0x0207
 21c:   ba 2f           mov r27, r26
 21e:   a9 2f           mov r26, r25
 220:   98 2f           mov r25, r24
 222:   88 27           eor r24, r24
 224:   80 93 04 02     sts 0x0204, r24
 228:   90 93 05 02     sts 0x0205, r25
 22c:   a0 93 06 02     sts 0x0206, r26
 230:   b0 93 07 02     sts 0x0207, r27
        recBytes    |= temp[OFFSET_iTOW+0];
 234:   20 91 04 02     lds r18, 0x0204
 238:   30 91 05 02     lds r19, 0x0205
 23c:   40 91 06 02     lds r20, 0x0206
 240:   50 91 07 02     lds r21, 0x0207
 244:   80 91 00 02     lds r24, 0x0200
 248:   90 e0           ldi r25, 0x00   ; 0
 24a:   a0 e0           ldi r26, 0x00   ; 0
 24c:   b0 e0           ldi r27, 0x00   ; 0
 24e:   82 2b           or  r24, r18
 250:   93 2b           or  r25, r19
 252:   a4 2b           or  r26, r20
 254:   b5 2b           or  r27, r21
 256:   80 93 04 02     sts 0x0204, r24
 25a:   90 93 05 02     sts 0x0205, r25
 25e:   a0 93 06 02     sts 0x0206, r26
 262:   b0 93 07 02     sts 0x0207, r27
 266:   6b cf           rjmp    .-298       ; 0x13e <loop+0x10>

This is part of a GPS data parser in the interrupt-service routine living on an 8 bit microprocessor running at 16 Mhz, and I need to do a lot of these conversions, so the result above is a bit excessive.

Since this is in an ISR, I can be confident that the various data will not change during the interrupt. Basically, I'd like to be able to address the individual bytes in the long. Since this is an 8-bit architecture, it seems like the compiler should be able to optimize down to just a few operations (maybe 3-4 per line of C, as the bytes in the long are directly addressable from an assembly perspective).

The variables are declared volatile so they're not optimized away to a loop that does nothing. In the actual application, they're externed structs that are written to from the ISR, but read from the idle loop (with the appropriate ISR guarding to prevent reads being interrupted). I'm not sure how to produce a compact snippet that demonstrates that exactt behavior, though.


Solution

  • if you use a union you can get to the byte parts of the long.

    union Data
    {
       uint8_t  bytes[4];
       uint32_t value;
    } recBytes;
    

    then

    recBytes.bytes[0] = temp[OFFSET_iTOW+3];
    recBytes.bytes[1] = temp[OFFSET_iTOW+2];
    recBytes.bytes[2] = temp[OFFSET_iTOW+1];
    recBytes.bytes[3] = temp[OFFSET_iTOW];
    

    then recBytes.value will be what you want ( though I'm not 100% about the byte ordering, you may have to reverse it)