Search code examples
assemblyavratmega

My Quick ADC-Interrupt filling array - can it be any faster? (Assembly)


I made an Interruptroutine with asm that is already faster than a C-Version.

Now i wonder if there could be a faster way for it (little tweaks). Any suggestions would be much appreciated.

Times (Atmega328):

  • My asm: 65 clocks
  • Atmel Studio: 108 clocks
  • the code in the answer: 48 clocks

Measured from first instruction of Interrrupt to reti.

ADC_vect:                       
push r18
in r18, SREG-0x20

push r24
push r25

push YL
push YH
push ZL
push ZH

ldi YL, lo8(srcPos)
ldi YH, hi8(srcPos)         ; get address of index

ld r24, Y+
ld r25, Y                   ; read value of index into registers

add r24, r24
adc r25, r25                ; value descripes index of an int (1 int = 2 bytes) array, so we double it

ldi r30, ((SRC_ARR_SIZE*2) & 0x00ff)
ldi r31, ( (SRC_ARR_SIZE*2) >> 8 )  ; load max arraySize in bytes

cp r24, r30
cpc r25, r31                ; compare if actual index is lower than array size

BRLO noZeroing
ldi r24, 0x0
ldi r25, 0x0                ; if not lower, then we start again at 0

noZeroing:

ldi ZL, lo8(srcArray)
ldi ZH, hi8(srcArray)       ; get address of array

add ZL, r24
adc ZH, r25                 ; add address of array with offsetvalue in Z-registers

clc                         ; clear any c-flag that might be set for ROR
ROR r25
ROR r24                     ; divide by two because it was int and we store index and ...

adiw r24, 0x01              ; ... increment index and then ...

st Y, r25                   ; ... store back the index. (r24/25 is free to use from here on)
st -Y, r24

lds r24, ADCL
lds r25, ADCH               ; read adc value

st Z+, r24
st Z+, r25                  ; store value to array address pointed by Z

pop ZH
pop ZL
pop YH
pop YL

pop r25
pop r24
out SREG-0x20, r18
pop r18
reti

The c equivalent:

ISR(ADC_vect){
    srcArray[srcPos] = ADCL | (ADCH << 8);
    srcPos++;
    if(srcPos >= SRC_ARR_SIZE)
        srcPos = 0;
}  

With the answer from below i now created this version (now only 42 clocks), with using only an arraysize of below 256, since i otherwise would have a shortcoming of codeexecution outside of the interrupt (filling more than 256 values in a fraction of a millisecond):

.org 0x00

srcArray:   .space (SRC_ARR_SIZE*2)
srcArrPtr:  .space 2

ADC_vect:
push r18
in r18, SREG-0x20
push YL
push YH
push ZL
push ZH

ldi YL, lo8(srcArrPtr)      ; get address of ptr (+2 for predecrement)
ldi YH, hi8(srcArrPtr)      ; YH is constant

ld ZL, Y+                   ; read the pointer to Z
ld ZH, Y                    ; Y now is on the highbyte of ptr

lds YL, ADCL                ; reuse YH to load adc value
st Z+, YL                   ; to *ptr++
lds YL, ADCH
st Z+, YL

ldi YL, lo8(srcArrPtr)      ; this saved 1 push and 1 pop with the use of YL above

cp ZL, YL

BRLO noReset
ldi ZL, lo8(srcArray)       ; reset next address to write

noReset:
st Y, ZL                    ; write back the ptr low btye ( the highbyte stays constant)

pop ZH
pop ZL
pop YH
pop YL
out SREG-0x20, r18
pop r18
reti

Solution

  • Use the c-equivalent code of

    ISR(){
      *ptr++=lo + hi*256;
      if (ptr==end) ptr=begin;
    }
    

    This should convert to perhaps half of your current assembly. Additional optimization can be done with careful placing of the variables -- e.g. placing the ptr at the end reduces the number of constants/addresses.

    ADC_vect:
    push r18
    push r19
    in r18, SREG-0x20
    
    push YL
    push YH
    push ZL
    push ZH
    
    ldi YL, lo8(ptr + 2)
    ldi YH, hi8(ptr + 2)       ; get address of ptr (+2 for predecrement)
    
    ld ZH, -Y                  ; read the pointer to Z
    ld ZL, -Y                  ; leaving Y==end
    
    lds r19, ADCL              ; reuse r19 to load adc value
    st Z+, r19                 ; to *ptr++
    lds r19, ADCH
    st Z+, r19
    
    cp ZL, YL
    cpc ZH, YH                 ; compare if actual index is lower than array size
    
    BRLO noReset
    ldi ZL, lo8(srcArray)      ; reset next address to write
    ldi ZH, hi8(srcArray)      ; to the beginning of srcArray
    
    noReset:
    st Y+, ZL                  ; write back the ptr
    st Y+, ZH
    
    pop ZH
    pop ZL
    pop YH
    pop YL
    
    out SREG-0x20, r18
    pop r19
    pop r18
    reti