Electronic – avr-gcc timer overflow interrupt handler in inline assembly

assemblyavravr-gccinterrupts

My current project involves producing 200KHz PWM waveforms using timer 1 in fast PWM mode. I would like to increment a 16-bit counter every time the timer overflows (every 5μS)

volatile uint16_t count;
ISR(TIMER1_OVF_vect)
{
    ++count;
}

The microcontroller is an ATmega8 running at 16MHz, leaving only 80 cycles for servicing the interrupt and incrementing the variable before the next interrupt fires. Looking at the compiled code…

00000890 <__vector_8>:
 890:   1f 92           push    r1
 892:   0f 92           push    r0
 894:   0f b6           in  r0, 0x3f    ; 63
 896:   0f 92           push    r0
 898:   11 24           eor r1, r1
 89a:   8f 93           push    r24
 89c:   9f 93           push    r25
 89e:   80 91 c9 00     lds r24, 0x00C9
 8a2:   90 91 ca 00     lds r25, 0x00CA
 8a6:   01 96           adiw    r24, 0x01   ; 1
 8a8:   90 93 ca 00     sts 0x00CA, r25
 8ac:   80 93 c9 00     sts 0x00C9, r24
 8b0:   9f 91           pop r25
 8b2:   8f 91           pop r24
 8b4:   0f 90           pop r0
 8b6:   0f be           out 0x3f, r0    ; 63
 8b8:   0f 90           pop r0
 8ba:   1f 90           pop r1
 8bc:   18 95           reti

…I found out that the generated interrupt service routine could easily be optimized further. This is the first time that I have tried to include inline assembly to a C program, and I have found out that learning to do so is unnecessarily frustating and requires understanding some fairly esoteric syntax. I would like to know how to access uint8_t count within the inline assembly (as the variable is allocated statically unlike in every answer that I have seen on the web). Is the code otherwise OK, or have I missed something else?

ISR(TIMER1_OVF_vect, ISR_NAKED)
{
    asm volatile("push    r24"                                      "\n\t"
                 "in      r24, __SREG__"                            "\n\t"
                 "push    r24"                                      "\n\t"
                 "push    r25"                                      "\n\t"
                 "lds     r24, %A0"                                 "\n\t"
                 "lds     r25, %B0"                                 "\n\t"
                 "adiw    r24, 1"                                   "\n\t"
                 "sts     %B0, r25"                                 "\n\t"
                 "sts     %A0, r24"                                 "\n\t"
                 "pop     r25"                                      "\n\t"
                 "pop     r24"                                      "\n\t"
                 "out     __SREG__, r24"                            "\n\t"
                 "pop     r24"                                      "\n\t"
                 "reti"                                             "\n\t"
                 : "=r" (count)   /*this does*/
                 : "0" (count));  /*not work*/
}

As a side note, is there a way to make the compiler reserve a register pair specifically for uint8_t count, as that would allow reducing the ISR lenght by at least 6 instructions (by eliminating both lds and sts instructions, a push to stack, and a pop from stack)?

Best Answer

I found you post when I was looking for optimization of ISR routine. Finally, I've got a solution which you (and I) wanted to get.

I use Atmel Studio 6.1 (GCC 3.4.2.1002)

ISR(TIM0_OVF_vect,ISR_NAKED)
{
    asm volatile(
        "push   r24"            "\n"
        "in     r24, __SREG__"  "\n"
        "push   r24"            "\n"
        "push   r25"            "\n"

        "lds    r24, %A[_ts]"   "\n"
        "lds    r25, %B[_ts]"   "\n"
        "adiw   r24,1"          "\n"
        "sts    %B[_ts], r25"   "\n"
        "sts    %A[_ts], r24"   "\n"

        "pop    r25"            "\n"
        "pop    r24"            "\n"
        "out    __SREG__,r24"   "\n"
        "pop    r24"            "\n"
        "reti"                  "\n"
        : 
        : [_ts] "m" (ts)
        : "r24", "r25"
    );
}

Here ts is declared as volatile unsigned int ts = 0;
I use named operands ([_ts] "m" (ts)) instead of defailt %0

The result is:


    0000005a <__vector_11>:
    5a: 8f 93           push    r24
    5c: 8f b7           in  r24, 0x3f   ; 63
    5e: 8f 93           push    r24
    60: 9f 93           push    r25
    62: 80 91 60 00     lds r24, 0x0060
    66: 90 91 61 00     lds r25, 0x0061
    6a: 01 96           adiw    r24, 0x01   ; 1
    6c: 90 93 61 00     sts 0x0061, r25
    70: 80 93 60 00     sts 0x0060, r24
    74: 9f 91           pop r25
    76: 8f 91           pop r24
    78: 8f bf           out 0x3f, r24   ; 63
    7a: 8f 91           pop r24
    7c: 18 95           reti