Skip to content

Convert pulseIn function to (inline) assembly #30

@MCUdude

Description

@MCUdude

So I've been working with optimizing the functions used in MicroCore. One of the goals was to redo the pulseIn function, as it relies on micros to work. I think I've created a good replacement for the pulseIn function, but LTO has to be enabled in order for the timing to be right. LTO also reduces the size of the function dramatically.

This core has the option to disable LTO (which is enabled by default). I'd like to take the output from the compiler (with LTO enabled) and make an inline assembly function out of it, so that the timing can be guarateed no matter what compiler flag is used. We're also working multiple clock frequencies, so this also have to be taken into account.

I have very little (no) experience with assembly, so any help would be appreciated. @nerdralph is it possible to use the output below to re-create the pulseIn function in assembly that isn't hard coded to a particular F_CPU?

int main(void)
{
  pulseInNew(1, HIGH, UINT32_MAX);
}

uint32_t pulseInNew(uint8_t pin, uint8_t state, uint32_t timeout) 
{  
  uint32_t width = 0; // Keep initialization out of time critical area
  
  // Convert the timeout from microseconds to a number of times through
  // the initial loop; it takes 16 clock cycles per iteration.
  uint32_t numloops = 0;
  uint32_t maxloops = microsecondsToClockCycles(timeout) >> 4; // Divide by 16

  // Wait for any previous pulse to end
  while (!!(PINB & _BV(pin)) == state)
  {
    if(numloops++ == maxloops) {return 0;}
    asm("nop \n");  
    asm("nop \n");
  }
  // Wait for the pulse to start
  while (!!(PINB & _BV(pin)) != state)
  {
    if(numloops++ == maxloops) {return 0;}
    asm("nop \n");  
    asm("nop \n");
    asm("nop \n");
  }
  // Wait for the pulse to stop This loop is 16 instructions long
  while (!!(PINB & _BV(pin)) == state)
  {
    if(numloops++ == maxloops) {return 0;}           
    width++;
    asm("nop \n");  
    asm("nop \n");  
    asm("nop \n"); 
  }

  // Convert the reading to microseconds. 
  return clockCyclesToMicroseconds(width << 4); // Same as multiplying by 16
}
$ avr-objdump -S pulseIn_t13_test.ino.elf

pulseIn_t13_test.ino.elf:     file format elf32-avr


Disassembly of section .text:

00000000 <__vectors>:
   0:	09 c0       	rjmp	.+18     	; 0x14 <__ctors_end>
   2:	0e c0       	rjmp	.+28     	; 0x20 <__bad_interrupt>
   4:	0d c0       	rjmp	.+26     	; 0x20 <__bad_interrupt>
   6:	0c c0       	rjmp	.+24     	; 0x20 <__bad_interrupt>
   8:	0b c0       	rjmp	.+22     	; 0x20 <__bad_interrupt>
   a:	0a c0       	rjmp	.+20     	; 0x20 <__bad_interrupt>
   c:	09 c0       	rjmp	.+18     	; 0x20 <__bad_interrupt>
   e:	08 c0       	rjmp	.+16     	; 0x20 <__bad_interrupt>
  10:	07 c0       	rjmp	.+14     	; 0x20 <__bad_interrupt>
  12:	06 c0       	rjmp	.+12     	; 0x20 <__bad_interrupt>

00000014 <__ctors_end>:
  14:	11 24       	eor	r1, r1
  16:	1f be       	out	0x3f, r1	; 63
  18:	cf e9       	ldi	r28, 0x9F	; 159
  1a:	cd bf       	out	0x3d, r28	; 61
  1c:	02 d0       	rcall	.+4      	; 0x22 <main>
  1e:	35 c0       	rjmp	.+106    	; 0x8a <_exit>

00000020 <__bad_interrupt>:
  20:	ef cf       	rjmp	.-34     	; 0x0 <__vectors>

00000022 <main>:
  22:	80 e0       	ldi	r24, 0x00	; 0
  24:	90 e0       	ldi	r25, 0x00	; 0
  26:	dc 01       	movw	r26, r24
  28:	b1 9b       	sbis	0x16, 1	; 22
  2a:	1a c0       	rjmp	.+52     	; 0x60 <__SREG__+0x21>
  2c:	01 96       	adiw	r24, 0x01	; 1
  2e:	a1 1d       	adc	r26, r1
  30:	b1 1d       	adc	r27, r1
  32:	83 39       	cpi	r24, 0x93	; 147
  34:	28 e1       	ldi	r18, 0x18	; 24
  36:	92 07       	cpc	r25, r18
  38:	24 e0       	ldi	r18, 0x04	; 4
  3a:	a2 07       	cpc	r26, r18
  3c:	b1 05       	cpc	r27, r1
  3e:	11 f1       	breq	.+68     	; 0x84 <__SREG__+0x45>
  40:	00 00       	nop
  42:	00 00       	nop
  44:	f1 cf       	rjmp	.-30     	; 0x28 <main+0x6>
  46:	01 96       	adiw	r24, 0x01	; 1
  48:	a1 1d       	adc	r26, r1
  4a:	b1 1d       	adc	r27, r1
  4c:	83 39       	cpi	r24, 0x93	; 147
  4e:	28 e1       	ldi	r18, 0x18	; 24
  50:	92 07       	cpc	r25, r18
  52:	24 e0       	ldi	r18, 0x04	; 4
  54:	a2 07       	cpc	r26, r18
  56:	b1 05       	cpc	r27, r1
  58:	a9 f0       	breq	.+42     	; 0x84 <__SREG__+0x45>
  5a:	00 00       	nop
  5c:	00 00       	nop
  5e:	00 00       	nop
  60:	b1 9b       	sbis	0x16, 1	; 22
  62:	f1 cf       	rjmp	.-30     	; 0x46 <__SREG__+0x7>
  64:	0d c0       	rjmp	.+26     	; 0x80 <__SREG__+0x41>
  66:	01 96       	adiw	r24, 0x01	; 1
  68:	a1 1d       	adc	r26, r1
  6a:	b1 1d       	adc	r27, r1
  6c:	83 39       	cpi	r24, 0x93	; 147
  6e:	28 e1       	ldi	r18, 0x18	; 24
  70:	92 07       	cpc	r25, r18
  72:	24 e0       	ldi	r18, 0x04	; 4
  74:	a2 07       	cpc	r26, r18
  76:	b1 05       	cpc	r27, r1
  78:	29 f0       	breq	.+10     	; 0x84 <__SREG__+0x45>
  7a:	00 00       	nop
  7c:	00 00       	nop
  7e:	00 00       	nop
  80:	b1 99       	sbic	0x16, 1	; 22
  82:	f1 cf       	rjmp	.-30     	; 0x66 <__SREG__+0x27>
  84:	80 e0       	ldi	r24, 0x00	; 0
  86:	90 e0       	ldi	r25, 0x00	; 0
  88:	08 95       	ret

0000008a <_exit>:
  8a:	f8 94       	cli

0000008c <__stop_program>:
  8c:	ff cf       	rjmp	.-2      	; 0x8c <__stop_program>

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions