/*
Animated LED Duck, 34 LEDs + speaker
Puya PY32F002A with at least 16 pins @ 8 MHz (internal oscillator)

PORTS
  PA0  = LEDs 1 (anodes  1~ 6, cathodes     7, 13, 19, 25, 31)
  PA1  = LEDs 2 (anodes  7~12, cathodes 1,     14, 20, 26, 32)
  PA2  = LEDs 3 (anodes 13~18, cathodes 2,  8,     21, 27, 33)
  PA3  = LEDs 4 (anodes 19~24, cathodes 3,  9, 15,     28, 34)
  PA6  = LEDs 5 (anodes 25~30, cathodes 4, 10, 16, 22,     35)
  PA7  = LEDs 6 (anodes 31~34, cathodes 5, 11, 17, 23, 29    )
  PA9  = LEDs 7 (              cathodes 6, 12, 18, 24, 30, 36)
  PA13 = [unused] / [SWD]
  PA14 = [unused] / [SWC]
  PB0  = Speaker PWM signal 1 [TIM1_CH2N]
  PB1  = Speaker PWM signal 2 [TIM1_CH3N]
  PF0  = Button (to ground, internal pull-up)
  PF1  = [unused] / Debug output
  PF2  = [unused] / [NRST]

BUTTON FUNCTION:
  1. Press button for >1s to turn power on or off.
  2. Press button for <1s to play sound.
  3. Hold button while connecting battery to enable always-on mode (button
     will ignore long presses). Animations will start immediately when button
     is released. Remove battery to disable always-on mode.
  4. Hold button while connecting battery and keep holding to enter test mode
     (for hardware debug: turn on each LED in sequence, repeat indefinitely).
     Button can be released once test mode starts. Remove battery to exit.

LED ARRANGEMENT
  See animations/ani_list.cm0.s

VERSIONS
  2025-06-30  Arne Rossius
   * first version
  2025-07-31  Arne Rossius
   * added option for high-strength anode output (no external transistors)
  2025-08-04  Arne Rossius
   * fixed test mode
*/
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

// SETTINGS

// Number of LEDs (max. 36)
.equiv N_LED, 34

// Use high-strength anode output (no external transistors)
.equiv NO_EXT_TR, 0 // set to 1 for DUCK Nano

// button hold-down delay
.equiv BUTTON_SHORT_TIME,   2 // short press (play sound) - 10 ms units
.equiv BUTTON_LONG_TIME,  100 // long press (power off) - 10 ms units, max. 127
.equiv POWER_ON_DELAY,    250 // long press (power on) - 4 ms units

//distance between PWM_6 and PWM_7:    >= 4
//distance between any other 2 values: >= 3

/*
//Gamma 2.2
PWM_0 = 0 // don't change
PWM_1 = 3
PWM_2 = 13
PWM_3 = 32
PWM_4 = 61
PWM_5 = 99
PWM_6 = 148
PWM_7 = 208 // 8 MHz / 64 / 6 / 208 = ~100.16 Hz
*/

.equiv PWM_0,   0 // don't change
.equiv PWM_1,   3
.equiv PWM_2,   6
.equiv PWM_3,  12
.equiv PWM_4,  25
.equiv PWM_5,  51
.equiv PWM_6, 103
.equiv PWM_7, 208 // 8 MHz / 64 / 6 / 208 = ~100.16 Hz

// pins
.equiv BUTTON_GPIO, GPIOF_BASE
.equiv BUTTON_PIN, 0

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.include "py32f002ax5.inc"

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.section .data
.align 2
DataStart:

.align 2
FrameLED:    .space 36
FrameShadow: .space 36
FrameBackup: .space 36

NewDelay: .space 1
Repeat:   .space 1

.align 2
DataEnd:

Flags    .req R7
.equiv fButtonShort, 0
.equiv fButtonLong,  1
.equiv fUpdateLEDs,  2 // Show next frame (from FrameShadow) when Delay expires
.equiv fAlwaysOn,    3 // always-on mode (can't turn off with button)

LEDs_Mux .req R8  // Mux in Byte 3 [23:16], LEDs in Byte 1 [7:0]
PwmVal   .req R9
BtnCnt   .req R10
Delay    .req R11
AniPtr   .req R12

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.section .text
.syntax unified

// Vector table
.org SP_Init
	.word SRAM_END+1

.org Reset_vect
	.word reset

.org NMI_vect
	.word error

.org HardFault_vect
	.word error

.org SVCall_vect
	.word error

.org PendSV_vect
	.word error

.org SysTick_vect
	.word error

.org EXTI0_1_vect
	.word exti0_falling

.org TIM1_BRK_UP_TRG_COM_vect
	.word tim1_ovf
	
.org TIM16_vect
	.word tim16_ovf

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.thumb_func
error:
	// unhandled exception vector
	ldr	R0, =GPIOF_BASE
	movs	R1, 0x02
error_loop:
	str	R1, [R0, GPIO_BSRR_offset]
	nop
	nop
	str	R1, [R0, GPIO_BRR_offset]
	b	error_loop

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.thumb_func
reset:
	// enable peripherals
	ldr	R0, =RCC_BASE
	ldr	R1, =RCC_IOPENR_GPIOAEN | RCC_IOPENR_GPIOBEN | RCC_IOPENR_GPIOFEN
	str	R1, [R0, RCC_IOPENR_offset] // enable GPIO
	ldr	R1, =RCC_APBENR1_PWREN
	str	R1, [R0, RCC_APBENR1_offset] // enable low-power control
	ldr	R1, =RCC_APBENR2_TIM1EN | RCC_APBENR2_TIM16EN
	str	R1, [R0, RCC_APBENR2_offset] // enable timers Tim1 & Tim16
	
	// init ports: GPIOA
	ldr	R0, =GPIOA_BASE
	//ldr	R1, =0x0C000000 // LED pin speed = very low (weak drive)
	//str	R1, [R0, GPIO_OSPEEDR_offset]
	//movs	R1, 0x0000 // LED pin state = low
	//str	R1, [R0, GPIO_ODR_offset]
	ldr	R1, =0x02CF // LED pin output mode = open drain
	str	R1, [R0, GPIO_OTYPER_offset]
	ldr	R1, =0xEBF75F55 // LED pins = output, SWD=alt, others=analog
	str	R1, [R0, GPIO_MODER_offset]
	// init ports: GPIOB
	ldr	R0, =GPIOB_BASE
	movs	R1, 0x0000000F // PWM pin speed = very high (strong drive)
	str	R1, [R0, GPIO_OSPEEDR_offset]
	movs	R1, 0x00000022 // PWM pin alt. function = 2 (TIM1_CH2N/CH3N)
	str	R1, [R0, GPIO_AFRL_offset]
	ldr	R1, =0xFFFFFFFA // PWM pins = alt. function, others = analog
	str	R1, [R0, GPIO_MODER_offset]
	// init ports: GPIOF
	ldr	R0, =GPIOF_BASE
	ldr	R1, =0x00000201 // pull-up for button pin (and NRST pin)
	str	R1, [R0, GPIO_PUPDR_offset]
	ldr	R1, =0xFFFFFFF4 // Button=input, Debug=output, others=analog
	str	R1, [R0, GPIO_MODER_offset]
	
	// clear RAM
	ldr	R0, =DataStart
	movs	R1, 0
	movs	R6, DataEnd-DataStart
init_ram:
	subs	R6, 4
	str	R1, [R0, R6]
	bne	init_ram
	
	// init registers
	movs	Flags, 0
	mov	BtnCnt, Flags
	mov	Delay, Flags
	mov	LEDs_Mux, Flags
	movs	R0, 6 // Ensure PWM & Mux overflow in first interrupt
	mov	PwmVal, R0
	ldr	R0, =animations
	mov	AniPtr, R0

	// read button (determine always-on or test mode)
	movs	R6, #200 // 200 * 25 ms = 5 s
	ldr	R0, =BUTTON_GPIO
init_mode:
	// read button
	ldr	R1, [R0, GPIO_IDR_offset]
	lsrs	R1, BUTTON_PIN+1
	bcs	init_mode_end // button not pressed
	ldr	R1, =200000/3 // delay ~25 ms (200k cycles @ 8 MHz)
init_mode_delay:
	subs	R1, 1
	bne	init_mode_delay
	movs	R1, 1<<fAlwaysOn // button held down at power-on: always-on mode
	orrs	Flags, R1
	subs	R6, 1
	bne	init_mode
	// button held down for 5 s: enter test mode
	b	test_mode
init_mode_end:
	
	// set Tim1 for audio PWM (but don't start counting yet)
	ldr	R0, =TIM1_BASE
	//movs	R1, 0
	//str	R1, [R0, TIM_PSC_offset] // Prescaler: Clk/1
	movs	R1, 0xFF
	str	R1, [R0, TIM_ARR_offset] // 8-bit PWM
	ldr	R1, =TIM_CCMR1_OC2PE | 6<<TIM_CCMR1_OC2M_Pos
	str	R1, [R0, TIM_CCMR1_offset] // CCR2 buffered, PWM mode 1
	movs	R1, TIM_CCMR2_OC3PE | 6<<TIM_CCMR2_OC3M_Pos
	str	R1, [R0, TIM_CCMR2_offset] // CCR3 buffered, PWM mode 1
	ldr	R1, =TIM_CCER_CC2NE | TIM_CCER_CC3NE
	str	R1, [R0, TIM_CCER_offset] // Enable OC2N & OC3N
	ldr	R1, =TIM_BDTR_OSSI
	str	R1, [R0, TIM_BDTR_offset] // drive pins when inactive
	
	// set Tim16 for LED PWM & multiplexing (but don't start counting yet)
	ldr	R0, =TIM16_BASE
	movs	R1, 64-1
	str	R1, [R0, TIM_PSC_offset] // Prescaler: Clk/64
	movs	R1, 50
	str	R1, [R0, TIM_ARR_offset] // TOP value
	movs	R1, TIM_DIER_UIE
	str	R1, [R0, TIM_DIER_offset] // Enable overflow interrupt
	movs	R1, TIM_CR1_ARPE
	str	R1, [R0, TIM_CR1_offset] // Preload register is buffered
	
	// set external interrupt on button pin (for wake-up)
	ldr	R0, =EXTI_BASE
	movs	R1, EXTI_EXTICR1_EXTI0_PF0
	str	R1, [R0, EXTI_EXTICR1_offset] // select PF0 pin for EXTI line 0
	ldr	R0, =EXTI_IMR
	movs	R1, EXTI_IMR_IM0
	str	R1, [R0, 0] // allow wake-up from line 0
	
	// enable interrupts in NVIC (Tim1 update, Tim16, external interrupt)
	ldr	R0, =NVIC_BASE
	ldr	R1, =1<<TIM1_BRK_UP_TRG_COM_IRQn | 1<<TIM16_IRQn | 1<<EXTI0_1_IRQn
	str	R1, [R0, NVIC_ISER_offset]
	
	// go to standby (if not in always-on mode)
	lsrs	R0, Flags, fAlwaysOn+1
	bcs	power_on

power_off:
	// stop multiplexing
	ldr	R0, =TIM16_BASE
	movs	R1, TIM_CR1_ARPE // stop Tim16
	str	R1, [R0, TIM_CR1_offset]
	
	// LEDs off
	ldr	R0, =GPIOA_BASE
	ldr	R1, =0x2CF
	str	R1, [R0, GPIO_OTYPER_offset] // all LED pins = open drain
	str	R1, [R0, GPIO_BSRR_offset] // all LED pins = high (high-Z)
	ldr	R1, =0x24045055       // (to avoid leakage through transistors)
	str	R1, [R0, GPIO_PUPDR_offset] // enable pull-ups for LED pins
	
	// select STOP mode (SLEEPDEEP=1)
	ldr	R0, =SCB_BASE
	movs	R1, SCB_SCR_SLEEPDEEP_Msk
	str	R1, [R0, SCB_SCR_offset]
	
	// wait until button released
	ldr	R0, =BUTTON_GPIO
power_off_wait:
	ldr	R1, [R0, GPIO_IDR_offset]
	lsrs	R1, BUTTON_PIN+1
	bcc	power_off_wait
	
	// enable button interrupt (for wake-up) // TODO use event instead?
	ldr	R0, =EXTI_BASE
	movs	R1, EXTI_FTSR_FT0 // enable falling trigger for EXTI line 0
	str	R1, [R0, EXTI_FTSR_offset]
	
power_off_sleep:
	// sleep (wake up on button pin falling edge)
	
	// select low-power regulator during STOP mode
	// (before every sleep, as wake-up seems to reset some of the bits)
	ldr	R0, =PWR_BASE
	ldr	R1, =PWR_SRAM_RETV_1V0_1V2 | PWR_CR1_LPR | PWR_CR1_VOS | PWR_CR1_MRRDY_TIME_5US
	str	R1, [R0, PWR_CR1_offset] // Use LPR with VDD=1.0V in STOP mode
	
	// TODO remove
	ldr	R3, =GPIOF_BASE
	movs	R2, 0x02
	str	R2, [R3, GPIO_BSRR_offset]
	
	wfi
	
	// TODO remove
	ldr	R3, =GPIOF_BASE
	movs	R2, 0x02
	str	R2, [R3, GPIO_BRR_offset]
	
	
	// check if button pressed for a certain time (filter ESD and spikes)
	movs	R6, POWER_ON_DELAY
power_on_loop:
	// delay 4 ms
	ldr	R1, =32000/3 // 4 ms * 8 MHz = 32000 cycles
power_on_delay_loop:
	subs	R1, 1
	bne	power_on_delay_loop
	// poll button
	ldr	R0, =BUTTON_GPIO
	ldr	R1, [R0, GPIO_IDR_offset]
	lsrs	R1, BUTTON_PIN+1
	bcs	power_off_sleep
	// button pressed
	subs	R6, 1
	bne	power_on_loop
	
power_on:
	// disable button interrupt
	ldr	R0, =EXTI_BASE
	movs	R1, 0  // disable falling trigger for EXTI line 0
	str	R1, [R0, EXTI_FTSR_offset]
	
	// avoid button press detection
	movs	R1, BUTTON_LONG_TIME + 1
	mov	BtnCnt, R1
	movs	R1, 1<<fButtonShort | 1<<fButtonLong
	bics	Flags, R1
	
	// disable pull-ups for LED pins
	ldr	R0, =GPIOA_BASE
	ldr	R1, =0x24000000
	str	R1, [R0, GPIO_PUPDR_offset]
	
	// continue multiplexing
	ldr	R0, =TIM16_BASE
	movs	R1, TIM_CR1_ARPE | TIM_CR1_CEN // resume Tim16 (start counting)
	str	R1, [R0, TIM_CR1_offset]
	
	b	ani_continue
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

ani_loop:
	// read next command to CmdH:CmdL
	mov	R0, AniPtr
	ldrh	R1, [R0]
	adds	R0, 2
	mov	AniPtr, R0
	// execute command
	lsrs	R2, R1, 16
	bcc	ani_fill   // 0xxx.xxxx : xxxx.xxxx - fill LEDs with value
	lsrs	R2, R1, 9
	bcs	ani_rotate // 1000.0001 : xxxx.xxxx - rotate
	lsrs	R2, R1, 10
	bcs	ani_fade   // 1000.0010 : xxxx.xxxx - fade
	lsrs	R2, R1, 11
	bcs	ani_delay  // 1000.0100 : xxxx.xxxx - delay
	lsrs	R2, R1, 12
	bcs	ani_repeat // 1000.1000 : xxxx.xxxx - repeat, repeat end
	// (fall through)     1000.0000 : xxxx.xxxx - end of animations
	// restart animation
	ldr	R0, =animations
	mov	AniPtr, R0
	b	ani_loop
	
@--------------------

ani_fill:
	// 0lll.llla : aaaa.avvv - fill l LEDs, starting at LED a, with value v
	lsrs	R2, R1, 9 // R2 = ll.llll
	lsls	R1, 23 // R1 = aaaa.aavv : v0000000 : 00000000 : 00000000
	lsrs	R3, R1, 26 // R3 = aa.aaaa
	lsls	R1, 6 //  R1 = vvv0.0000 : 00000000 : 00000000 : 00000000
	lsrs	R1, 29 // R1 = vvv
	ldr	R0, =FrameShadow
	add	R0, R3
	ldr	R3, =FrameShadow+N_LED
ani_fill_loop:
	strb	R1, [R0]
	adds	R0, 1
	cmp	R0, R3
	bne	ani_fill_wrap_skip
	subs	R0, N_LED // wrap around
ani_fill_wrap_skip:
	subs	R2, 1
	bne	ani_fill_loop
	b	ani_loop
	
@--------------------

ani_rotate:
	// 1000.0001 : 00nn.nnnn - rotate LEDs n places
	
	// copy LED values to backup framebuffer at shifted address
	sxtb	R2, R1 // R2 = nn.nnnn
	ldr	R0, =FrameShadow
	ldr	R1, =FrameBackup
	add	R1, R2
	ldr	R3, =FrameBackup+N_LED
	movs	R6, N_LED
ani_rotate_loop:
	ldrb	R2, [R0]
	strb	R2, [R1]
	adds	R0, 1
	adds	R1, 1
	cmp	R1, R3
	bne	ani_rotate_wrap_skip
	subs	R1, N_LED // wrap around
ani_rotate_wrap_skip:
	subs	R6, 1
	bne	ani_rotate_loop
	
	// copy shifted values back to display framebuffer
	ldr	R0, =FrameShadow
	ldr	R1, =FrameBackup
	ldmia	R1!, {R2-R6} // 5 words = 20 bytes
	stmia	R0!, {R2-R6}
	ldmia	R1!, {R2-R5} // 4 words = 16 bytes
	stmia	R0!, {R2-R5}
	b	ani_loop
	
@--------------------

ani_delay:
	// delay: new_delay * 10 ms
	ldr	R0, =NewDelay
	strb	R1, [R0]
	movs	R2, 1<<fUpdateLEDs
	orrs	Flags, R2
ani_delay_wait:
	lsrs	R1, Flags, fButtonShort+1
	bcs	sound // short press: quack
	lsrs	R1, Flags, fAlwaysOn+1
	bcs	ani_delay_wait_end
	lsrs	R1, Flags, fButtonLong+1
	bcs	power_off // long press: power off
ani_delay_wait_end:
ani_continue:
	lsrs	R1, Flags, fUpdateLEDs+1
	bcs	ani_delay_wait // still waiting to update LEDs
	// prepare next frame
	b	ani_loop
	
@--------------------

ani_fade:
	// 1000.0010 : nnnn.nnnn - fade all LEDs up (n>0) or down (n<0)
	sxtb	R1, R1 // sign extend n
	ldr	R0, =FrameShadow
	movs	R6, N_LED
ani_fade_loop:
	ldrb	R2, [R0]
	adds	R2, R1
	bpl	ani_fade_0_skip
	movs	R2, 0 // negative: clip to 0
ani_fade_0_skip:
	cmp	R2, 7
	bls	ani_fade_7_skip
	movs	R2, 7 // value > 7: clip to 7
ani_fade_7_skip:
	strb	R2, [R0]
	adds	R0, 1
	subs	R6, 1
	bne	ani_fade_loop
	b	ani_loop
	
@--------------------

ani_repeat:
	// repeat
	ldr	R0, =Repeat
	ldrb	R2, [R0] // get current repeat counter
	lsls	R3, R1, 24 // check if R1[7:0] is zero
	beq	ani_repeat_end // "REPEAT 0" = end of repeat loop
	// start repeat block
	mov	R3, AniPtr
	push	{R2, R3} // current repeat counter & start-of-repeat address
	strb	R1, [R0] // new repeat counter
	b	ani_loop
	
ani_repeat_end:
	// repeat end
	subs	R2, 1 // decrement repeat counter
	beq	ani_repeat_complete
	strb	R2, [R0]
	// restart repeat loop
	ldr	R3, [SP, 4] // get start-of-repeat address
	mov	AniPtr, R3
	b	ani_loop
	
ani_repeat_complete:
	// repeat loop complete
	pop	{R2, R3}
	strb	R2, [R0] // restore previous repeat counter
	b	ani_loop
	
@--------------------

sound:
	// stop further display updates
	movs	R1, 1<<fUpdateLEDs
	bics	Flags, R1
	ldr	R0, =NewDelay // backup NewDelay
	ldrb	R1, [R0]
	push	{R1}
	movs	R1, 0 // set Delay = 0
	mov	Delay, R1
	
	// start PWM generator
	ldr	R0, =TIM1_BASE
	movs	R1, 0x80
	str	R1, [R0, TIM_CCR2_offset] // initial duty cycle = 0x80 (50%)
	str	R1, [R0, TIM_CCR3_offset]
	movs	R1, TIM_CR1_CEN
	str	R1, [R0, TIM_CR1_offset] // start counting
	ldr	R1, =TIM_BDTR_OSSI | TIM_BDTR_AOE
	str	R1, [R0, TIM_BDTR_offset] // enable outputs at next overflow
	
	// backup framebuffer
	ldr	R0, =FrameShadow
	ldr	R1, =FrameBackup
	ldmia	R0!, {R2-R6} // 5 words = 20 bytes
	stmia	R1!, {R2-R6}
	ldmia	R0!, {R2-R5} // 4 words = 16 bytes
	stmia	R1!, {R2-R5}

	// fade all LEDs on
fade_on:
	movs	R5, 0 // fade value 0..7
fade_on_loop1:
	ldr	R0, =FrameShadow
	movs	R6, 36
fade_on_loop2:
	ldrb	R2, [R0]
	cmp	R2, R5
	bhs	fade_on_skip
	mov	R2, R5
	strb	R2, [R0]
fade_on_skip:
	adds	R0, 1
	subs	R6, 1
	bne	fade_on_loop2
	// 20 ms delay
	ldr	R0, =NewDelay
	movs	R1, 2
	strb	R1, [R0]
	movs	R1, 1<<fUpdateLEDs
	orrs	Flags, R1
fade_on_wait:
	tst	Flags, R1
	bne	fade_on_wait
	// next brightness step
	adds	R5, 1
	cmp	R5, 7
	bls	fade_on_loop1
	
	// delay until LEDs updated
fade_on_finish:
	mov	R1, Delay
	cmp	R1, 0
	bne	fade_on_finish

	// start playback
	ldr	R4, =audio
	ldr	R0, =TIM1_BASE
	movs	R1, 0
	str	R1, [R0, TIM_SR_offset] // clear interrupt flag
	movs	R1, TIM_DIER_UIE
	str	R1, [R0, TIM_DIER_offset] // enable overflow interrupt

	// wait until playback finished
sound_wait:
	ldr	R1, [R0, TIM_DIER_offset]
	lsrs	R1, TIM_DIER_UIE_Pos+1
	bcs	sound_wait
	
	// fade all LEDs back to original values
fade_off:
	movs	R5, 7 // fade value 7..0
fade_off_loop1:
	ldr	R0, =FrameShadow
	ldr	R1, =FrameBackup
	movs	R6, 36
fade_off_loop2:
	ldrb	R2, [R1]
	cmp	R2, R5
	bhi	fade_off_low_skip // Orig. LED value > fade value
	mov	R2, R5
fade_off_low_skip:
	strb	R2, [R0]
	adds	R0, 1
	adds	R1, 1
	subs	R6, 1
	bne	fade_off_loop2
	// 20 ms delay
	ldr	R0, =NewDelay
	movs	R1, 2
	strb	R1, [R0]
	movs	R1, 1<<fUpdateLEDs
	orrs	Flags, R1
fade_off_wait:
	tst	Flags, R1
	bne	fade_off_wait
	// next brightness step
	subs	R5, 1
	bpl	fade_off_loop1
	
	// disable PWM
	ldr	R0, =TIM1_BASE
	movs	R1, 0
	str	R1, [R0, TIM_CCR2_offset] // PWM values = 0 (output low)
	str	R1, [R0, TIM_CCR3_offset]
	str	R1, [R0, TIM_SR_offset] // clear interrupt flag
sound_finish:
	ldr	R2, [R0, TIM_SR_offset] // wait until prev. PWM cycle complete
	lsrs	R2, TIM_SR_UIF_Pos+1
	bcc	sound_finish
	ldr	R2, =TIM_BDTR_OSSI
	str	R2, [R0, TIM_BDTR_offset] // disable PWM outputs
	str	R1, [R0, TIM_CR1_offset] // stop counting
	
	// re-enable display updates
	ldr	R0, =NewDelay
	pop	{R1}
	strb	R1, [R0]
	movs	R1, 1<<fUpdateLEDs
	orrs	Flags, R1

	// return
	movs	R1, 1<<fButtonShort
	bics	Flags, R1
	b	ani_delay_wait_end
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

test_mode:
	// test mode for hardware debugging:
	// turn on LEDs one at a time in sequence
	ldr	R0, =GPIOA_BASE
	ldr	R5, =0x2CF // bit set = position used
	movs	R1, 0x01 // select first anode

test_anode_loop:
	// skip unavailable pins
	tst	R1, R5
	bne	test_skip_anode_end
test_next_anode:
	lsls	R1, 1
	beq	test_mode // no further pins => start over
	b	test_anode_loop
test_skip_anode_end:
	movs	R2, 0x01 // select first cathode
	
test_cathode_loop:
	// skip cathode == anode and unavailable pins
	cmp	R1, R2
	beq	test_skip_cathode
	tst	R2, R5
	bne	test_skip_cathode_end
test_skip_cathode:
	lsls	R2, 1
	beq	test_next_anode // no further pins => next anode
	b	test_cathode_loop
test_skip_cathode_end:

	movs	R6, 250 // 250 * 2 ms = 500 ms per LED
test_loop:

	// LEDs off
	str	R5, [R0, GPIO_BRR_offset] // LED pins = low
	str	R5, [R0, GPIO_OTYPER_offset] // LED pins = open drain
	
	// delay 5/6 * ~2 ms = 13333 cycles
	ldr	R4, =13333/3
test_delay_off:
	subs	R4, 1
	bne	test_delay_off
	
	// LED on: cathode = low, anode = push-pull
	str	R5, [R0, GPIO_BSRR_offset] // LED pins = high (not driven)
	str	R2, [R0, GPIO_BRR_offset] // active LED cathode low
	mov	R3, R5
	eors	R3, R1
	str	R3, [R0, GPIO_OTYPER_offset] // anode pin = push-pull (drive high)
	
	// delay 1/6 * ~2 ms = 2667 cycles
	ldr	R4, =2667/3
test_delay_on:
	subs	R4, 1
	bne	test_delay_on
	
	subs	R6, 1
	bne	test_loop
	
	// next cathode
	b	test_skip_cathode
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

// dump literal pool for main program
.pool

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.thumb_func
tim16_ovf:
	// Tim16 overflow interrupt: LED PWM & multiplexing
	// automatically saved: xPSR, R0~R3, R12, LR, PC
	// NOTE: execution time for 'str' to GPIO: 1 cycle, to TIM16: 5 cycles
	// NOTE: execution time for 'ldrb' from RAM: 4 cycles
	
	//LEDs off - 6 cycles
	ldr	R0, =GPIOA_BASE
	ldr	R1, =0x02CF
	str	R1, [R0, GPIO_BRR_offset] // force all LED pins low
	str	R1, [R0, GPIO_OTYPER_offset] // all LED pins = open drain
	
	// LEDs on with new pattern - TODO:16 cycles
	mov	R2, LEDs_Mux
	ldr	R3, =0x00700070 // shift bits 6:4 to 8:6 (both 16-bit halves)
	ands	R3, R2
	add	R2, R3
	add	R2, R3
	add	R2, R3
	ldr	R3, =0x01000100 // shift bit 8 to 9 (both 16-bit halves)
	ands	R3, R2
	add	R2, R3
	lsrs	R3, R2, 16 // R3 = Mux
	mvns	R3, R3
	ands	R1, R3 // R1 = ~Mux
	mvns	R3, R3
	str	R1, [R0, GPIO_OTYPER_offset] // anode pin = push-pull
.if (NO_EXT_TR)
	//Bit spreading Mux[15:0] => Mux[31:0]
	lsls	R1, R3, 8
	orrs	R3, R1
	ldr	R1, =0x00FF00FF
	ands	R3, R1
	lsls	R1, R3, 4
	orrs	R3, R1
	ldr	R1, =0x0F0F0F0F
	ands	R3, R1
	lsls	R1, R3, 2
	orrs	R3, R1
	ldr	R1, =0x33333333
	ands	R3, R1
	lsls	R1, R3, 1
	orrs	R3, R1
	ldr	R1, =0x55555555
	ands	R3, R1
	movs	R1, R3
	add	R1, R3
	add	R1, R3
	str	R1, [R0, GPIO_OSPEEDR_offset] // anode = max. strength
.endif
	str	R2, [R0, GPIO_ODR_offset] // anode = high, LED off = high-Z
	
	// LED PWM - 6 cycles
	mov	R1, PwmVal // valid PWM values: 0~6
	adds	R1, 1
	mov	PwmVal, R1
	subs	R1, 7
	bne	tim16_calc // 6 cycles if taken
	// restart PWM sequence
	mov	PwmVal, R1
	
	// LED Multiplexing - 9 cycles
	mov	R1, LEDs_Mux
	lsrs	R1, 16 // clear LEDs
	lsls	R1, 16+1 // count Mux: 0x01, 0x02, 0x04, 0x08, 0x10, 0x20
	mov	LEDs_Mux, R1
	lsls	R2, R1, 16-6 // check if any bit in Mux[5:0] is set
	bne	tim16_calc // 7 cycles if taken
	// restart multiplexing sequence
	ldr	R1, =0x01<<16
	mov	LEDs_Mux, R1
	
	// read button (~100 Hz) - max. 15 cycles (12 when idle)
	mov	R2, BtnCnt
	ldr	R0, =BUTTON_GPIO
	ldr	R1, [R0, GPIO_IDR_offset]
	lsrs	R1, BUTTON_PIN+1 // Button state -> C
	bcs	tim16_button_released
	// button pressed
	adds	R2, 1
	bcs	tim16_button_end // prevent overflow
	mov	BtnCnt, R2
	cmp	R2, BUTTON_LONG_TIME
	bne	tim16_button_end
	movs	R1, 1<<fButtonLong
	orrs	Flags, R1
	b	tim16_button_end
tim16_button_released:
	// button not pressed
	movs	R1, 0
	mov	BtnCnt, R1
	cmp	R2, BUTTON_SHORT_TIME
	blo	tim16_button_end
	cmp	R2, BUTTON_LONG_TIME
	bhs	tim16_button_end
	movs	R1, 1<<fButtonShort
	orrs	Flags, R1
tim16_button_end:
	
	// decrement delay & update LEDs (~100 Hz) - 80 cycles (6 if Delay>1)
	mov	R1, Delay
	subs	R1, 1
	bmi	tim16_delay_zero // overflow => Delay was 0 before decrement
	mov	Delay, R1
	bne	tim16_update_end // Delay not 0 after decrement
tim16_delay_zero:
	lsrs	R1, Flags, fUpdateLEDs+1
	bcc	tim16_update_end // no new frame data available (7 cycles)
	subs	Flags, 1<<fUpdateLEDs // clear flag
	ldr	R0, =NewDelay
	ldrb	R1, [R0]
	mov	Delay, R1
	// copy data from FrameShadow to FrameLED
	ldr	R0, =FrameShadow
	ldr	R1, =FrameLED
	movs	R3, 36
tim16_update_loop:
	ldmia	R0!, {R2}
	stmia	R1!, {R2}
	subs	R3, 4
	bne	tim16_update_loop
tim16_update_end:

tim16_calc:
	// calculate new values for LEDs (output in next interrupt)
	
.macro LED_PWM bit // cycles: anode = 4, bit6 = 4, others = 7
	// LEDs: 0 = on (pull low), 1 = off (high-Z) or Anode (pull high)
  .if (\bit < 6)
	lsls	R3, R1, 17 // Get Mux[bit]
	bcs	led_pwm_end\@ // Anode pin (Mux[bit] == 1)
	subs	R0, 1 // decrement RAM address
  .endif
	ldrb	R3, [R0] // takes 4 cycles!
	subs	R3, R2, R3 // C=0 (!!) if PwmVal (R2) < LED value (R3)
led_pwm_end\@:
	adcs	R1, R1 // R1 = R1<<1 + C
.endm
	
	// find RAM offset for selected anode - 18 cycles
	mov	R1, LEDs_Mux
	ldr	R0, =FrameLED+6-1 // Mux[0] == 1
	lsrs	R1, 16+2
	bcc	tim16_col2_skip
	adds	R0, 6 // Mux[1] == 1
tim16_col2_skip:
	lsrs	R1, 1
	bcc	tim16_col3_skip
	adds	R0, 12 // Mux[2] == 1
tim16_col3_skip:
	lsrs	R1, 1
	bcc	tim16_col4_skip
	adds	R0, 18 // Mux[3] == 1
tim16_col4_skip:
	lsrs	R1, 1
	bcc	tim16_col5_skip
	adds	r0, 24 // Mux[4] == 1
tim16_col5_skip:
	lsrs	R1, 1
	bcc	tim16_col6_skip
	adds	R0, 30 // Mux[5] == 1
tim16_col6_skip:
	
	// calculate LED output bits - 59 cycles
	mov	R1, LEDs_Mux
	lsrs	R1, 7 // remove LED bits
	mov	R2, PwmVal
	LED_PWM 6 // Cathode 7
	LED_PWM 5 // Cathode 6
	LED_PWM 4 // Cathode 5
	LED_PWM 3 // Cathode 4
	LED_PWM 2 // Cathode 3
	LED_PWM 1 // Cathode 2
	LED_PWM 0 // Cathode 1
	mov	LEDs_Mux, R1
	
	// get timer value for next PWM interval (used in next IRQ) - 18 cycles
	ldr	R0, =tim16_pwmval
	add	R0, PwmVal // range 0 to 6
	ldrb	R1, [R0]
	// set timer reload value for next overflow
	ldr	R0, =TIM16_BASE
	str	R1, [R0, TIM_ARR_offset] // auto-reload register (buffered)
	movs	R1, 0
	str	R1, [R0, TIM_SR_offset] // clear interrupt flag
	
	// return from interrupt - 2 cycles
	bx	LR
	
// TOTAL IRQ TIME IN CYCLES [TICKS], 1 timer tick = 64 cycles:
//   Last PWM, last Mux, new frame:  6+16+6+9+12+80+18+59+12+2 = 218 [3.41]
//   Last PWM, last Mux, same frame: 6+16+6+9+12+ 6+18+59+12+2 = 144 [2.25]
//   Last PWM, other Mux:            6+16+6+7      +18+59+12+2 = 126 [1.97]
//   Other PWM:                      6+16+6        +18+59+12+2 = 119 [1.86]
//   Other PWM, measured:                                        124 [1.94]
// Time between interrupts: 16-17 [0.27] (instruction + 15 for IRQ entry)

@--------------------

tim16_pwmval:
	.byte PWM_1 - PWM_0 - 1 // PwmVal = 0
	.byte PWM_2 - PWM_1 - 1 // PwmVal = 1
	.byte PWM_3 - PWM_2 - 1 // PwmVal = 2
	.byte PWM_4 - PWM_3 - 1 // PwmVal = 3
	.byte PWM_5 - PWM_4 - 1 // PwmVal = 4
	.byte PWM_6 - PWM_5 - 1 // PwmVal = 5
	.byte PWM_7 - PWM_6 - 1 // PwmVal = 6
.align 1 // assember seems to get confused without this ...

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.thumb_func
tim1_ovf:
	// TODO: don't use interrupt for smoother muxing
	// Tim1 overflow: load PWM value for following cycle
	ldr	R0, =TIM1_BASE
	movs	R1, 0
	str	R1, [R0, TIM_SR_offset] // clear interrupt flag
	
	// next audio sample
	ldrb	R1, [R4]
	adds	R4, 1
	cmp	R1, 0
	beq	tim1_finished // 0x00 marks end of audio
	negs	R2, R1
	uxtb	R2, R2
	str	R1, [R0, TIM_CCR2_offset]
	str	R2, [R0, TIM_CCR3_offset]
	bx	LR
	
tim1_finished:
	// sound output complete
	movs	R1, 0x80
	str	R1, [R0, TIM_CCR2_offset] // PWM values = 0x80 (idle, 50%)
	str	R1, [R0, TIM_CCR3_offset]
	movs	R1, 0
	str	R1, [R0, TIM_DIER_offset] // disable interrupt
	bx	LR
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

.thumb_func
exti0_falling:
	// EXTI0: button pushed => clear flag and return (used for wake up)
	ldr	R0, =EXTI_BASE
	movs	R1, EXTI_PR_PR0
	str	R1, [R0, EXTI_PR_offset] // clear interrupt flag
	bx	LR

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

// dump literal pool for interrupts
.pool

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

// animation command macros

.macro FILL led1, nn, brt // 0nnn.nnnl : llll.lbbb
	.if ((\led1 < 1) || (\led1 > N_LED))
	  .error "FILL: LED number out of range (1~N_LED)."
	.endif
	.if ((\nn < 1) || (\nn > N_LED))
	  .error "FILL: stretch length out of range (1~N_LED)."
	.endif
	.if ((\brt < 0) || (\brt > 7))
	  .error "FILL: Brightness out of range (0~7)."
	.endif
	.short \nn << 9 | (\led1-1) << 3 | \brt
	.endm

.macro LED led1, brt
	FILL \led1, 1, \brt
	.endm

.macro ALLOFF
	FILL 1, N_LED, 0
	.endm

.macro ROTATE nn // 1000.0001 : 00nn.nnnn
	.if ((\nn >= 1) && (\nn <= N_LED-1))
	  .short 0x8100 | \nn
	.elseif ((\nn >= -(N_LED-1)) && (\nn <= -1))
	  .short 0x8100 | (N_LED + \nn)
	.else
	  .error "ROTATE: value out of range (-[N_LED-1]~-1 or 1~[N_LED-1])."
	.endif
	.endm

.macro FADE nn // 1000.0010 : nnnn.nnnn
	.if ((\nn >= 1) && (\nn <= 7))
	  .short 0x8200 | nn
	.elseif ((\nn >= -7) && (\nn <= -1))
	  .short 0x8200 | (0x100 + \nn)
	.else
	  .error "FADE: value out of range (-7~-1 or 1~7)."
	.endif
	.endm

.macro DELAY nn // 1000.0100 : nnnn.nnnn
	.if ((\nn < 10) || (\nn > 255*10))
	  .error "DELAY: value out of range (10~2550)."
	.endif
	.short 0x8400 | ((\nn + 5) / 10)
	.endm

.macro REPEAT nn // 1000.1000 : nnnn.nnnn (n != 0)
	.if ((\nn < 1) || (\nn > 255))
	  .error "REPEAT: value out of range (1~255)."
	.endif
	.short 0x8800 | \nn
	.endm

.macro REPEAT_END // 1000.1000 : 0000.0000
	.short 0x8800
	.endm

.macro ANI_END // 1000.0000 : xxxx.xxxx
	.short 0x8000
	.endm

@--------------------

animations:
	.align 1
	.include "animations/ani_list.cm0.s"
animations_end:
	
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

audio:
	.include "audio/quack31250_u8.s"
audio_end:
