Skip to content

Commit

Permalink
cpu/esp8266: add LoadStoreError exception handler
Browse files Browse the repository at this point in the history
Usually, the access to the IROM (flash) memory requires 32-bit word aligned reads. Attempts to access data in the IROM (flash) memory less than 32 bits in size triggers a LoadStoreError exception. With the exception handler from esp-open-rtos it becomes possible to access data in IROM (flash) with a size of less than 32 bits and thus to place .rodata sections in the IROM (flash).
  • Loading branch information
gschorcht committed Apr 15, 2019
1 parent 7bbe94c commit 633887e
Showing 1 changed file with 312 additions and 0 deletions.
312 changes: 312 additions & 0 deletions cpu/esp_common/vendor/xtensa/xtensa_vectors.S
Original file line number Diff line number Diff line change
Expand Up @@ -485,12 +485,324 @@ User Exception (including Level 1 Interrupt from user mode).

_UserExceptionVector:

#ifdef MCU_ESP8266
wsr a0, EXCSAVE_1 /* preserve a0 */
j _UserExceptionTrampoline /* jump to handler trampoline */
#else
wsr a0, EXCSAVE_1 /* preserve a0 */
call0 _xt_user_exc /* user exception handler */
/* never returns here - call0 is used as a jump (see note at top) */
#endif

.end literal_prefix

#ifdef MCU_ESP8266
/*************************** LoadStoreError Handler BEGIN ********************/
/*
* PLEASE NOTE: The code between "LoadStoreError Handler BEGIN" and
* "LoadStoreError Handler END" markers was extracted from esp-open-rtos. It is
* under the following copyright:
*
* Original vector contents Copyright (C) 2014-2015 Espressif Systems
* Additions Copyright (C) Superhouse Automation Pty Ltd and Angus Gratton
* BSD Licensed as described in the file LICENSE
*
* Usually, the access to the IROM (flash) memory requires 32-bit word aligned
* reads. Attempts to access data in the IROM (flash) memory less than 32 bits
* in size triggers a LoadStoreError exception. Therefore, it is not possible to
* place .rodata sections in IROM (flash). Rather, .rodata sections have to
* be placed in RAM. With the exception handler from esp-open-rtos it becomes
* possible to access data in IROM (flash) with a size of less than 32 bits
* and thus to place .rodata sections in the IROM (flash).
*/

#define CAUSE_LOADSTORE 3
#define fatal_exception_handler _xt_user_exc

/* LoadStoreError handler stack */

.section .bss
.balign 16

_LoadStoreErrorHandlerStack:
.word 0 # a0
.word 0 # (unused)
.word 0 # a2
.word 0 # a3
.word 0 # a4

/* LoadStoreError Trampoline */

.section .UserExceptionTrampoline.text, "x"
.literal_position
.balign 4

_UserExceptionTrampoline:

wsr a1, EXCSAVE_2 /* preserve a1 */
#ifdef MCU_ESP8266
rsr a1, exccause
beqi a1, CAUSE_LOADSTORE, _LoadStoreErrorHandler
#endif
rsr a1, EXCSAVE_2 /* restore a1 */
call0 _xt_user_exc /* user exception handler */
/* never returns here - call0 is used as a jump (see note at top) */

/*
* Xtensa "Load/Store Exception" handler:
* Completes L8/L16 load instructions from Instruction address space,
* for which the architecture only supports 32-bit reads.
*
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
*
* (Fast path (no branches) is for L8UI)
*/
.literal_position
.balign 4
.type LoadStoreErrorHandler, @function

_LoadStoreErrorHandler:

rsr a1, EXCSAVE_2 /* restore a1 */
wsr a1, EXCSAVE_1 /* save it to excsave1 */
/* Registers are saved in the address corresponding to their register
* number times 4. This allows a quick and easy mapping later on when
* needing to store the value to a particular register number. */
movi sp, _LoadStoreErrorHandlerStack
s32i a0, sp, 0
s32i a2, sp, 0x08
s32i a3, sp, 0x0c
s32i a4, sp, 0x10
rsr a0, sar # Save SAR in a0 to restore later

/* Examine the opcode which generated the exception */
/* Note: Instructions are in this order to avoid pipeline stalls. */
rsr a2, epc1
movi a3, ~3
ssa8l a2 # sar is now correct shift for aligned read
and a2, a2, a3 # a2 now 4-byte aligned address of instruction
l32i a4, a2, 0
l32i a2, a2, 4
movi a3, 0x00700F # opcode mask for l8ui/l16si/l16ui
src a2, a2, a4 # a2 now instruction that failed
and a3, a2, a3 # a3 is masked instruction
bnei a3, 0x000002, .LSE_check_l16

/* Note: At this point, opcode could technically be one of two things:
* xx0xx2 (L8UI)
* xx8xx2 (Reserved (invalid) opcode)
* It is assumed that we'll never get to this point from an illegal
* opcode, so we don't bother to check for that case and presume this
* is always an L8UI. */

movi a4, ~3
rsr a3, excvaddr # read faulting address
and a4, a3, a4 # a4 now word aligned read address

l32i a4, a4, 0 # perform the actual read
ssa8l a3 # sar is now shift to extract a3's byte
srl a3, a4 # shift right correct distance
extui a4, a3, 0, 8 # mask off bits we need for an l8

.LSE_post_fetch:
/* We jump back here after either the L8UI or the L16*I routines do the
* necessary work to read the value from memory.
* At this point, a2 holds the faulting instruction and a4 holds the
* correctly read value.

* Restore original SAR value (saved in a0) and update EPC so we'll
* return back to the instruction following the one we just emulated */

/* Note: Instructions are in this order to avoid pipeline stalls */
rsr a3, epc1
wsr a0, sar
addi a3, a3, 0x3
wsr a3, epc1

/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
* per entry (so we can avoid a second jump by just doing a RFE inside
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
* operation to make that easy for us. Luckily, all of the faulting
* opcodes we're processing are guaranteed to have bit 3 be zero, which
* means if we just shift the register bits of the opcode down by 3
* instead of 4, we will get the register number multiplied by 2. This
* combined with an addx8 will give us an effective addx16 without
* needing any extra shift operations. */
extui a2, a2, 3, 5 # a2 is now destination register 0-15 times 2

bgei a2, 10, .LSE_assign_reg # a5..a15 use jumptable
beqi a2, 2, .LSE_assign_a1 # a1 uses a special routine

/* We're storing into a0 or a2..a4, which are all saved in our "stack"
* area. Calculate the correct address and stick the value in there,
* then just do our normal restore and RFE (no jumps required, which
* actually makes a0..a4 substantially faster). */
addx2 a2, a2, sp
s32i a4, a2, 0

/* Restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
rfe

.LSE_assign_reg:
/* At this point, a2 contains the register number times 2, a4 is the
* read value. */

/* Calculate the jumptable address, and restore all regs except a2 and
* a4 so we have less to do after jumping. */
/* Note: Instructions are in this order to avoid pipeline stalls. */
movi a3, .LSE_jumptable_base
l32i a0, sp, 0
addx8 a2, a2, a3 # a2 is now the address to jump to
l32i a3, sp, 0x0c

jx a2

.balign 4
.LSE_check_l16:
/* At this point, a2 contains the opcode, a3 is masked opcode */
movi a4, 0x001002 # l16si or l16ui opcode after masking
bne a3, a4, .LSE_wrong_opcode

/* Note: At this point, the opcode could be one of two things:
* xx1xx2 (L16UI)
* xx9xx2 (L16SI)
* Both of these we can handle. */

movi a4, ~3
rsr a3, excvaddr # read faulting address
and a4, a3, a4 # a4 now word aligned read address

l32i a4, a4, 0 # perform the actual read
ssa8l a3 # sar is now shift to extract a3's bytes
srl a3, a4 # shift right correct distance
extui a4, a3, 0, 16 # mask off bits we need for an l16

bbci a2, 15, .LSE_post_fetch # Not a signed op
bbci a4, 15, .LSE_post_fetch # Value does not need sign-extension

movi a3, 0xFFFF0000
or a4, a3, a4 # set 32-bit sign bits
j .LSE_post_fetch

.LSE_wrong_opcode:
/* If we got here it's not an opcode we can try to fix, so bomb out.
* Restore registers so any dump the fatal exception routine produces
* will have correct values */
wsr a0, sar
l32i a0, sp, 0
/*l32i a2, sp, 0x08*/
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
mov a2, a1
movi a3, 0
call0 fatal_exception_handler

.balign 4
.LSE_assign_a1:
/* a1 is saved in excsave1, so just update that with the value, */
wsr a4, excsave1
/* Then restore all regs and return */
l32i a0, sp, 0
l32i a2, sp, 0x08
l32i a3, sp, 0x0c
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.balign 4
.LSE_jumptable:
/* The first 5 entries (80 bytes) of this table are unused (registers
* a0..a4 are handled separately above). Rather than have a whole bunch
* of wasted space, we just pretend that the table starts 80 bytes
* earlier in memory. */
.set .LSE_jumptable_base, .LSE_jumptable - (16 * 5)

.org .LSE_jumptable_base + (16 * 5)
mov a5, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 6)
mov a6, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 7)
mov a7, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 8)
mov a8, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 9)
mov a9, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 10)
mov a10, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 11)
mov a11, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 12)
mov a12, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 13)
mov a13, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 14)
mov a14, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

.org .LSE_jumptable_base + (16 * 15)
mov a15, a4
l32i a2, sp, 0x08
l32i a4, sp, 0x10
rsr a1, excsave1
rfe

/*************************** LoadStoreError Handler END **********************/
#endif

/*
--------------------------------------------------------------------------------
Insert some waypoints for jumping beyond the signed 8-bit range of
Expand Down

0 comments on commit 633887e

Please sign in to comment.