diff --git a/src/thread/ContextSwitch_fpu.s b/src/thread/ContextSwitch_fpu.s index 74995e2..a0f7a4c 100644 --- a/src/thread/ContextSwitch_fpu.s +++ b/src/thread/ContextSwitch_fpu.s @@ -27,28 +27,30 @@ PendSV_Handler: @ preempt ISRs, we always work with the PSP. .thumb_func - @ Call Thread_Schedule() + PUSH {LR} + + @ Call Thread_Schedule(EXC_RETURN) @ We can delay the context push because the ABI enforces @ routines to save and restore R4-R11 and S16-S31. @ Return: R0 = new ctx (or NULL), R1 = old ctx (or NULL) - LDR R0, =Thread_Schedule - BLX R0 + MOV R0, LR + LDR R1, =Thread_Schedule + BLX R1 + + POP {LR} @ If needed, save old thread context TEQ R1, #0 ITT NE MRSNE R2, PSP - STMNE R1!, {R2, R4-R11} + STMNE R1, {R2, R4-R11, LR} @ If needed, switch context @ Also clear any exclusive lock held by the old thread TEQ R0, #0 ITTT NE - LDMNE R0!, {R2, R4-R11} + LDMNE R0, {R2, R4-R11, LR} MSRNE PSP, R2 CLREXNE - @ Return to thread mode, use PSP, no FP state - @ TODO: not sure about EXC_RETURN[4] = 1 - LDR LR, =0xFFFFFFFD BX LR diff --git a/src/thread/ContextSwitch_nofpu.s b/src/thread/ContextSwitch_nofpu.s index 662a754..fb3be16 100644 --- a/src/thread/ContextSwitch_nofpu.s +++ b/src/thread/ContextSwitch_nofpu.s @@ -27,6 +27,8 @@ PendSV_Handler: @ preempt ISRs, we always work with the PSP. .thumb_func + PUSH {LR} + @ Call Thread_Schedule() @ We can delay the context push because the ABI enforces @ routines to save and restore R4-R11. @@ -34,20 +36,20 @@ PendSV_Handler: LDR R0, =Thread_Schedule BLX R0 + POP {LR} + @ If needed, save old thread context TEQ R1, #0 ITT NE MRSNE R2, PSP - STMNE R1!, {R2, R4-R11} + STMNE R1, {R2, R4-R11, LR} @ If needed, switch context @ Also clear any exclusive lock held by the old thread TEQ R0, #0 ITTT NE - LDMNE R0!, {R2, R4-R11} + LDMNE R0, {R2, R4-R11, LR} MSRNE PSP, R2 CLREXNE - @ Return to thread mode, use PSP, no FP state - LDR LR, =0xFFFFFFFD BX LR diff --git a/src/thread/Thread.c b/src/thread/Thread.c index d8e9bd4..2668e83 100644 --- a/src/thread/Thread.c +++ b/src/thread/Thread.c @@ -73,9 +73,17 @@ /* Default PSR for new threads, only thumb flag set. */ #define THREAD_DEFAULT_PSR 0x01000000 +/* Default EXC_RETURN for new threads: thread mode, PSP, no FP state. */ +#define THREAD_DEFAULT_ER 0xFFFFFFFD + +#ifdef EVICSDK_FPU_SUPPORT +/* NOT set in EXC_RETURN if the thread used FPU at least once. */ +#define THREAD_ER_MSK_FPCTX (1 << 4) +#endif /* Creates the return value for Thread_Schedule(). */ -#define THREAD_MAKE_SCHEDRET(newCtx, oldCtx) ((((uint64_t)(uint32_t) (oldCtx)) << 32) | ((uint32_t) (newCtx))) +#define THREAD_MAKE_SCHEDRET(newCtx, oldCtx) \ + ((((uint64_t)(uint32_t) (oldCtx)) << 32) | ((uint32_t) (newCtx))) /* Marks the scheduler as pending by flagging PendSV. */ #define THREAD_PEND_SCHED() do { SCB->ICSR |= SCB_ICSR_PENDSVSET_Msk; } while(0) @@ -121,12 +129,36 @@ typedef struct { uint32_t sp; /**< Software-saved registers: R4-R11. */ uint32_t r[8]; + /**< EXC_RETURN to be used when resuming. */ + uint32_t er; #ifdef EVICSDK_FPU_SUPPORT - /**< Software-saved FPU registers: S16-S31. */ - uint32_t s[16]; + /**< Software-saved FPU registers: S0-S31. */ + uint32_t s[32]; #endif } Thread_SoftwareContext_t; +#ifdef EVICSDK_FPU_SUPPORT +/** + * FPU context state. + * Keep in sync with UsageFault handler. + */ +typedef struct { + /** + * Pointer to software-saved FPU state (Thread_SoftwareContex_t.s) + * of the thread that held FPU last. Will be saved when another + * thread uses the FPU. NULL if no thread holds the FPU state. + */ + uint32_t *holderCtx; + /** + * Pointer to software-saved FPU state (Thread_SoftwareContext_t.s) + * for the current thread. Will be restored when the current thread + * uses the FPU. NULL if the current thread has no FPU state. + * Shared with the UsageFault handler. + */ + uint32_t *curCtx; +} Thread_FpuState_t; +#endif + /** * Thread control block. * Field order is arranged first to minimize @@ -217,20 +249,12 @@ volatile uint32_t Thread_sysTick; #ifdef EVICSDK_FPU_SUPPORT /** - * Pointer to software-saved FPU state (Thread_SoftwareContex_t.s) - * of the thread that held FPU last. Will be saved when another - * thread uses the FPU. NULL if no thread holds the FPU state. - * Shared with the UsageFault handler. - */ -uint32_t *Thread_fpuHolderCtx = NULL; - -/** - * Pointer to software-saved FPU state (Thread_SoftwareContext_t.s) - * for the current thread. Will be restored when the current thread - * uses the FPU. NULL if the current thread has no FPU state. - * Shared with the UsageFault handler. + * FPU context state. + * Shared with UsageFault handler. Non-atomic operations + * must be carried out with IRQs masked, to protect from + * faults generated by higher priority FPU-using ISRs. */ -uint32_t *Thread_fpuCurCtx = NULL; +Thread_FpuState_t Thread_fpuState; #endif /** @@ -336,17 +360,42 @@ static void Thread_SetupStackGuard(void *guardPtr) { // thread gets control back (this must be called from ISR). } +#ifdef EVICSDK_FPU_SUPPORT +/** + * Enables/disables the FPU. + * + * @param enable True to enable, false to disable. + */ +static void Thread_FpuControl(uint8_t enable) { + if(enable) { + // Enable CP10/CP11, i.e. CPACR[23:20] = 1111 + SCB->CPACR |= (0xFUL << 20); + } + else { + // Disable CP10/CP11, i.e. CPACR[23:20] = 0000 + SCB->CPACR &= ~(0xFUL << 20); + } + + // Ensure write completed, flush pipeline + __DMB(); + __ISB(); +} +#endif + /** * Schedules the next thread. Called from PendSV. * This is an internal function. * + * @param er EXC_RETURN from the PendSV handler. + * Ignored if FPU support is disabled. + * * @return The lower 32 bits are the address of the software-saved * context for the new thread. If NULL, no context switch * is performed. The higher 32 bits are the address of the * software-saved context for the old thread. If NULL, the * old context isn't saved. */ -uint64_t Thread_Schedule() { +uint64_t Thread_Schedule(uint32_t er) { Thread_TCB_t *nextTcb; Thread_SoftwareContext_t *newCtx = NULL, *oldCtx = NULL; uint8_t isCurReady; @@ -420,26 +469,40 @@ uint64_t Thread_Schedule() { } } +#ifdef EVICSDK_FPU_SUPPORT + uint32_t *prevFpuCtx = (Thread_curTcb != NULL ? Thread_curTcb->ctx.s : NULL); +#endif + // Switch to next thread Thread_curTcb = nextTcb; newCtx = &Thread_curTcb->ctx; + + primask = Thread_IrqDisable(); + #ifdef EVICSDK_FPU_SUPPORT - Thread_fpuCurCtx = Thread_curTcb->ctx.s; + if(Thread_fpuState.curCtx == NULL && !(er & THREAD_ER_MSK_FPCTX)) { + // The previous thread used FPU for the first time + // The previous holder already had its context saved + Thread_fpuState.holderCtx = prevFpuCtx; + } + // Switch current FPU context. If a thread has never used FPU + // NULL its context to avoid useless saves. If it ends up using + // it, the holder will be updated (see above). + Thread_fpuState.curCtx = (Thread_curTcb->ctx.er & THREAD_ER_MSK_FPCTX ? + NULL : Thread_curTcb->ctx.s); + // If we're resuming the holder thread, enable FPU since + // registers are good. Otherwise, disable FPU and let lazy + // stacking do its job. Also disable FPU when curCtx is NULL, + // since we don't have FP context for that thread yet. + Thread_FpuControl(Thread_fpuState.curCtx != NULL && + Thread_fpuState.curCtx == Thread_fpuState.holderCtx); #endif // Configure stack guard: stack is at the beginning // of the allocated block. - primask = Thread_IrqDisable(); Thread_SetupStackGuard(Thread_curTcb->blockPtr); - Thread_IrqRestore(primask); -#ifdef EVICSDK_FPU_SUPPORT - // Disable FPU (CP10/CP11), i.e. CPACR[23:20] = 0000 - SCB->CPACR &= ~(0xFUL << 20); - // Ensure write completed, flush pipeline - __DMB(); - __ISB(); -#endif + Thread_IrqRestore(primask); } // Reset quantum @@ -471,6 +534,16 @@ void SysTick_Handler() { static void Thread_ExitProc(void *ret) { Thread_CriticalEnter(); +#ifdef EVICSDK_FPU_SUPPORT + // We don't hold FPU anymore + uint32_t primask = Thread_IrqDisable(); + if(Thread_fpuState.holderCtx == Thread_fpuState.curCtx) { + Thread_fpuState.holderCtx = NULL; + } + Thread_fpuState.curCtx = NULL; + Thread_IrqRestore(primask); +#endif + // If a thread has joined us, wake him up if(Thread_curTcb->join.tcb != NULL) { *Thread_curTcb->join.retPtr = ret; @@ -500,6 +573,10 @@ void Thread_Init() { Queue_Init(&Thread_readyQueue); Queue_Init(&Thread_chronoQueue); Thread_curTcb = NULL; +#ifdef EVICSDK_FPU_SUPPORT + Thread_fpuState.holderCtx = NULL; + Thread_fpuState.curCtx = NULL; +#endif Thread_criticalCount = 0; Thread_sysTick = 0; @@ -562,6 +639,7 @@ Thread_Error_t Thread_Create(Thread_t *thread, Thread_EntryPtr_t entry, void *ar ctx->lr = (uint32_t) Thread_ExitProc; ctx->pc = (uint32_t) entry; ctx->psr = THREAD_DEFAULT_PSR; + tcb->ctx.er = THREAD_DEFAULT_ER; tcb->ctx.sp = (uint32_t) ctx; // Push new thread to back of ready queue @@ -690,6 +768,7 @@ void Thread_CriticalExit() { /** * Initializes a semaphore. + * This is an internal function. * * @param sema Semaphore. * @param count Initial semaphore count. @@ -702,6 +781,7 @@ static void Thread_SemaphoreInit(Thread_SemaphoreInternal_t *sema, int32_t count /** * Deletes and deallocates a semaphore. + * This is an internal function. * * @param sema Semaphore. * @param doFree True to free the memory pointed by sema. @@ -734,7 +814,7 @@ Thread_Error_t Thread_SemaphoreCreate(Thread_Semaphore_t *sema, int32_t count) { return INVALID_VALUE; } - // Allocate and initialize semaphore + // Allocate semaphore sm = malloc(sizeof(Thread_SemaphoreInternal_t)); if(sm == NULL) { return NO_MEMORY; diff --git a/src/thread/UsageFault_fpu.s b/src/thread/UsageFault_fpu.s index 62472c8..c76210f 100644 --- a/src/thread/UsageFault_fpu.s +++ b/src/thread/UsageFault_fpu.s @@ -23,9 +23,9 @@ UsageFault_Handler: @ Check if NOCP (CFSR[16+3]) is set @ If not, this is not a CP denial - LDR R0, =0xE000ED28 - LDR R1, [R0] - TST R1, #(1 << 19) + LDR R2, =0xE000ED28 + LDR R3, [R2] + TST R3, #(1 << 19) BEQ UsageFault_Handler_escalate @ Check if FPU is enabled (i.e. CPACR[23:20] != 0000) @@ -35,6 +35,10 @@ UsageFault_Handler: TST R1, #(0xF << 20) BNE UsageFault_Handler_escalate + @ Clear NOCP flag + BIC R3, #(1 << 19) + STR R3, [R2] + @ Enable FPU (enable CP10/CP11, i.e. CPACR[23:20] = 1111) ORR R1, #(0xF << 20) STR R1, [R0] @@ -43,27 +47,52 @@ UsageFault_Handler: ISB @ If the current thread is the FPU holder, we are done - LDR R0, =Thread_fpuHolderCtx - LDR R2, [R0] - LDR R1, =Thread_fpuCurCtx - LDR R3, [R1] - TEQ R2, R3 + LDR R0, =Thread_fpuState + LDM R0, {R1-R2} + TEQ R1, R2 IT EQ BXEQ LR + PUSH {R4, R5} + + @ Save FPCCR, clear LSPACT (FPCCR[0] = 0) + LDR R4, =0xE000EF34 + LDR R5, [R4] + BIC R12, R5, #(1 << 0) + STR R12, [R4] + + @ Note that we never check whether we are in thread mode + @ or in an interrupt. This means that an FPU-using ISR will + @ behave like the thread it preempted used FPU. + @ While this is unnecessary, it solves the race where an + @ ISR uses the FPU before the thread does in this quantum: + @ if we only enable FPU without switching context, there + @ would be no UsageFault when the thread uses it, resulting + @ in corrupted context. + @ This can be avoided by tailchaining PendSV and disabling + @ FPU again there, but the performance has to be evaluated. + @ If needed, save FPU context for FPU holder - TEQ R2, #0 + TEQ R1, #0 IT NE - VSTMNE R2, {S16-S31} + VSTMNE R1, {S0-S31} @ If needed, restore FPU context for current @ thread and set it as FPU holder - TEQ R3, #0 + TEQ R2, #0 ITT NE - VLDMNE R3, {S16-S31} - STRNE R3, [R0] + VLDMNE R2, {S0-S31} + STRNE R2, [R0] - BX LR + @ Restore FPCCR + STR R5, [R4] + + POP {R4, R5} + + @ If other fault flags were set, escalate + TEQ R3, #0 + IT EQ + BXEQ LR UsageFault_Handler_escalate: @ If this is not an FPU CP denial, we want to escalate