From 9ba880ab483b8c851b99ca2af792d05f5710ab7f Mon Sep 17 00:00:00 2001 From: Huaqi Fang <578567190@qq.com> Date: Fri, 24 Jul 2020 18:25:59 +0800 Subject: [PATCH] DSP: Update DSP support source code Signed-off-by: Huaqi Fang <578567190@qq.com> --- .../riscv_conv_fast_opt_q15.c | 2 +- .../FilteringFunctions/riscv_conv_fast_q15.c | 4 +++- .../FilteringFunctions/riscv_conv_opt_q15.c | 6 ++++-- .../riscv_conv_partial_fast_opt_q15.c | 2 +- .../riscv_conv_partial_fast_q15.c | 4 +++- .../riscv_conv_partial_opt_q15.c | 6 ++++-- .../riscv_conv_partial_opt_q7.c | 4 ---- .../FilteringFunctions/riscv_conv_partial_q15.c | 6 +++--- .../FilteringFunctions/riscv_conv_partial_q31.c | 16 ++++++++++++---- .../FilteringFunctions/riscv_conv_partial_q7.c | 4 +++- .../Source/FilteringFunctions/riscv_conv_q31.c | 12 +++++++++--- .../riscv_fir_decimate_fast_q15.c | 17 ++++++++++++++--- .../sincospart/riscv_controller.c | 2 -- 13 files changed, 57 insertions(+), 28 deletions(-) diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_opt_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_opt_q15.c index 8b8cf0787..a7a7817ff 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_opt_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_opt_q15.c @@ -116,7 +116,7 @@ void riscv_conv_fast_opt_q15( } #if __RISCV_XLEN == 64 /* Pointer to take end of scratch2 buffer */ - pScr2 = pScratch2 + srcBLen - 4; + pScr2 = pScratch2 + srcBLen - 3; #else /* Pointer to take end of scratch2 buffer */ pScr2 = pScratch2 + srcBLen - 1; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_q15.c index aab341347..2fe4c6588 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_fast_q15.c @@ -215,7 +215,9 @@ void riscv_conv_fast_q15( /* Decrement loop counter */ k--; } - +#if defined RISCV_MATH_DSP && __RISCV_XLEN == 64 + py += 2; +#endif /* defined RISCV_MATH_DSP && __RISCV_XLEN == 64 */ /* For the next MAC operations, the pointer py is used without SIMD * So, py is incremented by 1 */ py = py + 1U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_opt_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_opt_q15.c index dd912cdf9..7fea6569f 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_opt_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_opt_q15.c @@ -122,7 +122,7 @@ void riscv_conv_opt_q15( /* Loop unrolling: Compute 4 outputs at a time */ k = srcBLen >> 2U; #if __RISCV_XLEN == 64 - pScr2 -= 4; + pScr2 -= 3; #endif /* __RISCV_XLEN == 64 */ /* Copy smaller length input sequence in reverse order into second scratch buffer */ while (k > 0U) @@ -140,7 +140,9 @@ void riscv_conv_opt_q15( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + pScr2 += 3; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = srcBLen % 0x4U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_opt_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_opt_q15.c index 95d202815..2fb3fa768 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_opt_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_opt_q15.c @@ -126,7 +126,7 @@ riscv_status riscv_conv_partial_fast_opt_q15( #if __RISCV_XLEN == 64 /* Pointer to take end of scratch2 buffer */ - pScr2 = pScratch2 + srcBLen - 5; + pScr2 = pScratch2 + srcBLen - 4; #else /* Pointer to take end of scratch2 buffer */ pScr2 = pScratch2 + srcBLen - 1; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_q15.c index ad81450c3..cd418ef26 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_fast_q15.c @@ -227,7 +227,9 @@ riscv_status riscv_conv_partial_fast_q15( /* Decrement loop counter */ k--; } - +#if defined RISCV_MATH_DSP && __RISCV_XLEN == 64 + py += 2; +#endif /* RISCV_MATH_DSP && __RISCV_XLEN == 64 */ /* For the next MAC operations, the pointer py is used without SIMD So, py is incremented by 1 */ py = py + 1U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q15.c index d200339e8..b9a91d3c5 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q15.c @@ -133,7 +133,7 @@ riscv_status riscv_conv_partial_opt_q15( #if defined (RISCV_MATH_LOOPUNROLL) #if __RISCV_XLEN == 64 - pScr2 -= 4; + pScr2 -= 3; #endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute 4 outputs at a time */ k = srcBLen >> 2U; @@ -153,7 +153,9 @@ riscv_status riscv_conv_partial_opt_q15( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + pScr2 += 3; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = srcBLen % 0x4U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q7.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q7.c index cd55a572c..f6360c673 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q7.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_opt_q7.c @@ -168,9 +168,6 @@ riscv_status riscv_conv_partial_opt_q7( ** a second loop below copies for the remaining 1 to 3 samples. */ while (k > 0U) { -#if __RISCV_XLEN == 64 - write_q15x4_ia(&pScr2,read_q15x4_da((q15_t **)&px)); -#else /* copy second buffer in reversal manner */ x4 = (q15_t) *pIn1++; *pScr1++ = x4; @@ -180,7 +177,6 @@ riscv_status riscv_conv_partial_opt_q7( *pScr1++ = x4; x4 = (q15_t) *pIn1++; *pScr1++ = x4; -#endif /* __RISCV_XLEN == 64 */ /* Decrement loop counter */ k--; } diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q15.c index 38732b58d..5605eb302 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q15.c @@ -260,9 +260,9 @@ riscv_status riscv_conv_partial_q15( /* Update the inputA and inputB pointers for next MAC calculation */ py = ++pSrc2 - 1U; px = pIn1; -#if __RISCV_XLEN == 64 - py = py - 2; -#endif /* __RISCV_XLEN == 64 */ +// #if __RISCV_XLEN == 64 +// py = py - 2; +// #endif /* __RISCV_XLEN == 64 */ /* Increment MAC count */ count++; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q31.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q31.c index 11de12ca4..f83c7aaed 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q31.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q31.c @@ -209,7 +209,9 @@ riscv_status riscv_conv_partial_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = count % 0x4U; @@ -414,7 +416,9 @@ riscv_status riscv_conv_partial_q31( /* Loop unrolling: Compute 4 outputs at a time */ k = srcBLen >> 2U; - +#if __RISCV_XLEN == 64 + py--; +#endif /* __RISCV_XLEN == 64 */ while (k > 0U) { #if __RISCV_XLEN == 64 @@ -435,7 +439,9 @@ riscv_status riscv_conv_partial_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = srcBLen % 0x4U; @@ -572,7 +578,9 @@ riscv_status riscv_conv_partial_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = count % 0x4U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q7.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q7.c index 1315c0541..cc5e72790 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q7.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_partial_q7.c @@ -224,7 +224,9 @@ riscv_status riscv_conv_partial_q7( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py += 7; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = count % 0x4U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_q31.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_q31.c index b332a00cc..1b81c2912 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_q31.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_conv_q31.c @@ -194,7 +194,9 @@ void riscv_conv_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = count % 0x4U; @@ -410,7 +412,9 @@ void riscv_conv_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = srcBLen % 0x4U; @@ -548,7 +552,9 @@ void riscv_conv_q31( /* Decrement loop counter */ k--; } - +#if __RISCV_XLEN == 64 + py++; +#endif /* __RISCV_XLEN == 64 */ /* Loop unrolling: Compute remaining outputs */ k = blockSize3 % 0x4U; diff --git a/NMSIS/DSP/Source/FilteringFunctions/riscv_fir_decimate_fast_q15.c b/NMSIS/DSP/Source/FilteringFunctions/riscv_fir_decimate_fast_q15.c index f0d2e83ca..81598ca7e 100644 --- a/NMSIS/DSP/Source/FilteringFunctions/riscv_fir_decimate_fast_q15.c +++ b/NMSIS/DSP/Source/FilteringFunctions/riscv_fir_decimate_fast_q15.c @@ -82,7 +82,7 @@ void riscv_fir_decimate_fast_q15( #if defined (RISCV_MATH_LOOPUNROLL) q31_t c1; /* Temporary variables to hold state and coefficient values */ #if __RISCV_XLEN == 64 - q63_t x064, x164, c064; + q63_t x064, x164, c064, sum064 = 0; #endif /* __RISCV_XLEN == 64 */ #endif @@ -228,6 +228,15 @@ void riscv_fir_decimate_fast_q15( while (tapCnt > 0U) { +#if __RISCV_XLEN == 64 + /* Read the b[numTaps-1] and b[numTaps-2] coefficients */ + c064 = read_q15x4_ia ((q15_t **) &pb); + + /* Read x[n-numTaps-1] and x[n-numTaps-2] sample */ + x064 = read_q15x4_ia ((q15_t **) &px); + + sum064 = __SMLAD(x064, c064, sum064); +#else /* Read the b[numTaps-1] and b[numTaps-2] coefficients */ c0 = read_q15x2_ia ((q15_t **) &pb); @@ -245,11 +254,13 @@ void riscv_fir_decimate_fast_q15( /* Perform the multiply-accumulate */ sum0 = __SMLAD(x0, c1, sum0); - +#endif /* Decrement loop counter */ tapCnt--; } - +#if __RISCV_XLEN == 64 + sum0 =(q31_t)((sum064 + (sum064<<32u))>>32u); +#endif /* Loop unrolling: Compute remaining taps */ tapCnt = numTaps % 0x4U; diff --git a/NMSIS/DSP/Test/ControllerFunctions/sincospart/riscv_controller.c b/NMSIS/DSP/Test/ControllerFunctions/sincospart/riscv_controller.c index ead782c2a..9daa75e8b 100644 --- a/NMSIS/DSP/Test/ControllerFunctions/sincospart/riscv_controller.c +++ b/NMSIS/DSP/Test/ControllerFunctions/sincospart/riscv_controller.c @@ -29,7 +29,6 @@ static int DSP_SIN_COS_F32(void) riscv_sin_cos_f32(0, &pSinVal, &pCosVal); BENCH_END(riscv_sin_cos_f32); ref_sin_cos_f32(0, &pSinVal_ref, &pCosVal_ref); - printf("%-20s : cycle: %d\n", "riscv_sin_cos_f32", cycle); if ((fabs(pSinVal - pSinVal_ref) > DELTAF32) || (fabs(pCosVal - pCosVal_ref) > DELTAF32)) { BENCH_ERROR(riscv_sin_cos_f32); @@ -50,7 +49,6 @@ static int DSP_SIN_COS_Q31(void) riscv_sin_cos_q31(0, &pSinVal, &pCosVal); BENCH_END(riscv_sin_cos_q31); ref_sin_cos_q31(0, &pSinVal_ref, &pCosVal_ref); - printf("%-20s : cycle: %d\n", "riscv_sin_cos_q31", cycle); if ((labs(pSinVal - pSinVal_ref) > DELTAQ31) || (labs(pCosVal - pCosVal_ref) > DELTAQ31)) { BENCH_ERROR(riscv_sin_cos_q31);