-
Notifications
You must be signed in to change notification settings - Fork 258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize values known at compile time. #547
Conversation
Sure , however please may i suggest to keep original formulas as comment just above the new formula optimized. Also please comment the reason of new table |
Sry, but Current master: ; 778 : gi_tmp = gi_min + roomsize * (gi_max - gi_min);
subsd xmm0, xmm6
mulsd xmm0, xmm7
addsd xmm0, xmm6
; 779 : /* Computes T60DC from gi using inverse of relation E2.*/
; 780 : dc_rev_time = -3 * FLUID_M_LN10 * delay_length[NBR_DELAYS - 1] * sample_period / FLUID_LOGF(gi_tmp);
call log ; gi_tmp is in xmm0
; 781 : }
; 782 : #endif /* ROOMSIZE_RESPONSE_LINEAR */
; 783 : /*--------------------------------------------
; 784 : Computes alpha
; 785 : ----------------------------------------------*/
; 786 : /* Computes alpha from damp,ai_tmp,gi_tmp using relation R */
; 787 : /* - damp (0 to 1) controls concave reverb time for fs/2 frequency (T60DC to 0) */
; 788 : ai_tmp = 1.0f * damp;
; 789 :
; 790 : /* Preserve the square of R */
; 791 : alpha2 = 1.f / (1.f - ai_tmp / ((20.f / 80.f) * FLUID_LOGF(gi_tmp)));
movsd xmm10, QWORD PTR __real@3fd0000000000000
movaps xmm9, xmm11
; magic const folded number (-3 * FLUID_M_LN10 * delay_length[NBR_DELAYS - 1]) goes below
mulsd xmm9, QWORD PTR __real@c0be76db0fcf0397
movaps xmm6, xmm14
divsd xmm9, xmm0 ;
mulsd xmm0, xmm10
divsd xmm8, xmm0
movaps xmm0, xmm14
subsd xmm0, xmm8
divsd xmm6, xmm0
; 792 :
; 793 : alpha = FLUID_SQRT(alpha2); /* R */
movaps xmm0, xmm6
call sqrt Your modification is the same, only register allocation differs: ; 792 : gi_tmp = gi_min + roomsize * (gi_max - gi_min);
subsd xmm0, xmm6
mulsd xmm0, xmm7
addsd xmm0, xmm6
; 793 : /* Computes T60DC from gi using inverse of relation E2.*/
; 794 : dc_rev_time = (-3 * FLUID_M_LN10 * DELAY_LEN_MAX) * sample_period / FLUID_LOGF(gi_tmp);
call log
; 795 : }
; 796 : #endif /* ROOMSIZE_RESPONSE_LINEAR */
; 797 : /*--------------------------------------------
; 798 : Computes alpha
; 799 : ----------------------------------------------*/
; 800 : /* Computes alpha from damp,ai_tmp,gi_tmp using relation R */
; 801 : /* - damp (0 to 1) controls concave reverb time for fs/2 frequency (T60DC to 0) */
; 802 : ai_tmp = 1.0f * damp;
; 803 :
; 804 : /* Preserve the square of R */
; 805 : alpha2 = 1.f / (1.f - ai_tmp / ((20.f / 80.f) * FLUID_LOGF(gi_tmp)));
movsd xmm10, QWORD PTR __real@3fd0000000000000
movaps xmm9, xmm12
mulsd xmm9, QWORD PTR __real@c0be76db0fcf0397
movaps xmm6, xmm13
divsd xmm9, xmm0
mulsd xmm0, xmm10
divsd xmm8, xmm0
movaps xmm0, xmm13
subsd xmm0, xmm8
divsd xmm6, xmm0
; 806 :
; 807 : alpha = FLUID_SQRT(alpha2); /* R */
movaps xmm0, xmm6
call sqrt Below is how it would look like if ; 778 : gi_tmp = gi_min + roomsize * (gi_max - gi_min);
subsd xmm0, xmm6
mulsd xmm0, xmm8
addsd xmm0, xmm6
; 779 : /* Computes T60DC from gi using inverse of relation E2.*/
; 780 : dc_rev_time = -3 * FLUID_M_LN10 * delay_length[NBR_DELAYS - 1] * sample_period / FLUID_LOGF(gi_tmp);
call log
; 781 : }
; 782 : #endif /* ROOMSIZE_RESPONSE_LINEAR */
; 783 : /*--------------------------------------------
; 784 : Computes alpha
; 785 : ----------------------------------------------*/
; 786 : /* Computes alpha from damp,ai_tmp,gi_tmp using relation R */
; 787 : /* - damp (0 to 1) controls concave reverb time for fs/2 frequency (T60DC to 0) */
; 788 : ai_tmp = 1.0f * damp;
; 789 :
; 790 : /* Preserve the square of R */
; 791 : alpha2 = 1.f / (1.f - ai_tmp / ((20.f / 80.f) * FLUID_LOGF(gi_tmp)));
movsd xmm11, QWORD PTR __real@3fd0000000000000
xorps xmm10, xmm10
cvtsi2sd xmm10, DWORD PTR delay_length+28
movaps xmm6, xmm14
mulsd xmm10, QWORD PTR __real@c01ba18a998fffa1
mulsd xmm10, xmm13
divsd xmm10, xmm0
mulsd xmm0, xmm11
divsd xmm9, xmm0
movaps xmm0, xmm14
subsd xmm0, xmm9
divsd xmm6, xmm0
; 792 :
; 793 : alpha = FLUID_SQRT(alpha2); /* R */
movaps xmm0, xmm6
call sqrt Same is true for The only optimization you've made is rearranging the constants when calculating fluid_rev_carlo.txt |
Thank you, I did not know that. |
When doing float math PR, I have seen that into
fluid_rev.c
there is something that could be optimized with little effort.In several lines it is used
delay_length[NBR_DELAYS - 1]
, but this value is known at compile time and it is equal toDELAY_L7
orDELAY_L11
, depending on the value of macroNBR_DELAYS
.So, I introduced a macro called
DELAY_LEN_MAX
, which points to that value.In this way, some code has been simplified to a constant value instead of a number of load-multiply calculations.
I also introduced a small array called
delay_length_x3[]
, which just stores the values ofdelay_length[]
but multiplied by -3. This table is so small and its size can be comparable to the code generated previously, so in my opinion it would not be a bad idea for reducing a little the overhead.