From 23965f164c3801863ccf4cd5f8b8985b59ad7a39 Mon Sep 17 00:00:00 2001 From: wangqian Date: Wed, 29 May 2013 19:48:31 +0800 Subject: [PATCH] Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64. --- kernel/x86_64/cgemv_n.S | 57 ++++++++++++++++++++++++++++++++++++--- kernel/x86_64/cgemv_t.S | 48 +++++++++++++++++++++++++++++++-- kernel/x86_64/dgemv_n.S | 48 ++++++++++++++++++++++++++++----- kernel/x86_64/sgemv_n.S | 56 ++++++++++++++++++++++++++++++++------ kernel/x86_64/sgemv_t.S | 10 +++---- kernel/x86_64/zgemv_n.S | 59 ++++++++++++++++++++++++++++++++++++++--- kernel/x86_64/zgemv_t.S | 50 ++++++++++++++++++++++++++++++++-- 7 files changed, 297 insertions(+), 31 deletions(-) diff --git a/kernel/x86_64/cgemv_n.S b/kernel/x86_64/cgemv_n.S index 77e9b3d966..64967d4bf0 100644 --- a/kernel/x86_64/cgemv_n.S +++ b/kernel/x86_64/cgemv_n.S @@ -47,14 +47,22 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_INCX 8 + STACKSIZE(%rsp) #define OLD_Y 16 + STACKSIZE(%rsp) #define OLD_INCY 24 + STACKSIZE(%rsp) #define OLD_BUFFER 32 + STACKSIZE(%rsp) #define ALPHA 48 (%rsp) - + +#define MMM 64(%rsp) +#define NN 72(%rsp) +#define AA 80(%rsp) +#define XX 88(%rsp) +#define LDAX 96(%rsp) +#define ALPHAR 104(%rsp) +#define ALPHAI 112(%rsp) + #define M %rdi #define N %rsi #define A %rcx @@ -66,7 +74,7 @@ #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) #define OLD_A 48 + STACKSIZE(%rsp) @@ -78,6 +86,14 @@ #define OLD_BUFFER 96 + STACKSIZE(%rsp) #define ALPHA 224 (%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define XX 256(%rsp) +#define LDAX 264(%rsp) +#define ALPHAR 272(%rsp) +#define ALPHAI 280(%rsp) + #define M %rcx #define N %rdx #define A %r8 @@ -142,9 +158,37 @@ movaps %xmm3, %xmm0 movss OLD_ALPHA_I, %xmm1 #endif + movq A, AA + movq N, NN + movq M, MMM + movq LDA, LDAX + movq X, XX + movq OLD_Y, Y + movss %xmm0,ALPHAR + movss %xmm1,ALPHAI + +.L0t: + xorq I,I + addq $1,I + salq $20,I + subq I,MMM + movq I,M + movss ALPHAR,%xmm0 + movss ALPHAI,%xmm1 + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq AA, A + movq NN, N + movq LDAX, LDA + movq XX, X movq OLD_INCX, INCX - movq OLD_Y, Y +# movq OLD_Y, Y movq OLD_INCY, INCY movq OLD_BUFFER, BUFFER @@ -4274,6 +4318,11 @@ ALIGN_3 .L999: + movq M, I + salq $ZBASE_SHIFT,I + addq I,AA + jmp .L0t +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 diff --git a/kernel/x86_64/cgemv_t.S b/kernel/x86_64/cgemv_t.S index c268e4f594..49fc0eb361 100644 --- a/kernel/x86_64/cgemv_t.S +++ b/kernel/x86_64/cgemv_t.S @@ -47,13 +47,19 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_INCX 8 + STACKSIZE(%rsp) #define OLD_Y 16 + STACKSIZE(%rsp) #define OLD_INCY 24 + STACKSIZE(%rsp) #define OLD_BUFFER 32 + STACKSIZE(%rsp) #define ALPHA 48 (%rsp) +#define MMM 64(%rsp) +#define NN 72(%rsp) +#define AA 80(%rsp) +#define LDAX 88(%rsp) +#define ALPHAR 96(%rsp) +#define ALPHAI 104(%rsp) #define M %rdi #define N %rsi @@ -66,7 +72,7 @@ #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) #define OLD_A 48 + STACKSIZE(%rsp) @@ -78,6 +84,13 @@ #define OLD_BUFFER 96 + STACKSIZE(%rsp) #define ALPHA 224 (%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define LDAX 256(%rsp) +#define ALPHAR 264(%rsp) +#define ALPHAI 272(%rsp) + #define M %rcx #define N %rdx #define A %r8 @@ -144,6 +157,32 @@ movss OLD_ALPHA_I, %xmm1 #endif + movq A, AA + movq N, NN + movq M, MMM + movq LDA, LDAX + movss %xmm0,ALPHAR + movss %xmm1,ALPHAI + +.L0t: + xorq I,I + addq $1,I + salq $20,I + subq I,MMM + movq I,M + movss ALPHAR,%xmm0 + movss ALPHAI,%xmm1 + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq AA, A + movq NN, N + movq LDAX, LDA + movq OLD_INCX, INCX movq OLD_Y, Y movq OLD_INCY, INCY @@ -4350,6 +4389,11 @@ ALIGN_3 .L999: + movq M, I + salq $ZBASE_SHIFT,I + addq I,AA + jmp .L0t +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 diff --git a/kernel/x86_64/dgemv_n.S b/kernel/x86_64/dgemv_n.S index 3c3cdfb07e..dd4b7d8a20 100644 --- a/kernel/x86_64/dgemv_n.S +++ b/kernel/x86_64/dgemv_n.S @@ -47,7 +47,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_M %rdi #define OLD_N %rsi @@ -59,6 +59,11 @@ #define STACK_BUFFER 32 + STACKSIZE(%rsp) #define ALPHA 48 (%rsp) +#define MMM 56(%rsp) +#define NN 64(%rsp) +#define AA 72(%rsp) +#define LDAX 80(%rsp) +#define XX 88(%rsp) #else #define STACKSIZE 256 @@ -137,17 +142,42 @@ movq OLD_LDA, LDA #endif - movq STACK_INCX, INCX - movq STACK_Y, Y - movq STACK_INCY, INCY - movq STACK_BUFFER, BUFFER - #ifndef WINDOWS_ABI movsd %xmm0, ALPHA #else movsd %xmm3, ALPHA #endif + movq STACK_Y, Y + movq A,AA + movq N,NN + movq M,MMM + movq LDA,LDAX + movq X,XX + +.L0t: + xorq I,I + addq $1,I + salq $21,I + subq I,MMM + movq I,M + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq XX,X + movq AA,A + movq NN,N + movq LDAX,LDA + + movq STACK_INCX, INCX + movq STACK_INCY, INCY + movq STACK_BUFFER, BUFFER + + leaq -1(INCY), %rax leaq (,INCX, SIZE), INCX @@ -2815,6 +2845,12 @@ ALIGN_3 .L999: + leaq (, M, SIZE), %rax + addq %rax,AA + jmp .L0t + ALIGN_4 + +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 diff --git a/kernel/x86_64/sgemv_n.S b/kernel/x86_64/sgemv_n.S index ead2420c4a..263c1e1e1f 100644 --- a/kernel/x86_64/sgemv_n.S +++ b/kernel/x86_64/sgemv_n.S @@ -47,7 +47,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_M %rdi #define OLD_N %rsi @@ -58,10 +58,14 @@ #define STACK_INCY 24 + STACKSIZE(%rsp) #define STACK_BUFFER 32 + STACKSIZE(%rsp) #define ALPHA 48 (%rsp) - +#define MMM 56(%rsp) +#define NN 64(%rsp) +#define AA 72(%rsp) +#define LDAX 80(%rsp) +#define XX 96(%rsp) #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_M %rcx #define OLD_N %rdx @@ -74,6 +78,12 @@ #define STACK_BUFFER 88 + STACKSIZE(%rsp) #define ALPHA 224 (%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define LDAX 256(%rsp) +#define XX 264(%rsp) +#define #endif #define LDA %r8 @@ -137,17 +147,41 @@ movq OLD_LDA, LDA #endif - movq STACK_INCX, INCX - movq STACK_Y, Y - movq STACK_INCY, INCY - movq STACK_BUFFER, BUFFER - #ifndef WINDOWS_ABI movss %xmm0, ALPHA #else movss %xmm3, ALPHA #endif + + movq M,MMM + movq A,AA + movq N,NN + movq LDA,LDAX + movq X,XX + movq STACK_Y, Y +.L0t: + xorq I,I + addq $1,I + salq $22,I + subq I,MMM + movq I,M + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq AA,A + movq NN,N + movq LDAX,LDA + movq XX,X + + movq STACK_INCX, INCX + movq STACK_INCY, INCY + movq STACK_BUFFER, BUFFER + leaq (,INCX, SIZE), INCX leaq (,INCY, SIZE), INCY leaq (,LDA, SIZE), LDA @@ -5990,6 +6024,12 @@ ALIGN_3 .L999: + leaq (,M,SIZE),%rax + addq %rax,AA + jmp .L0t + ALIGN_4 + +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 diff --git a/kernel/x86_64/sgemv_t.S b/kernel/x86_64/sgemv_t.S index 854e0f295c..c2cb6b9441 100644 --- a/kernel/x86_64/sgemv_t.S +++ b/kernel/x86_64/sgemv_t.S @@ -63,7 +63,7 @@ #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_M %rcx #define OLD_N %rdx @@ -74,10 +74,10 @@ #define STACK_Y 72 + STACKSIZE(%rsp) #define STACK_INCY 80 + STACKSIZE(%rsp) #define STACK_BUFFER 88 + STACKSIZE(%rsp) -#define MMM 216(%rsp) -#define NN 224(%rsp) -#define AA 232(%rsp) -#define LDAX 240(%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define LDAX 256(%rsp) #endif diff --git a/kernel/x86_64/zgemv_n.S b/kernel/x86_64/zgemv_n.S index b584a53708..efcf161eb9 100644 --- a/kernel/x86_64/zgemv_n.S +++ b/kernel/x86_64/zgemv_n.S @@ -42,7 +42,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_INCX 8 + STACKSIZE(%rsp) #define OLD_Y 16 + STACKSIZE(%rsp) @@ -50,7 +50,15 @@ #define OLD_BUFFER 32 + STACKSIZE(%rsp) #define ALPHA_R 48 (%rsp) #define ALPHA_I 56 (%rsp) - + +#define MMM 64(%rsp) +#define NN 72(%rsp) +#define AA 80(%rsp) +#define XX 88(%rsp) +#define LDAX 96(%rsp) +#define ALPHAR 104(%rsp) +#define ALPHAI 112(%rsp) + #define M %rdi #define N %rsi #define A %rcx @@ -62,7 +70,7 @@ #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) #define OLD_A 48 + STACKSIZE(%rsp) @@ -75,6 +83,14 @@ #define ALPHA_R 224 (%rsp) #define ALPHA_I 232 (%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define XX 256(%rsp) +#define LDAX 264(%rsp) +#define ALPHAR 272(%rsp) +#define ALPHAI 280(%rsp) + #define M %rcx #define N %rdx #define A %r8 @@ -136,8 +152,37 @@ movsd OLD_ALPHA_I, %xmm1 #endif - movq OLD_INCX, INCX + movq A, AA + movq N, NN + movq M, MMM + movq LDA, LDAX + movq X, XX movq OLD_Y, Y + movsd %xmm0,ALPHAR + movsd %xmm1,ALPHAI + +.L0t: + xorq I,I + addq $1,I + salq $18,I + subq I,MMM + movq I,M + movsd ALPHAR,%xmm0 + movsd ALPHAI,%xmm1 + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq AA, A + movq NN, N + movq LDAX, LDA + movq XX, X + + movq OLD_INCX, INCX +# movq OLD_Y, Y movq OLD_INCY, INCY movq OLD_BUFFER, BUFFER @@ -2673,6 +2718,12 @@ ALIGN_3 .L999: + movq M, I + salq $ZBASE_SHIFT,I + addq I,AA + jmp .L0t +.L999x: + movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 diff --git a/kernel/x86_64/zgemv_t.S b/kernel/x86_64/zgemv_t.S index 14abc8bfa2..30f76dcbe8 100644 --- a/kernel/x86_64/zgemv_t.S +++ b/kernel/x86_64/zgemv_t.S @@ -42,13 +42,20 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_INCX 8 + STACKSIZE(%rsp) #define OLD_Y 16 + STACKSIZE(%rsp) #define OLD_INCY 24 + STACKSIZE(%rsp) #define OLD_BUFFER 32 + STACKSIZE(%rsp) +#define MMM 64(%rsp) +#define NN 72(%rsp) +#define AA 80(%rsp) +#define LDAX 88(%rsp) +#define ALPHAR 96(%rsp) +#define ALPHAI 104(%rsp) + #define M %rdi #define N %rsi #define A %rcx @@ -60,7 +67,7 @@ #else -#define STACKSIZE 256 +#define STACKSIZE 288 #define OLD_ALPHA_I 40 + STACKSIZE(%rsp) #define OLD_A 48 + STACKSIZE(%rsp) @@ -71,6 +78,13 @@ #define OLD_INCY 88 + STACKSIZE(%rsp) #define OLD_BUFFER 96 + STACKSIZE(%rsp) +#define MMM 232(%rsp) +#define NN 240(%rsp) +#define AA 248(%rsp) +#define LDAX 256(%rsp) +#define ALPHAR 264(%rsp) +#define ALPHAI 272(%rsp) + #define M %rcx #define N %rdx #define A %r8 @@ -135,6 +149,32 @@ movsd OLD_ALPHA_I, %xmm1 #endif + movq A, AA + movq N, NN + movq M, MMM + movq LDA, LDAX + movsd %xmm0,ALPHAR + movsd %xmm1,ALPHAI + +.L0t: + xorq I,I + addq $1,I + salq $19,I + subq I,MMM + movq I,M + movsd ALPHAR,%xmm0 + movsd ALPHAI,%xmm1 + jge .L00t + + movq MMM,M + addq I,M + jle .L999x + +.L00t: + movq AA, A + movq NN, N + movq LDAX, LDA + movq OLD_INCX, INCX movq OLD_Y, Y movq OLD_INCY, INCY @@ -2405,6 +2445,12 @@ ALIGN_3 .L999: + movq M, I + salq $ZBASE_SHIFT,I + addq I,AA + jmp .L0t +.L999x: + movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12