Skip to content

Commit

Permalink
Fixed overflow internal buffer bug of (s/d/c/z)gemv on x86_64.
Browse files Browse the repository at this point in the history
  • Loading branch information
wangqian committed May 29, 2013
1 parent 6a72840 commit 23965f1
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 31 deletions.
57 changes: 53 additions & 4 deletions kernel/x86_64/cgemv_n.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,22 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128

#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)


#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define XX 88(%rsp)
#define LDAX 96(%rsp)
#define ALPHAR 104(%rsp)
#define ALPHAI 112(%rsp)

#define M %rdi
#define N %rsi
#define A %rcx
Expand All @@ -66,7 +74,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288

#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
Expand All @@ -78,6 +86,14 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define XX 256(%rsp)
#define LDAX 264(%rsp)
#define ALPHAR 272(%rsp)
#define ALPHAI 280(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
Expand Down Expand Up @@ -142,9 +158,37 @@
movaps %xmm3, %xmm0
movss OLD_ALPHA_I, %xmm1
#endif
movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movq X, XX
movq OLD_Y, Y
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA
movq XX, X

movq OLD_INCX, INCX
movq OLD_Y, Y
# movq OLD_Y, Y
movq OLD_INCY, INCY
movq OLD_BUFFER, BUFFER

Expand Down Expand Up @@ -4274,6 +4318,11 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
Expand Down
48 changes: 46 additions & 2 deletions kernel/x86_64/cgemv_t.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,19 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128

#define OLD_INCX 8 + STACKSIZE(%rsp)
#define OLD_Y 16 + STACKSIZE(%rsp)
#define OLD_INCY 24 + STACKSIZE(%rsp)
#define OLD_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)
#define MMM 64(%rsp)
#define NN 72(%rsp)
#define AA 80(%rsp)
#define LDAX 88(%rsp)
#define ALPHAR 96(%rsp)
#define ALPHAI 104(%rsp)

#define M %rdi
#define N %rsi
Expand All @@ -66,7 +72,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288

#define OLD_ALPHA_I 40 + STACKSIZE(%rsp)
#define OLD_A 48 + STACKSIZE(%rsp)
Expand All @@ -78,6 +84,13 @@
#define OLD_BUFFER 96 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define ALPHAR 264(%rsp)
#define ALPHAI 272(%rsp)

#define M %rcx
#define N %rdx
#define A %r8
Expand Down Expand Up @@ -144,6 +157,32 @@
movss OLD_ALPHA_I, %xmm1
#endif

movq A, AA
movq N, NN
movq M, MMM
movq LDA, LDAX
movss %xmm0,ALPHAR
movss %xmm1,ALPHAI

.L0t:
xorq I,I
addq $1,I
salq $20,I
subq I,MMM
movq I,M
movss ALPHAR,%xmm0
movss ALPHAI,%xmm1
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA, A
movq NN, N
movq LDAX, LDA

movq OLD_INCX, INCX
movq OLD_Y, Y
movq OLD_INCY, INCY
Expand Down Expand Up @@ -4350,6 +4389,11 @@
ALIGN_3

.L999:
movq M, I
salq $ZBASE_SHIFT,I
addq I,AA
jmp .L0t
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
Expand Down
48 changes: 42 additions & 6 deletions kernel/x86_64/dgemv_n.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128

#define OLD_M %rdi
#define OLD_N %rsi
Expand All @@ -59,6 +59,11 @@
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)

#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 88(%rsp)
#else

#define STACKSIZE 256
Expand Down Expand Up @@ -137,17 +142,42 @@
movq OLD_LDA, LDA
#endif

movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

#ifndef WINDOWS_ABI
movsd %xmm0, ALPHA
#else
movsd %xmm3, ALPHA
#endif

movq STACK_Y, Y
movq A,AA
movq N,NN
movq M,MMM
movq LDA,LDAX
movq X,XX

.L0t:
xorq I,I
addq $1,I
salq $21,I
subq I,MMM
movq I,M
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq XX,X
movq AA,A
movq NN,N
movq LDAX,LDA

movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER


leaq -1(INCY), %rax

leaq (,INCX, SIZE), INCX
Expand Down Expand Up @@ -2815,6 +2845,12 @@
ALIGN_3

.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
jmp .L0t
ALIGN_4

.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
Expand Down
56 changes: 48 additions & 8 deletions kernel/x86_64/sgemv_n.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#ifndef WINDOWS_ABI

#define STACKSIZE 64
#define STACKSIZE 128

#define OLD_M %rdi
#define OLD_N %rsi
Expand All @@ -58,10 +58,14 @@
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define ALPHA 48 (%rsp)

#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#define XX 96(%rsp)
#else

#define STACKSIZE 256
#define STACKSIZE 288

#define OLD_M %rcx
#define OLD_N %rdx
Expand All @@ -74,6 +78,12 @@
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define ALPHA 224 (%rsp)

#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)
#define XX 264(%rsp)
#define
#endif

#define LDA %r8
Expand Down Expand Up @@ -137,17 +147,41 @@
movq OLD_LDA, LDA
#endif

movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

#ifndef WINDOWS_ABI
movss %xmm0, ALPHA
#else
movss %xmm3, ALPHA
#endif


movq M,MMM
movq A,AA
movq N,NN
movq LDA,LDAX
movq X,XX
movq STACK_Y, Y
.L0t:
xorq I,I
addq $1,I
salq $22,I
subq I,MMM
movq I,M
jge .L00t

movq MMM,M
addq I,M
jle .L999x

.L00t:
movq AA,A
movq NN,N
movq LDAX,LDA
movq XX,X

movq STACK_INCX, INCX
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER

leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (,LDA, SIZE), LDA
Expand Down Expand Up @@ -5990,6 +6024,12 @@
ALIGN_3

.L999:
leaq (,M,SIZE),%rax
addq %rax,AA
jmp .L0t
ALIGN_4

.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12
Expand Down
10 changes: 5 additions & 5 deletions kernel/x86_64/sgemv_t.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

#else

#define STACKSIZE 256
#define STACKSIZE 288

#define OLD_M %rcx
#define OLD_N %rdx
Expand All @@ -74,10 +74,10 @@
#define STACK_Y 72 + STACKSIZE(%rsp)
#define STACK_INCY 80 + STACKSIZE(%rsp)
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
#define MMM 216(%rsp)
#define NN 224(%rsp)
#define AA 232(%rsp)
#define LDAX 240(%rsp)
#define MMM 232(%rsp)
#define NN 240(%rsp)
#define AA 248(%rsp)
#define LDAX 256(%rsp)

#endif

Expand Down
Loading

0 comments on commit 23965f1

Please sign in to comment.