From 0cd286de061d32f38c3d833dcfb583bf176bb426 Mon Sep 17 00:00:00 2001 From: Shigeki Ohtsu Date: Wed, 4 May 2016 03:06:14 +0900 Subject: [PATCH] deps: update openssl asm files Regenerate asm files with Makefile without CC and ASM envs. Fixes: https://github.com/nodejs/node/issues/6458 PR-URL: https://github.com/nodejs/node/pull/6553 Reviewed-By: Ben Noordhuis --- deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s | 43 ++++++++++++- .../openssl/asm/x64-elf-gas/bn/x86_64-mont5.s | 24 ++++++++ .../asm/x64-macosx-gas/bn/x86_64-mont.s | 43 ++++++++++++- .../asm/x64-macosx-gas/bn/x86_64-mont5.s | 24 ++++++++ .../asm/x64-win32-masm/bn/x86_64-mont.asm | 43 ++++++++++++- .../asm/x64-win32-masm/bn/x86_64-mont5.asm | 24 ++++++++ deps/openssl/asm/x86-elf-gas/bn/x86-mont.s | 60 +++++++++++-------- deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s | 60 +++++++++++-------- .../asm/x86-win32-masm/bn/x86-mont.asm | 60 +++++++++++-------- 9 files changed, 294 insertions(+), 87 deletions(-) diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s index ea12bd408cb5..bdeb75d3500b 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s @@ -31,6 +31,21 @@ bn_mul_mont: movq %r11,8(%rsp,%r9,8) .Lmul_body: + + + + + + + subq %rsp,%r11 + andq $-4096,%r11 +.Lmul_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x66,0x2e + + jnc .Lmul_page_walk + movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -228,6 +243,15 @@ bn_mul4x_mont: movq %r11,8(%rsp,%r9,8) .Lmul4x_body: + subq %rsp,%r11 + andq $-4096,%r11 +.Lmul4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + + jnc .Lmul4x_page_walk + movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -610,6 +634,7 @@ bn_mul4x_mont: .align 16 bn_sqr4x_mont: .Lsqr4x_enter: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 @@ -618,13 +643,25 @@ bn_sqr4x_mont: pushq %r15 shll $3,%r9d - xorq %r10,%r10 movq %rsp,%r11 - subq %r9,%r10 + negq %r9 movq (%r8),%r8 - leaq -72(%rsp,%r10,2),%rsp + leaq -72(%rsp,%r9,2),%rsp andq $-1024,%rsp + subq %rsp,%r11 + andq $-4096,%r11 +.Lsqr4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + + jnc .Lsqr4x_page_walk + + movq %r9,%r10 + negq %r9 + leaq -48(%rax),%r11 + diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s index 0cbcc78dfb6b..9259a62a5738 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s @@ -32,6 +32,21 @@ bn_mul_mont_gather5: movq %rax,8(%rsp,%r9,8) .Lmul_body: + + + + + + + subq %rsp,%rax + andq $-4096,%rax +.Lmul_page_walk: + movq (%rsp,%rax,1),%r11 + subq $4096,%rax +.byte 0x2e + + jnc .Lmul_page_walk + leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 movdqa 16(%r10),%xmm1 @@ -420,6 +435,15 @@ bn_mul4x_mont_gather5: movq %rax,8(%rsp,%r9,8) .Lmul4x_body: + subq %rsp,%rax + andq $-4096,%rax +.Lmul4x_page_walk: + movq (%rsp,%rax,1),%r11 + subq $4096,%rax +.byte 0x2e + + jnc .Lmul4x_page_walk + movq %rdi,16(%rsp,%r9,8) leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s index ece106c49842..859e1405d4f2 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s @@ -31,6 +31,21 @@ L$mul_enter: movq %r11,8(%rsp,%r9,8) L$mul_body: + + + + + + + subq %rsp,%r11 + andq $-4096,%r11 +L$mul_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x66,0x2e + + jnc L$mul_page_walk + movq %rdx,%r12 movq (%r8),%r8 movq (%r12),%rbx @@ -228,6 +243,15 @@ L$mul4x_enter: movq %r11,8(%rsp,%r9,8) L$mul4x_body: + subq %rsp,%r11 + andq $-4096,%r11 +L$mul4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + + jnc L$mul4x_page_walk + movq %rdi,16(%rsp,%r9,8) movq %rdx,%r12 movq (%r8),%r8 @@ -610,6 +634,7 @@ L$mul4x_epilogue: .p2align 4 bn_sqr4x_mont: L$sqr4x_enter: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 @@ -618,13 +643,25 @@ L$sqr4x_enter: pushq %r15 shll $3,%r9d - xorq %r10,%r10 movq %rsp,%r11 - subq %r9,%r10 + negq %r9 movq (%r8),%r8 - leaq -72(%rsp,%r10,2),%rsp + leaq -72(%rsp,%r9,2),%rsp andq $-1024,%rsp + subq %rsp,%r11 + andq $-4096,%r11 +L$sqr4x_page_walk: + movq (%rsp,%r11,1),%r10 + subq $4096,%r11 +.byte 0x2e + + jnc L$sqr4x_page_walk + + movq %r9,%r10 + negq %r9 + leaq -48(%rax),%r11 + diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s index cb4543ea60b1..c91081ac9a3f 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s @@ -32,6 +32,21 @@ L$mul_alloca: movq %rax,8(%rsp,%r9,8) L$mul_body: + + + + + + + subq %rsp,%rax + andq $-4096,%rax +L$mul_page_walk: + movq (%rsp,%rax,1),%r11 + subq $4096,%rax +.byte 0x2e + + jnc L$mul_page_walk + leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 movdqa 16(%r10),%xmm1 @@ -420,6 +435,15 @@ L$mul4x_alloca: movq %rax,8(%rsp,%r9,8) L$mul4x_body: + subq %rsp,%rax + andq $-4096,%rax +L$mul4x_page_walk: + movq (%rsp,%rax,1),%r11 + subq $4096,%rax +.byte 0x2e + + jnc L$mul4x_page_walk + movq %rdi,16(%rsp,%r9,8) leaq 128(%rdx),%r12 movdqa 0(%r10),%xmm0 diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm index f4518aa3bdb0..c8b8f5037182 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm @@ -43,6 +43,21 @@ $L$mul_enter:: mov QWORD PTR[8+r9*8+rsp],r11 $L$mul_body:: + + + + + + + sub r11,rsp + and r11,-4096 +$L$mul_page_walk:: + mov r10,QWORD PTR[r11*1+rsp] + sub r11,4096 +DB 066h,02eh + + jnc $L$mul_page_walk + mov r12,rdx mov r8,QWORD PTR[r8] mov rbx,QWORD PTR[r12] @@ -255,6 +270,15 @@ $L$mul4x_enter:: mov QWORD PTR[8+r9*8+rsp],r11 $L$mul4x_body:: + sub r11,rsp + and r11,-4096 +$L$mul4x_page_walk:: + mov r10,QWORD PTR[r11*1+rsp] + sub r11,4096 +DB 02eh + + jnc $L$mul4x_page_walk + mov QWORD PTR[16+r9*8+rsp],rdi mov r12,rdx mov r8,QWORD PTR[r8] @@ -652,6 +676,7 @@ $L$SEH_begin_bn_sqr4x_mont:: $L$sqr4x_enter:: + mov rax,rsp push rbx push rbp push r12 @@ -660,13 +685,25 @@ $L$sqr4x_enter:: push r15 shl r9d,3 - xor r10,r10 mov r11,rsp - sub r10,r9 + neg r9 mov r8,QWORD PTR[r8] - lea rsp,QWORD PTR[((-72))+r10*2+rsp] + lea rsp,QWORD PTR[((-72))+r9*2+rsp] and rsp,-1024 + sub r11,rsp + and r11,-4096 +$L$sqr4x_page_walk:: + mov r10,QWORD PTR[r11*1+rsp] + sub r11,4096 +DB 02eh + + jnc $L$sqr4x_page_walk + + mov r10,r9 + neg r9 + lea r11,QWORD PTR[((-48))+rax] + diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm index 0df45e340306..535b31bf431f 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm @@ -44,6 +44,21 @@ $L$mul_alloca:: mov QWORD PTR[8+r9*8+rsp],rax $L$mul_body:: + + + + + + + sub rax,rsp + and rax,-4096 +$L$mul_page_walk:: + mov r11,QWORD PTR[rax*1+rsp] + sub rax,4096 +DB 02eh + + jnc $L$mul_page_walk + lea r12,QWORD PTR[128+rdx] movdqa xmm0,XMMWORD PTR[r10] movdqa xmm1,XMMWORD PTR[16+r10] @@ -447,6 +462,15 @@ $L$mul4x_alloca:: mov QWORD PTR[8+r9*8+rsp],rax $L$mul4x_body:: + sub rax,rsp + and rax,-4096 +$L$mul4x_page_walk:: + mov r11,QWORD PTR[rax*1+rsp] + sub rax,4096 +DB 02eh + + jnc $L$mul4x_page_walk + mov QWORD PTR[16+r9*8+rsp],rdi lea r12,QWORD PTR[128+rdx] movdqa xmm0,XMMWORD PTR[r10] diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s index d71cc6441c42..40c8016d1408 100644 --- a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s @@ -29,6 +29,14 @@ bn_mul_mont: xorl $2048,%edx subl %edx,%esp andl $-64,%esp + movl %ebp,%eax + subl %esp,%eax + andl $-4096,%eax +.L001page_walk: + movl (%esp,%eax,1),%edx + subl $4096,%eax +.byte 46 + jnc .L001page_walk movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -52,12 +60,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L001bn_sqr_mont + jz .L002bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L002mull: +.L003mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -66,7 +74,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L002mull + jl .L003mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -84,9 +92,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0032ndmadd + jmp .L0042ndmadd .align 16 -.L0041stmadd: +.L0051stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -97,7 +105,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0041stmadd + jl .L0051stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -120,7 +128,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0032ndmadd: +.L0042ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -131,7 +139,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0032ndmadd + jl .L0042ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -147,16 +155,16 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je .L005common_tail + je .L006common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0041stmadd + jmp .L0051stmadd .align 16 -.L001bn_sqr_mont: +.L002bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -167,7 +175,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L006sqr: +.L007sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -179,7 +187,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L006sqr + jl .L007sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -203,7 +211,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0073rdmadd: +.L0083rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -222,7 +230,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0073rdmadd + jl .L0083rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -238,7 +246,7 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je .L005common_tail + je .L006common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -250,12 +258,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L008sqrlast + je .L009sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L009sqradd: +.L010sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -271,13 +279,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L009sqradd + jle .L010sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L008sqrlast: +.L009sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -292,9 +300,9 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0073rdmadd + jmp .L0083rdmadd .align 16 -.L005common_tail: +.L006common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -302,13 +310,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L010sub: +.L011sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L010sub + jge .L011sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -316,12 +324,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L011copy: +.L012copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L011copy + jge .L012copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s index 48598cc62dd5..bec6bbe82482 100644 --- a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s @@ -28,6 +28,14 @@ L_bn_mul_mont_begin: xorl $2048,%edx subl %edx,%esp andl $-64,%esp + movl %ebp,%eax + subl %esp,%eax + andl $-4096,%eax +L001page_walk: + movl (%esp,%eax,1),%edx + subl $4096,%eax +.byte 46 + jnc L001page_walk movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -51,12 +59,12 @@ L_bn_mul_mont_begin: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L001bn_sqr_mont + jz L002bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L002mull: +L003mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -65,7 +73,7 @@ L002mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L002mull + jl L003mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -83,9 +91,9 @@ L002mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0032ndmadd + jmp L0042ndmadd .align 4,0x90 -L0041stmadd: +L0051stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -96,7 +104,7 @@ L0041stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0041stmadd + jl L0051stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -119,7 +127,7 @@ L0041stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0032ndmadd: +L0042ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -130,7 +138,7 @@ L0032ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0032ndmadd + jl L0042ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -146,16 +154,16 @@ L0032ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L005common_tail + je L006common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0041stmadd + jmp L0051stmadd .align 4,0x90 -L001bn_sqr_mont: +L002bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -166,7 +174,7 @@ L001bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L006sqr: +L007sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -178,7 +186,7 @@ L006sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L006sqr + jl L007sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -202,7 +210,7 @@ L006sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0073rdmadd: +L0083rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -221,7 +229,7 @@ L0073rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0073rdmadd + jl L0083rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -237,7 +245,7 @@ L0073rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L005common_tail + je L006common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -249,12 +257,12 @@ L0073rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L008sqrlast + je L009sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L009sqradd: +L010sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -270,13 +278,13 @@ L009sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L009sqradd + jle L010sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L008sqrlast: +L009sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -291,9 +299,9 @@ L008sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0073rdmadd + jmp L0083rdmadd .align 4,0x90 -L005common_tail: +L006common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -301,13 +309,13 @@ L005common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L010sub: +L011sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L010sub + jge L011sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -315,12 +323,12 @@ L010sub: andl %eax,%ebp orl %ebp,%esi .align 4,0x90 -L011copy: +L012copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L011copy + jge L012copy movl 24(%esp),%esp movl $1,%eax L000just_leave: diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm index 031be4e7ea51..476e96dc5499 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm @@ -37,6 +37,14 @@ $L_bn_mul_mont_begin:: xor edx,2048 sub esp,edx and esp,-64 + mov eax,ebp + sub eax,esp + and eax,-4096 +$L001page_walk: + mov edx,DWORD PTR [eax*1+esp] + sub eax,4096 +DB 46 + jnc $L001page_walk mov eax,DWORD PTR [esi] mov ebx,DWORD PTR 4[esi] mov ecx,DWORD PTR 8[esi] @@ -60,12 +68,12 @@ $L_bn_mul_mont_begin:: lea eax,DWORD PTR 4[ebx*4+edi] or ebp,edx mov edi,DWORD PTR [edi] - jz $L001bn_sqr_mont + jz $L002bn_sqr_mont mov DWORD PTR 28[esp],eax mov eax,DWORD PTR [esi] xor edx,edx ALIGN 16 -$L002mull: +$L003mull: mov ebp,edx mul edi add ebp,eax @@ -74,7 +82,7 @@ $L002mull: mov eax,DWORD PTR [ecx*4+esi] cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L002mull + jl $L003mull mov ebp,edx mul edi mov edi,DWORD PTR 20[esp] @@ -92,9 +100,9 @@ $L002mull: mov eax,DWORD PTR 4[esi] adc edx,0 inc ecx - jmp $L0032ndmadd + jmp $L0042ndmadd ALIGN 16 -$L0041stmadd: +$L0051stmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -105,7 +113,7 @@ $L0041stmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L0041stmadd + jl $L0051stmadd mov ebp,edx mul edi add eax,DWORD PTR 32[ebx*4+esp] @@ -128,7 +136,7 @@ $L0041stmadd: adc edx,0 mov ecx,1 ALIGN 16 -$L0032ndmadd: +$L0042ndmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -139,7 +147,7 @@ $L0032ndmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0032ndmadd + jl $L0042ndmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -155,16 +163,16 @@ $L0032ndmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,DWORD PTR 28[esp] mov DWORD PTR 36[ebx*4+esp],eax - je $L005common_tail + je $L006common_tail mov edi,DWORD PTR [ecx] mov esi,DWORD PTR 8[esp] mov DWORD PTR 12[esp],ecx xor ecx,ecx xor edx,edx mov eax,DWORD PTR [esi] - jmp $L0041stmadd + jmp $L0051stmadd ALIGN 16 -$L001bn_sqr_mont: +$L002bn_sqr_mont: mov DWORD PTR [esp],ebx mov DWORD PTR 12[esp],ecx mov eax,edi @@ -175,7 +183,7 @@ $L001bn_sqr_mont: and ebx,1 inc ecx ALIGN 16 -$L006sqr: +$L007sqr: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -187,7 +195,7 @@ $L006sqr: cmp ecx,DWORD PTR [esp] mov ebx,eax mov DWORD PTR 28[ecx*4+esp],ebp - jl $L006sqr + jl $L007sqr mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -211,7 +219,7 @@ $L006sqr: mov eax,DWORD PTR 4[esi] mov ecx,1 ALIGN 16 -$L0073rdmadd: +$L0083rdmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -230,7 +238,7 @@ $L0073rdmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0073rdmadd + jl $L0083rdmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -246,7 +254,7 @@ $L0073rdmadd: mov DWORD PTR 32[ebx*4+esp],edx cmp ecx,ebx mov DWORD PTR 36[ebx*4+esp],eax - je $L005common_tail + je $L006common_tail mov edi,DWORD PTR 4[ecx*4+esi] lea ecx,DWORD PTR 1[ecx] mov eax,edi @@ -258,12 +266,12 @@ $L0073rdmadd: xor ebp,ebp cmp ecx,ebx lea ecx,DWORD PTR 1[ecx] - je $L008sqrlast + je $L009sqrlast mov ebx,edx shr edx,1 and ebx,1 ALIGN 16 -$L009sqradd: +$L010sqradd: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -279,13 +287,13 @@ $L009sqradd: cmp ecx,DWORD PTR [esp] mov DWORD PTR 28[ecx*4+esp],ebp mov ebx,eax - jle $L009sqradd + jle $L010sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 -$L008sqrlast: +$L009sqrlast: mov edi,DWORD PTR 20[esp] mov esi,DWORD PTR 16[esp] imul edi,DWORD PTR 32[esp] @@ -300,9 +308,9 @@ $L008sqrlast: adc edx,0 mov ecx,1 mov eax,DWORD PTR 4[esi] - jmp $L0073rdmadd + jmp $L0083rdmadd ALIGN 16 -$L005common_tail: +$L006common_tail: mov ebp,DWORD PTR 16[esp] mov edi,DWORD PTR 4[esp] lea esi,DWORD PTR 32[esp] @@ -310,13 +318,13 @@ $L005common_tail: mov ecx,ebx xor edx,edx ALIGN 16 -$L010sub: +$L011sub: sbb eax,DWORD PTR [edx*4+ebp] mov DWORD PTR [edx*4+edi],eax dec ecx mov eax,DWORD PTR 4[edx*4+esi] lea edx,DWORD PTR 1[edx] - jge $L010sub + jge $L011sub sbb eax,0 and esi,eax not eax @@ -324,12 +332,12 @@ $L010sub: and ebp,eax or esi,ebp ALIGN 16 -$L011copy: +$L012copy: mov eax,DWORD PTR [ebx*4+esi] mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR 32[ebx*4+esp],ecx dec ebx - jge $L011copy + jge $L012copy mov esp,DWORD PTR 24[esp] mov eax,1 $L000just_leave: