Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vec macro into_boxed codegen regression #71861

Closed
RustyYato opened this issue May 3, 2020 · 10 comments
Closed

vec macro into_boxed codegen regression #71861

RustyYato opened this issue May 3, 2020 · 10 comments
Labels
A-codegen Area: Code generation C-bug Category: This is a bug. I-slow Issue: Problems and improvements with respect to performance of generated code. ICEBreaker-Cleanup-Crew Helping to "clean up" bugs with minimal examples and bisections P-medium Medium priority regression-from-stable-to-stable Performance or correctness regression from one stable version to another. T-compiler Relevant to the compiler team, which will review and decide on the PR/issue.

Comments

@RustyYato
Copy link
Contributor

RustyYato commented May 3, 2020

This is a regression from stable 1.42.0 to the latest beta & nightly

found because of this: https://internals.rust-lang.org/t/more-efficient-boxed-slice-creation/12271

pub fn bar2(n: usize) -> Box<[u32]> {
    vec![0; n].into_boxed_slice()
}

I think the regression happened because of a missed optimization to inline alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}, so it generates unnecessary panic handlers

stable

alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}:
        push    rax
        call    qword ptr [rip + alloc::raw_vec::capacity_overflow@GOTPCREL]
        ud2

example::bar2:
        push    r14
        push    rbx
        push    rax
        mov     ecx, 4
        mov     rax, rdi
        mul     rcx
        jo      .LBB1_5
        mov     rbx, rdi
        mov     r14, rax
        test    rax, rax
        je      .LBB1_4
        mov     esi, 4
        mov     rdi, r14
        call    qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
        test    rax, rax
        je      .LBB1_6
        mov     rcx, rax
.LBB1_4:
        mov     rax, rcx
        mov     rdx, rbx
        add     rsp, 8
        pop     rbx
        pop     r14
        ret
.LBB1_5:
        call    alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
        ud2
.LBB1_6:
        mov     esi, 4
        mov     rdi, r14
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2

nightly

core::ptr::drop_in_place:
        mov     rsi, qword ptr [rdi + 8]
        test    rsi, rsi
        je      .LBB0_2
        shl     rsi, 2
        test    rsi, rsi
        je      .LBB0_2
        mov     rdi, qword ptr [rdi]
        mov     edx, 4
        jmp     qword ptr [rip + __rust_dealloc@GOTPCREL]
.LBB0_2:
        ret

alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}:
        push    rax
        call    qword ptr [rip + alloc::raw_vec::capacity_overflow@GOTPCREL]
        ud2

alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}:
        push    rax
        mov     rax, qword ptr [rdi]
        mov     rsi, qword ptr [rdi + 8]
        mov     rdi, rax
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2

example::bar2:
        push    r15
        push    r14
        push    rbx
        sub     rsp, 32
        mov     ecx, 4
        xor     esi, esi
        mov     rax, rdi
        mul     rcx
        mov     rbx, rax
        setno   al
        jo      .LBB3_18
        mov     r14, rdi
        mov     sil, al
        shl     rsi, 2
        mov     qword ptr [rsp + 8], rbx
        mov     qword ptr [rsp + 16], rsi
        test    rbx, rbx
        je      .LBB3_3
        mov     rdi, rbx
        call    qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
        mov     rsi, rax
.LBB3_3:
        test    rsi, rsi
        je      .LBB3_19
        mov     rax, rbx
        shr     rax, 2
        mov     qword ptr [rsp + 8], rsi
        mov     qword ptr [rsp + 16], rax
        mov     qword ptr [rsp + 24], r14
        cmp     rax, r14
        je      .LBB3_15
        jb      .LBB3_10
        test    rbx, rbx
        je      .LBB3_15
        lea     r15, [4*r14]
        cmp     rbx, r15
        je      .LBB3_14
        mov     edx, 4
        mov     rdi, rsi
        mov     rsi, rbx
        test    r15, r15
        je      .LBB3_9
        mov     rcx, r15
        call    qword ptr [rip + __rust_realloc@GOTPCREL]
        test    rax, rax
        je      .LBB3_16
        mov     rsi, rax
        mov     rbx, r15
        jmp     .LBB3_14
.LBB3_9:
        call    qword ptr [rip + __rust_dealloc@GOTPCREL]
        xor     ebx, ebx
        mov     esi, 4
.LBB3_14:
        mov     qword ptr [rsp + 8], rsi
        shr     rbx, 2
        mov     qword ptr [rsp + 16], rbx
.LBB3_15:
        mov     rax, rsi
        mov     rdx, r14
        add     rsp, 32
        pop     rbx
        pop     r14
        pop     r15
        ret
.LBB3_18:
        call    alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
        ud2
.LBB3_19:
        lea     rdi, [rsp + 8]
        call    alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
        ud2
.LBB3_10:
        lea     rdi, [rip + .L__unnamed_1]
        lea     rdx, [rip + .L__unnamed_2]
        mov     esi, 36
        call    qword ptr [rip + core::panicking::panic@GOTPCREL]
        ud2
.LBB3_16:
        mov     esi, 4
        mov     rdi, r15
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2
        mov     rbx, rax
        lea     rdi, [rsp + 8]
        call    core::ptr::drop_in_place
        mov     rdi, rbx
        call    _Unwind_Resume@PLT
        ud2

.L__unnamed_1:
        .ascii  "Tried to shrink to a larger capacity"

.L__unnamed_3:
        .ascii  "/rustc/f05a5240440b3eaef1684a7965860fab40301947/src/libcore/macros/mod.rs"

.L__unnamed_2:
        .quad   .L__unnamed_3
        .asciz  "I\000\000\000\000\000\000\000\n\000\000\000\t\000\000"
@jonas-schievink jonas-schievink added A-codegen Area: Code generation C-bug Category: This is a bug. I-prioritize Issue: Indicates that prioritization has been requested for this issue. I-slow Issue: Problems and improvements with respect to performance of generated code. regression-from-stable-to-beta Performance or correctness regression from stable to beta. T-compiler Relevant to the compiler team, which will review and decide on the PR/issue. labels May 3, 2020
@spastorino
Copy link
Member

Assigning P-high as discussed as part of the Prioritization Working Group process and removing I-prioritize.

@spastorino spastorino added P-high High priority and removed I-prioritize Issue: Indicates that prioritization has been requested for this issue. labels May 5, 2020
@spastorino
Copy link
Member

Changing the priority to P-medium as we've discussed as part of the Prioritization Working Group process, if there's evidence that this regress badly some real world code and someone can prove where and how we can switch this back to P-high.

@spastorino spastorino added P-medium Medium priority and removed P-high High priority labels May 5, 2020
@spastorino
Copy link
Member

Would be nice to identify where this has regressed ...

@rustbot ping cleanup

@rustbot
Copy link
Collaborator

rustbot commented May 21, 2020

Hey Cleanup Crew ICE-breakers! This bug has been identified as a good
"Cleanup ICE-breaking candidate". In case it's useful, here are some
instructions for tackling these sorts of bugs. Maybe take a look?
Thanks! <3

cc @AminArria @chrissimpkins @contrun @DutchGhost @elshize @ethanboxx @h-michael @HallerPatrick @hdhoang @hellow554 @imtsuki @jakevossen5 @kanru @KarlK90 @LeSeulArtichaut @MAdrianMattocks @matheus-consoli @mental32 @nmccarty @Noah-Kennedy @pard68 @PeytonT @pierreN @Redblueflame @RobbieClarken @RobertoSnap @robjtede @SarthakSingh31 @senden9 @shekohex @sinato @spastorino @turboladen @woshilapin @yerke

@rustbot rustbot added the ICEBreaker-Cleanup-Crew Helping to "clean up" bugs with minimal examples and bisections label May 21, 2020
@spastorino
Copy link
Member

This was discussed during today's weekly meeting. Removing I-nominated.

@kanru
Copy link
Contributor

kanru commented May 22, 2020

searched nightlies: from nightly-2020-02-02 to nightly-2020-05-22
regressed nightly: nightly-2020-03-04
searched commits: from 18c275b to 4ad6248
regressed commit: a5de254

bisected with cargo-bisect-rustc v0.5.1

Host triple: x86_64-unknown-linux-gnu
Reproduce with:

cargo bisect-rustc --start 2020-02-02 --prompt -- asm issue71861::bar2 

@kanru
Copy link
Contributor

kanru commented May 22, 2020

Bisected with git bisect
d8e3557 is the first bad commit

@pickfire
Copy link
Contributor

pickfire commented Aug 18, 2020

This might have affected FromIterator too #75636

@cuviper
Copy link
Member

cuviper commented Aug 18, 2020

#75677 removes the panic, "Tried to shrink to a larger capacity". However, I think this issue could be further improved if the compiler could also see that it will never need a realloc, since the vec![0; n] should be created with the exact capacity in the first place.

bors added a commit to rust-lang-ci/rust that referenced this issue Aug 19, 2020
Don't panic in Vec::shrink_to_fit

We can help the compiler see that `Vec::shrink_to_fit` will never reach the panic case in `RawVec::shrink_to_fit`, just by guarding the call only for cases where the capacity is strictly greater. A capacity less than the length is only possible through an unsafe call to `set_len`, which would break the `Vec` invariants, so `shrink_to_fit` can just ignore that.

This removes the panicking code from the examples in both rust-lang#71861 and rust-lang#75636.
@pietroalbini pietroalbini added regression-from-stable-to-stable Performance or correctness regression from one stable version to another. and removed regression-from-stable-to-beta Performance or correctness regression from stable to beta. labels Aug 21, 2020
@istankovic
Copy link
Contributor

I think this is fine now with current nightly:

example::bar2:
        pushq   %r14
        pushq   %rbx
        pushq   %rax
        movq    %rdi, %rbx
        movl    $4, %eax
        testq   %rdi, %rdi
        je      .LBB0_4
        movq    %rbx, %rcx
        shrq    $61, %rcx
        jne     .LBB0_5
        leaq    (,%rbx,4), %r14
        testq   %r14, %r14
        je      .LBB0_4
        movl    $4, %esi
        movq    %r14, %rdi
        callq   *__rust_alloc_zeroed@GOTPCREL(%rip)
        testq   %rax, %rax
        je      .LBB0_6
.LBB0_4:
        movq    %rbx, %rdx
        addq    $8, %rsp
        popq    %rbx
        popq    %r14
        retq
.LBB0_5:
        callq   *alloc::raw_vec::capacity_overflow@GOTPCREL(%rip)
        ud2
.LBB0_6:
        movl    $4, %edi
        movq    %r14, %rsi
        callq   *alloc::alloc::handle_alloc_error@GOTPCREL(%rip)
        ud2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
A-codegen Area: Code generation C-bug Category: This is a bug. I-slow Issue: Problems and improvements with respect to performance of generated code. ICEBreaker-Cleanup-Crew Helping to "clean up" bugs with minimal examples and bisections P-medium Medium priority regression-from-stable-to-stable Performance or correctness regression from one stable version to another. T-compiler Relevant to the compiler team, which will review and decide on the PR/issue.
Projects
None yet
Development

No branches or pull requests

10 participants