Optimization opportunity for constructor functions #49539

glandium · 2018-03-31T11:44:28Z

Consider the following constructor:

struct Foo([u8; 512]);

impl Foo {
    fn new() -> Foo {
        Foo([42u8; 512])
    }
}

(stupid newtype with large and stupid content to trigger a recognizable memset call)

Now, let's say we use the constructor in some way:

pub fn foo() -> Option<Foo> {
    Some(Foo::new())
}

Typical rust would essentially have lots of constructs like this, in more elaborated forms.

The code above compiles to the following straightforward code:

  push rbx
  mov rbx, rdi            // rdi is the pointer we got from the caller to store the (large) Option result.
  lea rdi, [rbx + 1]      // \
  mov esi, 42             // | memset(ptr + 1, 42, 512)
  mov edx, 512            // |
  call memset@PLT         // /
  mov byte ptr [rbx], 1   // set the Option discriminant for Some.
  mov rax, rbx            // return the pointer where the Option is.
  pop rbx
  ret

Now, if for some reason the constructor is not inlined (and that can happen), here is what this becomes:

  push r14
  push rbx
  sub rsp, 520            // prepare space on the stack
  mov rbx, rdi            // rdi is the pointer we got from the caller to store the (large) Option result.
  lea r14, [rsp + 8]      // \
  mov rdi, r14            // | Foo::new(&buffer_on_the_stack)
  call Foo::new           // / meaning Foo::new will call memset(&buffer_on_the_stack, 42, 512)
  lea rdi, [rbx + 1]      // \
  mov edx, 512            // | memcpy(ptr + 1, &buffer_on_the_stack, 512)
  mov rsi, r14            // |
  call memcpy@PLT         // /
  mov byte ptr [rbx], 1   // set the Option discriminant for Some.
  mov rax, rbx            // return the pointer where the Option is.
  add rsp, 520
  pop rbx
  pop r14
  ret

I don't see a reason why this couldn't be the following instead:

  push rbx
  mov rbx, rdi            // rdi is the pointer we got from the caller to store the (large) Option result.
  lea rdi, [rbx + 1]      // \
  call Foo::new           // / Foo::new(ptr + 1)
  mov byte ptr [rbx], 1   // set the Option discriminant for Some.
  mov rax, rbx            // return the pointer where the Option is.
  pop rbx
  ret

avoiding a useless copy that inlining avoided.

The text was updated successfully, but these errors were encountered:

glandium · 2018-03-31T12:09:03Z

Also note that the pointer we give to Foo::new doesn't even have to be on the stack:

pub fn foo() -> Vec<Foo> {
    vec![Foo::new()]
}

Compiles to the following when inlined:

  push r14
  push rbx
  sub rsp, 56
  mov r14, rdi
  lea rdx, [rsp + 8]
  mov edi, 512
  mov esi, 1
  call __rust_alloc@PLT
  mov rbx, rax
  test rbx, rbx
  je .LBB2_1
  mov esi, 42
  mov edx, 512
  mov rdi, rbx
  call memset@PLT
  mov qword ptr [r14], rbx
  mov qword ptr [r14 + 8], 1
  mov qword ptr [r14 + 16], 1
  mov rax, r14
  add rsp, 56
  pop rbx
  pop r14
  ret

And to the following when not inlined:

  push r15
  push r14
  push rbx
  sub rsp, 528
  mov r14, rdi
  lea rdx, [rsp + 16]
  mov edi, 512
  mov esi, 1
  call __rust_alloc@PLT
  mov rbx, rax
  test rbx, rbx
  je .LBB3_1
  lea r15, [rsp + 16]
  mov rdi, r15
  call Foo::new
  mov edx, 512
  mov rdi, rbx
  mov rsi, r15
  call memcpy@PLT
  mov qword ptr [r14], rbx
  mov qword ptr [r14 + 8], 1
  mov qword ptr [r14 + 16], 1
  mov rax, r14
  add rsp, 528
  pop rbx
  pop r14
  pop r15
  ret

While this could be:

  push r14
  push rbx
  sub rsp, 56
  mov r14, rdi
  lea rdx, [rsp + 8]
  mov edi, 512
  mov esi, 1
  call __rust_alloc@PLT
  mov rbx, rax
  test rbx, rbx
  je .LBB2_1
  mov rdi, rbx
  call Foo::new
  mov qword ptr [r14], rbx
  mov qword ptr [r14 + 8], 1
  mov qword ptr [r14 + 16], 1
  mov rax, r14
  add rsp, 56
  pop rbx
  pop r14
  ret

nox · 2018-03-31T12:54:39Z

Cc @rust-lang/wg-codegen

oli-obk · 2018-03-31T13:37:05Z

Isn't this essentially #13707 (comment) just for tuple struct constructors?

glandium · 2018-03-31T21:09:24Z

maybe?

eddyb · 2018-04-01T14:24:59Z

Please note that looking at assembly can hide the reasons for the generated code.
LLVM IR tends to bake in both some of the reasons and most of the effects, so it's ideal.

But generally, these sorts of these are nowadays present in the MIR and later stages can't necessarily remove them because they lack the information to make correctness assumptions.

The plan is getting something like #47954 into the compiler in the coming months.

nikic · 2021-03-13T19:42:18Z

This optimizes well on nightly: https://godbolt.org/z/sGza7c This is because LLVM 12 can perform call slot optimization with a GEP destination.

oli-obk added the WG-llvm Working group: LLVM backend code generation label Mar 31, 2018

XAMPPRocky added C-enhancement Category: An issue proposing an enhancement or a PR with one. A-codegen Area: Code generation T-compiler Relevant to the compiler team, which will review and decide on the PR/issue. labels Jun 29, 2018

jonas-schievink added the I-slow Issue: Problems and improvements with respect to performance of generated code. label Mar 22, 2020

nikic closed this as completed Mar 13, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Optimization opportunity for constructor functions #49539

Optimization opportunity for constructor functions #49539

glandium commented Mar 31, 2018

glandium commented Mar 31, 2018

nox commented Mar 31, 2018

oli-obk commented Mar 31, 2018

glandium commented Mar 31, 2018

eddyb commented Apr 1, 2018

nikic commented Mar 13, 2021

Optimization opportunity for constructor functions #49539

Optimization opportunity for constructor functions #49539

Comments

glandium commented Mar 31, 2018

glandium commented Mar 31, 2018

nox commented Mar 31, 2018

oli-obk commented Mar 31, 2018

glandium commented Mar 31, 2018

eddyb commented Apr 1, 2018

nikic commented Mar 13, 2021