Skip to content

Commit

Permalink
Auto merge of rust-lang#9400 - lukaslueg:approx_large_enum, r=llogiq
Browse files Browse the repository at this point in the history
Use `approx_ty_size` for `large_enum_variant`

This builds upon rust-lang#9373 to use the approximate size of each variant for `large_enum_variant`. This allows us to lint in situations where an `enum` contains generics but is still guaranteed to have a large variant on an at-least basis, e.g. with `(T, [u8; 512])`.

* I've changed the wording from "is ... bytes" to "contains at least" because
  * the size is now an approximate lower bound (e.g. `512` in the example above). The actual size is larger due to `T`, including due to `T`'s memory layout.
  * the discriminant is not taken into account in the message. This comes up with variants like `A(T)`, which are "is at least 0 bytes" otherwise, which may be misleading.
* If the second-largest variant has no fields, there is a special case "carries no data" instead of "is at least 0 bytes".
* A variant like `A(T)` is "at least 0 bytes", which is technically true, yet we don't distinguish between "indeterminate" and truly "ZST".
* The generics-tests that were there before now lint while they didn't lint before. AFAICS this is correct.

I guess the above is correct-ish. However, I use the `SubstsRef` that I got via `cx.tcx.type_of(item.def_id)` to solve for generics in the variants. Is this even applicable, since we start from an - [ ] `ItemKind`?

changelog: none
  • Loading branch information
bors committed Sep 3, 2022
2 parents 30e4532 + 584000a commit 99ab5fe
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 139 deletions.
96 changes: 58 additions & 38 deletions clippy_lints/src/large_enum_variant.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
//! lint when there is a large size difference between variants on an enum
use clippy_utils::source::snippet_with_applicability;
use clippy_utils::{diagnostics::span_lint_and_then, ty::is_copy};
use clippy_utils::{diagnostics::span_lint_and_then, ty::approx_ty_size, ty::is_copy};
use rustc_errors::Applicability;
use rustc_hir::{Item, ItemKind};
use rustc_lint::{LateContext, LateLintPass};
use rustc_middle::lint::in_external_macro;
use rustc_middle::ty::layout::LayoutOf;
use rustc_middle::ty::{Adt, Ty};
use rustc_middle::ty::{Adt, AdtDef, GenericArg, List, Ty};
use rustc_session::{declare_tool_lint, impl_lint_pass};
use rustc_span::source_map::Span;

Expand All @@ -17,7 +16,7 @@ declare_clippy_lint! {
/// `enum`s.
///
/// ### Why is this bad?
/// Enum size is bounded by the largest variant. Having a
/// Enum size is bounded by the largest variant. Having one
/// large variant can penalize the memory layout of that enum.
///
/// ### Known problems
Expand All @@ -33,8 +32,9 @@ declare_clippy_lint! {
/// use case it may be possible to store the large data in an auxiliary
/// structure (e.g. Arena or ECS).
///
/// The lint will ignore generic types if the layout depends on the
/// generics, even if the size difference will be large anyway.
/// The lint will ignore the impact of generic types to the type layout by
/// assuming every type parameter is zero-sized. Depending on your use case,
/// this may lead to a false positive.
///
/// ### Example
/// ```rust
Expand Down Expand Up @@ -83,6 +83,38 @@ struct VariantInfo {
fields_size: Vec<FieldInfo>,
}

fn variants_size<'tcx>(
cx: &LateContext<'tcx>,
adt: AdtDef<'tcx>,
subst: &'tcx List<GenericArg<'tcx>>,
) -> Vec<VariantInfo> {
let mut variants_size = adt
.variants()
.iter()
.enumerate()
.map(|(i, variant)| {
let mut fields_size = variant
.fields
.iter()
.enumerate()
.map(|(i, f)| FieldInfo {
ind: i,
size: approx_ty_size(cx, f.ty(cx.tcx, subst)),
})
.collect::<Vec<_>>();
fields_size.sort_by(|a, b| (a.size.cmp(&b.size)));

VariantInfo {
ind: i,
size: fields_size.iter().map(|info| info.size).sum(),
fields_size,
}
})
.collect::<Vec<_>>();
variants_size.sort_by(|a, b| (b.size.cmp(&a.size)));
variants_size
}

impl_lint_pass!(LargeEnumVariant => [LARGE_ENUM_VARIANT]);

impl<'tcx> LateLintPass<'tcx> for LargeEnumVariant {
Expand All @@ -92,57 +124,45 @@ impl<'tcx> LateLintPass<'tcx> for LargeEnumVariant {
}
if let ItemKind::Enum(ref def, _) = item.kind {
let ty = cx.tcx.type_of(item.def_id);
let adt = ty.ty_adt_def().expect("already checked whether this is an enum");
let (adt, subst) = match ty.kind() {
Adt(adt, subst) => (adt, subst),
_ => panic!("already checked whether this is an enum"),
};
if adt.variants().len() <= 1 {
return;
}
let mut variants_size: Vec<VariantInfo> = Vec::new();
for (i, variant) in adt.variants().iter().enumerate() {
let mut fields_size = Vec::new();
for (i, f) in variant.fields.iter().enumerate() {
let ty = cx.tcx.type_of(f.did);
// don't lint variants which have a field of generic type.
match cx.layout_of(ty) {
Ok(l) => {
let fsize = l.size.bytes();
fields_size.push(FieldInfo { ind: i, size: fsize });
},
Err(_) => {
return;
},
}
}
let size: u64 = fields_size.iter().map(|info| info.size).sum();

variants_size.push(VariantInfo {
ind: i,
size,
fields_size,
});
}

variants_size.sort_by(|a, b| (b.size.cmp(&a.size)));
let variants_size = variants_size(cx, *adt, subst);

let mut difference = variants_size[0].size - variants_size[1].size;
if difference > self.maximum_size_difference_allowed {
let help_text = "consider boxing the large fields to reduce the total size of the enum";
span_lint_and_then(
cx,
LARGE_ENUM_VARIANT,
def.variants[variants_size[0].ind].span,
item.span,
"large size difference between variants",
|diag| {
diag.span_label(
item.span,
format!("the entire enum is at least {} bytes", approx_ty_size(cx, ty)),
);
diag.span_label(
def.variants[variants_size[0].ind].span,
&format!("this variant is {} bytes", variants_size[0].size),
format!("the largest variant contains at least {} bytes", variants_size[0].size),
);
diag.span_note(
diag.span_label(
def.variants[variants_size[1].ind].span,
&format!("and the second-largest variant is {} bytes:", variants_size[1].size),
&if variants_size[1].fields_size.is_empty() {
"the second-largest variant carries no data at all".to_owned()
} else {
format!(
"the second-largest variant contains at least {} bytes",
variants_size[1].size
)
},
);

let fields = def.variants[variants_size[0].ind].data.fields();
variants_size[0].fields_size.sort_by(|a, b| (a.size.cmp(&b.size)));
let mut applicability = Applicability::MaybeIncorrect;
if is_copy(cx, ty) || maybe_copy(cx, ty) {
diag.span_note(
Expand Down
7 changes: 4 additions & 3 deletions src/docs/large_enum_variant.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Checks for large size differences between variants on
`enum`s.

### Why is this bad?
Enum size is bounded by the largest variant. Having a
Enum size is bounded by the largest variant. Having one
large variant can penalize the memory layout of that enum.

### Known problems
Expand All @@ -19,8 +19,9 @@ still be `Clone`, but that is worse ergonomically. Depending on the
use case it may be possible to store the large data in an auxiliary
structure (e.g. Arena or ECS).

The lint will ignore generic types if the layout depends on the
generics, even if the size difference will be large anyway.
The lint will ignore the impact of generic types to the type layout by
assuming every type parameter is zero-sized. Depending on your use case,
this may lead to a false positive.

### Example
```
Expand Down
24 changes: 24 additions & 0 deletions tests/ui/large_enum_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,30 @@ impl<T: Copy> Clone for SomeGenericPossiblyCopyEnum<T> {

impl<T: Copy> Copy for SomeGenericPossiblyCopyEnum<T> {}

enum LargeEnumWithGenerics<T> {
Small,
Large((T, [u8; 512])),
}

struct Foo<T> {
foo: T,
}

enum WithGenerics {
Large([Foo<u64>; 64]),
Small(u8),
}

enum PossiblyLargeEnumWithConst<const U: usize> {
SmallBuffer([u8; 4]),
MightyBuffer([u16; U]),
}

enum LargeEnumOfConst {
Ok,
Error(PossiblyLargeEnumWithConst<256>),
}

fn main() {
large_enum_variant!();
}
Loading

0 comments on commit 99ab5fe

Please sign in to comment.