Auto merge of rust-lang#9400 - lukaslueg:approx_large_enum, r=llogiq

Use `approx_ty_size` for `large_enum_variant` This builds upon rust-lang#9373 to use the approximate size of each variant for `large_enum_variant`. This allows us to lint in situations where an `enum` contains generics but is still guaranteed to have a large variant on an at-least basis, e.g. with `(T, [u8; 512])`. * I've changed the wording from "is ... bytes" to "contains at least" because * the size is now an approximate lower bound (e.g. `512` in the example above). The actual size is larger due to `T`, including due to `T`'s memory layout. * the discriminant is not taken into account in the message. This comes up with variants like `A(T)`, which are "is at least 0 bytes" otherwise, which may be misleading. * If the second-largest variant has no fields, there is a special case "carries no data" instead of "is at least 0 bytes". * A variant like `A(T)` is "at least 0 bytes", which is technically true, yet we don't distinguish between "indeterminate" and truly "ZST". * The generics-tests that were there before now lint while they didn't lint before. AFAICS this is correct. I guess the above is correct-ish. However, I use the `SubstsRef` that I got via `cx.tcx.type_of(item.def_id)` to solve for generics in the variants. Is this even applicable, since we start from an - [ ] `ItemKind`? changelog: none
flip1995 · Sep 3, 2022 · 99ab5fe · 99ab5fe
2 parents 30e4532 + 584000a
commit 99ab5fe
Show file tree

Hide file tree

Showing 6 changed files with 267 additions and 139 deletions.
diff --git a/clippy_lints/src/large_enum_variant.rs b/clippy_lints/src/large_enum_variant.rs
@@ -1,13 +1,12 @@
 //! lint when there is a large size difference between variants on an enum
 
 use clippy_utils::source::snippet_with_applicability;
-use clippy_utils::{diagnostics::span_lint_and_then, ty::is_copy};
+use clippy_utils::{diagnostics::span_lint_and_then, ty::approx_ty_size, ty::is_copy};
 use rustc_errors::Applicability;
 use rustc_hir::{Item, ItemKind};
 use rustc_lint::{LateContext, LateLintPass};
 use rustc_middle::lint::in_external_macro;
-use rustc_middle::ty::layout::LayoutOf;
-use rustc_middle::ty::{Adt, Ty};
+use rustc_middle::ty::{Adt, AdtDef, GenericArg, List, Ty};
 use rustc_session::{declare_tool_lint, impl_lint_pass};
 use rustc_span::source_map::Span;
 
@@ -17,7 +16,7 @@ declare_clippy_lint! {
     /// `enum`s.
     ///
     /// ### Why is this bad?
-    /// Enum size is bounded by the largest variant. Having a
+    /// Enum size is bounded by the largest variant. Having one
     /// large variant can penalize the memory layout of that enum.
     ///
     /// ### Known problems
@@ -33,8 +32,9 @@ declare_clippy_lint! {
     /// use case it may be possible to store the large data in an auxiliary
     /// structure (e.g. Arena or ECS).
     ///
-    /// The lint will ignore generic types if the layout depends on the
-    /// generics, even if the size difference will be large anyway.
+    /// The lint will ignore the impact of generic types to the type layout by
+    /// assuming every type parameter is zero-sized. Depending on your use case,
+    /// this may lead to a false positive.
     ///
     /// ### Example
     /// ```rust
@@ -83,6 +83,38 @@ struct VariantInfo {
     fields_size: Vec<FieldInfo>,
 }
 
+fn variants_size<'tcx>(
+    cx: &LateContext<'tcx>,
+    adt: AdtDef<'tcx>,
+    subst: &'tcx List<GenericArg<'tcx>>,
+) -> Vec<VariantInfo> {
+    let mut variants_size = adt
+        .variants()
+        .iter()
+        .enumerate()
+        .map(|(i, variant)| {
+            let mut fields_size = variant
+                .fields
+                .iter()
+                .enumerate()
+                .map(|(i, f)| FieldInfo {
+                    ind: i,
+                    size: approx_ty_size(cx, f.ty(cx.tcx, subst)),
+                })
+                .collect::<Vec<_>>();
+            fields_size.sort_by(|a, b| (a.size.cmp(&b.size)));
+
+            VariantInfo {
+                ind: i,
+                size: fields_size.iter().map(|info| info.size).sum(),
+                fields_size,
+            }
+        })
+        .collect::<Vec<_>>();
+    variants_size.sort_by(|a, b| (b.size.cmp(&a.size)));
+    variants_size
+}
+
 impl_lint_pass!(LargeEnumVariant => [LARGE_ENUM_VARIANT]);
 
 impl<'tcx> LateLintPass<'tcx> for LargeEnumVariant {
@@ -92,57 +124,45 @@ impl<'tcx> LateLintPass<'tcx> for LargeEnumVariant {
         }
         if let ItemKind::Enum(ref def, _) = item.kind {
             let ty = cx.tcx.type_of(item.def_id);
-            let adt = ty.ty_adt_def().expect("already checked whether this is an enum");
+            let (adt, subst) = match ty.kind() {
+                Adt(adt, subst) => (adt, subst),
+                _ => panic!("already checked whether this is an enum"),
+            };
             if adt.variants().len() <= 1 {
                 return;
             }
-            let mut variants_size: Vec<VariantInfo> = Vec::new();
-            for (i, variant) in adt.variants().iter().enumerate() {
-                let mut fields_size = Vec::new();
-                for (i, f) in variant.fields.iter().enumerate() {
-                    let ty = cx.tcx.type_of(f.did);
-                    // don't lint variants which have a field of generic type.
-                    match cx.layout_of(ty) {
-                        Ok(l) => {
-                            let fsize = l.size.bytes();
-                            fields_size.push(FieldInfo { ind: i, size: fsize });
-                        },
-                        Err(_) => {
-                            return;
-                        },
-                    }
-                }
-                let size: u64 = fields_size.iter().map(|info| info.size).sum();
-
-                variants_size.push(VariantInfo {
-                    ind: i,
-                    size,
-                    fields_size,
-                });
-            }
-
-            variants_size.sort_by(|a, b| (b.size.cmp(&a.size)));
+            let variants_size = variants_size(cx, *adt, subst);
 
             let mut difference = variants_size[0].size - variants_size[1].size;
             if difference > self.maximum_size_difference_allowed {
                 let help_text = "consider boxing the large fields to reduce the total size of the enum";
                 span_lint_and_then(
                     cx,
                     LARGE_ENUM_VARIANT,
-                    def.variants[variants_size[0].ind].span,
+                    item.span,
                     "large size difference between variants",
                     |diag| {
+                        diag.span_label(
+                            item.span,
+                            format!("the entire enum is at least {} bytes", approx_ty_size(cx, ty)),
+                        );
                         diag.span_label(
                             def.variants[variants_size[0].ind].span,
-                            &format!("this variant is {} bytes", variants_size[0].size),
+                            format!("the largest variant contains at least {} bytes", variants_size[0].size),
                         );
-                        diag.span_note(
+                        diag.span_label(
                             def.variants[variants_size[1].ind].span,
-                            &format!("and the second-largest variant is {} bytes:", variants_size[1].size),
+                            &if variants_size[1].fields_size.is_empty() {
+                                "the second-largest variant carries no data at all".to_owned()
+                            } else {
+                                format!(
+                                    "the second-largest variant contains at least {} bytes",
+                                    variants_size[1].size
+                                )
+                            },
                         );
 
                         let fields = def.variants[variants_size[0].ind].data.fields();
-                        variants_size[0].fields_size.sort_by(|a, b| (a.size.cmp(&b.size)));
                         let mut applicability = Applicability::MaybeIncorrect;
                         if is_copy(cx, ty) || maybe_copy(cx, ty) {
                             diag.span_note(

diff --git a/src/docs/large_enum_variant.txt b/src/docs/large_enum_variant.txt
@@ -3,7 +3,7 @@ Checks for large size differences between variants on
 `enum`s.
 
 ### Why is this bad?
-Enum size is bounded by the largest variant. Having a
+Enum size is bounded by the largest variant. Having one
 large variant can penalize the memory layout of that enum.
 
 ### Known problems
@@ -19,8 +19,9 @@ still be `Clone`, but that is worse ergonomically. Depending on the
 use case it may be possible to store the large data in an auxiliary
 structure (e.g. Arena or ECS).
 
-The lint will ignore generic types if the layout depends on the
-generics, even if the size difference will be large anyway.
+The lint will ignore the impact of generic types to the type layout by
+assuming every type parameter is zero-sized. Depending on your use case,
+this may lead to a false positive.
 
 ### Example
 ```

diff --git a/tests/ui/large_enum_variant.rs b/tests/ui/large_enum_variant.rs
@@ -130,6 +130,30 @@ impl<T: Copy> Clone for SomeGenericPossiblyCopyEnum<T> {
 
 impl<T: Copy> Copy for SomeGenericPossiblyCopyEnum<T> {}
 
+enum LargeEnumWithGenerics<T> {
+    Small,
+    Large((T, [u8; 512])),
+}
+
+struct Foo<T> {
+    foo: T,
+}
+
+enum WithGenerics {
+    Large([Foo<u64>; 64]),
+    Small(u8),
+}
+
+enum PossiblyLargeEnumWithConst<const U: usize> {
+    SmallBuffer([u8; 4]),
+    MightyBuffer([u16; U]),
+}
+
+enum LargeEnumOfConst {
+    Ok,
+    Error(PossiblyLargeEnumWithConst<256>),
+}
+
 fn main() {
     large_enum_variant!();
 }