Auto merge of rust-lang#13139 - xFrednet:lintcheck-limit-summery-outp…

…ut, r=Alexendoo Lintcheck: Rework and limit diff output for GH's CI ### Background While working on rust-lang#13136 I found an amazing limitation of GH's CI. The summary can at most have be 1MB of text. Here is the warning message: > $GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 46731k. For more information see: https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary [The PR](rust-lang#13136) produced a *casual* 61808 changes. Guess that's why those lints are not *warn-by-default* :P. ### Changes: This PR limits the lintcheck diff output in two ways. 1. The diff is limited to 200 messages per lint per section. Hidden messages are indicated by a message at the end of the section. 2. The output is first written to a file and only the first 1MB is written to ` >> $GITHUB_STEP_SUMMARY`. The entire file is also written to the normal CI log. This helps for cases where several lints change and the total size exceeds the 1MB limit. An example of these changes can be seen here: https://github.com/xFrednet/rust-clippy/actions/runs/10028799118?pr=4 --- changelog: none r? `@Alexendoo` Sorry for bombarding you with so many PR's lately 😅 Feel free to pass some of you reviews to me.
flip1995 · Jul 25, 2024 · be82340 · be82340
2 parents 5e6540f + bdf3e58
commit be82340
Show file tree

Hide file tree

Showing 5 changed files with 167 additions and 43 deletions.
diff --git a/.github/workflows/lintcheck.yml b/.github/workflows/lintcheck.yml
@@ -115,4 +115,20 @@ jobs:
       uses: actions/download-artifact@v4
 
     - name: Diff results
-      run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
+      # GH's summery has a maximum size of 1024k:
+      # https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
+      # That's why we first log to file and then to the summary and logs
+      run: |
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
+        head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
+        cat truncated_diff.md
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md
+
+    - name: Upload full diff
+      uses: actions/upload-artifact@v4
+      with:
+        name: diff
+        if-no-files-found: ignore
+        path: |
+          full_diff.md
+          truncated_diff.md
diff --git a/lintcheck/Cargo.toml b/lintcheck/Cargo.toml
@@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
 crossbeam-channel = "0.5.6"
 diff = "0.1.13"
 flate2 = "1.0"
-itertools = "0.12"
+itertools = "0.13"
 rayon = "1.5.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.85"

diff --git a/lintcheck/src/config.rs b/lintcheck/src/config.rs
@@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
 #[derive(Subcommand, Clone, Debug)]
 pub(crate) enum Commands {
     /// Display a markdown diff between two lintcheck log files in JSON format
-    Diff { old: PathBuf, new: PathBuf },
+    Diff {
+        old: PathBuf,
+        new: PathBuf,
+        /// This will limit the number of warnings that will be printed for each lint
+        #[clap(long)]
+        truncate: bool,
+    },
     /// Create a lintcheck crates TOML file containing the top N popular crates
     Popular {
         /// Output TOML file name

diff --git a/lintcheck/src/json.rs b/lintcheck/src/json.rs
@@ -1,12 +1,16 @@
 use std::fs;
 use std::path::Path;
 
-use itertools::EitherOrBoth;
+use itertools::{EitherOrBoth, Itertools};
 use serde::{Deserialize, Serialize};
 
 use crate::ClippyWarning;
 
-#[derive(Deserialize, Serialize)]
+/// This is the total number. 300 warnings results in 100 messages per section.
+const DEFAULT_LIMIT_PER_LINT: usize = 300;
+const TRUNCATION_TOTAL_TARGET: usize = 1000;
+
+#[derive(Debug, Deserialize, Serialize)]
 struct LintJson {
     lint: String,
     krate: String,
@@ -18,7 +22,7 @@ struct LintJson {
 
 impl LintJson {
     fn key(&self) -> impl Ord + '_ {
-        (self.file_name.as_str(), self.byte_pos, self.lint.as_str())
+        (self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
     }
 
     fn info_text(&self, action: &str) -> String {
@@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
     serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
 }
 
-fn print_warnings(title: &str, warnings: &[LintJson]) {
+pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
+    let old_warnings = load_warnings(old_path);
+    let new_warnings = load_warnings(new_path);
+
+    let mut lint_warnings = vec![];
+
+    for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
+        .chunk_by(|change| change.as_ref().into_left().lint.to_string())
+    {
+        let mut added = Vec::new();
+        let mut removed = Vec::new();
+        let mut changed = Vec::new();
+        for change in changes {
+            match change {
+                EitherOrBoth::Both(old, new) => {
+                    if old.rendered != new.rendered {
+                        changed.push((old, new));
+                    }
+                },
+                EitherOrBoth::Left(old) => removed.push(old),
+                EitherOrBoth::Right(new) => added.push(new),
+            }
+        }
+
+        if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
+            lint_warnings.push(LintWarnings {
+                name,
+                added,
+                removed,
+                changed,
+            });
+        }
+    }
+
+    print_summary_table(&lint_warnings);
+    println!();
+
+    if lint_warnings.is_empty() {
+        return;
+    }
+
+    let truncate_after = if truncate {
+        // Max 15 ensures that we at least have five messages per lint
+        DEFAULT_LIMIT_PER_LINT
+            .min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
+            .max(15)
+    } else {
+        // No lint should ever each this number of lint emissions, so this is equivialent to
+        // No truncation
+        usize::MAX
+    };
+
+    for lint in lint_warnings {
+        print_lint_warnings(&lint, truncate_after);
+    }
+}
+
+#[derive(Debug)]
+struct LintWarnings {
+    name: String,
+    added: Vec<LintJson>,
+    removed: Vec<LintJson>,
+    changed: Vec<(LintJson, LintJson)>,
+}
+
+fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
+    let name = &lint.name;
+    let html_id = to_html_id(name);
+
+    // The additional anchor is added for non GH viewers that don't prefix ID's
+    println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
+    println!();
+
+    print!(
+        r##"{}, {}, {}"##,
+        count_string(name, "added", lint.added.len()),
+        count_string(name, "removed", lint.removed.len()),
+        count_string(name, "changed", lint.changed.len()),
+    );
+    println!();
+
+    print_warnings("Added", &lint.added, truncate_after / 3);
+    print_warnings("Removed", &lint.removed, truncate_after / 3);
+    print_changed_diff(&lint.changed, truncate_after / 3);
+}
+
+fn print_summary_table(lints: &[LintWarnings]) {
+    println!("| Lint                                       | Added   | Removed | Changed |");
+    println!("| ------------------------------------------ | ------: | ------: | ------: |");
+
+    for lint in lints {
+        println!(
+            "| {:<62} | {:>7} | {:>7} | {:>7} |",
+            format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
+            lint.added.len(),
+            lint.removed.len(),
+            lint.changed.len()
+        );
+    }
+}
+
+fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
     if warnings.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="{title}">{title}</h3>"#);
+    print_h3(&warnings[0].lint, title);
     println!();
+
+    let warnings = truncate(warnings, truncate_after);
+
     for warning in warnings {
         println!("{}", warning.info_text(title));
         println!();
@@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
     }
 }
 
-fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
+fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
     if changed.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="changed">Changed</h3>"#);
+    print_h3(&changed[0].0.lint, "Changed");
     println!();
+
+    let changed = truncate(changed, truncate_after);
+
     for (old, new) in changed {
         println!("{}", new.info_text("Changed"));
         println!();
@@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
     }
 }
 
-pub(crate) fn diff(old_path: &Path, new_path: &Path) {
-    let old_warnings = load_warnings(old_path);
-    let new_warnings = load_warnings(new_path);
+fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
+    if list.len() > truncate_after {
+        println!(
+            "{} warnings have been truncated for this summary.",
+            list.len() - truncate_after
+        );
+        println!();
 
-    let mut added = Vec::new();
-    let mut removed = Vec::new();
-    let mut changed = Vec::new();
-
-    for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
-        match change {
-            EitherOrBoth::Both(old, new) => {
-                if old.rendered != new.rendered {
-                    changed.push((old, new));
-                }
-            },
-            EitherOrBoth::Left(old) => removed.push(old),
-            EitherOrBoth::Right(new) => added.push(new),
-        }
+        list.split_at(truncate_after).0
+    } else {
+        list
     }
+}
 
-    print!(
-        r##"{}, {}, {}"##,
-        count_string("added", added.len()),
-        count_string("removed", removed.len()),
-        count_string("changed", changed.len()),
-    );
-    println!();
-    println!();
+fn print_h3(lint: &str, title: &str) {
+    let html_id = to_html_id(lint);
+    // We have to use HTML here to be able to manually add an id.
+    println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
+}
 
-    print_warnings("Added", &added);
-    print_warnings("Removed", &removed);
-    print_changed_diff(&changed);
+/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
+/// the lint name for the HTML ID.
+fn to_html_id(lint_name: &str) -> String {
+    lint_name.replace("clippy::", "").replace('_', "-")
 }
 
 /// This generates the `x added` string for the start of the job summery.
 /// It linkifies them if possible to jump to the respective heading.
-fn count_string(label: &str, count: usize) -> String {
+fn count_string(lint: &str, label: &str, count: usize) -> String {
     // Headlines are only added, if anything will be displayed under the headline.
     // We therefore only want to add links to them if they exist
     if count == 0 {
         format!("0 {label}")
     } else {
+        let html_id = to_html_id(lint);
         // GitHub's job summaries don't add HTML ids to headings. That's why we
         // manually have to add them. GitHub prefixes these manual ids with
         // `user-content-` and that's how we end up with these awesome links :D
-        format!("[{count} {label}](#user-content-{label})")
+        format!("[{count} {label}](#user-content-{html_id}-{label})")
     }
 }
diff --git a/lintcheck/src/main.rs b/lintcheck/src/main.rs
@@ -260,7 +260,7 @@ fn main() {
     let config = LintcheckConfig::new();
 
     match config.subcommand {
-        Some(Commands::Diff { old, new }) => json::diff(&old, &new),
+        Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
         Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
         None => lintcheck(config),
     }