Skip to content

Commit

Permalink
Auto merge of #13139 - xFrednet:lintcheck-limit-summery-output, r=Ale…
Browse files Browse the repository at this point in the history
…xendoo

Lintcheck: Rework and limit diff output for GH's CI

### Background

While working on #13136 I found an amazing limitation of GH's CI. The summary can at most have be 1MB of text. Here is the warning message:

> $GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 46731k. For more information see: https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary

[The PR](#13136) produced a *casual* 61808 changes. Guess that's why those lints are not *warn-by-default* :P.

### Changes:

This PR limits the lintcheck diff output in two ways.

1. The diff is limited to 200 messages per lint per section. Hidden messages are indicated by a message at the end of the section.
2. The output is first written to a file and only the first 1MB is written to ` >> $GITHUB_STEP_SUMMARY`. The entire file is also written to the normal CI log. This helps for cases where several lints change and the total size exceeds the 1MB limit.

An example of these changes can be seen here: https://github.com/xFrednet/rust-clippy/actions/runs/10028799118?pr=4

---

changelog: none

r? `@Alexendoo`

Sorry for bombarding you with so many PR's lately 😅 Feel free to pass some of you reviews to me.
  • Loading branch information
bors committed Jul 25, 2024
2 parents 5e6540f + bdf3e58 commit be82340
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 43 deletions.
18 changes: 17 additions & 1 deletion .github/workflows/lintcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,20 @@ jobs:
uses: actions/download-artifact@v4

- name: Diff results
run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
# GH's summery has a maximum size of 1024k:
# https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
# That's why we first log to file and then to the summary and logs
run: |
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
cat truncated_diff.md
./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md
- name: Upload full diff
uses: actions/upload-artifact@v4
with:
name: diff
if-no-files-found: ignore
path: |
full_diff.md
truncated_diff.md
2 changes: 1 addition & 1 deletion lintcheck/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
diff = "0.1.13"
flate2 = "1.0"
itertools = "0.12"
itertools = "0.13"
rayon = "1.5.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.85"
Expand Down
8 changes: 7 additions & 1 deletion lintcheck/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
#[derive(Subcommand, Clone, Debug)]
pub(crate) enum Commands {
/// Display a markdown diff between two lintcheck log files in JSON format
Diff { old: PathBuf, new: PathBuf },
Diff {
old: PathBuf,
new: PathBuf,
/// This will limit the number of warnings that will be printed for each lint
#[clap(long)]
truncate: bool,
},
/// Create a lintcheck crates TOML file containing the top N popular crates
Popular {
/// Output TOML file name
Expand Down
180 changes: 141 additions & 39 deletions lintcheck/src/json.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
use std::fs;
use std::path::Path;

use itertools::EitherOrBoth;
use itertools::{EitherOrBoth, Itertools};
use serde::{Deserialize, Serialize};

use crate::ClippyWarning;

#[derive(Deserialize, Serialize)]
/// This is the total number. 300 warnings results in 100 messages per section.
const DEFAULT_LIMIT_PER_LINT: usize = 300;
const TRUNCATION_TOTAL_TARGET: usize = 1000;

#[derive(Debug, Deserialize, Serialize)]
struct LintJson {
lint: String,
krate: String,
Expand All @@ -18,7 +22,7 @@ struct LintJson {

impl LintJson {
fn key(&self) -> impl Ord + '_ {
(self.file_name.as_str(), self.byte_pos, self.lint.as_str())
(self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
}

fn info_text(&self, action: &str) -> String {
Expand Down Expand Up @@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
}

fn print_warnings(title: &str, warnings: &[LintJson]) {
pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
let old_warnings = load_warnings(old_path);
let new_warnings = load_warnings(new_path);

let mut lint_warnings = vec![];

for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
.chunk_by(|change| change.as_ref().into_left().lint.to_string())
{
let mut added = Vec::new();
let mut removed = Vec::new();
let mut changed = Vec::new();
for change in changes {
match change {
EitherOrBoth::Both(old, new) => {
if old.rendered != new.rendered {
changed.push((old, new));
}
},
EitherOrBoth::Left(old) => removed.push(old),
EitherOrBoth::Right(new) => added.push(new),
}
}

if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
lint_warnings.push(LintWarnings {
name,
added,
removed,
changed,
});
}
}

print_summary_table(&lint_warnings);
println!();

if lint_warnings.is_empty() {
return;
}

let truncate_after = if truncate {
// Max 15 ensures that we at least have five messages per lint
DEFAULT_LIMIT_PER_LINT
.min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
.max(15)
} else {
// No lint should ever each this number of lint emissions, so this is equivialent to
// No truncation
usize::MAX
};

for lint in lint_warnings {
print_lint_warnings(&lint, truncate_after);
}
}

#[derive(Debug)]
struct LintWarnings {
name: String,
added: Vec<LintJson>,
removed: Vec<LintJson>,
changed: Vec<(LintJson, LintJson)>,
}

fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
let name = &lint.name;
let html_id = to_html_id(name);

// The additional anchor is added for non GH viewers that don't prefix ID's
println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
println!();

print!(
r##"{}, {}, {}"##,
count_string(name, "added", lint.added.len()),
count_string(name, "removed", lint.removed.len()),
count_string(name, "changed", lint.changed.len()),
);
println!();

print_warnings("Added", &lint.added, truncate_after / 3);
print_warnings("Removed", &lint.removed, truncate_after / 3);
print_changed_diff(&lint.changed, truncate_after / 3);
}

fn print_summary_table(lints: &[LintWarnings]) {
println!("| Lint | Added | Removed | Changed |");
println!("| ------------------------------------------ | ------: | ------: | ------: |");

for lint in lints {
println!(
"| {:<62} | {:>7} | {:>7} | {:>7} |",
format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
lint.added.len(),
lint.removed.len(),
lint.changed.len()
);
}
}

fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
if warnings.is_empty() {
return;
}

//We have to use HTML here to be able to manually add an id.
println!(r#"<h3 id="{title}">{title}</h3>"#);
print_h3(&warnings[0].lint, title);
println!();

let warnings = truncate(warnings, truncate_after);

for warning in warnings {
println!("{}", warning.info_text(title));
println!();
Expand All @@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
}
}

fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
if changed.is_empty() {
return;
}

//We have to use HTML here to be able to manually add an id.
println!(r#"<h3 id="changed">Changed</h3>"#);
print_h3(&changed[0].0.lint, "Changed");
println!();

let changed = truncate(changed, truncate_after);

for (old, new) in changed {
println!("{}", new.info_text("Changed"));
println!();
Expand All @@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
}
}

pub(crate) fn diff(old_path: &Path, new_path: &Path) {
let old_warnings = load_warnings(old_path);
let new_warnings = load_warnings(new_path);
fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
if list.len() > truncate_after {
println!(
"{} warnings have been truncated for this summary.",
list.len() - truncate_after
);
println!();

let mut added = Vec::new();
let mut removed = Vec::new();
let mut changed = Vec::new();

for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
match change {
EitherOrBoth::Both(old, new) => {
if old.rendered != new.rendered {
changed.push((old, new));
}
},
EitherOrBoth::Left(old) => removed.push(old),
EitherOrBoth::Right(new) => added.push(new),
}
list.split_at(truncate_after).0
} else {
list
}
}

print!(
r##"{}, {}, {}"##,
count_string("added", added.len()),
count_string("removed", removed.len()),
count_string("changed", changed.len()),
);
println!();
println!();
fn print_h3(lint: &str, title: &str) {
let html_id = to_html_id(lint);
// We have to use HTML here to be able to manually add an id.
println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
}

print_warnings("Added", &added);
print_warnings("Removed", &removed);
print_changed_diff(&changed);
/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
/// the lint name for the HTML ID.
fn to_html_id(lint_name: &str) -> String {
lint_name.replace("clippy::", "").replace('_', "-")
}

/// This generates the `x added` string for the start of the job summery.
/// It linkifies them if possible to jump to the respective heading.
fn count_string(label: &str, count: usize) -> String {
fn count_string(lint: &str, label: &str, count: usize) -> String {
// Headlines are only added, if anything will be displayed under the headline.
// We therefore only want to add links to them if they exist
if count == 0 {
format!("0 {label}")
} else {
let html_id = to_html_id(lint);
// GitHub's job summaries don't add HTML ids to headings. That's why we
// manually have to add them. GitHub prefixes these manual ids with
// `user-content-` and that's how we end up with these awesome links :D
format!("[{count} {label}](#user-content-{label})")
format!("[{count} {label}](#user-content-{html_id}-{label})")
}
}
2 changes: 1 addition & 1 deletion lintcheck/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ fn main() {
let config = LintcheckConfig::new();

match config.subcommand {
Some(Commands::Diff { old, new }) => json::diff(&old, &new),
Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
None => lintcheck(config),
}
Expand Down

0 comments on commit be82340

Please sign in to comment.