From f466a15e42d061fc630a186e93f8d9c1a5dbbf01 Mon Sep 17 00:00:00 2001 From: Daniel Muller Date: Mon, 16 Sep 2024 11:22:27 -0600 Subject: [PATCH] fix: support file list input for format command to bypass OS limits Add support for '@file' syntax to read format targets from a file Implement batch processing to handle large numbers of files Maintain compatibility with direct command-line arguments This change allows the format command to process an arbitrarily large number of files by reading targets from a file, bypassing OS command line length limits. It addresses issues with pre-commit hooks and other tooling when formatting many files at once. Note: This introduces a limitation where files starting with '@' cannot be directly specified as format targets. Users must use the full file path for such files. Closes #387 --- format/private/format.sh | 95 +++++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 20 deletions(-) diff --git a/format/private/format.sh b/format/private/format.sh index ddbcddf8..d0c47a48 100755 --- a/format/private/format.sh +++ b/format/private/format.sh @@ -43,7 +43,7 @@ function on_exit { ;; *) echo >&2 "FAILED: A formatter tool exited with code $code" - if [ "$mode" == "check" ]; then + if [[ "${mode:-}" == "check" ]]; then echo >&2 "Try running '$FIX_CMD' to fix this." fi ;; @@ -52,6 +52,61 @@ function on_exit { trap on_exit EXIT +function process_args_in_batches() { + local lang="$1" + local bin="$2" + local flags="$3" + shift 3 + local args=("$@") + + # Uses up to ARG_MAX - 2k, or 128k, whichever is smaller, characters per + # command. This was derived from following the defaults from xargs + # https://www.gnu.org/software/findutils/manual/html_node/find_html/Limiting-Command-Size.html + max_batch_size=$(getconf ARG_MAX)-2048 + max_batch_size=$((max_batch_size < 128000 ? max_batch_size : 128000)) + + # Check if there's only one argument and it starts with '@' + # If so, read the file to get the actual files to format. + if [ ${#args[@]} -eq 1 ] && [[ "${args[0]}" == @* ]]; then + local file="${args[0]:1}" # Strip the '@' symbol + if [ ! -f "$file" ]; then + echo "Error: File '$file' not found." + return 1 + fi + mapfile -t args < "$file" + if [ ${#args[@]} -eq 0 ]; then + echo "Error: No arguments found in the specified file." + return 1 + fi + fi + + # If no arguments were passed, still run run-format once + if [ ${#args[@]} -eq 0 ]; then + run-format "$lang" "$bin" "$flags" + return + fi + + # Format files in batches so that we do not exceed the OS limit for line + # length when calling subcommands + local current_batch_size=0 + local current_batch=() + for arg in "${args[@]}"; do + if ((current_batch_size + ${#arg} + 1 >= max_batch_size)); then + # Process current batch + run-format "$lang" "$bin" "$flags" "${current_batch[@]}" + current_batch=() + current_batch_size=0 + fi + current_batch+=("$arg") + ((current_batch_size += ${#arg} + 1)) # +1 for space between arguments + done + + # Process any remaining arguments + if [ -n "$current_batch" ]; then + run-format "$lang" "$bin" "$flags" "${current_batch[@]}" + fi +} + # Exports a function that is similar to 'git ls-files' # ls-files [...] function ls-files { @@ -206,7 +261,7 @@ function run-format { Go) # gofmt doesn't produce non-zero exit code so we must check for non-empty output # https://github.com/golang/go/issues/24230 - if [ "$mode" == "check" ]; then + if [[ "${mode:-}" == "check" ]]; then GOFMT_OUT=$(mktemp) time { echo "$files" | tr \\n \\0 | xargs -0 "$bin" $args > "$GOFMT_OUT" @@ -239,24 +294,24 @@ function run-format { # Check if our script is the main entry point, not being sourced by a test if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then - bin="$(rlocation $tool)" - if [ ! -e "$bin" ]; then - echo >&2 "cannot locate binary $tool" - exit 1 - fi + bin="$(rlocation $tool)" + if [ ! -e "$bin" ]; then + echo >&2 "cannot locate binary $tool" + exit 1 + fi - run-format "$lang" "$bin" "${flags:-""}" $@ + process_args_in_batches "$lang" "$bin" "${flags:-""}" "$@" - # Currently these aren't exposed as separate languages to the attributes of format_multirun - # So we format all these languages as part of "JavaScript". - if [[ "$lang" == "JavaScript" ]]; then - run-format "JSON" "$bin" "${flags:-""}" $@ - run-format "TSX" "$bin" "${flags:-""}" $@ - run-format "TypeScript" "$bin" "${flags:-""}" $@ - run-format "Vue" "$bin" "${flags:-""}" $@ - fi - if [[ "$lang" == "CSS" ]]; then - run-format "Less" "$bin" "${flags:-""}" $@ - run-format "SCSS" "$bin" "${flags:-""}" $@ - fi + # Handle additional languages for JavaScript and CSS + if [[ "$lang" == "JavaScript" ]]; then + for sublang in "JSON" "TSX" "TypeScript" "Vue"; do + process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$@" + done + fi + if [[ "$lang" == "CSS" ]]; then + for sublang in "Less" "SCSS"; do + process_args_in_batches "$sublang" "$bin" "${flags:-""}" "$@" + done + fi fi +