-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement copyright notice detection (#4701)
## Summary Add copyright notice detection to enforce the presence of copyright headers in Python files. Configurable settings include: the relevant regular expression, the author name, and the minimum file size, similar to [flake8-copyright](https://github.com/savoirfairelinux/flake8-copyright). Closes #3579 --------- Signed-off-by: ryan <ryang@waabi.ai> Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
- Loading branch information
1 parent
9f7cc86
commit ab3c023
Showing
26 changed files
with
465 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
//! Rules related to copyright notices. | ||
pub(crate) mod rules; | ||
|
||
pub mod settings; | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::registry::Rule; | ||
use crate::test::test_snippet; | ||
use crate::{assert_messages, settings}; | ||
|
||
#[test] | ||
fn notice() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Copyright 2023 | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]), | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn notice_with_c() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Copyright (C) 2023 | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]), | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn notice_with_caps() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# COPYRIGHT (C) 2023 | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]), | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn notice_with_range() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Copyright (C) 2021-2023 | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]), | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn valid_author() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Copyright (C) 2023 Ruff | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings { | ||
copyright: super::settings::Settings { | ||
author: Some("Ruff".to_string()), | ||
..super::settings::Settings::default() | ||
}, | ||
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]) | ||
}, | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn invalid_author() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Copyright (C) 2023 Some Author | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings { | ||
copyright: super::settings::Settings { | ||
author: Some("Ruff".to_string()), | ||
..super::settings::Settings::default() | ||
}, | ||
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]) | ||
}, | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn small_file() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
import os | ||
"# | ||
.trim(), | ||
&settings::Settings { | ||
copyright: super::settings::Settings { | ||
min_file_size: 256, | ||
..super::settings::Settings::default() | ||
}, | ||
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]) | ||
}, | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
|
||
#[test] | ||
fn late_notice() { | ||
let diagnostics = test_snippet( | ||
r#" | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Content Content Content Content Content Content Content Content Content Content | ||
# Copyright 2023 | ||
"# | ||
.trim(), | ||
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]), | ||
); | ||
assert_messages!(diagnostics); | ||
} | ||
} |
59 changes: 59 additions & 0 deletions
59
crates/ruff/src/rules/copyright/rules/missing_copyright_notice.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
use ruff_text_size::{TextRange, TextSize}; | ||
|
||
use ruff_diagnostics::{Diagnostic, Violation}; | ||
use ruff_macros::{derive_message_formats, violation}; | ||
use ruff_python_ast::source_code::Locator; | ||
|
||
use crate::settings::Settings; | ||
|
||
/// ## What it does | ||
/// Checks for the absence of copyright notices within Python files. | ||
/// | ||
/// ## Why is this bad? | ||
/// In some codebases, it's common to have a license header at the top of every | ||
/// file. This rule ensures that the license header is present. | ||
#[violation] | ||
pub struct MissingCopyrightNotice; | ||
|
||
impl Violation for MissingCopyrightNotice { | ||
#[derive_message_formats] | ||
fn message(&self) -> String { | ||
format!("Missing copyright notice at top of file") | ||
} | ||
} | ||
|
||
/// CPY001 | ||
pub(crate) fn missing_copyright_notice( | ||
locator: &Locator, | ||
settings: &Settings, | ||
) -> Option<Diagnostic> { | ||
// Ignore files that are too small to contain a copyright notice. | ||
if locator.len() < settings.copyright.min_file_size { | ||
return None; | ||
} | ||
|
||
// Only search the first 1024 bytes in the file. | ||
let contents = if locator.len() < 1024 { | ||
locator.contents() | ||
} else { | ||
locator.up_to(TextSize::from(1024)) | ||
}; | ||
|
||
// Locate the copyright notice. | ||
if let Some(match_) = settings.copyright.notice_rgx.find(contents) { | ||
match settings.copyright.author { | ||
Some(ref author) => { | ||
// Ensure that it's immediately followed by the author. | ||
if contents[match_.end()..].trim_start().starts_with(author) { | ||
return None; | ||
} | ||
} | ||
None => return None, | ||
} | ||
} | ||
|
||
Some(Diagnostic::new( | ||
MissingCopyrightNotice, | ||
TextRange::default(), | ||
)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pub(crate) use missing_copyright_notice::{missing_copyright_notice, MissingCopyrightNotice}; | ||
|
||
mod missing_copyright_notice; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
//! Settings for the `copyright` plugin. | ||
|
||
use once_cell::sync::Lazy; | ||
use regex::Regex; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
use ruff_macros::{CacheKey, CombineOptions, ConfigurationOptions}; | ||
|
||
#[derive( | ||
Debug, PartialEq, Eq, Serialize, Deserialize, Default, ConfigurationOptions, CombineOptions, | ||
)] | ||
#[serde( | ||
deny_unknown_fields, | ||
rename_all = "kebab-case", | ||
rename = "CopyrightOptions" | ||
)] | ||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] | ||
pub struct Options { | ||
#[option( | ||
default = r#"(?i)Copyright\s+(\(C\)\s+)?\d{4}([-,]\d{4})*"#, | ||
value_type = "str", | ||
example = r#"notice-rgx = "(?i)Copyright \\(C\\) \\d{4}""# | ||
)] | ||
/// The regular expression used to match the copyright notice, compiled | ||
/// with the [`regex`](https://docs.rs/regex/latest/regex/) crate. | ||
/// | ||
/// Defaults to `(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*`, which matches | ||
/// the following: | ||
/// - `Copyright 2023` | ||
/// - `Copyright (C) 2023` | ||
/// - `Copyright 2021-2023` | ||
/// - `Copyright (C) 2021-2023` | ||
pub notice_rgx: Option<String>, | ||
#[option(default = "None", value_type = "str", example = r#"author = "Ruff""#)] | ||
/// Author to enforce within the copyright notice. If provided, the | ||
/// author must be present immediately following the copyright notice. | ||
pub author: Option<String>, | ||
#[option( | ||
default = r#"0"#, | ||
value_type = "int", | ||
example = r#" | ||
# Avoid enforcing a header on files smaller than 1024 bytes. | ||
min-file-size = 1024 | ||
"# | ||
)] | ||
/// A minimum file size (in bytes) required for a copyright notice to | ||
/// be enforced. By default, all files are validated. | ||
pub min_file_size: Option<usize>, | ||
} | ||
|
||
#[derive(Debug, CacheKey)] | ||
pub struct Settings { | ||
pub notice_rgx: Regex, | ||
pub author: Option<String>, | ||
pub min_file_size: usize, | ||
} | ||
|
||
static COPYRIGHT: Lazy<Regex> = | ||
Lazy::new(|| Regex::new(r"(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*").unwrap()); | ||
|
||
impl Default for Settings { | ||
fn default() -> Self { | ||
Self { | ||
notice_rgx: COPYRIGHT.clone(), | ||
author: None, | ||
min_file_size: 0, | ||
} | ||
} | ||
} | ||
|
||
impl From<Options> for Settings { | ||
fn from(options: Options) -> Self { | ||
Self { | ||
notice_rgx: options | ||
.notice_rgx | ||
.map(|pattern| Regex::new(&pattern)) | ||
.transpose() | ||
.expect("Invalid `notice-rgx`") | ||
.unwrap_or_else(|| COPYRIGHT.clone()), | ||
author: options.author, | ||
min_file_size: options.min_file_size.unwrap_or_default(), | ||
} | ||
} | ||
} | ||
|
||
impl From<Settings> for Options { | ||
fn from(settings: Settings) -> Self { | ||
Self { | ||
notice_rgx: Some(settings.notice_rgx.to_string()), | ||
author: settings.author, | ||
min_file_size: Some(settings.min_file_size), | ||
} | ||
} | ||
} |
12 changes: 12 additions & 0 deletions
12
crates/ruff/src/rules/copyright/snapshots/ruff__rules__copyright__tests__invalid_author.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
--- | ||
source: crates/ruff/src/rules/copyright/mod.rs | ||
--- | ||
<filename>:1:1: CPY001 Missing copyright notice at top of file | ||
| | ||
1 | # Copyright (C) 2023 Some Author | ||
| CPY001 | ||
2 | | ||
3 | import os | ||
| | ||
|
||
|
Oops, something went wrong.