Skip to content

Commit

Permalink
Implement copyright notice detection (#4701)
Browse files Browse the repository at this point in the history
## Summary

Add copyright notice detection to enforce the presence of copyright
headers in Python files.

Configurable settings include: the relevant regular expression, the
author name, and the minimum file size, similar to
[flake8-copyright](https://github.com/savoirfairelinux/flake8-copyright).

Closes #3579

---------

Signed-off-by: ryan <ryang@waabi.ai>
Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
  • Loading branch information
Ryang20718 and charliermarsh authored Jun 11, 2023
1 parent 9f7cc86 commit ab3c023
Show file tree
Hide file tree
Showing 26 changed files with 465 additions and 24 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ quality tools, including:
- [flake8-builtins](https://pypi.org/project/flake8-builtins/)
- [flake8-commas](https://pypi.org/project/flake8-commas/)
- [flake8-comprehensions](https://pypi.org/project/flake8-comprehensions/)
- [flake8-copyright](https://pypi.org/project/flake8-copyright/)
- [flake8-datetimez](https://pypi.org/project/flake8-datetimez/)
- [flake8-debugger](https://pypi.org/project/flake8-debugger/)
- [flake8-django](https://pypi.org/project/flake8-django/)
Expand Down
8 changes: 8 additions & 0 deletions crates/ruff/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::UniversalNewlines;

use crate::registry::Rule;
use crate::rules::copyright::rules::missing_copyright_notice;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
use crate::rules::flake8_executable::rules::{
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
Expand Down Expand Up @@ -49,6 +50,7 @@ pub(crate) fn check_physical_lines(
let enforce_blank_line_contains_whitespace =
settings.rules.enabled(Rule::BlankLineWithWhitespace);
let enforce_tab_indentation = settings.rules.enabled(Rule::TabIndentation);
let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice);

let fix_unnecessary_coding_comment = settings.rules.should_fix(Rule::UTF8EncodingDeclaration);
let fix_shebang_whitespace = settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
Expand Down Expand Up @@ -172,6 +174,12 @@ pub(crate) fn check_physical_lines(
}
}

if enforce_copyright_notice {
if let Some(diagnostic) = missing_copyright_notice(locator, settings) {
diagnostics.push(diagnostic);
}
}

diagnostics
}

Expand Down
3 changes: 3 additions & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Simplify, "401") => (RuleGroup::Unspecified, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictGet),
(Flake8Simplify, "910") => (RuleGroup::Unspecified, rules::flake8_simplify::rules::DictGetWithNoneDefault),

// copyright
(Copyright, "001") => (RuleGroup::Unspecified, rules::copyright::rules::MissingCopyrightNotice),

// pyupgrade
(Pyupgrade, "001") => (RuleGroup::Unspecified, rules::pyupgrade::rules::UselessMetaclassType),
(Pyupgrade, "003") => (RuleGroup::Unspecified, rules::pyupgrade::rules::TypeOfPrimitive),
Expand Down
4 changes: 4 additions & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ pub enum Linter {
/// [flake8-commas](https://pypi.org/project/flake8-commas/)
#[prefix = "COM"]
Flake8Commas,
/// Copyright-related rules
#[prefix = "CPY"]
Copyright,
/// [flake8-comprehensions](https://pypi.org/project/flake8-comprehensions/)
#[prefix = "C4"]
Flake8Comprehensions,
Expand Down Expand Up @@ -267,6 +270,7 @@ impl Rule {
| Rule::ShebangLeadingWhitespace
| Rule::TrailingWhitespace
| Rule::TabIndentation
| Rule::MissingCopyrightNotice
| Rule::BlankLineWithWhitespace => LintSource::PhysicalLines,
Rule::AmbiguousUnicodeCharacterComment
| Rule::AmbiguousUnicodeCharacterDocstring
Expand Down
158 changes: 158 additions & 0 deletions crates/ruff/src/rules/copyright/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//! Rules related to copyright notices.
pub(crate) mod rules;

pub mod settings;

#[cfg(test)]
mod tests {
use crate::registry::Rule;
use crate::test::test_snippet;
use crate::{assert_messages, settings};

#[test]
fn notice() {
let diagnostics = test_snippet(
r#"
# Copyright 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_c() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_caps() {
let diagnostics = test_snippet(
r#"
# COPYRIGHT (C) 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_range() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2021-2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn valid_author() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023 Ruff
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
author: Some("Ruff".to_string()),
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn invalid_author() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023 Some Author
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
author: Some("Ruff".to_string()),
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn small_file() {
let diagnostics = test_snippet(
r#"
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
min_file_size: 256,
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn late_notice() {
let diagnostics = test_snippet(
r#"
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Copyright 2023
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}
}
59 changes: 59 additions & 0 deletions crates/ruff/src/rules/copyright/rules/missing_copyright_notice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use ruff_text_size::{TextRange, TextSize};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

use crate::settings::Settings;

/// ## What it does
/// Checks for the absence of copyright notices within Python files.
///
/// ## Why is this bad?
/// In some codebases, it's common to have a license header at the top of every
/// file. This rule ensures that the license header is present.
#[violation]
pub struct MissingCopyrightNotice;

impl Violation for MissingCopyrightNotice {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing copyright notice at top of file")
}
}

/// CPY001
pub(crate) fn missing_copyright_notice(
locator: &Locator,
settings: &Settings,
) -> Option<Diagnostic> {
// Ignore files that are too small to contain a copyright notice.
if locator.len() < settings.copyright.min_file_size {
return None;
}

// Only search the first 1024 bytes in the file.
let contents = if locator.len() < 1024 {
locator.contents()
} else {
locator.up_to(TextSize::from(1024))
};

// Locate the copyright notice.
if let Some(match_) = settings.copyright.notice_rgx.find(contents) {
match settings.copyright.author {
Some(ref author) => {
// Ensure that it's immediately followed by the author.
if contents[match_.end()..].trim_start().starts_with(author) {
return None;
}
}
None => return None,
}
}

Some(Diagnostic::new(
MissingCopyrightNotice,
TextRange::default(),
))
}
3 changes: 3 additions & 0 deletions crates/ruff/src/rules/copyright/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub(crate) use missing_copyright_notice::{missing_copyright_notice, MissingCopyrightNotice};

mod missing_copyright_notice;
94 changes: 94 additions & 0 deletions crates/ruff/src/rules/copyright/settings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//! Settings for the `copyright` plugin.

use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};

use ruff_macros::{CacheKey, CombineOptions, ConfigurationOptions};

#[derive(
Debug, PartialEq, Eq, Serialize, Deserialize, Default, ConfigurationOptions, CombineOptions,
)]
#[serde(
deny_unknown_fields,
rename_all = "kebab-case",
rename = "CopyrightOptions"
)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct Options {
#[option(
default = r#"(?i)Copyright\s+(\(C\)\s+)?\d{4}([-,]\d{4})*"#,
value_type = "str",
example = r#"notice-rgx = "(?i)Copyright \\(C\\) \\d{4}""#
)]
/// The regular expression used to match the copyright notice, compiled
/// with the [`regex`](https://docs.rs/regex/latest/regex/) crate.
///
/// Defaults to `(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*`, which matches
/// the following:
/// - `Copyright 2023`
/// - `Copyright (C) 2023`
/// - `Copyright 2021-2023`
/// - `Copyright (C) 2021-2023`
pub notice_rgx: Option<String>,
#[option(default = "None", value_type = "str", example = r#"author = "Ruff""#)]
/// Author to enforce within the copyright notice. If provided, the
/// author must be present immediately following the copyright notice.
pub author: Option<String>,
#[option(
default = r#"0"#,
value_type = "int",
example = r#"
# Avoid enforcing a header on files smaller than 1024 bytes.
min-file-size = 1024
"#
)]
/// A minimum file size (in bytes) required for a copyright notice to
/// be enforced. By default, all files are validated.
pub min_file_size: Option<usize>,
}

#[derive(Debug, CacheKey)]
pub struct Settings {
pub notice_rgx: Regex,
pub author: Option<String>,
pub min_file_size: usize,
}

static COPYRIGHT: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*").unwrap());

impl Default for Settings {
fn default() -> Self {
Self {
notice_rgx: COPYRIGHT.clone(),
author: None,
min_file_size: 0,
}
}
}

impl From<Options> for Settings {
fn from(options: Options) -> Self {
Self {
notice_rgx: options
.notice_rgx
.map(|pattern| Regex::new(&pattern))
.transpose()
.expect("Invalid `notice-rgx`")
.unwrap_or_else(|| COPYRIGHT.clone()),
author: options.author,
min_file_size: options.min_file_size.unwrap_or_default(),
}
}
}

impl From<Settings> for Options {
fn from(settings: Settings) -> Self {
Self {
notice_rgx: Some(settings.notice_rgx.to_string()),
author: settings.author,
min_file_size: Some(settings.min_file_size),
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: crates/ruff/src/rules/copyright/mod.rs
---
<filename>:1:1: CPY001 Missing copyright notice at top of file
|
1 | # Copyright (C) 2023 Some Author
| CPY001
2 |
3 | import os
|


Loading

0 comments on commit ab3c023

Please sign in to comment.