Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement copyright notice detection #4701

Merged
merged 9 commits into from
Jun 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ quality tools, including:
- [flake8-builtins](https://pypi.org/project/flake8-builtins/)
- [flake8-commas](https://pypi.org/project/flake8-commas/)
- [flake8-comprehensions](https://pypi.org/project/flake8-comprehensions/)
- [flake8-copyright](https://pypi.org/project/flake8-copyright/)
- [flake8-datetimez](https://pypi.org/project/flake8-datetimez/)
- [flake8-debugger](https://pypi.org/project/flake8-debugger/)
- [flake8-django](https://pypi.org/project/flake8-django/)
Expand Down
8 changes: 8 additions & 0 deletions crates/ruff/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use ruff_python_ast::source_code::{Indexer, Locator, Stylist};
use ruff_python_whitespace::UniversalNewlines;

use crate::registry::Rule;
use crate::rules::copyright::rules::missing_copyright_notice;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
use crate::rules::flake8_executable::rules::{
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
Expand Down Expand Up @@ -49,6 +50,7 @@ pub(crate) fn check_physical_lines(
let enforce_blank_line_contains_whitespace =
settings.rules.enabled(Rule::BlankLineWithWhitespace);
let enforce_tab_indentation = settings.rules.enabled(Rule::TabIndentation);
let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice);

let fix_unnecessary_coding_comment = settings.rules.should_fix(Rule::UTF8EncodingDeclaration);
let fix_shebang_whitespace = settings.rules.should_fix(Rule::ShebangLeadingWhitespace);
Expand Down Expand Up @@ -172,6 +174,12 @@ pub(crate) fn check_physical_lines(
}
}

if enforce_copyright_notice {
if let Some(diagnostic) = missing_copyright_notice(locator, settings) {
diagnostics.push(diagnostic);
}
}

diagnostics
}

Expand Down
3 changes: 3 additions & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Simplify, "401") => (RuleGroup::Unspecified, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictGet),
(Flake8Simplify, "910") => (RuleGroup::Unspecified, rules::flake8_simplify::rules::DictGetWithNoneDefault),

// copyright
(Copyright, "001") => (RuleGroup::Unspecified, rules::copyright::rules::MissingCopyrightNotice),

// pyupgrade
(Pyupgrade, "001") => (RuleGroup::Unspecified, rules::pyupgrade::rules::UselessMetaclassType),
(Pyupgrade, "003") => (RuleGroup::Unspecified, rules::pyupgrade::rules::TypeOfPrimitive),
Expand Down
4 changes: 4 additions & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ pub enum Linter {
/// [flake8-commas](https://pypi.org/project/flake8-commas/)
#[prefix = "COM"]
Flake8Commas,
/// Copyright-related rules
#[prefix = "CPY"]
Copyright,
/// [flake8-comprehensions](https://pypi.org/project/flake8-comprehensions/)
#[prefix = "C4"]
Flake8Comprehensions,
Expand Down Expand Up @@ -267,6 +270,7 @@ impl Rule {
| Rule::ShebangLeadingWhitespace
| Rule::TrailingWhitespace
| Rule::TabIndentation
| Rule::MissingCopyrightNotice
| Rule::BlankLineWithWhitespace => LintSource::PhysicalLines,
Rule::AmbiguousUnicodeCharacterComment
| Rule::AmbiguousUnicodeCharacterDocstring
Expand Down
158 changes: 158 additions & 0 deletions crates/ruff/src/rules/copyright/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//! Rules related to copyright notices.
pub(crate) mod rules;

pub mod settings;

#[cfg(test)]
mod tests {
use crate::registry::Rule;
use crate::test::test_snippet;
use crate::{assert_messages, settings};

#[test]
fn notice() {
let diagnostics = test_snippet(
r#"
# Copyright 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_c() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_caps() {
let diagnostics = test_snippet(
r#"
# COPYRIGHT (C) 2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn notice_with_range() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2021-2023
import os
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}

#[test]
fn valid_author() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023 Ruff
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
author: Some("Ruff".to_string()),
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn invalid_author() {
let diagnostics = test_snippet(
r#"
# Copyright (C) 2023 Some Author
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
author: Some("Ruff".to_string()),
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn small_file() {
let diagnostics = test_snippet(
r#"
import os
"#
.trim(),
&settings::Settings {
copyright: super::settings::Settings {
min_file_size: 256,
..super::settings::Settings::default()
},
..settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice])
},
);
assert_messages!(diagnostics);
}

#[test]
fn late_notice() {
let diagnostics = test_snippet(
r#"
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Content Content Content Content Content Content Content Content Content Content
# Copyright 2023
"#
.trim(),
&settings::Settings::for_rules(vec![Rule::MissingCopyrightNotice]),
);
assert_messages!(diagnostics);
}
}
59 changes: 59 additions & 0 deletions crates/ruff/src/rules/copyright/rules/missing_copyright_notice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use ruff_text_size::{TextRange, TextSize};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

use crate::settings::Settings;

/// ## What it does
/// Checks for the absence of copyright notices within Python files.
///
/// ## Why is this bad?
/// In some codebases, it's common to have a license header at the top of every
/// file. This rule ensures that the license header is present.
#[violation]
pub struct MissingCopyrightNotice;

impl Violation for MissingCopyrightNotice {
#[derive_message_formats]
fn message(&self) -> String {
format!("Missing copyright notice at top of file")
}
}

/// CPY001
pub(crate) fn missing_copyright_notice(
locator: &Locator,
settings: &Settings,
) -> Option<Diagnostic> {
// Ignore files that are too small to contain a copyright notice.
if locator.len() < settings.copyright.min_file_size {
return None;
}

// Only search the first 1024 bytes in the file.
let contents = if locator.len() < 1024 {
locator.contents()
} else {
locator.up_to(TextSize::from(1024))
};

// Locate the copyright notice.
if let Some(match_) = settings.copyright.notice_rgx.find(contents) {
match settings.copyright.author {
Some(ref author) => {
// Ensure that it's immediately followed by the author.
if contents[match_.end()..].trim_start().starts_with(author) {
return None;
}
}
None => return None,
}
}

Some(Diagnostic::new(
MissingCopyrightNotice,
TextRange::default(),
))
}
3 changes: 3 additions & 0 deletions crates/ruff/src/rules/copyright/rules/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub(crate) use missing_copyright_notice::{missing_copyright_notice, MissingCopyrightNotice};

mod missing_copyright_notice;
94 changes: 94 additions & 0 deletions crates/ruff/src/rules/copyright/settings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//! Settings for the `copyright` plugin.

use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};

use ruff_macros::{CacheKey, CombineOptions, ConfigurationOptions};

#[derive(
Debug, PartialEq, Eq, Serialize, Deserialize, Default, ConfigurationOptions, CombineOptions,
)]
#[serde(
deny_unknown_fields,
rename_all = "kebab-case",
rename = "CopyrightOptions"
)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct Options {
#[option(
default = r#"(?i)Copyright\s+(\(C\)\s+)?\d{4}([-,]\d{4})*"#,
value_type = "str",
example = r#"notice-rgx = "(?i)Copyright \\(C\\) \\d{4}""#
)]
/// The regular expression used to match the copyright notice, compiled
/// with the [`regex`](https://docs.rs/regex/latest/regex/) crate.
///
/// Defaults to `(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*`, which matches
/// the following:
/// - `Copyright 2023`
/// - `Copyright (C) 2023`
/// - `Copyright 2021-2023`
/// - `Copyright (C) 2021-2023`
pub notice_rgx: Option<String>,
#[option(default = "None", value_type = "str", example = r#"author = "Ruff""#)]
/// Author to enforce within the copyright notice. If provided, the
/// author must be present immediately following the copyright notice.
pub author: Option<String>,
#[option(
default = r#"0"#,
value_type = "int",
example = r#"
# Avoid enforcing a header on files smaller than 1024 bytes.
min-file-size = 1024
"#
)]
/// A minimum file size (in bytes) required for a copyright notice to
/// be enforced. By default, all files are validated.
pub min_file_size: Option<usize>,
}

#[derive(Debug, CacheKey)]
pub struct Settings {
pub notice_rgx: Regex,
pub author: Option<String>,
pub min_file_size: usize,
}

static COPYRIGHT: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)Copyright\s+(\(C\)\s+)?\d{4}(-\d{4})*").unwrap());

impl Default for Settings {
fn default() -> Self {
Self {
notice_rgx: COPYRIGHT.clone(),
author: None,
min_file_size: 0,
}
}
}

impl From<Options> for Settings {
fn from(options: Options) -> Self {
Self {
notice_rgx: options
.notice_rgx
.map(|pattern| Regex::new(&pattern))
.transpose()
.expect("Invalid `notice-rgx`")
.unwrap_or_else(|| COPYRIGHT.clone()),
author: options.author,
min_file_size: options.min_file_size.unwrap_or_default(),
}
}
}

impl From<Settings> for Options {
fn from(settings: Settings) -> Self {
Self {
notice_rgx: Some(settings.notice_rgx.to_string()),
author: settings.author,
min_file_size: Some(settings.min_file_size),
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: crates/ruff/src/rules/copyright/mod.rs
---
<filename>:1:1: CPY001 Missing copyright notice at top of file
|
1 | # Copyright (C) 2023 Some Author
| CPY001
2 |
3 | import os
|


Loading