diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fe87763c..dbd8f488e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Additions - Running `noseyparker --version` now emits many compile-time details about the build, which can be useful for troubleshooting ([#48](https://github.com/praetorian-inc/noseyparker/issues/48)). +- The `github` and `scan` commands now support accessing GitHub Enterprise Server instances using the new `--github-api-url URL` parameter ([#53](https://github.com/praetorian-inc/noseyparker/pull/53)). + Thank you @AdnaneKhan! + ### Changes - Existing rules were modified to reduce both false positives and false negatives: @@ -115,7 +118,7 @@ docker pull ghcr.io/praetorian-inc/noseyparker:v0.12.0 - PyPI Upload Token - The `report` command now offers rudimentary SARIF support ([#4](https://github.com/praetorian-inc/noseyparker/issues/4)). - Thanks you @Coruscant11! + Thank you @Coruscant11! ### Changes - Several default rules have been revised to improve performance of the matching engine and to produce fewer false positives. diff --git a/Cargo.lock b/Cargo.lock index 0bced35d3..b932d7ae5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2216,6 +2216,7 @@ dependencies = [ "tracing", "tracing-log", "tracing-subscriber", + "url", "vectorscan", "vergen", ] diff --git a/crates/noseyparker-cli/Cargo.toml b/crates/noseyparker-cli/Cargo.toml index 361ac2171..6a551e8f7 100644 --- a/crates/noseyparker-cli/Cargo.toml +++ b/crates/noseyparker-cli/Cargo.toml @@ -56,6 +56,7 @@ serde-sarif = "0.3.6" tracing = "0.1" tracing-log = "0.1" tracing-subscriber = { version = "0.3", features = ["tracing-log", "ansi"] } +url = "2.3" vectorscan = { path = "../vectorscan" } [dev-dependencies] diff --git a/crates/noseyparker-cli/src/bin/noseyparker/args.rs b/crates/noseyparker-cli/src/bin/noseyparker/args.rs index 1b78745b2..80890bb93 100644 --- a/crates/noseyparker-cli/src/bin/noseyparker/args.rs +++ b/crates/noseyparker-cli/src/bin/noseyparker/args.rs @@ -1,6 +1,7 @@ use anyhow::{Context, Result}; use clap::{crate_description, crate_version, ArgAction, Args, Parser, Subcommand, ValueEnum}; use std::path::PathBuf; +use url::Url; use noseyparker::git_url::GitUrl; @@ -211,6 +212,18 @@ impl std::fmt::Display for Mode { pub struct GitHubArgs { #[command(subcommand)] pub command: GitHubCommand, + + /// Use the given URL for GitHub API access + /// + /// If accessing a GitHub Enterprise Server instance, this value should be the entire base URL + /// include the `api/v3` portion, e.g., `https://github.example.com/api/v3`. + #[arg( + long, + value_name = "URL", + default_value_t = Url::parse("https://api.github.com").expect("default API url should parse"), + visible_alias="api-url" + )] + pub github_api_url: Url, } #[derive(Subcommand, Debug)] @@ -359,7 +372,7 @@ pub struct ScanInputArgs { #[arg(long, value_name = "NAME", display_order = 20)] pub github_user: Vec, - /// Name of a GitHub organization to enumerate and scan + /// Name of a GitHub organization to enumerate and scan #[arg( long, visible_alias = "github-org", @@ -367,6 +380,20 @@ pub struct ScanInputArgs { display_order = 20 )] pub github_organization: Vec, + + /// Use the given URL for GitHub API access + /// + /// If accessing a GitHub Enterprise Server instance, this value should be the entire base URL + /// include the `api/v3` portion, e.g., `https://github.example.com/api/v3`. + #[arg( + long, + visible_alias = "api-url", + value_name = "URL", + default_value_t = Url::parse("https://api.github.com").expect("default API url should parse"), + display_order = 30 + )] + pub github_api_url: Url, + } /// This struct represents options to control content discovery. diff --git a/crates/noseyparker-cli/src/bin/noseyparker/cmd_github.rs b/crates/noseyparker-cli/src/bin/noseyparker/cmd_github.rs index a5a48d468..2fdfaf0b7 100644 --- a/crates/noseyparker-cli/src/bin/noseyparker/cmd_github.rs +++ b/crates/noseyparker-cli/src/bin/noseyparker/cmd_github.rs @@ -1,4 +1,5 @@ use anyhow::{bail, Context, Result}; +use url::Url; use crate::args::{GitHubArgs, GitHubReposListArgs, GlobalArgs, Reportable}; use noseyparker::github; @@ -6,18 +7,18 @@ use noseyparker::github; pub fn run(global_args: &GlobalArgs, args: &GitHubArgs) -> Result<()> { use crate::args::{GitHubCommand::*, GitHubReposCommand::*}; match &args.command { - Repos(List(args)) => list_repos(global_args, args), + Repos(List(args_list)) => list_repos(global_args, args_list, args.github_api_url.clone()), } } -fn list_repos(_global_args: &GlobalArgs, args: &GitHubReposListArgs) -> Result<()> { +fn list_repos(_global_args: &GlobalArgs, args: &GitHubReposListArgs, api_url: Url) -> Result<()> { if args.repo_specifiers.is_empty() { bail!("No repositories specified"); } let repo_urls = github::enumerate_repo_urls(&github::RepoSpecifiers { user: args.repo_specifiers.user.clone(), organization: args.repo_specifiers.organization.clone(), - }, None) + }, api_url, None) .context("Failed to enumerate GitHub repositories")?; RepoReporter(repo_urls).report(&args.output_args) } diff --git a/crates/noseyparker-cli/src/bin/noseyparker/cmd_scan.rs b/crates/noseyparker-cli/src/bin/noseyparker/cmd_scan.rs index 3aaba8c27..ec304cb5b 100644 --- a/crates/noseyparker-cli/src/bin/noseyparker/cmd_scan.rs +++ b/crates/noseyparker-cli/src/bin/noseyparker/cmd_scan.rs @@ -80,7 +80,8 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::ScanArgs) -> Result<()> progress_enabled, ); let mut num_found: u64 = 0; - for repo_string in github::enumerate_repo_urls(&repo_specifiers, Some(&mut progress)) + let api_url = args.input_args.github_api_url.clone(); + for repo_string in github::enumerate_repo_urls(&repo_specifiers, api_url, Some(&mut progress)) .context("Failed to enumerate GitHub repositories")? { match GitUrl::from_str(&repo_string) { diff --git a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github-2.snap b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github-2.snap index 6ed045a11..c5350654b 100644 --- a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github-2.snap +++ b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github-2.snap @@ -1,5 +1,5 @@ --- -source: tests/test_noseyparker_help.rs +source: crates/noseyparker-cli/tests/help/mod.rs expression: stdout --- Interact with GitHub @@ -17,6 +17,15 @@ Commands: Print this message or the help of the given subcommand(s) Options: + --github-api-url + Use the given URL for GitHub API access + + If accessing a GitHub Enterprise Server instance, this value should be the entire base URL + include the `api/v3` portion, e.g., `https://github.example.com/api/v3`. + + [default: https://api.github.com/] + [aliases: api-url] + -h, --help Print help (see a summary with '-h') diff --git a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github_short-2.snap b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github_short-2.snap index e3e72c827..65f0b897d 100644 --- a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github_short-2.snap +++ b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_github_short-2.snap @@ -1,5 +1,5 @@ --- -source: tests/test_noseyparker_help.rs +source: crates/noseyparker-cli/tests/help/mod.rs expression: stdout --- Interact with GitHub @@ -11,7 +11,9 @@ Commands: help Print this message or the help of the given subcommand(s) Options: - -h, --help Print help (see more with '--help') + --github-api-url Use the given URL for GitHub API access [default: + https://api.github.com/] [aliases: api-url] + -h, --help Print help (see more with '--help') Global Options: -v, --verbose... Enable verbose output diff --git a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan-2.snap b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan-2.snap index fd933be0b..6d9a09ad1 100644 --- a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan-2.snap +++ b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan-2.snap @@ -1,5 +1,5 @@ --- -source: tests/test_noseyparker_help.rs +source: crates/noseyparker-cli/tests/help/mod.rs expression: stdout --- Scan content for secrets @@ -72,13 +72,22 @@ Input Specifier Options: supported. --github-organization - Name of a GitHub organization to enumerate and scan + Name of a GitHub organization to enumerate and scan [aliases: github-org] --github-user Name of a GitHub user to enumerate and scan + --github-api-url + Use the given URL for GitHub API access + + If accessing a GitHub Enterprise Server instance, this value should be the entire base URL + include the `api/v3` portion, e.g., `https://github.example.com/api/v3`. + + [default: https://api.github.com/] + [aliases: api-url] + Content Discovery Options: --max-file-size Do not scan files larger than the specified size diff --git a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan_short-2.snap b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan_short-2.snap index c73ee7ec8..9b86fa150 100644 --- a/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan_short-2.snap +++ b/crates/noseyparker-cli/tests/help/snapshots/test_noseyparker__help__help_scan_short-2.snap @@ -1,5 +1,5 @@ --- -source: tests/test_noseyparker_help.rs +source: crates/noseyparker-cli/tests/help/mod.rs expression: stdout --- Scan content for secrets @@ -15,9 +15,11 @@ Options: Input Specifier Options: [INPUT]... Path to a file, directory, or local Git repository to scan --git-url URL of a Git repository to clone and scan - --github-organization Name of a GitHub organization to enumerate and scan [aliases: + --github-organization Name of a GitHub organization to enumerate and scan [aliases: github-org] --github-user Name of a GitHub user to enumerate and scan + --github-api-url Use the given URL for GitHub API access [default: + https://api.github.com/] [aliases: api-url] Content Discovery Options: --max-file-size Do not scan files larger than the specified size [default: 100] diff --git a/crates/noseyparker/src/github.rs b/crates/noseyparker/src/github.rs index 877c5f3af..3b83d201d 100644 --- a/crates/noseyparker/src/github.rs +++ b/crates/noseyparker/src/github.rs @@ -1,3 +1,5 @@ +use url::Url; + mod auth; mod client; mod client_builder; @@ -19,11 +21,17 @@ use crate::progress::Progress; /// /// This is a high-level wrapper for enumerating GitHub repositories that handles the details of /// creating an async runtime and a GitHub REST API client. -pub fn enumerate_repo_urls(repo_specifiers: &RepoSpecifiers, progress: Option<&mut Progress>) -> anyhow::Result> { +pub fn enumerate_repo_urls( + repo_specifiers: &RepoSpecifiers, + github_url: Url, + progress: Option<&mut Progress>, +) -> anyhow::Result> { use anyhow::{bail, Context}; use tracing::{debug, warn}; let client = ClientBuilder::new() + .base_url(github_url) + .context("Failed to set base URL")? .personal_access_token_from_env() .context("Failed to load access token from environment")? .build() @@ -45,7 +53,9 @@ pub fn enumerate_repo_urls(repo_specifiers: &RepoSpecifiers, progress: Option<&m debug!("GitHub rate limits: {:?}", rate_limit.rate); let repo_enumerator = RepoEnumerator::new(&client); - let repo_urls = repo_enumerator.enumerate_repo_urls(repo_specifiers, progress).await?; + let repo_urls = repo_enumerator + .enumerate_repo_urls(repo_specifiers, progress) + .await?; Ok(repo_urls) // ::, Error>(repo_urls) }); diff --git a/crates/noseyparker/src/github/client.rs b/crates/noseyparker/src/github/client.rs index 31db8e3cf..820fa2673 100644 --- a/crates/noseyparker/src/github/client.rs +++ b/crates/noseyparker/src/github/client.rs @@ -83,7 +83,8 @@ impl Client { } pub async fn get_all(&self, page: Page) -> Result> - where T: serde::de::DeserializeOwned + where + T: serde::de::DeserializeOwned, { let mut results = Vec::new(); let mut next_page = Some(page); @@ -95,30 +96,128 @@ impl Client { } } +/// Create a URL from the given base, path parts, and parameters. +/// +/// The path parts should not contain slashes. +fn url_from_path_parts_and_params( + base_url: Url, + path_parts: &[&str], + params: &[(&str, &str)], +) -> Result { + if base_url.cannot_be_a_base() { + return Err(Error::UrlBaseError(base_url)); + } + + let mut buf = base_url.path().to_string(); + if !buf.ends_with('/') { + buf.push('/'); + } + + for (i, p) in path_parts.iter().enumerate() { + if p.contains('/') { + return Err(Error::UrlSlashError(p.to_string())); + } + if i > 0 { + // do not add a leading slash for the very first path part, or the result comes out + // wrong, as it is unintentionally treated as an absolute path + // + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=c2674663bf5e681b5bdb302d1b050237 + buf.push('/'); + } + buf.push_str(p); + } + let url = base_url.join(&buf).map_err(Error::UrlParseError)?; + let url = if params.is_empty() { + Url::parse(url.as_str()).map_err(Error::UrlParseError)? + } else { + Url::parse_with_params(url.as_str(), params).map_err(Error::UrlParseError)? + }; + Ok(url) +} + +#[cfg(test)] +mod test { + use super::*; + use pretty_assertions::assert_eq; + + fn make_url(base_url: &str, path_parts: &[&str], params: &[(&str, &str)]) -> Result { + let base_url = Url::parse(base_url).unwrap(); + url_from_path_parts_and_params(base_url, path_parts, params) + } + + fn testcase_ok(inputs: (&str, &[&str], &[(&str, &str)]), expected: &str) { + let (base_url, path_parts, params) = inputs; + let actual = make_url(base_url, path_parts, params).unwrap(); + let expected = Url::parse(expected).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn url_from_path_parts_and_params_1() { + testcase_ok( + ("https://github.example.com/api/v3", &[], &[]), + "https://github.example.com/api/v3/", + ); + } + + #[test] + fn url_from_path_parts_and_params_2() { + testcase_ok( + ("https://github.example.com/api/v3/", &[], &[]), + "https://github.example.com/api/v3/", + ); + } + + #[test] + fn url_from_path_parts_and_params_3() { + testcase_ok( + ("https://github.example.com/api/v3", &["SomeUser", "somerepo.git"], &[]), + "https://github.example.com/api/v3/SomeUser/somerepo.git", + ); + } + + #[test] + fn url_from_path_parts_and_params_4() { + testcase_ok( + ("https://github.example.com/api/v3/", &["SomeUser", "somerepo.git"], &[]), + "https://github.example.com/api/v3/SomeUser/somerepo.git", + ); + } + + #[test] + fn url_from_path_parts_and_params_5() { + testcase_ok( + ("https://api.github.com", &["praetorian-inc", "noseyparker.git"], &[]), + "https://api.github.com/praetorian-inc/noseyparker.git", + ); + } + + #[test] + fn url_from_path_parts_and_params_6() { + let res = make_url("https://api.github.com", &["praetorian-inc", "some/bogus/path/part"], &[]); + // XXX have to resort to match here because `Error` doesn't have an Eq instance + match res { + Err(Error::UrlSlashError(p)) if p == "some/bogus/path/part" => (), + _ => assert!(false), + } + } + + #[test] + fn url_from_path_parts_and_params_7() { + let res = make_url("mailto:blah@example.com", &[], &[]); + // XXX have to resort to match here because `Error` doesn't have an Eq instance + match res { + Err(Error::UrlBaseError(p)) if p.as_str() == "mailto:blah@example.com" => (), + _ => assert!(false), + } + } +} + // private implementation impl Client { /// Construct a `Url` from the given path parts and query parameters. fn make_url(&self, path_parts: &[&str], params: &[(&str, &str)]) -> Result { - // XXX Surely this can be done better - let mut buf = String::new(); - for p in path_parts { - buf.push('/'); - if p.contains('/') { - return Err(Error::UrlSlashError(p.to_string())); - } - buf.push_str(p); - } - let url = self - .base_url - .clone() - .join(&buf) - .map_err(Error::UrlParseError)?; - let url = if params.is_empty() { - Url::parse(url.as_str()).map_err(Error::UrlParseError)? - } else { - Url::parse_with_params(url.as_str(), params).map_err(Error::UrlParseError)? - }; - Ok(url) + url_from_path_parts_and_params(self.base_url.clone(), path_parts, params) } async fn get(&self, path_parts: &[&str]) -> Result { diff --git a/crates/noseyparker/src/github/error.rs b/crates/noseyparker/src/github/error.rs index 2cc115e17..7d2f7585d 100644 --- a/crates/noseyparker/src/github/error.rs +++ b/crates/noseyparker/src/github/error.rs @@ -13,6 +13,7 @@ pub enum Error { /// The duration to wait until trying again wait: Option, }, + UrlBaseError(url::Url), UrlParseError(url::ParseError), UrlSlashError(String), ReqwestError(reqwest::Error), @@ -23,6 +24,7 @@ impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::RateLimited{client_error, ..} => write!(f, "request was rate-limited: {}", client_error.message), + Error::UrlBaseError(u) =>write!(f, "invalid base url: {u}"), Error::UrlParseError(e) => write!(f, "error parsing URL: {e}"), Error::UrlSlashError(p) => write!(f, "error building URL: component {p:?} contains a slash"), Error::ReqwestError(e) => write!(f, "error making request: {e}"), @@ -35,6 +37,7 @@ impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::RateLimited{..} => None, + Error::UrlBaseError(_) => None, Error::UrlParseError(e) => Some(e), Error::UrlSlashError(_) => None, Error::ReqwestError(e) => Some(e),