Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata to rules #62

Merged
merged 4 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 54 additions & 18 deletions crates/noseyparker-cli/src/bin/noseyparker/cmd_rules.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use anyhow::{Context, Result, bail};
use vectorscan::{Pattern, BlockDatabase, Scan, Flag};
use anyhow::{bail, Context, Result};
use regex::Regex;
use std::collections::HashSet;
use vectorscan::{BlockDatabase, Flag, Pattern, Scan};

use tracing::{debug_span, error, error_span, info, warn};

Expand All @@ -16,12 +18,47 @@ pub fn run(global_args: &args::GlobalArgs, args: &args::RulesArgs) -> Result<()>
fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs) -> Result<()> {
let _span = debug_span!("cmd_rules_check").entered();

let rules = Rules::from_paths(&args.inputs)
.context("Failed to load input rules")?;
let rules = Rules::from_paths(&args.inputs).context("Failed to load input rules")?;
let mut num_errors = 0;
let mut num_warnings = 0;
let num_rules = rules.rules.len();

// ensure IDs are globally unique
{
let mut seen_ids = HashSet::<&str>::new();
for rule in rules.rules.iter() {
let rule_id = &rule.id;
if !seen_ids.insert(rule_id) {
error!("Rule ID {rule_id} is not unique");
num_errors += 1;
}
}
}

// ensure IDs are well-formed
{
let id_pat = Regex::new(r"^[a-zA-Z0-9]+(?:[.-][a-zA-Z0-9]+)*$")
.expect("ID validator pattern should compile");

for rule in rules.rules.iter() {
let rule_id = &rule.id;
const ID_LIMIT: usize = 20;
let rule_id_len = rule_id.len();
if rule_id_len > ID_LIMIT {
error!("Rule ID {rule_id} is too long ({rule_id_len} characters: \
should be {ID_LIMIT} characters max)");
num_errors += 1;
}

if !id_pat.is_match(rule_id) {
error!("Rule ID {rule_id} is not well-formed: \
it should consist only of alphanumeric sections \
delimited by hyphens or periods");
num_errors += 1;
}
}
}

// compile the rules individually
for (rule_num, rule) in rules.rules.iter().enumerate() {
let stats = check_rule(rule_num, rule)?;
Expand All @@ -30,8 +67,8 @@ fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs)
}

// compile the rules all together
let _rules_db = RulesDatabase::from_rules(rules)
.context("Failed to compile rules database")?;
let _rules_db =
RulesDatabase::from_rules(rules).context("Failed to compile combined rules database")?;

if num_warnings == 0 && num_errors == 0 {
println!("{num_rules} rules: no issues detected");
Expand All @@ -40,11 +77,11 @@ fn cmd_rules_check(_global_args: &args::GlobalArgs, args: &args::RulesCheckArgs)
}

if num_errors != 0 {
bail!("{} errors in rules", num_errors);
bail!("{num_errors} errors in rules");
}

if num_warnings != 0 && args.warnings_as_errors {
bail!("{} warnings; warnings being treated as errors", num_warnings);
bail!("{num_warnings} warnings; warnings being treated as errors");
}

Ok(())
Expand All @@ -64,7 +101,6 @@ fn hs_compile_pattern(pat: &str) -> Result<BlockDatabase> {
// Ok(db)
// }


struct CheckStats {
num_warnings: usize,
num_errors: usize,
Expand All @@ -84,7 +120,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

match rule.as_regex() {
Err(e) => {
error!("Regex: failed to compile pattern: {}", e);
error!("Regex: failed to compile pattern: {e}");
num_errors += 1;
}
Ok(pat) => {
Expand All @@ -94,7 +130,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
// Check positive examples
for (example_num, example) in rule.examples.iter().enumerate() {
if pat.find(example.as_bytes()).is_none() {
error!("Regex: failed to match example {}", example_num);
error!("Regex: failed to match example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -105,7 +141,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
// Check negative examples
for (example_num, example) in rule.negative_examples.iter().enumerate() {
if pat.find(example.as_bytes()).is_some() {
error!("Regex: incorrectly matched negative example {}", example_num);
error!("Regex: incorrectly matched negative example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -115,7 +151,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

let num_total = num_succeeded + num_failed;
if num_total > 0 {
info!("Regex: {}/{} examples succeeded", num_succeeded, num_total);
info!("Regex: {num_succeeded}/{num_total} examples succeeded");
}
}
};
Expand All @@ -130,7 +166,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

match hs_compile_pattern(&rule.uncommented_pattern()) {
Err(e) => {
error!("Vectorscan: failed to compile pattern: {}", e);
error!("Vectorscan: failed to compile pattern: {e}");
num_errors += 1;
}
Ok(db) => {
Expand All @@ -147,7 +183,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
Scan::Continue
})?;
if !matched {
error!("Vectorscan: failed to match example {}", example_num);
error!("Vectorscan: failed to match example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -163,7 +199,7 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {
Scan::Continue
})?;
if matched {
error!("Vectorscan: incorrectly matched negative example {}", example_num);
error!("Vectorscan: incorrectly matched negative example {example_num}");
num_failed += 1;
num_errors += 1;
} else {
Expand All @@ -173,15 +209,15 @@ fn check_rule(rule_num: usize, rule: &Rule) -> Result<CheckStats> {

let num_total = num_succeeded + num_failed;
if num_total > 0 {
info!("Vectorscan: {}/{} examples succeeded", num_succeeded, num_total);
info!("Vectorscan: {num_succeeded}/{num_total} examples succeeded");
}
}
}

if num_warnings == 0 && num_errors == 0 {
info!("No issues detected");
} else {
info!("{} errors and {} warnings", num_errors, num_warnings);
info!("{num_errors} errors and {num_warnings} warnings");
}

Ok(CheckStats {
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/adobe.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Adobe OAuth Client Secret
id: np.adobe.1

pattern: |
(?x)(?i)
\b
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/age.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Age Recipient (X25519 public key)
id: np.age.1
pattern: '\bage1[0-9a-z]{58}\b'

examples:
Expand All @@ -13,6 +14,7 @@ rules:


- name: Age Identity (X22519 secret key)
id: np.age.2
pattern: '\bAGE-SECRET-KEY-1[0-9A-Z]{58}\b'

examples:
Expand Down
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/artifactory.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Artifactory API Key
id: np.artifactory.1
pattern: '(?i)artifactory.{0,50}\b([a-z0-9]{73})\b'

examples:
Expand Down
14 changes: 14 additions & 0 deletions crates/noseyparker/data/default/rules/aws.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
rules:

- name: AWS API Key
id: np.aws.1

pattern: '\b((?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16})\b'

references:
- https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html
Expand All @@ -21,7 +24,10 @@ rules:


- name: AWS Secret Access Key
id: np.aws.2

pattern: '(?i)\baws_?(?:secret)?_?(?:access)?_?(?:key)?["'']?\s{0,30}(?::|=>|=)\s{0,30}["'']?([a-z0-9/+=]{40})\b'

references:
- https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html
- https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html
Expand All @@ -44,6 +50,8 @@ rules:


- name: AWS Account ID
id: np.aws.3

pattern: '(?i)aws_?(?:account)_?(?:id)?["''`]?\s{0,30}(?::|=>|=)\s{0,30}["''`]?([0-9]{4}-?[0-9]{4}-?[0-9]{4})'

examples:
Expand Down Expand Up @@ -83,6 +91,7 @@ rules:


- name: AWS Session Token
id: np.aws.4
pattern: '(?i)(?:aws.?session|aws.?session.?token|aws.?token)["''`]?\s{0,30}(?::|=>|=)\s{0,30}["''`]?([a-z0-9/+=]{16,200})[^a-z0-9/+=]'

negative_examples:
Expand All @@ -98,6 +107,7 @@ rules:


- name: Amazon MWS Auth Token
id: np.aws.5
pattern: '(?i)amzn\.mws\.([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'

examples:
Expand All @@ -108,6 +118,8 @@ rules:


- name: AWS S3 Bucket (subdomain style)
id: np.s3.1

pattern: |
(?x)
(?: ^ | [\s/"'] | %2F )
Expand Down Expand Up @@ -161,6 +173,8 @@ rules:


- name: AWS S3 Bucket (path style)
id: np.s3.2

pattern: |
(?x)
(?: ^ | [\s/"'] | %2F )
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/azure.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Azure Connection String
id: np.azure.1

# XXX There are a bunch of other keys that seem to have secret content assigned to them:
#
# - SharedAccessSignature
Expand Down Expand Up @@ -49,6 +51,8 @@ rules:


- name: Azure App Configuration Connection String
id: np.azure.2

pattern: |
(?x)
(https://[a-zA-Z0-9-]+\.azconfig\.io);
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/codeclimate.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
rules:

- name: CodeClimate
id: np.codeclimate.1

pattern: '(?i)codeclima.{0,50}\b([a-f0-9]{64})\b'

references:
- https://github.com/codeclimate/ruby-test-reporter/issues/34

examples:
- ' - RAILS_ENV=test CODECLIMATE_REPO_TOKEN=d37a8b9e09642cb73cfcf4e1284815fc3d6a55a7714110187ac59856ae4ab5ad'
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/crates.io.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: crates.io API Key
id: np.cratesio.1

# It's a 32-character alphanumeric identifier prefixed by `cio`
pattern: '\bcio[a-zA-Z0-9]{32}\b'
Expand Down
6 changes: 6 additions & 0 deletions crates/noseyparker/data/default/rules/digitalocean.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: DigitalOcean Application Access Token
id: np.digitalocean.1

pattern: |
(?x)(?i)
\b
Expand All @@ -15,6 +17,8 @@ rules:


- name: DigitalOcean Personal Access Token
id: np.digitalocean.2

pattern: |
(?x)(?i)
\b
Expand All @@ -29,6 +33,8 @@ rules:


- name: DigitalOcean Refresh Token
id: np.digitalocean.3

pattern: |
(?x)(?i)
\b
Expand Down
2 changes: 2 additions & 0 deletions crates/noseyparker/data/default/rules/dynatrace.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Dynatrace Token
id: np.dynatrace.1

pattern: '\b(dt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64})\b'

examples:
Expand Down
4 changes: 4 additions & 0 deletions crates/noseyparker/data/default/rules/facebook.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
rules:

- name: Facebook Secret Key
id: np.facebook.1

pattern: |
(?x)(?i)
\b (?: facebook | fb )
Expand All @@ -21,6 +23,8 @@ rules:


- name: Facebook Access Token
id: np.facebook.2

pattern: '\b(EAACEdEose0cBA[a-zA-Z0-9]+)\b'

references:
Expand Down
1 change: 1 addition & 0 deletions crates/noseyparker/data/default/rules/figma.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
rules:

- name: Figma Personal Access Token
id: np.figma.1

# The key material looks like a v4 UUID with an extra 4 hex digits up front
pattern: |
Expand Down
Loading