Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests(pcaps): download additional pcaps #4728

Merged
merged 4 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions tests/pcap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ publish = false
[features]
default = []
ja4 = [] # Older versions of tshark do not support JA4
download = [] # Download additional pcaps from a list of configured urls

[build-dependencies]
anyhow = "1.0.86"
bytes = "1.7.1"
hex = "0.4.3"
reqwest = { version = "0.12.7", features = ["blocking"] }

[dependencies]
anyhow = "1.0.86"
Expand Down
131 changes: 131 additions & 0 deletions tests/pcap/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

use anyhow::*;
use bytes::Buf;
use bytes::Bytes;
use std::collections::HashMap;
use std::fs::File;
use std::io::copy;
use std::path::Path;
use std::thread;
use std::time::Duration;

fn get_download_urls() -> HashMap<String, String> {
let mut urls = HashMap::new();
if cfg!(feature = "download") == false {
return urls;
}

let ja4_pcaps = [
"badcurveball.pcap",
"browsers-x509.pcapng",
"chrome-cloudflare-quic-with-secrets.pcapng",
"http2-with-cookies.pcapng",
"ipv6.pcapng",
"latest.pcapng",
"macos_tcp_flags.pcap",
"tls-alpn-h2.pcap",
// TODO: non-ascii alpn handling currently differs between implementations,
// with no official consensus. Wireshark chose different handling than s2n-tls did.
// See https://github.com/FoxIO-LLC/ja4/pull/147
// "tls-non-ascii-alpn.pcapng",
"tls12.pcap",
"tls3.pcapng",
];
add_github_urls(
&mut urls,
"FoxIO-LLC/ja4",
"259739593049478dc68c84a436eca75b9f404e6e",
"pcap",
&ja4_pcaps,
);

let wireshark_pcaps = [
"tls-renegotiation.pcap",
"tls12-aes128ccm.pcap",
"tls12-aes256gcm.pcap",
"tls12-chacha20poly1305.pcap",
"tls12-dsb.pcapng",
"tls13-20-chacha20poly1305.pcap",
"tls13-rfc8446.pcap",
"rsa-p-lt-q.pcap",
"rsasnakeoil2.pcap",
"http2-brotli.pcapng",
];
add_github_urls(
&mut urls,
"wireshark/wireshark",
"2ba9f4c56e162127b85164912399b8e69e47b1d3",
"test/captures",
&wireshark_pcaps,
);

urls
}

fn add_github_urls(
output: &mut HashMap<String, String>,
repo: &str,
commit: &str,
path: &str,
files: &[&str],
) {
// For safety, only exact commits can be used. No tags or branches.
assert!(commit.len() == 40);
assert!(hex::decode(commit).is_ok());

for file in files {
let url = format!(
"https://raw.githubusercontent.com/{}/{}/{}/{}",
repo, commit, path, file
);
let name = format!("{}_{}", repo.replace('/', "_"), file);
if output.insert(name, url).is_some() {
panic!("Duplicate download path for {repo}:{file}")
}
}
}

fn download(url: &str) -> Result<Bytes> {
let delay = Duration::from_secs(5);
for _ in 0..5 {
if let reqwest::Result::Ok(result) = reqwest::blocking::get(url) {
if let reqwest::Result::Ok(bytes) = result.bytes() {
return Ok(bytes);
}
}
delay.checked_mul(2).context("Invalid backoff delay")?;
thread::sleep(delay);
}
bail!("Unable to download: {}", url);
}

fn main() -> Result<()> {
let out_dir = std::env::var("OUT_DIR")?;
let download_path = Path::new(&out_dir).join("downloaded_pcaps");

let _ = std::fs::remove_dir_all(&download_path);
std::fs::create_dir_all(&download_path).context("Failed to create path")?;
println!(
"Created directory for downloaded pcaps: {}",
download_path.display()
);

let urls = get_download_urls();
for (name, url) in urls {
let contents = download(&url)?;
let file_path = download_path.join(name);
let mut file = File::create(&file_path)?;
copy(&mut contents.reader(), &mut file)?;
println!("Downloaded pcap \"{}\" to {}", url, file_path.display());
}

println!("cargo:rerun-if-changed=build.rs");
println!(
"cargo:rustc-env=DOWNLOADED_PCAPS_PATH={}",
download_path.display()
);

Ok(())
}
3 changes: 3 additions & 0 deletions tests/pcap/src/handshake_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ impl Builder {
Self::MESSAGE_TYPE.to_string()
};

// We currently don't support QUIC
let filter = filter + " && !quic";

// tshark associates a LOT of metadata with each packet. Filtering that
// metadata (like by using `metadata_whitelist`) significantly improves
// both performance and memory usage.
Expand Down
47 changes: 43 additions & 4 deletions tests/pcap/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,52 @@ pub mod client_hello;
pub mod handshake_message;
pub mod packet;

/// Creates an iterator over all test pcap file paths
pub fn all_pcaps() -> impl Iterator<Item = String> {
std::fs::read_dir("data")
.expect("Missing test pcap file")
use std::path::Path;

fn pcaps_from<P: AsRef<Path>>(path: P) -> impl Iterator<Item = String> {
std::fs::read_dir(path.as_ref())
.unwrap_or_else(|_| panic!("Unable to read pcap folder: {}", path.as_ref().display()))
.filter_map(Result::ok)
.filter_map(|entry| {
let path = entry.path();
path.to_str().map(std::string::ToString::to_string)
})
}

/// Creates an iterator over all test pcap file paths
pub fn all_pcaps() -> impl Iterator<Item = String> {
let local_pcaps = pcaps_from("data");

let downloaded_pcaps_path = std::env!("DOWNLOADED_PCAPS_PATH");
let downloaded_pcaps = pcaps_from(downloaded_pcaps_path);

local_pcaps.chain(downloaded_pcaps)
}

#[cfg(test)]
mod tests {
use super::*;
use anyhow::*;
use std::collections::HashSet;

#[test]
fn pcaps_source() -> Result<()> {
let pcaps = all_pcaps();
println!("All pcaps: ");
let paths: HashSet<String> = pcaps
.map(|file_str| {
println!("{}", file_str);
let (path, _file) = file_str.rsplit_once("/").expect("No path");
path.to_owned()
})
.collect();
// If we're also using downloaded pcaps, we'd expected to be reading
// pcaps from at least two sources (the local data and the downloaded data).
if cfg!(feature = "download") {
assert!(paths.len() > 1);
} else {
assert!(paths.len() == 1);
}
Ok(())
}
}
Loading