From 03d5e1363b2187af2bd53ca942ba0034438a19c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20M?= Date: Thu, 5 Oct 2023 16:57:03 +0200 Subject: [PATCH] feat: manage static assets properly in Unix and Windows (#229) * feat: manage static assets properly in Unix and Windows * feat: add more e2e tests to validate static assets --- crates/server/src/handlers/assets.rs | 274 +++++++++++++++++++++++++-- examples/js-json/public/robots.txt | 2 + examples/js-params/public/main.css | 3 +- tests/data/public/about/index.html | 0 tests/data/public/index.html | 0 tests/data/public/main.css | 0 tests/e2e.rs | 142 ++++++++++++-- 7 files changed, 391 insertions(+), 30 deletions(-) create mode 100644 examples/js-json/public/robots.txt create mode 100644 tests/data/public/about/index.html create mode 100644 tests/data/public/index.html create mode 100644 tests/data/public/main.css diff --git a/crates/server/src/handlers/assets.rs b/crates/server/src/handlers/assets.rs index af2691ae..64c86add 100644 --- a/crates/server/src/handlers/assets.rs +++ b/crates/server/src/handlers/assets.rs @@ -5,9 +5,47 @@ use actix_files::NamedFile; use actix_web::{web::Data, HttpRequest}; use std::{ io::{Error, ErrorKind}, - path::PathBuf, + path::{Component, Path, PathBuf}, }; +/// Clean up invalid components in the paths and returns it. For a file +/// in the public folder, only "normal" components are valid. +fn clean_up_path(uri: &str) -> PathBuf { + // First split the URI as it always uses the /. + let path = PathBuf::from_iter(uri.split('/')); + + let valid_components: Vec> = path + .components() + // Keep only normal components. The relative components should be + // strip by actix, but we're double checking it in case of weird encodings + // that can be interpreted as parent paths. Note this is a path that will + // be appended later to the public folder. + .filter(|c| matches!(c, Component::Normal(_))) + .collect(); + + // Build a new PathBuf based only on valid components + PathBuf::from_iter(valid_components) +} + +/// Build the file path to retrieve and check if it exists. To build, it takes the project +/// root and the parsed path. You can set it the index_folder flag to manage the +/// parsed_path as a folder an look for an index.html inside it. +fn retrieve_asset_path(root_path: &Path, file_path: &Path, index_folder: bool) -> Option { + let public_folder = root_path.join("public"); + let asset_path = if index_folder { + public_folder.join(file_path).join("index.html") + } else { + public_folder.join(file_path) + }; + + // Checks the output path is a child of public folder + if asset_path.starts_with(public_folder) && asset_path.exists() && asset_path.is_file() { + Some(asset_path) + } else { + None + } +} + /// Find a static HTML file in the `public` folder. This function is used /// when there's no direct file to be served. It will look for certain patterns /// like "public/{uri}/index.html" and "public/{uri}.html". @@ -17,20 +55,234 @@ pub async fn handle_assets(req: &HttpRequest) -> Result { let root_path = req.app_data::>().unwrap(); let uri_path = req.path(); - // File path. This is required for the wasm_handler as dynamic routes may capture static files - let file_path = root_path.join(format!("public{uri_path}")); - // A.k.a pretty urls. We may access /about and this matches to /about/index.html - let index_folder_path = root_path.join(format!("public{uri_path}/index.html")); - // Same as before, but the file is located at ./about.html - let html_ext_path = root_path.join(format!("public{uri_path}.html")); + // Double-check the given path path does not contain any unexpected value. + // It was previously sanitized, but this is a double check. + let parsed_path = clean_up_path(uri_path); - if file_path.exists() { + if let Some(file_path) = retrieve_asset_path(root_path, &parsed_path, false) { + // File path. This is required for the wasm_handler as dynamic routes may capture static files NamedFile::open_async(file_path).await - } else if uri_path.ends_with('/') && index_folder_path.exists() { + } else if let Some(index_folder_path) = retrieve_asset_path(root_path, &parsed_path, true) { + // A.k.a pretty urls. We may access /about and this matches to /about/index.html NamedFile::open_async(index_folder_path).await - } else if !uri_path.ends_with('/') && html_ext_path.exists() { - NamedFile::open_async(html_ext_path).await } else { Err(Error::new(ErrorKind::NotFound, "The file is not present")) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_clean_up_path() { + let tests = if cfg!(target_os = "windows") { + Vec::from([ + ("/", PathBuf::new()), + ("/index.js", PathBuf::from("index.js")), + ("/my-folder/index.js", PathBuf::from("my-folder\\index.js")), + // These scenarios are unlikely as actix already filters the + // URI, but let's test them too + ("/../index.js", PathBuf::from("index.js")), + ("/../../index.js", PathBuf::from("index.js")), + ]) + } else { + Vec::from([ + ("/", PathBuf::new()), + ("/index.js", PathBuf::from("index.js")), + ("////index.js", PathBuf::from("index.js")), + ("/my-folder/index.js", PathBuf::from("my-folder/index.js")), + // These scenarios are unlikely as actix already filters the + // URI, but let's test them too + ("/../index.js", PathBuf::from("index.js")), + ("/../../index.js", PathBuf::from("index.js")), + ]) + }; + + for (uri, path) in tests { + assert_eq!(clean_up_path(uri), path); + } + } + + #[test] + fn relative_asset_path_retrieval() { + let (project_root, tests) = if cfg!(target_os = "windows") { + let project_root = Path::new("..\\..\\tests\\data"); + let tests = Vec::from([ + // Existing files + ( + Path::new("index.html"), + Some(PathBuf::from("..\\..\\tests\\data\\public\\index.html")), + ), + ( + Path::new("main.css"), + Some(PathBuf::from("..\\..\\tests\\data\\public\\main.css")), + ), + // Missing files + (Path::new(""), None), + (Path::new("unknown"), None), + (Path::new("about"), None), + ]); + + (project_root, tests) + } else { + let project_root = Path::new("../../tests/data"); + let tests = Vec::from([ + // Existing files + ( + Path::new("index.html"), + Some(PathBuf::from("../../tests/data/public/index.html")), + ), + ( + Path::new("main.css"), + Some(PathBuf::from("../../tests/data/public/main.css")), + ), + // Missing files + (Path::new(""), None), + (Path::new("unknown"), None), + (Path::new("about"), None), + ]); + + (project_root, tests) + }; + + for (file, asset_path) in tests { + assert_eq!(retrieve_asset_path(project_root, file, false), asset_path); + } + } + + #[test] + fn absolute_asset_path_retrieval() { + let (project_root, tests) = if cfg!(target_os = "windows") { + let project_root = Path::new("..\\..\\tests\\data").canonicalize().unwrap(); + let tests = Vec::from([ + // Existing files + ( + Path::new("index.html"), + Some(project_root.join("public\\index.html")), + ), + ( + Path::new("main.css"), + Some(project_root.join("public\\main.css")), + ), + // Missing files + (Path::new(""), None), + (Path::new("unknown"), None), + (Path::new("about"), None), + ]); + + (project_root, tests) + } else { + let project_root = Path::new("../../tests/data").canonicalize().unwrap(); + + let tests = Vec::from([ + // Existing files + ( + Path::new("index.html"), + Some(project_root.join("public/index.html")), + ), + ( + Path::new("main.css"), + Some(project_root.join("public/main.css")), + ), + // Missing files + (Path::new(""), None), + (Path::new("unknown"), None), + (Path::new("about"), None), + ]); + + (project_root, tests) + }; + + for (file, asset_path) in tests { + assert_eq!(retrieve_asset_path(&project_root, file, false), asset_path); + } + } + + #[test] + fn relative_asset_index_path_retrieval() { + let (project_root, tests) = if cfg!(target_os = "windows") { + let project_root = Path::new("..\\..\\tests\\data"); + let tests = Vec::from([ + // Existing index files + ( + Path::new("about"), + Some(PathBuf::from( + "..\\..\\tests\\data\\public\\about\\index.html", + )), + ), + ( + Path::new(""), + Some(PathBuf::from("..\\..\\tests\\data\\public\\index.html")), + ), + // Missing index files + (Path::new("main.css"), None), + (Path::new("unknown"), None), + ]); + + (project_root, tests) + } else { + let project_root = Path::new("../../tests/data"); + let tests = Vec::from([ + // Existing index files + ( + Path::new("about"), + Some(PathBuf::from("../../tests/data/public/about/index.html")), + ), + ( + Path::new(""), + Some(PathBuf::from("../../tests/data/public/index.html")), + ), + // Missing index files + (Path::new("main.css"), None), + (Path::new("unknown"), None), + ]); + + (project_root, tests) + }; + + for (file, asset_path) in tests { + assert_eq!(retrieve_asset_path(project_root, file, true), asset_path); + } + } + + #[test] + fn absolute_asset_index_path_retrieval() { + let (project_root, tests) = if cfg!(target_os = "windows") { + let project_root = Path::new("..\\..\\tests\\data").canonicalize().unwrap(); + let tests = Vec::from([ + // Existing idnex files + ( + Path::new("about"), + Some(project_root.join("public\\about\\index.html")), + ), + (Path::new(""), Some(project_root.join("public\\index.html"))), + // Missing index files + (Path::new("main.css"), None), + (Path::new("unknown"), None), + ]); + + (project_root, tests) + } else { + let project_root = Path::new("../../tests/data").canonicalize().unwrap(); + + let tests = Vec::from([ + // Existing index files + ( + Path::new("about"), + Some(project_root.join("public/about/index.html")), + ), + (Path::new(""), Some(project_root.join("public/index.html"))), + // Missing index files + (Path::new("main.css"), None), + (Path::new("unknown"), None), + ]); + + (project_root, tests) + }; + + for (file, asset_path) in tests { + assert_eq!(retrieve_asset_path(&project_root, file, true), asset_path); + } + } +} diff --git a/examples/js-json/public/robots.txt b/examples/js-json/public/robots.txt new file mode 100644 index 00000000..1f53798b --- /dev/null +++ b/examples/js-json/public/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / diff --git a/examples/js-params/public/main.css b/examples/js-params/public/main.css index ca178402..ff66985d 100644 --- a/examples/js-params/public/main.css +++ b/examples/js-params/public/main.css @@ -1,3 +1,4 @@ +/* This is just a comment for testing purposes */ body { max-width: 1000px; } @@ -25,4 +26,4 @@ pre>code { p { margin-top: 2rem; -} \ No newline at end of file +} diff --git a/tests/data/public/about/index.html b/tests/data/public/about/index.html new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/public/index.html b/tests/data/public/index.html new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/public/main.css b/tests/data/public/main.css new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e.rs b/tests/e2e.rs index bfe49a12..96b27251 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -6,34 +6,25 @@ mod test { use std::time::Instant; use std::{env, io}; + use reqwest::StatusCode; + // Default timeout when waiting for wws to be ready static DEFAULT_MAX_TIMEOUT: u64 = 30; - #[cfg(not(target_os = "windows"))] fn get_wws_path() -> PathBuf { let path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); - // Use release when it's available - let wws_path = if path.join("target/release/wws").exists() { - path.join("target/release/wws") + let binary = if cfg!(target_os = "windows") { + "wws.exe" } else { - path.join("target/debug/wws") + "wws" }; - println!("[E2E] Running wws from {}", wws_path.display()); - - wws_path - } - - #[cfg(target_os = "windows")] - fn get_wws_path() -> PathBuf { - let path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); - // Use release when it's available - let wws_path = if path.join("target/release/wws.exe").exists() { - path.join("target/release/wws.exe") + let wws_path = if path.join("target/release").join(binary).exists() { + path.join("target/release").join(binary) } else { - path.join("target/debug/wws.exe") + path.join("target/debug").join(binary) }; println!("[E2E] Running wws from {}", wws_path.display()); @@ -87,6 +78,10 @@ mod test { reqwest::blocking::get(url)?.text() } + fn request_status(url: &str) -> Result { + Ok(reqwest::blocking::get(url)?.status()) + } + // Check the examples/js-json works fn run_end_to_end_test(example: &str, max_timeout: u64, url: &str, expected_text: &str) { println!("[E2E] Running example: {example}"); @@ -113,9 +108,58 @@ mod test { assert!(body.contains(expected_text)); } + /// Runs the given example and ensures it returns a 404 error. + fn run_not_found_test(example: &str, max_timeout: u64, urls: &[&str]) { + println!("[E2E] Running example (not found): {example}"); + let mut codes = Vec::new(); + + let mut child = run(example, max_timeout).expect("Failed to execute command"); + + for url in urls { + let code = match request_status(url) { + Ok(code) => code, + Err(err) => { + eprintln!("[E2E] Error getting the StatusCode from the request to {url}"); + eprintln!("[E2E] Error: {}", err); + + // Make it fail + StatusCode::FOUND + } + }; + + let body = match request_body(url) { + Ok(body) => body, + Err(err) => { + eprintln!("[E2E] Error getting the body from the request to {url}"); + eprintln!("[E2E] Error: {}", err); + String::new() + } + }; + + codes.push((url, code)); + + println!("[E2E] URL: {url} / Status code: {code} / Body: {body}"); + } + + println!("[E2E] Stopping wws process [{}]", &child.id()); + child.kill().expect("Error stopping wws"); + + // Assert all of them at the same time to avoid leaving an instance running + for (_url, code) in codes { + // Test + assert!(matches!(code, StatusCode::NOT_FOUND)); + } + } + #[test] - // Use this approach to run tests sequentially fn test_end_to_end() { + // Run them in a method to ensure they run sequentially + examples_end_to_end(); + not_found_end_to_end(); + } + + // Use this approach to run tests sequentially + fn examples_end_to_end() { // Allow configuring waiting times. It avoids having long waiting times // in development, while making it configurable in the CI let max_timeout = env::var("E2E_MAX_WAITING_TIME").map_or(DEFAULT_MAX_TIMEOUT, |str| { @@ -154,6 +198,11 @@ mod test { "http://localhost:8080/thisisatest", "thisisatest", ), + ( + "js-params", + "http://localhost:8080/main.css", + "This is just a comment for testing purposes", + ), ( "python-basic", "http://localhost:8080/", @@ -175,4 +224,61 @@ mod test { run_end_to_end_test(example, max_timeout, url, expected_text); } } + + fn not_found_end_to_end() { + // Allow configuring waiting times. It avoids having long waiting times + // in development, while making it configurable in the CI + let max_timeout = env::var("E2E_MAX_WAITING_TIME").map_or(DEFAULT_MAX_TIMEOUT, |str| { + str.parse::().ok().unwrap_or(DEFAULT_MAX_TIMEOUT) + }); + let path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); + let cargo_toml = path.join("Cargo.toml"); + let cargo_toml_str = if cfg!(target_os = "windows") { + // Actix doesn't allow : and \ characters + // Remove the first two characters (like X:) and the \. + let no_root_path = cargo_toml + .to_string_lossy() + .chars() + .skip(3) + .collect::(); + format!("http://localhost:8080/{}", no_root_path).replace('\\', "/") + } else { + format!("http://localhost:8080{}", cargo_toml.to_string_lossy()) + }; + + // Test we're not exposing wrong files + let tests = [ + ( + "js-basic", + Vec::from([ + "http://localhost:8080/index.js", + "http://localhost:8080/examples/js-basic/index.js", + "http://localhost:8080/Cargo.toml", + // Check path traversal issues + "http://localhost:8080/../README.md", + "http://localhost:8080/%C0AE%C0AE%C0AFREADME.md", + "http://localhost:8080/%2e%2e/README.md", + cargo_toml_str.as_str(), + ]), + ), + ( + "js-json", + Vec::from([ + "http://localhost:8080/handler.js", + "http://localhost:8080/examples/js-json/handler.js", + "http://localhost:8080/Cargo.toml", + "http://localhost:8080/handler.toml", + // Check path traversal issues + "http://localhost:8080/../README.md", + "http://localhost:8080/%C0AE%C0AE%C0AFREADME.md", + "http://localhost:8080/%2e%2e/README.md", + cargo_toml_str.as_str(), + ]), + ), + ]; + + for (example, urls) in tests { + run_not_found_test(example, max_timeout, &urls); + } + } }