diff --git a/src/config.rs b/src/config.rs index 8f1c81c..f93cf8a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -74,6 +74,7 @@ impl ThrottleConfig /// - ```server_cache_period_seconds: u16```: internal cache period if content is not static /// - ```static_content: Option```: all content is immutably cached at launch /// - ```ignore_regexes: Option>```: do not serve content matching any of these patterns +/// - ```generate_sitemap: Option```: sitemap.xml will be automatically generated (and updated) #[derive(Clone, Serialize, Deserialize)] pub struct ContentConfig { @@ -83,7 +84,8 @@ pub struct ContentConfig pub ignore_regexes: Option>, pub browser_cache_period_seconds: u16, pub server_cache_period_seconds: u16, - pub static_content: Option + pub static_content: Option, + pub generate_sitemap: Option } impl ContentConfig @@ -98,7 +100,8 @@ impl ContentConfig ignore_regexes: None, browser_cache_period_seconds: 3600, server_cache_period_seconds: 3600, - static_content: Some(false) + static_content: Some(false), + generate_sitemap: Some(true) } } } diff --git a/src/content/sitemap.rs b/src/content/sitemap.rs index d3ff29f..04fea0a 100644 --- a/src/content/sitemap.rs +++ b/src/content/sitemap.rs @@ -1,6 +1,6 @@ -use std::{clone, collections::BTreeMap, sync::Arc, time::{Duration, Instant, SystemTime}, vec}; -use openssl::{conf::Conf, sha::Sha256}; +use std::{collections::BTreeMap, sync::Arc, time::{Duration, Instant, SystemTime}, vec}; +use openssl::sha::Sha256; use tokio::sync::Mutex; use axum::{response::IntoResponse, routing::get, Router}; @@ -19,6 +19,7 @@ use super::{get_content, mime_type::Mime, Content}; /// [crate::config::ContentConfig::static_content] is false. If /// so and the server cache has expired ([crate::config::ContentConfig::server_cache_period_seconds]) /// then content is automatically refreshed when served +#[derive(Clone)] pub struct ContentTree { uri_stem: String, @@ -38,6 +39,7 @@ impl ContentTree let mut router: Router<(), axum::body::Body> = Router::new(); for (mut content, mutex) in self.contents.clone() { + content.refresh(); router = router.route ( &content.get_uri(), @@ -67,22 +69,24 @@ impl ContentTree router } - fn calculate_hash(&self) -> Vec + fn calculate_hash(&self, with_bodies: bool) -> Vec { let mut sha = Sha256::new(); - for (mut content, _) in self.contents.clone() + let mut content: Vec = self.contents.clone().into_iter().map(|(x, _)| x).collect(); + content.sort_by(|a, b| a.get_uri().cmp(&b.get_uri())); + for mut content in content { - if content.is_stale() + sha.update(content.get_uri().as_bytes()); + if with_bodies && content.is_stale() { content.refresh(); sha.update(&content.byte_body()); - sha.update(content.get_uri().as_bytes()); } } for (_, child) in &self.children { - sha.update(&child.calculate_hash()); + sha.update(&child.calculate_hash(with_bodies)); } sha.finish().to_vec() @@ -207,6 +211,7 @@ impl ContentTree /// /// Convertable to a router, see [ContentTree] for dynamic /// options +#[derive(Clone)] pub struct SiteMap { contents: ContentTree, @@ -222,12 +227,59 @@ impl SiteMap SiteMap { contents: ContentTree::new("/"), content_path, domain, hash: vec![] } } + pub fn from_config(config: &Config, insert_tag: bool, silent: bool) -> SiteMap + { + let mut sitemap = SiteMap::new(config.domain.clone(), config.content.path.clone()); + + match config.content.ignore_regexes.clone() + { + Some(p) => + { + sitemap.build + ( + insert_tag, + silent, + Some(&ContentFilter::new(p)) + ); + }, + None => + { + sitemap.build + ( + insert_tag, + silent, + None + ); + } + }; + + let mut home = Content::new + ( + "/", + &config.content.home.clone(), + config.content.server_cache_period_seconds, + config.content.browser_cache_period_seconds, + insert_tag + ); + match home.load_from_file() + { + Ok(()) => + { + sitemap.push(home); + }, + Err(e) => {crate::debug(format!("Error serving home page resource {}", e), None);} + } + + sitemap + } + pub fn push(&mut self, content: Content) { self.contents.push(content.uri.clone(), content); self.calculate_hash(); } + /// Hash a sitemap by detected uri's pub fn get_hash(&self) -> Vec { self.hash.clone() @@ -235,7 +287,7 @@ impl SiteMap fn calculate_hash(&mut self) { - self.hash = self.contents.calculate_hash(); + self.hash = self.contents.calculate_hash(false); } /// Searches the content path from [SiteMap::new] for [Content] @@ -288,7 +340,6 @@ impl SiteMap } contents.sort_by_key(|x|x.get_uri()); - let mut no_sitemap = true; let mut no_robots = true; tic = Instant::now(); @@ -302,32 +353,31 @@ impl SiteMap None }; - for mut content in contents + let generate_sitemap = match config.content.generate_sitemap + { + Some(b) => b, + None => true + }; + + for content in contents { - if content.get_uri().contains("sitemap.xml") - { - no_sitemap = false; - } if content.get_uri().contains("robots.txt") { no_robots = false; } - crate::debug(format!("Attempting to add content {:?}", content.preview(64)), None); + if generate_sitemap && content.get_uri().contains("sitemap.xml") + { + continue + } + crate::debug(format!("Adding content {:?}", content.preview(64)), None); let path = self.content_path.clone()+"/"; let uri = parse_uri(content.get_uri(), path); - match content.load_from_file() + if short_urls && content.get_content_type().is_html() { - Ok(()) => - { - if short_urls && content.get_content_type().is_html() - { - let short_uri = Regex::new(r"\.\S+$").unwrap().replacen(&uri, 1, ""); - crate::debug(format!("Adding content as short url: {}", short_uri), None); - self.contents.push(short_uri.to_string(), Content::new(&short_uri, &content.path(), server_cache_period, browser_cache_period, tag)); - } - } - Err(e) => {crate::debug(format!("Error adding content {}\n{}", content.get_uri(), e), None);} + let short_uri = Regex::new(r"\.\S+$").unwrap().replacen(&uri, 1, ""); + crate::debug(format!("Adding content as short url: {}", short_uri), None); + self.contents.push(short_uri.to_string(), Content::new(&short_uri, &content.path(), server_cache_period, browser_cache_period, tag)); } self.contents.push(content.uri.clone(), content); @@ -347,7 +397,7 @@ impl SiteMap crate::debug(format!("No robots.txt specified, generating robots.txt"), None); } - if no_sitemap + if generate_sitemap { let path = format!("{}/{}",self.content_path,"sitemap.xml"); write_file_bytes(&path, &self.to_xml()); diff --git a/src/main.rs b/src/main.rs index f123f04..6d867d4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,10 @@ +use std::time::Duration; + +use busser::config::{Config, CONFIG_PATH}; +use busser::content::sitemap::SiteMap; use busser::server::http::ServerHttp; use busser::server::https::Server; use busser::{openssl_version, program_version}; - use tokio::task::spawn; #[tokio::main] @@ -34,12 +37,61 @@ async fn main() { true }; - let server = Server::new(0,0,0,0, insert_tag); - let http_server = ServerHttp::new(0,0,0,0); - let _http_redirect = spawn(http_server.serve()); - server.serve().await; + if args.iter().any(|x| x == "--static-sitemap") + { + busser::debug(format!("Serving with static sitemap"), None); + serve(insert_tag).await; + } + else + { + busser::debug(format!("Serving with dynamic sitemap"), None); + serve_observed(insert_tag).await; + } +} + +/// Serve by observing the site content found at the path [busser::config::ContentConfig] +/// every [busser::config::ContentConfig::server_cache_period_seconds] the sitemap +/// hash (see [busser::content::sitemap::SiteMap::get_hash]) is checked, if it is +/// different the server is re-served. +async fn serve_observed(insert_tag: bool) +{ + let sitemap = SiteMap::from_config(&Config::load_or_default(CONFIG_PATH), insert_tag, false); + let mut hash = sitemap.get_hash(); + + let server = Server::new(0,0,0,0,sitemap); + let mut server_handle = server.get_handle(); + let mut thread_handle = spawn(async move {server.serve()}.await); + + loop + { + let config = Config::load_or_default(CONFIG_PATH); + let sitemap = SiteMap::from_config(&config, insert_tag, false); + let sitemap_hash = sitemap.get_hash(); + if sitemap_hash != hash + { + busser::debug(format!("Sitemap changed, shutting down"), None); + server_handle.shutdown(); + thread_handle.abort(); + + let server = Server::new(0,0,0,0,sitemap); + server_handle = server.get_handle(); + thread_handle = spawn(async move {server.serve()}.await); + hash = sitemap_hash; + busser::debug(format!("Re-served"), None); + } + busser::debug(format!("Next sitemap check: {}s", config.content.server_cache_period_seconds), None); + tokio::time::sleep(Duration::from_secs(config.content.server_cache_period_seconds.into())).await; + } +} + +/// Serve without checking for sitemap changes +async fn serve(insert_tag: bool) +{ + let sitemap = SiteMap::from_config(&Config::load_or_default(CONFIG_PATH), insert_tag, false); + let server = Server::new(0,0,0,0,sitemap); + server.serve().await; } \ No newline at end of file diff --git a/src/server/https.rs b/src/server/https.rs index a4af2c3..274252a 100644 --- a/src/server/https.rs +++ b/src/server/https.rs @@ -1,8 +1,9 @@ use crate:: { - config::{read_config, Config, CONFIG_PATH}, content::{filter::ContentFilter, sitemap::SiteMap, Content}, server::throttle::{handle_throttle, IpThrottler}, task::{schedule_from_option, TaskPool}, CRAB + config::{read_config, Config, CONFIG_PATH}, content::sitemap::SiteMap, server::throttle::{handle_throttle, IpThrottler}, task::{schedule_from_option, TaskPool}, CRAB }; +use core::time; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::path::PathBuf; use std::sync::Arc; @@ -13,27 +14,18 @@ use axum:: middleware, Router }; -use axum_server::tls_rustls::RustlsConfig; +use axum_server::{tls_rustls::RustlsConfig, Handle}; use super::{api::{stats::StatsDigest, ApiRequest}, stats::{hits::{log_stats, HitStats}, StatsSaveTask, StatsDigestTask}}; /// An https server that reads a directory configured with [Config] /// ```.html``` pages and resources, then serves them. -/// # Example -/// ```no_run -/// use busser::server::https::Server; -/// #[tokio::main] -/// async fn main() -/// { -/// let server = Server::new(0,0,0,0,true); -/// server.serve().await; -/// } -/// ``` pub struct Server { addr: SocketAddr, router: Router, config: Config, + handle: Handle, pub tasks: TaskPool } @@ -62,7 +54,7 @@ impl Server b: u8, c: u8, d: u8, - tag: bool + sitemap: SiteMap ) -> Server { @@ -84,40 +76,6 @@ impl Server ); let throttle_state = Arc::new(Mutex::new(requests)); - - let mut sitemap = SiteMap::new(config.domain.clone(), config.content.path.clone()); - - match config.content.ignore_regexes.clone() - { - Some(p) => - { - sitemap.build - ( - tag, - false, - Some(&ContentFilter::new(p)) - ); - }, - None => - { - sitemap.build - ( - tag, - false, - None - ); - } - }; - - let mut home = Content::new("/", &config.content.home.clone(), config.content.server_cache_period_seconds, config.content.browser_cache_period_seconds, tag); - match home.load_from_file() - { - Ok(()) => - { - sitemap.push(home); - }, - Err(e) => {crate::debug(format!("Error serving home page resource {}", e), None);} - } let mut router: Router<(), axum::body::Body> = sitemap.into(); @@ -135,6 +93,7 @@ impl Server addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(a,b,c,d)), config.port_https), router, config: config.clone(), + handle: Handle::new(), tasks: TaskPool::new() }; @@ -170,13 +129,18 @@ impl Server self.addr } - pub async fn serve(self: Server) + pub fn get_handle(&self) -> Handle + { + self.handle.clone() + } + + pub async fn serve(self) { // configure https - let cert_path = self.config.cert_path; - let key_path = self.config.key_path; + let cert_path = self.config.cert_path.clone(); + let key_path = self.config.key_path.clone(); let config = match RustlsConfig::from_pem_file( PathBuf::from(cert_path.clone()), @@ -207,12 +171,23 @@ impl Server println!("(or https://127.0.0.1)"); } - self.tasks.run(); + self.tasks.clone().run(); axum_server::bind_rustls(self.addr, config) - .serve(self.router.into_make_service_with_connect_info::()) + .handle(self.handle.clone()) + .serve(self.router.clone().into_make_service_with_connect_info::()) .await .unwrap(); } + pub async fn shutdown(&mut self, graceful: Option) + { + match graceful + { + // not sure if graceful_shutdown defaults to shutdown if None is passed + Some(_) => self.handle.graceful_shutdown(graceful), + None => self.handle.shutdown() + } + } + } \ No newline at end of file diff --git a/src/task/mod.rs b/src/task/mod.rs index 900d58d..c7282cb 100644 --- a/src/task/mod.rs +++ b/src/task/mod.rs @@ -35,6 +35,7 @@ pub trait Task /// A pool of tasks to be executed /// - [Task]s are added to the pool using [TaskPool::add] /// - [TaskPool::run] loops continuously (with sleeps) running tasks when they are available +#[derive(Clone)] pub struct TaskPool { tasks: HashMap>>> diff --git a/tests/common/mod.rs b/tests/common/mod.rs index e69de29..2034cd6 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -0,0 +1 @@ +pub const BAD_UTF8: [u8; 2] = [0xC0, 0xC1]; \ No newline at end of file diff --git a/tests/test_config.rs b/tests/test_config.rs index f3fce42..8134937 100644 --- a/tests/test_config.rs +++ b/tests/test_config.rs @@ -3,7 +3,10 @@ mod common; #[cfg(test)] mod config { - use busser::config::{read_config, Config, ContentConfig, StatsConfig, ThrottleConfig}; + use busser::{config::{read_config, Config, ContentConfig, StatsConfig, ThrottleConfig}, filesystem::file::write_file_bytes}; + use uuid::Uuid; + + use crate::common::BAD_UTF8; #[test] fn test_read_config() @@ -31,6 +34,7 @@ mod config assert_eq!(config.stats.hit_cooloff_seconds, 60); assert_eq!(config.stats.digest_schedule, Some("0 0 1 * * Fri *".to_string())); assert_eq!(config.stats.ignore_regexes.unwrap(), vec!["/favicon.ico".to_string()]); + assert_eq!(config.stats.top_n_digest, None); assert_eq!(config.content.path, "/home/jerboa/Website/"); assert_eq!(config.content.home, "/home/jerboa/Website/jerboa.html"); @@ -38,6 +42,8 @@ mod config assert_eq!(config.content.browser_cache_period_seconds, 3600); assert_eq!(config.content.server_cache_period_seconds, 1); assert_eq!(config.content.ignore_regexes.unwrap(), vec!["/.git", "workspace"]); + assert_eq!(config.content.static_content, None); + assert_eq!(config.content.generate_sitemap, None); } #[test] @@ -156,4 +162,22 @@ mod config assert_eq!(config.content.server_cache_period_seconds, 1); assert_eq!(config.content.ignore_regexes.unwrap(), vec!["/.git", "workspace"]); } + + #[test] + fn test_bad_utf8() + { + let file_name = format!("tests/bad_utf8-{}", Uuid::new_v4()); + write_file_bytes(&file_name, &BAD_UTF8); + assert!(read_config(&file_name).is_none()); + std::fs::remove_file(file_name).unwrap(); + } + + #[test] + fn test_not_json() + { + let file_name = format!("tests/not_json-{}", Uuid::new_v4()); + write_file_bytes(&file_name, "not_json{".as_bytes()); + assert!(read_config(&file_name).is_none()); + std::fs::remove_file(file_name).unwrap(); + } } \ No newline at end of file diff --git a/tests/test_discord.rs b/tests/test_discord.rs index f8e19b5..c6184d5 100644 --- a/tests/test_discord.rs +++ b/tests/test_discord.rs @@ -14,4 +14,11 @@ mod discord assert!(post(&w, "400".to_string()).await.is_ok()); } + + #[tokio::test] + async fn test_err_webhook() + { + let w = Webhook::new("not_a_domain".to_string()); + assert!(post(&w, "400".to_string()).await.is_err()); + } } \ No newline at end of file diff --git a/tests/test_hmac.rs b/tests/test_hmac.rs index 80938cf..065c36a 100644 --- a/tests/test_hmac.rs +++ b/tests/test_hmac.rs @@ -8,6 +8,8 @@ mod hmac use openssl::{hash::MessageDigest, pkey::PKey, sha::sha256, sign::Signer}; use reqwest::StatusCode; + use crate::common::BAD_UTF8; + const TOKEN: &str = "A_SECRET_TOKEN"; const KEY: &str = "HMAC_TOKEN_HEADER_KEY"; @@ -36,5 +38,9 @@ mod hmac headers = HeaderMap::new(); headers.append(KEY, HeaderValue::from_str(&hmac).unwrap()); assert_eq!(is_authentic(&headers, KEY, TOKEN.to_string(), &body), StatusCode::ACCEPTED); + + headers = HeaderMap::new(); + headers.append(KEY, HeaderValue::from_bytes(&BAD_UTF8).unwrap()); + assert_eq!(is_authentic(&headers, KEY, TOKEN.to_string(), &body), StatusCode::BAD_REQUEST); } } \ No newline at end of file diff --git a/tests/test_mime.rs b/tests/test_mime.rs index 19aca6f..9442e6b 100644 --- a/tests/test_mime.rs +++ b/tests/test_mime.rs @@ -7,7 +7,7 @@ mod mime #[test] - fn test_build() + fn test_mime() { assert!(MIME::TextPlain.is_text()); assert!(MIME::TextHtml.is_text()); diff --git a/tests/test_sitemap.rs b/tests/test_sitemap.rs index 77d2fb9..0658b15 100644 --- a/tests/test_sitemap.rs +++ b/tests/test_sitemap.rs @@ -3,7 +3,7 @@ mod common; #[cfg(test)] mod sitemap { - use std::{fs::remove_file, hash::Hash, path::Path, time::SystemTime}; + use std::{fs::remove_file, path::Path, time::SystemTime}; use busser::{content::sitemap::{lastmod, SiteMap}, filesystem::file::read_file_utf8}; use chrono::{DateTime, Datelike, Utc}; @@ -58,11 +58,6 @@ mod sitemap assert_eq!(sitemap_disk, expected_sitemap); assert_eq!("Sitemap: https://test.domain/sitemap.xml", robots_disk); - } - - #[test] - pub fn test_hash() - { let mut sitemap = SiteMap::new("https://test.domain".to_owned(), "tests/pages".to_owned()); @@ -71,13 +66,6 @@ mod sitemap sitemap.build(true, true, None); assert_ne!(sitemap.get_hash(), Vec::::new()); assert!(sitemap.get_hash().len() > 0); - - let mut sitemap_no_short = SiteMap::new("https://test.domain".to_owned(), "tests/pages".to_owned()); - sitemap_no_short.build(true, true, None); - - assert_ne!(sitemap_no_short.get_hash(), Vec::::new()); - assert!(sitemap_no_short.get_hash().len() > 0); - assert_ne!(sitemap_no_short.get_hash(), sitemap.get_hash()); for file in vec!["tests/pages/robots.txt", "tests/pages/sitemap.xml"] {