Skip to content

Commit

Permalink
Removes count from Hit, Adds documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Jerboa-app committed May 20, 2024
1 parent 3d21c89 commit 89eca58
Show file tree
Hide file tree
Showing 14 changed files with 88 additions and 72 deletions.
9 changes: 2 additions & 7 deletions src/content/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,10 @@ pub mod sitemap;

/// Store web content
///
/// CF [crate::content::pages::page::Page] and [crate::content::resources::resource::Resource]
///
/// - [Content::uri] is the served uri of the content (webaddress)
/// - [Content::body] is a byte body
/// - [Content::disk_path] is a path to the locally stored file on disk representing [Content::body]
/// - [Content::cache_period_seconds] is the cache-control max-age
///
/// - The body is unpopulated until [Content::load_from_file] is called
/// - The body may be converted to a utf8 string using [Content::utf8_body]
/// - A hash of the file is used to check it is stale, used by [Observed]
/// - Content may have different server side and browser side cache ages
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Content
{
Expand Down
20 changes: 18 additions & 2 deletions src/content/sitemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ use crate::server::https::parse_uri;

use super::{get_content, mime_type::Mime, Content};

/// A tree structure representing a uri stem and content
/// convertable to a [Router] which monitors the content if
/// [crate::config::ContentConfig::static_content] is false. If
/// so and the server cache has expired ([crate::config::ContentConfig::server_cache_period_seconds])
/// then content is automatically refreshed when served
pub struct ContentTree
{
uri_stem: String,
Expand Down Expand Up @@ -104,7 +109,8 @@ impl ContentTree
}
}

/// Implements writing to an xml conforming to https://www.sitemaps.org/protocol.html
/// Implements writing to an xml conforming to <https://www.sitemaps.org/protocol.html>
/// with <http://www.google.com/schemas/sitemap-image/1.1> and <http://www.google.com/schemas/sitemap-video/1.1>
pub fn to_xml(&self, domain: String) -> Vec<u8>
{
if self.contents.is_empty() && self.children.is_empty()
Expand Down Expand Up @@ -172,6 +178,13 @@ impl ContentTree
}
}

/// Represents the structure of a site.
/// If no sitemap.xml or robots.txt is present
/// these will be generated by calling [SiteMap::to_xml]
/// and inserting the resulting sitemap.xml
///
/// Convertable to a router, see [ContentTree] for dynamic
/// options
pub struct SiteMap
{
contents: ContentTree,
Expand All @@ -191,6 +204,8 @@ impl SiteMap
self.contents.push(content.uri.clone(), content);
}

/// Searches the content path from [SiteMap::new] for [Content]
/// robots.txt and sitemap.xml can be generated and added here
pub fn build
(
&mut self,
Expand Down Expand Up @@ -287,7 +302,8 @@ impl SiteMap
}
}

/// Implements writing to an xml conforming to https://www.sitemaps.org/protocol.html
/// Implements writing to an xml conforming to <https://www.sitemaps.org/protocol.html>
/// with <http://www.google.com/schemas/sitemap-image/1.1> and <http://www.google.com/schemas/sitemap-video/1.1>
pub fn to_xml(&self) -> Vec<u8>
{
let mut buffer = Vec::new();
Expand Down
2 changes: 2 additions & 0 deletions src/filesystem/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pub trait File
fn path(&self) -> String;
}

/// Something that may be observed for changes
/// [Observed::is_stale] is typically a hash function
pub trait Observed
{
fn is_stale(&self) -> bool;
Expand Down
4 changes: 4 additions & 0 deletions src/filesystem/folder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ impl fmt::Display for ListDirError {
}
}

/// List all files in path
pub fn list_dir(path: String) -> Result<std::fs::ReadDir, ListDirError>
{
match std::fs::read_dir(path)
Expand All @@ -30,6 +31,7 @@ pub fn list_dir(path: String) -> Result<std::fs::ReadDir, ListDirError>
}
}

/// Return parsed [std::ffi::OsString] from [DirEntry]
pub fn dir_entry_to_path(d: DirEntry) -> Option<String>
{
let file_os_string = d.file_name();
Expand All @@ -45,6 +47,7 @@ pub fn dir_entry_to_path(d: DirEntry) -> Option<String>
}
}

/// List all subdirectories of a path
pub fn list_sub_dirs(path: String) -> Vec<String>
{
let mut found_dirs: Vec<String> = vec![];
Expand Down Expand Up @@ -100,6 +103,7 @@ pub fn list_sub_dirs(path: String) -> Vec<String>
found_dirs
}

/// List all files conforming to an [Option] [Regex]
pub fn list_dir_by(pattern: Option<Regex>, path: String) -> Vec<String>
{
match std::fs::read_dir(path.clone())
Expand Down
1 change: 1 addition & 0 deletions src/integrations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod webhook;

/// Uses openssl to verify the request body via the given hmac_token
/// - hmac_header_key is the location in the https header for the digest
/// - hmac_token must be 64 byte Hex, as should he the sent HMAC digest
pub fn is_authentic
(
headers: &HeaderMap,
Expand Down
1 change: 1 addition & 0 deletions src/integrations/webhook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub struct Webhook
addr: String
}

/// Wraps a webhook url
impl Webhook
{

Expand Down
2 changes: 1 addition & 1 deletion src/server/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use super::stats::hits::HitStats;
pub trait ApiRequest
{
/// Validate a request's hmac given a token read from config.json
/// - See [crate::config::Config] and [crate::web::is_authentic]
/// - See [crate::config::Config] and [crate::integrations::is_authentic]
fn is_authentic(headers: HeaderMap, body: Bytes) -> StatusCode;
/// Deserialise the Bytes body from JSON
fn deserialise_payload(&mut self, headers: HeaderMap, body: Bytes) -> StatusCode;
Expand Down
45 changes: 24 additions & 21 deletions src/server/stats/digest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ use crate::{config::{Config, CONFIG_PATH}, content::is_page, util::matches_one};

use super::hits::{collect_hits, HitStats};

/// A digest of hit statistics
#[derive(Debug, Clone, PartialEq)]
pub struct Digest
{
pub top_hitters: Vec<(String, u16)>,
pub top_pages: Vec<(String, u16)>,
pub top_resources: Vec<(String, u16)>,
pub hits_by_hour_utc: [u16; 24],
pub total_hits: u16,
pub unique_hits: u16
pub top_hitters: Vec<(String, usize)>,
pub top_pages: Vec<(String, usize)>,
pub top_resources: Vec<(String, usize)>,
pub hits_by_hour_utc: [usize; 24],
pub total_hits: usize,
pub unique_hits: usize
}

impl Digest
Expand All @@ -33,6 +34,7 @@ impl Digest
}
}

/// Collect hits cached and from local files into a [Digest]
pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Option<DateTime<chrono::Utc>>, top_n: Option<usize>, stats: Option<HitStats>) -> Digest
{

Expand All @@ -52,9 +54,9 @@ pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Optio
None => (vec![], config.domain)
};

let mut hitters: HashMap<String, u16> = HashMap::new();
let mut pages: HashMap<String, u16> = HashMap::new();
let mut resources: HashMap<String, u16> = HashMap::new();
let mut hitters: HashMap<String, usize> = HashMap::new();
let mut pages: HashMap<String, usize> = HashMap::new();
let mut resources: HashMap<String, usize> = HashMap::new();

for hit in collect_hits(path, stats, from, to)
{
Expand All @@ -65,10 +67,10 @@ pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Optio

match hitters.contains_key(&hit.ip_hash)
{
true => {hitters.insert(hit.ip_hash.clone(), hit.count+hitters[&hit.ip_hash]);},
true => {hitters.insert(hit.ip_hash.clone(), hit.count()+hitters[&hit.ip_hash]);},
false =>
{
hitters.insert(hit.ip_hash, hit.count);
hitters.insert(hit.ip_hash.clone(), hit.count());
digest.unique_hits += 1;
}
}
Expand All @@ -77,20 +79,20 @@ pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Optio
{
match pages.contains_key(&hit.path)
{
true => {pages.insert(hit.path.clone(), hit.count+pages[&hit.path]);},
false => {pages.insert(hit.path, hit.count);}
true => {pages.insert(hit.path.clone(), hit.count()+pages[&hit.path]);},
false => {pages.insert(hit.path.clone(), hit.count());}
}
}
else
{
match resources.contains_key(&hit.path)
{
true => {resources.insert(hit.path.clone(), hit.count+resources[&hit.path]);},
false => {resources.insert(hit.path, hit.count);}
true => {resources.insert(hit.path.clone(), hit.count()+resources[&hit.path]);},
false => {resources.insert(hit.path.clone(), hit.count());}
}
}

digest.total_hits += hit.count;
digest.total_hits += hit.count();

for time in hit.times
{
Expand All @@ -105,13 +107,13 @@ pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Optio
}
}

let mut all_hitters: Vec<(String, u16)> = hitters.into_iter().collect();
let mut all_pages: Vec<(String, u16)> = pages.into_iter().collect();
let mut all_resources: Vec<(String, u16)> = resources.into_iter().collect();
let mut all_hitters: Vec<(String, usize)> = hitters.into_iter().collect();
let mut all_pages: Vec<(String, usize)> = pages.into_iter().collect();
let mut all_resources: Vec<(String, usize)> = resources.into_iter().collect();

for data in vec![&mut all_hitters, &mut all_pages, &mut all_resources]
{
data.sort_by(|a: &(String, u16), b: &(String, u16)| a.1.cmp(&b.1));
data.sort_by(|a: &(String, usize), b: &(String, usize)| a.1.cmp(&b.1));
data.reverse();
}

Expand Down Expand Up @@ -157,6 +159,7 @@ pub fn process_hits(path: String, from: Option<DateTime<chrono::Utc>>, to: Optio

}

/// Post a [Digest] as a formatted message to Discord
pub fn digest_message(digest: Digest, from: Option<DateTime<chrono::Utc>>, to: Option<DateTime<chrono::Utc>>) -> String
{
let mut msg = String::new();
Expand Down Expand Up @@ -204,7 +207,7 @@ pub fn digest_message(digest: Digest, from: Option<DateTime<chrono::Utc>>, to: O
msg
}

pub fn hits_by_hour_text_graph(hits: [u16; 24], symbol: char, size: u8) -> String
pub fn hits_by_hour_text_graph(hits: [usize; 24], symbol: char, size: u8) -> String
{
let mut graph = String::new();

Expand Down
10 changes: 5 additions & 5 deletions src/server/stats/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ use crate::{config::{Config, CONFIG_PATH}, filesystem::file::{read_file_utf8, wr

use super::hits::{Hit, HitStats};

/// Save hits to disk.
/// Each stats files takes the current date YYYY-MM-DD as
/// its file name, if multiple saves occur on the same date
/// the file is appended to
/// See [crate::server::stats::HitStats]
pub struct StatsFile
{
pub hits: Vec<Hit>,
pub path: Option<String>
}

/// Save hits to disk.
/// Each stats files takes the current date YYYY-MM-DD as
/// its file name, if multiple saves occur on the same date
/// the file is appended to
/// See [crate::server::stats::HitStats]
impl StatsFile
{
pub fn new() -> StatsFile
Expand Down
34 changes: 24 additions & 10 deletions src/server/stats/hits.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,39 @@
use std::{collections::HashMap, fs::create_dir, net::{IpAddr, Ipv4Addr, SocketAddr}, sync::Arc, time::Instant};
use std::{collections::HashMap, net::{IpAddr, Ipv4Addr, SocketAddr}, sync::Arc, time::Instant};

use axum::{extract::{ConnectInfo, State}, http::Request, middleware::Next, response::Response};
use chrono::DateTime;
use openssl::sha::sha512;
use reqwest::StatusCode;
use serde::{Deserialize, Serialize};
use tokio::sync::{Mutex, MutexGuard};
use tokio::sync::Mutex;

use crate::{config::{Config, CONFIG_PATH}, filesystem::{file::{read_file_utf8, write_file_bytes}, folder::list_dir_by}, util::{compress, date_to_rfc3339, dump_bytes}};
use crate::{config::{Config, CONFIG_PATH}, filesystem::{file::read_file_utf8, folder::list_dir_by}, util::{date_to_rfc3339, dump_bytes}};

use super::digest::Digest;

/// A hit defined by a uri and an ip (hashed) at given times
/// - [Hit::times] the unique hit times of equivalent events (controlled by [crate::config::StatsConfig::hit_cooloff_seconds])
/// - [Hit::path] the uri defining this hit
/// - [Hit::ip_hash] the sha512 hash of the ip defining this hit
///
/// Further hits inside the [crate::config::StatsConfig::hit_cooloff_seconds] period will be ignored
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Hit
{
pub count: u16,
pub times: Vec<String>,
pub path: String,
pub ip_hash: String
}

impl Hit
{
pub fn count(&self) -> usize
{
self.times.len().clone()
}
}

/// A collections of Hits indexed by a sha512 of the ip and uri
#[derive(Debug, Clone)]
pub struct HitStats
{
Expand All @@ -39,6 +53,7 @@ impl HitStats
}
}

/// Launches a thread to log statistics for this request
pub async fn log_stats<B>
(
ConnectInfo(addr): ConnectInfo<SocketAddr>,
Expand All @@ -59,6 +74,7 @@ pub async fn log_stats<B>
Ok(next.run(request).await)
}

/// Updates hit statistics with this request
pub async fn process_hit
(
addr: SocketAddr,
Expand Down Expand Up @@ -112,7 +128,6 @@ pub async fn process_hit
return
}
hit.times.push(chrono::offset::Utc::now().to_rfc3339());
hit.count += 1;
hit
},
Err(_e) => {hit}
Expand All @@ -122,7 +137,7 @@ pub async fn process_hit
},
false =>
{
Hit {path: uri, count: 1, times: vec![chrono::offset::Utc::now().to_rfc3339()], ip_hash: dump_bytes(&ip_hash)}
Hit {path: uri, times: vec![chrono::offset::Utc::now().to_rfc3339()], ip_hash: dump_bytes(&ip_hash)}
}
};

Expand All @@ -138,6 +153,7 @@ pub async fn process_hit
), Some("PERFORMANCE".to_string()));
}

/// Gathers [Hit]s both from disk and those cached in [HitStats]
pub fn collect_hits(path: String, stats: Option<HitStats>, from: Option<DateTime<chrono::Utc>>, to: Option<DateTime<chrono::Utc>>) -> Vec<Hit>
{
let stats_files = list_dir_by(None, path);
Expand Down Expand Up @@ -189,7 +205,6 @@ pub fn collect_hits(path: String, stats: Option<HitStats>, from: Option<DateTime
for hit in hits_to_filter
{
// check the cached stats are within the time period, then add
let mut count = 0;
let mut times: Vec<String> = vec![];
for i in 0..hit.times.len()
{
Expand All @@ -200,13 +215,12 @@ pub fn collect_hits(path: String, stats: Option<HitStats>, from: Option<DateTime
};
if !from.is_some_and(|from| t < from) && !to.is_some_and(|to| t > to)
{
count += 1;
times.push(hit.times[i].clone());
}
}
if count > 0
if times.len() > 0
{
let h = Hit {count, times, ip_hash: hit.ip_hash.clone(), path: hit.path.clone()};
let h = Hit {times, ip_hash: hit.ip_hash.clone(), path: hit.path.clone()};
hits.push(h.clone());
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/task/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl TaskPool

/// Returns a duration to wait for the next runnable process
/// and an information string about that process including
/// a [DateTime<Utc>] when it will be run
/// a [`DateTime<Utc>`] when it will be run
pub async fn waiting_for(&self) -> (tokio::time::Duration, String)
{
if self.tasks.len() == 0
Expand Down
Loading

0 comments on commit 89eca58

Please sign in to comment.