From 214710fd6b98b020a3701805b73bb9168a31c3c5 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sat, 20 Mar 2021 16:03:02 -0400 Subject: [PATCH 1/4] Give precedence to local shared files over global ones When serving 'essential files', we can either serve the global one, created when building `empty_library`, or the local one, created when building the local crate. Currently we default to the global one, but this causes issues when the file should never have been global in the first place (such as recently for `crates.js`: see https://github.com/rust-lang/docs.rs/issues/1313). This gives precedence to the local file so that the bug will be fixed when rustdoc fixes it, even if we forget to update `ESSENTIAL_FILES_UNVERSIONED`. --- src/web/mod.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/web/mod.rs b/src/web/mod.rs index 1ad93fe51..90bd0d3c1 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -171,12 +171,23 @@ impl Handler for CratesfyiHandler { } } - // try serving shared rustdoc resources first, then db/static file handler and last router - // return 404 if none of them return Ok. It is important that the router comes last, - // because it gives the most specific errors, e.g. CrateNotFound or VersionNotFound - self.shared_resource_handler + // This is kind of a mess. + // + // Almost all files should be served through the `router_handler`; eventually + // `shared_resource_handler` should go through the router too, and `database_file_handler` + // could be removed altogether. + // + // Unfortunately, combining `shared_resource_handler` with the `router_handler` breaks + // things, because right now `shared_resource_handler` allows requesting files from *any* + // subdirectory and the router requires us to give a specific path. Changing them to a + // specific path means that buggy docs from 2018 will have missing CSS (#1181) so until + // that's fixed, we need to keep the current (buggy) behavior. + // + // It's important that `router_handler` comes first so that local rustdoc files take + // precedence over global ones (see #1324). + self.router_handler .handle(req) - .or_else(|e| if_404(e, || self.router_handler.handle(req))) + .or_else(|e| if_404(e, || self.shared_resource_handler.handle(req))) .or_else(|e| if_404(e, || self.database_file_handler.handle(req))) .or_else(|e| { let err = if let Some(err) = e.error.downcast_ref::() { From 228ee9fe6d55463912435b035e3d8bd1c34374cd Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Mar 2021 09:42:31 -0400 Subject: [PATCH 2/4] Remove `database_file_handler`; give precendence to router error messages The database handler was unused; the router already serves the favicon and it doesn't do anything else. Note that 'database handler' is different from 'files served from storage', the router already handles those. This also fixes the long standing bug that all 404 errors are either 'crate not found' or 'resource not found'. --- src/web/error.rs | 11 ++++------- src/web/mod.rs | 16 ++++------------ src/web/rustdoc.rs | 2 +- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/web/error.rs b/src/web/error.rs index 3bb8bb0b2..36b60346d 100644 --- a/src/web/error.rs +++ b/src/web/error.rs @@ -163,9 +163,6 @@ mod tests { #[test] fn check_404_page_content_resource() { - // Resources with a `.js` and `.ico` extension are special cased in the - // routes_handler which is currently run last. This means that `get("resource.exe")` will - // fail with a `no so such crate` instead of 'no such resource' wrapper(|env| { let page = kuchiki::parse_html().one( env.frontend() @@ -180,7 +177,7 @@ mod tests { .next() .unwrap() .text_contents(), - "The requested crate does not exist", + "The requested resource does not exist", ); Ok(()) @@ -200,7 +197,7 @@ mod tests { .next() .unwrap() .text_contents(), - "The requested crate does not exist", + "The requested version does not exist", ); Ok(()) @@ -219,7 +216,7 @@ mod tests { .next() .unwrap() .text_contents(), - "The requested crate does not exist", + "The requested version does not exist", ); Ok(()) @@ -242,7 +239,7 @@ mod tests { .next() .unwrap() .text_contents(), - "The requested crate does not exist", + "The requested version does not exist", ); Ok(()) diff --git a/src/web/mod.rs b/src/web/mod.rs index 90bd0d3c1..8a254bb60 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -105,7 +105,6 @@ use iron::{ status::Status, Chain, Handler, Iron, IronError, IronResult, Listening, Request, Response, Url, }; -use metrics::RequestRecorder; use page::TemplateData; use postgres::Client; use router::NoRoute; @@ -121,7 +120,6 @@ const DEFAULT_BIND: &str = "0.0.0.0:3000"; struct CratesfyiHandler { shared_resource_handler: Box, router_handler: Box, - database_file_handler: Box, inject_extensions: InjectExtensions, } @@ -140,8 +138,6 @@ impl CratesfyiHandler { let inject_extensions = InjectExtensions::new(context, template_data)?; let routes = routes::build_routes(); - let blacklisted_prefixes = routes.page_prefixes(); - let shared_resources = Self::chain(inject_extensions.clone(), rustdoc::SharedResourceHandler); let router_chain = Self::chain(inject_extensions.clone(), routes.iron_router()); @@ -149,10 +145,6 @@ impl CratesfyiHandler { Ok(CratesfyiHandler { shared_resource_handler: Box::new(shared_resources), router_handler: Box::new(router_chain), - database_file_handler: Box::new(routes::BlockBlacklistedPrefixes::new( - blacklisted_prefixes, - Box::new(RequestRecorder::new(file::DatabaseFileHandler, "database")), - )), inject_extensions, }) } @@ -165,7 +157,9 @@ impl Handler for CratesfyiHandler { handle: impl FnOnce() -> IronResult, ) -> IronResult { if e.response.status == Some(status::NotFound) { - handle() + // the routes are ordered from most specific to least; give precedence to the + // original error message. + handle().or(Err(e)) } else { Err(e) } @@ -174,8 +168,7 @@ impl Handler for CratesfyiHandler { // This is kind of a mess. // // Almost all files should be served through the `router_handler`; eventually - // `shared_resource_handler` should go through the router too, and `database_file_handler` - // could be removed altogether. + // `shared_resource_handler` should go through the router too. // // Unfortunately, combining `shared_resource_handler` with the `router_handler` breaks // things, because right now `shared_resource_handler` allows requesting files from *any* @@ -188,7 +181,6 @@ impl Handler for CratesfyiHandler { self.router_handler .handle(req) .or_else(|e| if_404(e, || self.shared_resource_handler.handle(req))) - .or_else(|e| if_404(e, || self.database_file_handler.handle(req))) .or_else(|e| { let err = if let Some(err) = e.error.downcast_ref::() { *err diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 195c1a158..828062f40 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -516,7 +516,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let crate_details = match CrateDetails::new(&mut conn, &name, &version) { Some(krate) => krate, - None => return Err(Nope::ResourceNotFound.into()), + None => return Err(Nope::VersionNotFound.into()), }; // [crate, :name, :version, target-redirect, :target, *path] From ea6e6437be2fad49f871b9ee90d089cd59778793 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Mar 2021 09:44:07 -0400 Subject: [PATCH 3/4] Remove dead code and make some functions private --- src/web/file.rs | 37 +++++-------------------------------- src/web/metrics.rs | 4 ++-- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/src/web/file.rs b/src/web/file.rs index de9b0ca4f..5dc6f4158 100644 --- a/src/web/file.rs +++ b/src/web/file.rs @@ -1,15 +1,15 @@ //! Database based file handler use crate::storage::{Blob, Storage}; -use crate::{error::Result, Config, Metrics}; -use iron::{status, Handler, IronResult, Request, Response}; +use crate::{error::Result, Config}; +use iron::{status, Response}; #[derive(Debug)] pub(crate) struct File(pub(crate) Blob); impl File { /// Gets file from database - pub fn from_path(storage: &Storage, path: &str, config: &Config) -> Result { + pub(super) fn from_path(storage: &Storage, path: &str, config: &Config) -> Result { let max_size = if path.ends_with(".html") { config.max_file_size_html } else { @@ -20,7 +20,7 @@ impl File { } /// Consumes File and creates a iron response - pub fn serve(self) -> Response { + pub(super) fn serve(self) -> Response { use iron::headers::{CacheControl, CacheDirective, ContentType, HttpDate, LastModified}; let mut response = Response::with((status::Ok, self.0.content)); @@ -44,38 +44,11 @@ impl File { } /// Checks if mime type of file is "application/x-empty" - pub fn is_empty(&self) -> bool { + pub(super) fn is_empty(&self) -> bool { self.0.mime == "application/x-empty" } } -/// Database based file handler for iron -/// -/// This is similar to staticfile crate, but its using getting files from database. -pub struct DatabaseFileHandler; - -impl Handler for DatabaseFileHandler { - fn handle(&self, req: &mut Request) -> IronResult { - let path = req.url.path().join("/"); - let storage = extension!(req, Storage); - let config = extension!(req, Config); - if let Ok(file) = File::from_path(&storage, &path, &config) { - let metrics = extension!(req, Metrics); - - // Because all requests that don't hit another handler go through here, we will get all - // requests successful or not recorded by the RequestRecorder, so we inject an extra - // "database success" route to keep track of how often we succeed vs 404 - metrics - .routes_visited - .with_label_values(&["database success"]) - .inc(); - Ok(file.serve()) - } else { - Err(super::error::Nope::CrateNotFound.into()) - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/src/web/metrics.rs b/src/web/metrics.rs index b4b6fcf71..8423360b3 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -7,7 +7,7 @@ use iron::status::Status; use prometheus::{Encoder, HistogramVec, TextEncoder}; use std::time::{Duration, Instant}; -pub fn metrics_handler(req: &mut Request) -> IronResult { +pub(super) fn metrics_handler(req: &mut Request) -> IronResult { let metrics = extension!(req, Metrics); let pool = extension!(req, Pool); let queue = extension!(req, BuildQueue); @@ -30,7 +30,7 @@ fn duration_to_seconds(d: Duration) -> f64 { d.as_secs() as f64 + nanos } -pub struct RequestRecorder { +pub(super) struct RequestRecorder { handler: Box, route_name: String, } From 8010082aa56a51df1fe325cf0d75d77cd67c5e87 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Mar 2021 10:56:41 -0400 Subject: [PATCH 4/4] Fix broken metrics test --- src/web/metrics.rs | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/web/metrics.rs b/src/web/metrics.rs index 8423360b3..877363994 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -108,6 +108,7 @@ impl Drop for RenderingTimesRecorder<'_> { #[cfg(test)] mod tests { use crate::test::{assert_success, wrapper}; + use crate::Context; use std::collections::HashMap; #[test] @@ -133,8 +134,8 @@ mod tests { ("/sitemap.xml", "static resource"), ("/-/static/style.css", "static resource"), ("/-/static/vendored.css", "static resource"), - ("/rustdoc/rcc/0.0.0/rcc/index.html", "database"), - ("/rustdoc/gcc/0.0.0/gcc/index.html", "database"), + ("/rustdoc/rcc/0.0.0/rcc/index.html", "rustdoc page"), + ("/rustdoc/gcc/0.0.0/gcc/index.html", "rustdoc page"), ]; wrapper(|env| { @@ -167,29 +168,43 @@ mod tests { *entry += 2; } + // this shows what the routes were *actually* recorded as, making it easier to update ROUTES if the name changes. + let metrics_serialized = metrics.gather(&env.pool()?, &env.build_queue())?; + let all_routes_visited = metrics_serialized + .iter() + .find(|x| x.get_name() == "docsrs_routes_visited") + .unwrap(); + let routes_visited_pretty: Vec<_> = all_routes_visited + .get_metric() + .iter() + .map(|metric| { + let labels = metric.get_label(); + assert_eq!(labels.len(), 1); // not sure when this would be false + let route = labels[0].get_value(); + let count = metric.get_counter().get_value(); + format!("{}: {}", route, count) + }) + .collect(); + println!("routes: {:?}", routes_visited_pretty); + for (label, count) in expected.iter() { assert_eq!( metrics.routes_visited.with_label_values(&[*label]).get(), - *count + *count, + "routes_visited metrics for {} are incorrect", + label, ); assert_eq!( metrics .response_time .with_label_values(&[*label]) .get_sample_count(), - *count as u64 + *count as u64, + "response_time metrics for {} are incorrect", + label, ); } - // extra metrics for the "database success" hack - assert_eq!( - metrics - .routes_visited - .with_label_values(&["database success"]) - .get(), - 2 - ); - Ok(()) }) }