Skip to content

Commit

Permalink
webapp: Implement client addresses extractor, use for report spam pro…
Browse files Browse the repository at this point in the history
…tection and logging
  • Loading branch information
AMDmi3 committed Mar 2, 2025
1 parent dc6e98c commit 9e2e33d
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 18 deletions.
108 changes: 108 additions & 0 deletions repology-webapp/src/extractors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// SPDX-FileCopyrightText: Copyright 2025 Dmitry Marakasov <[email protected]>
// SPDX-License-Identifier: GPL-3.0-or-later

use std::convert::Infallible;
use std::net::IpAddr;

use anyhow::Result;
use axum::extract::FromRequestParts;
use axum::http::request::Parts;
use tracing::{debug, error};

/// axum extractor for all possible client IP addresses
///
/// Depending on how the [repology] application is deployed, client address
/// needs to be extracted from different places. If application is directly
/// accessible to clients, client IP comes from tokio connection. If an application
/// is behind a reverse proxy, client address is usually passed in some HTTP header
/// such as X-Real-IP. There may be multiple headers, and a header may contain
/// multiple addresses, and some headers may also be spoofed.
///
/// For the sake of client identification or load balancing, this may require
/// application to know how it is deployed, to extract the address specifically
/// from the proper place. However, for the sake of client blacklising, we can
/// just extract all addresses from all possible locations, and avoid configuration.
///
/// Therefore, we use custom extractor and not https://docs.rs/axum-client-ip.
///
/// The current implementation does not really support ALL possible locations,
/// good enough job for how repology is currently deployed. If someone deploys
/// repology another way, and suffers from spam problems, this implementation
/// may be extended.
pub struct PossibleClientAddresses(pub Vec<IpAddr>);

impl<S> FromRequestParts<S> for PossibleClientAddresses
where
S: Send + Sync,
{
type Rejection = Infallible;

async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result<Self, Self::Rejection> {
const SOURCE_HEADERS: &[&str] = &["x-real-ip", "x-forwarded-for"];

fn parse_address(data: &[u8]) -> Result<IpAddr> {
Ok(std::str::from_utf8(data)?.trim().parse()?)
}

let mut addresses: Vec<IpAddr> = vec![];

for header in SOURCE_HEADERS {
let Some(value) = parts.headers.get(*header) else {
continue;
};
// we iterate byte slice to avoid the possibility of malicious user poisoning
// the whole header by injecting invalid utf sequence in previous header content
for part in value.as_bytes().split(|b| *b == b',') {
match parse_address(part) {
Ok(address) => addresses.push(address),
Err(err) => {
let part = String::from_utf8_lossy(part);
error!(%part, %err, "unable to parse client address");
}
}
}
}
debug!(?parts.headers, "headers dump in PossibleClientAddresses extractor");
Ok(Self(addresses))
}
}

#[cfg(test)]
#[coverage(off)]
mod tests {
use std::net::Ipv4Addr;

use http::HeaderValue;

use super::*;

#[tokio::test]
async fn test_simple() {
let mut parts = http::Request::builder()
.header(
"X-Real-IP",
HeaderValue::from_bytes(b" 10.0.0.1 , 10.0.0.2 , garbage, \xff").unwrap(),
)
.header(
"X-Forwarded-For",
HeaderValue::from_bytes(b" 10.1.0.1 , 10.1.0.2 , garbage, \xff").unwrap(),
)
.body(())
.unwrap()
.into_parts()
.0;

assert_eq!(
PossibleClientAddresses::from_request_parts(&mut parts, &())
.await
.unwrap()
.0,
vec![
IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)),
IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2)),
IpAddr::V4(Ipv4Addr::new(10, 1, 0, 1)),
IpAddr::V4(Ipv4Addr::new(10, 1, 0, 2)),
]
);
}
}
1 change: 1 addition & 0 deletions repology-webapp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod badges;
pub mod config;
mod constants;
mod endpoints;
mod extractors;
mod feeds;
mod font;
mod graphs;
Expand Down
49 changes: 32 additions & 17 deletions repology-webapp/src/views/project/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use tracing::error;

use crate::config::AppConfig;
use crate::endpoints::Endpoint;
use crate::extractors::PossibleClientAddresses;
use crate::result::EndpointResult;
use crate::state::AppState;
use crate::template_context::TemplateContext;
Expand Down Expand Up @@ -67,7 +68,7 @@ fn check_new_report(
project_name: &str,
project_is_alive: bool,
too_many_reports: bool,
client_address: &IpAddr,
client_addresses: &[IpAddr],
form: &ReportForm,
config: &AppConfig,
) -> std::result::Result<(), Vec<&'static str>> {
Expand All @@ -80,20 +81,21 @@ fn check_new_report(
if !project_is_alive {
error!(
project_name,
%client_address, "report to gone or nonexisting project"
?client_addresses,
"report to gone or nonexisting project"
);
errors.push("project does not exist or is gone");
}

if too_many_reports {
error!(project_name, %client_address, "too many reports");
error!(project_name, ?client_addresses, "too many reports");
errors.push("too many reports for this project");
}

if form.comment.len() > MAX_REPORT_COMMENT_LENGTH {
error!(
project_name,
%client_address,
?client_addresses,
length = form.comment.len(),
"report comment too long"
);
Expand All @@ -106,27 +108,33 @@ fn check_new_report(
&& !form.need_vuln
&& form.comment.is_empty()
{
error!(project_name, %client_address, "report form is not filled");
error!(project_name, ?client_addresses, "report form is not filled");
errors.push("please fill out the form");
}

if form.comment.contains("<a href") {
error!(project_name, %client_address, "report comment contains HTML");
error!(
project_name,
?client_addresses,
"report comment contains HTML"
);
errors.push("HTML not allowed");
}

if form.need_vuln && !form.comment.contains("nvd.nist.gov/vuln/detail/CVE-") {
error!(
project_name,
%client_address, "missing vulnerability report does not contain NVD link"
?client_addresses,
"missing vulnerability report does not contain NVD link"
);
errors.push("link to NVD entry (e.g. https://nvd.nist.gov/vuln/detail/CVE-*) for missing CVE is required; note that CVE must already have CPE(s) assigned");
}

if config.disabled_reports.contains(project_name) {
error!(
project_name,
%client_address, "report attempt to disabled project"
?client_addresses,
"report attempt to disabled project"
);
errors.push("new reports to this project are disabled, probably due to a big number of incorrect reports or spam");
}
Expand All @@ -136,18 +144,23 @@ fn check_new_report(
if form.comment.contains(keyword) {
error!(
project_name,
%client_address, keyword, "report comment contains spam keyword"
?client_addresses,
keyword,
"report comment contains spam keyword"
);
is_spam = true;
break;
}
}

for spam_network in &config.spam_networks {
if spam_network.contains(client_address.clone()) {
if client_addresses
.iter()
.any(|address| spam_network.contains(*address))
{
error!(
project_name,
%client_address, %spam_network, "report submitter is blacklisted"
?client_addresses, %spam_network, "report submitter is blacklisted"
);
is_spam = true;
break;
Expand All @@ -162,7 +175,8 @@ fn check_new_report(
{
error!(
project_name,
%client_address, "report form filled in meaningless pattern"
?client_addresses,
"report form filled in meaningless pattern"
);
is_spam = true;
}
Expand All @@ -181,7 +195,7 @@ fn check_new_report(
async fn project_report_generic(
project_name: String,
state: &AppState,
form: Option<ReportForm>,
input: Option<(&[std::net::IpAddr], ReportForm)>,
) -> EndpointResult {
let ctx = TemplateContext::new(
Endpoint::ProjectReport,
Expand Down Expand Up @@ -226,12 +240,12 @@ async fn project_report_generic(

let too_many_reports = reports.len() >= crate::constants::MAX_REPORTS;

let errors = if let Some(form) = &form {
let errors = if let Some((client_addresses, form)) = &input {
if let Err(errors) = check_new_report(
&project_name,
!project.is_orphaned(),
too_many_reports,
&"127.0.0.1".parse().unwrap(), // XXX: stub, use real client address
client_addresses,
form,
&state.config,
) {
Expand Down Expand Up @@ -295,7 +309,7 @@ async fn project_report_generic(
reports_disabled: false,
too_many_reports,
afk_till: None,
form: form.unwrap_or(ReportForm::default()),
form: input.map(|(_, form)| form).unwrap_or(ReportForm::default()),
errors,
}
.render()?,
Expand All @@ -307,9 +321,10 @@ async fn project_report_generic(
pub async fn project_report_post(
Path(project_name): Path<String>,
State(state): State<Arc<AppState>>,
PossibleClientAddresses(addresses): PossibleClientAddresses,
Form(form): Form<ReportForm>,
) -> EndpointResult {
project_report_generic(project_name, &state, Some(form)).await
project_report_generic(project_name, &state, Some((&addresses, form))).await
}

#[cfg_attr(not(feature = "coverage"), tracing::instrument(skip(state)))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,12 @@ async fn test_spam_network(pool: PgPool) {
need_verignore: true,
..Default::default()
};
let response = Request::new(pool, "/project/zsh/report").with_form(form).with_spam_network(&"0.0.0.0/0".parse().unwrap()).perform().await;
let response = Request::new(pool, "/project/zsh/report")
.with_form(form)
.with_spam_network(&"10.0.0.1/32".parse().unwrap())
.with_header("x-real-ip", "10.0.0.0, 10.0.0.1, 10.0.0.2")
.perform()
.await;
assert_eq!(response.status(), http::StatusCode::OK);
assert_eq!(response.header_value_str("content-type").unwrap(), Some("text/html"));
assert!(response.is_html_valid(HtmlValidationFlags::ALLOW_EMPTY_TAGS | HtmlValidationFlags::WARNINGS_ARE_FATAL));
Expand Down

0 comments on commit 9e2e33d

Please sign in to comment.