Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid creating invalid filters for AdGuard cosmetic filter location regexes #395

Merged
merged 1 commit into from
Oct 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions src/content_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,9 @@ impl TryFrom<CosmeticFilter> for CbRule {
type Error = CbRuleCreationFailure;

fn try_from(v: CosmeticFilter) -> Result<Self, Self::Error> {
use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask};
use crate::filters::cosmetic::{
CosmeticFilterLocationType as LocationType, CosmeticFilterMask,
};

if v.action.is_some() {
return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported);
Expand All @@ -573,28 +575,29 @@ impl TryFrom<CosmeticFilter> for CbRule {
let mut hostnames_vec = vec![];
let mut not_hostnames_vec = vec![];

let mut any_entities = false;
let mut any_unsupported = false;

// Unwrap is okay here - cosmetic rules must have a '#' character
let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap();
CosmeticFilter::locations_before_sharp(&raw_line, sharp_index).for_each(
|(location_type, location)| match location_type {
CosmeticFilterLocationType::Entity => any_entities = true,
CosmeticFilterLocationType::NotEntity => any_entities = true,
CosmeticFilterLocationType::Hostname => {
LocationType::Entity | LocationType::NotEntity | LocationType::Unsupported => {
any_unsupported = true
}
LocationType::Hostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
hostnames_vec.push(encoded);
}
}
CosmeticFilterLocationType::NotHostname => {
LocationType::NotHostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
not_hostnames_vec.push(encoded);
}
}
},
);

if any_entities {
if any_unsupported && hostnames_vec.is_empty() && not_hostnames_vec.is_empty() {
return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported);
}

Expand Down Expand Up @@ -1403,4 +1406,25 @@ mod filterset_tests {

Ok(())
}

#[test]
fn convert_cosmetic_filter_locations() -> Result<(), ()> {
let list = [
r"/^dizipal\d+\.com$/##.web",
r"/^example\d+\.com$/,test.net,b.*##.ad",
];
let mut set = FilterSet::new(true);
set.add_filters(&list, Default::default());

let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules.len(), 1);
assert_eq!(cb_rules.len(), 1);
assert!(cb_rules[0].trigger.if_domain.is_some());
assert_eq!(
cb_rules[0].trigger.if_domain.as_ref().unwrap(),
&["test.net"]
);

Ok(())
}
}
25 changes: 25 additions & 0 deletions src/filters/cosmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ pub(crate) enum CosmeticFilterLocationType {
NotEntity,
Hostname,
NotHostname,
Unsupported,
}

/// Contains hashes of all of the comma separated location items that were populated before the
Expand Down Expand Up @@ -186,6 +187,10 @@ impl CosmeticFilter {
hostname.len()
};
let location = &hostname[start..end];
// AdGuard regex syntax
if location.starts_with('/') {
return Some((CosmeticFilterLocationType::Unsupported, part));
}
Some(match (negation, entity) {
(true, true) => (CosmeticFilterLocationType::NotEntity, location),
(true, false) => (CosmeticFilterLocationType::NotHostname, location),
Expand Down Expand Up @@ -216,6 +221,7 @@ impl CosmeticFilter {
return Err(CosmeticFilterError::LocationModifiersUnsupported);
}

let mut any_unsupported = false;
for (location_type, location) in Self::locations_before_sharp(line, sharp_index) {
let mut hostname = String::new();
if location.is_ascii() {
Expand All @@ -232,8 +238,20 @@ impl CosmeticFilter {
CosmeticFilterLocationType::NotHostname => not_hostnames_vec.push(hash),
CosmeticFilterLocationType::Entity => entities_vec.push(hash),
CosmeticFilterLocationType::Hostname => hostnames_vec.push(hash),
CosmeticFilterLocationType::Unsupported => {
any_unsupported = true;
}
}
}
// Discard the rule altogether if there are no supported location types
if any_unsupported
&& hostnames_vec.is_empty()
&& entities_vec.is_empty()
&& not_hostnames_vec.is_empty()
&& not_entities_vec.is_empty()
{
return Err(CosmeticFilterError::UnsupportedSyntax);
}

/// Sorts `vec` and wraps it in `Some` if it's not empty, or returns `None` if it is.
#[inline]
Expand Down Expand Up @@ -2167,6 +2185,13 @@ mod matching_tests {
assert!(parse_cf(r#"​##a[href^="https://www.g2fame.com/"] > img"#).is_err());
}

#[test]
fn adg_regex() {
assert!(parse_cf(r"/^dizipal\d+\.com$/##.web").is_err());
// Filter is still salvageable if at least one location is supported
assert!(parse_cf(r"/^dizipal\d+\.com,test.net$/##.web").is_ok());
}

#[test]
#[cfg(feature = "css-validation")]
fn abp_has_conversion() {
Expand Down