diff --git a/README.md b/README.md index e6a92fc..73e894a 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ module "waf" { } ``` -For a list of all variables please refer to: [terraform-dock](https://github.com/DND-IT/infra-terraform-module/blob/master/waf/terraform-docs.md) +For a list of all variables please refer to: [terraform-dock](terraform-docs.md) ### Waf scope: CLOUDFRONT or REGIONAL diff --git a/athena_queries/count_requests_grouped_by_ip_tenant_endpoint.sql b/athena_queries/count_requests_grouped_by_ip_tenant_endpoint.sql index 8719f07..634c627 100644 --- a/athena_queries/count_requests_grouped_by_ip_tenant_endpoint.sql +++ b/athena_queries/count_requests_grouped_by_ip_tenant_endpoint.sql @@ -1,11 +1,11 @@ /* This query count requests grouped by the ip, terminating rule, action, endpoint and tenant */ -WITH test_dataset AS +WITH test_dataset AS (SELECT httprequest.clientip, terminatingruleid, action, httprequest.uri, header FROM waf_logs CROSS JOIN UNNEST(httprequest.headers) AS t(header) where (action='BLOCK') and (terminatingruleid='Group_1-CH')) SELECT COUNT(*) as count, clientip, terminatingruleid, action, uri, header.value as tenant -FROM test_dataset +FROM test_dataset WHERE LOWER(header.name)='host' GROUP BY clientip, terminatingruleid, action, uri, header.value ORDER BY count DESC diff --git a/athena_queries/per_ip_blocked_requests.sql b/athena_queries/per_ip_blocked_requests.sql index 2c70f0e..6140d86 100644 --- a/athena_queries/per_ip_blocked_requests.sql +++ b/athena_queries/per_ip_blocked_requests.sql @@ -1,6 +1,6 @@ /* This query gets all the blocked requests for a given IP (works for both IPV4 and IPV6) in a give time range */ -SELECT * +SELECT * FROM waf_logs WHERE httprequest.clientip='2a02:121e:7823:0:bc4c:e549:9ae0:c93a' AND "date" >= '2022/11/03' AND "date" < '2022/11/04' AND ("action" LIKE 'BLOCK') diff --git a/athena_queries/waf_logs_table.sql.tftpl b/athena_queries/waf_logs_table.sql.tftpl index d425e17..44af766 100644 --- a/athena_queries/waf_logs_table.sql.tftpl +++ b/athena_queries/waf_logs_table.sql.tftpl @@ -1,4 +1,4 @@ -/* +/* the table name waf_logs is the one we'll use in all queries. If changed all queries will need to be accordingly modified. BE AWARE THAT IF A TABLE WITH THE SAME NAME ALREADY EXISTS, IT HAS TO BE DELETED BEFORE CREATING THE NEWONE !!! official query provided by AWS: https://docs.aws.amazon.com/athena/latest/ug/waf-logs.html#to-create-the-waf-table diff --git a/parselycrawlers.tf b/parselycrawlers.tf index 3b75a56..6becfea 100644 --- a/parselycrawlers.tf +++ b/parselycrawlers.tf @@ -1,4 +1,4 @@ -# Parse.ly is an analytics tool used by the Disco team. The use case for whitelisting their crawlers is that they might need to +# Parse.ly is an analytics tool used by the Disco team. The use case for whitelisting their crawlers is that they might need to # trigger a large recrawl after a wrong update to the metadata of the articles locals { parsely_whitelist_json = jsondecode(data.http.parsely_ip_list.response_body) diff --git a/waf.tf b/waf.tf index 71aacfa..a4dd253 100644 --- a/waf.tf +++ b/waf.tf @@ -8,7 +8,7 @@ locals { # Not the "real" regexp for ipv6. The right one has around 1000 characters... parsed_allowed_ipv6 = [ for ip in var.allowed_ips_v6 : - format(regex("^[0-9a-fA-F:]*/\\d{1,2}", ip)) + format(regex("^[0-9a-fA-F:]*/\\d{1,3}", ip)) if ip != "" ] self_ips = [ @@ -416,7 +416,7 @@ resource "aws_wafv2_web_acl" "waf" { } } dynamic "statement" { - # or_statement needs 2 arguments so handle the case when only one article is in the rule + # or_statement needs 2 arguments so handle the case when only one article is in the rule for_each = length(rule.value.articles) > 1 ? [1] : [] # if more than one element use or_statement content { or_statement { @@ -604,7 +604,7 @@ resource "aws_wafv2_web_acl" "waf" { dynamic "rule" { # Dont create this rule independently of var.aws_managed_rules_labels if length(var.aws_managed_rules) == 0 - # The rule created by var.aws_managed_rules is the one adding labels to the requests, therefore without the + # The rule created by var.aws_managed_rules is the one adding labels to the requests, therefore without the # rule for var.aws_managed_rules this rule would have no labels to check and therefore should not be created for_each = length(var.aws_managed_rules_labels) > 0 && length(var.aws_managed_rules) > 0 ? [1] : [] content {