From 045121887b3eae2d8d8d8f193dc5d89ff6d3e6c1 Mon Sep 17 00:00:00 2001
From: Ashleigh Brennan <abrennan@redhat.com>
Date: Fri, 9 Feb 2024 10:02:09 -0600
Subject: [PATCH] OBSDOCS-805: Add docs for content filtering

---
 _topic_maps/_topic_map.yml                    |   2 +
 _topic_maps/_topic_map_osd.yml                |   2 +
 _topic_maps/_topic_map_rosa.yml               |   2 +
 .../logging-content-filtering.adoc            |  30 +++++
 .../logging-content-filter-drop-records.adoc  | 108 ++++++++++++++++++
 .../logging-content-filter-prune-records.adoc |  58 ++++++++++
 6 files changed, 202 insertions(+)
 create mode 100644 logging/performance_reliability/logging-content-filtering.adoc
 create mode 100644 modules/logging-content-filter-drop-records.adoc
 create mode 100644 modules/logging-content-filter-prune-records.adoc

diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml
index ad21738fdc5e..04b764f39207 100644
--- a/_topic_maps/_topic_map.yml
+++ b/_topic_maps/_topic_map.yml
@@ -2704,6 +2704,8 @@ Topics:
   Topics:
   - Name: Flow control mechanisms
     File: logging-flow-control-mechanisms
+#  - Name: Filtering logs by content
+#    File: logging-content-filtering
 - Name: Scheduling resources
   Dir: scheduling_resources
   Topics:
diff --git a/_topic_maps/_topic_map_osd.yml b/_topic_maps/_topic_map_osd.yml
index c04d6063d4b4..f57a45362999 100644
--- a/_topic_maps/_topic_map_osd.yml
+++ b/_topic_maps/_topic_map_osd.yml
@@ -1210,6 +1210,8 @@ Topics:
   Topics:
   - Name: Flow control mechanisms
     File: logging-flow-control-mechanisms
+#  - Name: Filtering logs by content
+#    File: logging-content-filtering
 - Name: Scheduling resources
   Dir: scheduling_resources
   Topics:
diff --git a/_topic_maps/_topic_map_rosa.yml b/_topic_maps/_topic_map_rosa.yml
index 18da64957a05..b0214c5be5fd 100644
--- a/_topic_maps/_topic_map_rosa.yml
+++ b/_topic_maps/_topic_map_rosa.yml
@@ -1452,6 +1452,8 @@ Topics:
   Topics:
   - Name: Flow control mechanisms
     File: logging-flow-control-mechanisms
+#  - Name: Filtering logs by content
+#    File: logging-content-filtering
 - Name: Scheduling resources
   Dir: scheduling_resources
   Topics:
diff --git a/logging/performance_reliability/logging-content-filtering.adoc b/logging/performance_reliability/logging-content-filtering.adoc
new file mode 100644
index 000000000000..3e668b66c5ad
--- /dev/null
+++ b/logging/performance_reliability/logging-content-filtering.adoc
@@ -0,0 +1,30 @@
+:_mod-docs-content-type: ASSEMBLY
+include::_attributes/common-attributes.adoc[]
+include::_attributes/attributes-openshift-dedicated.adoc[]
+[id="logging-content-filtering"]
+= Filtering logs by content
+:context: logging-content-filtering
+
+toc::[]
+
+Collecting all logs from a cluster might produce a large amount of data, which can be expensive to transport and store.
+
+You can reduce the volume of your log data by filtering out low priority data that does not need to be stored. {logging-uc} provides content filters that you can use to reduce the volume of log data.
+
+[NOTE]
+====
+Content filters are distinct from `input` selectors. `input` selectors select or ignore entire log streams based on source metadata. Content filters edit log streams to remove and modify records based on the record content.
+====
+
+Log data volume can be reduced by using one of the following methods:
+
+* xref:../../logging/performance_reliability/logging-content-filtering.adoc#logging-content-filter-drop-records_logging-content-filtering[Configuring content filters to drop unwanted log records]
+* xref:../../logging/performance_reliability/logging-content-filtering.adoc#logging-content-filter-prune-records_logging-content-filtering[Configuring content filters to prune log records]
+
+include::modules/logging-content-filter-drop-records.adoc[leveloffset=+1]
+include::modules/logging-content-filter-prune-records.adoc[leveloffset=+1]
+
+[role="_additional-resources"]
+[id="additional-resources_logging-content-filtering"]
+== Additional resources
+* xref:../../logging/log_collection_forwarding/configuring-log-forwarding.adoc#cluster-logging-collector-log-forwarding-about_configuring-log-forwarding[About forwarding logs to third-party systems]
diff --git a/modules/logging-content-filter-drop-records.adoc b/modules/logging-content-filter-drop-records.adoc
new file mode 100644
index 000000000000..993966bb24b1
--- /dev/null
+++ b/modules/logging-content-filter-drop-records.adoc
@@ -0,0 +1,108 @@
+// Module included in the following assemblies:
+//
+// * logging/performance_reliability/logging-content-filtering.adoc
+
+:_mod-docs-content-type: PROCEDURE
+[id="logging-content-filter-drop-records_{context}"]
+= Configuring content filters to drop unwanted log records
+
+When the `drop` filter is configured, the log collector evaluates log streams according to the filters before forwarding. The collector drops unwanted log records that match the specified configuration.
+
+.Prerequisites
+
+* You have installed the {clo}.
+* You have administrator permissions.
+* You have created a `ClusterLogForwarder` custom resource (CR).
+
+.Procedure
+
+. Add a configuration for a filter to the `filters` spec in the `ClusterLogForwarder` CR.
++
+The following example shows how to configure the `ClusterLogForwarder` CR to drop log records based on regular expressions:
++
+.Example `ClusterLogForwarder` CR
+[source,yaml]
+----
+apiVersion: logging.openshift.io/v1
+kind: ClusterLogForwarder
+metadata:
+# ...
+spec:
+  filters:
+  - name: <filter_name>
+    type: drop # <1>
+    drop: # <2>
+      test: # <3>
+      - field: .kubernetes.labels."foo-bar/baz" # <4>
+        matches: .+ # <5>
+      - field: .kubernetes.pod_name
+        notMatches: "my-pod" # <6>
+  pipelines:
+  - name: <pipeline_name> # <7>
+    filterRefs: ["<filter_name>"]
+# ...
+----
+<1> Specifies the type of filter. The `drop` filter drops log records that match the filter configuration.
+<2> Specifies configuration options for applying the `drop` filter.
+<3> Specifies the configuration for tests that are used to evaluate whether a log record is dropped.
+** If all the conditions specified for a test are true, the test passes and the log record is dropped.
+** When multiple tests are specified for the `drop` filter configuration, if any of the tests pass, the record is dropped.
+** If there is an error evaluating a condition, for example, the field is missing from the log record being evaluated, that condition evaluates to false.
+<4> Specifies a dot-delimited field path, which is a path to a field in the log record. The path can contain alpha-numeric characters and underscores (`a-zA-Z0-9_`), for example, `.kubernetes.namespace_name`. If segments contain characters outside of this range, the segment must be in quotes, for example, `.kubernetes.labels."foo.bar-bar/baz"`. You can include multiple field paths in a single `test` configuration, but they must all evaluate to true for the test to pass and the `drop` filter to be applied.
+<5> Specifies a regular expression. If log records match this regular expression, they are dropped. You can set either the `matches` or `notMatches` condition for a single `field` path, but not both.
+<6> Specifies a regular expression. If log records do not match this regular expression, they are dropped. You can set either the `matches` or `notMatches` condition for a single `field` path, but not both.
+<7> Specifies the pipeline that the `drop` filter is applied to.
+
+. Apply the `ClusterLogForwarder` CR by running the following command:
++
+[source,terminal]
+----
+$ oc apply -f <filename>.yaml
+----
+
+.Additional examples
+
+The following additional example shows how you can configure the `drop` filter to only keep higher priority log records:
+
+[source,yaml]
+----
+apiVersion: logging.openshift.io/v1
+kind: ClusterLogForwarder
+metadata:
+# ...
+spec:
+  filters:
+  - name: important
+    type: drop
+    drop:
+      test:
+      - field: .message
+        notMatches: "(?i)critical|error"
+      - field: .level
+        matches: "info|warning"
+# ...
+----
+
+In addition to including multiple field paths in a single `test` configuration, you can also include additional tests that are treated as _OR_ checks. In the following example, records are dropped if either `test` configuration evaluates to true. However, for the second `test` configuration, both field specs must be true for it to be evaluated to true:
+
+[source,yaml]
+----
+apiVersion: logging.openshift.io/v1
+kind: ClusterLogForwarder
+metadata:
+# ...
+spec:
+  filters:
+  - name: important
+    type: drop
+    drop:
+      test:
+      - field: .kubernetes.namespace_name
+        matches: "^open"
+      test:
+      - field: .log_type
+        matches: "application"
+      - field: .kubernetes.pod_name
+        notMatches: "my-pod"
+# ...
+----
diff --git a/modules/logging-content-filter-prune-records.adoc b/modules/logging-content-filter-prune-records.adoc
new file mode 100644
index 000000000000..a796daacb4f4
--- /dev/null
+++ b/modules/logging-content-filter-prune-records.adoc
@@ -0,0 +1,58 @@
+// Module included in the following assemblies:
+//
+// * logging/performance_reliability/logging-content-filtering.adoc
+
+:_mod-docs-content-type: PROCEDURE
+[id="logging-content-filter-prune-records_{context}"]
+= Configuring content filters to prune log records
+
+When the `prune` filter is configured, the log collector evaluates log streams according to the filters before forwarding. The collector prunes log records by removing low value fields such as pod annotations.
+
+.Prerequisites
+
+* You have installed the {clo}.
+* You have administrator permissions.
+* You have created a `ClusterLogForwarder` custom resource (CR).
+
+.Procedure
+
+. Add a configuration for a filter to the `prune` spec in the `ClusterLogForwarder` CR.
++
+The following example shows how to configure the `ClusterLogForwarder` CR to prune log records based on field paths:
++
+[IMPORTANT]
+====
+If both are specified, records are pruned based on the `notIn` array first, which takes precedence over the `in` array. After records have been pruned by using the `notIn` array, they are then pruned by using the `in` array.
+====
++
+.Example `ClusterLogForwarder` CR
+[source,yaml]
+----
+apiVersion: logging.openshift.io/v1
+kind: ClusterLogForwarder
+metadata:
+# ...
+spec:
+  filters:
+  - name: <filter_name>
+    type: prune # <1>
+    prune: # <2>
+      in: [.kubernetes.annotations, .kubernetes.namespace_id] # <3>
+      notIn: [.kubernetes,.log_type,.message,."@timestamp"] # <4>
+  pipelines:
+  - name: <pipeline_name> # <5>
+    filterRefs: ["<filter_name>"]
+# ...
+----
+<1> Specify the type of filter. The `prune` filter prunes log records by configured fields.
+<2> Specify configuration options for applying the `prune` filter. The `in` and `notIn` fields are specified as arrays of dot-delimited field paths, which are paths to fields in log records. These paths can contain alpha-numeric characters and underscores (`a-zA-Z0-9_`), for example, `.kubernetes.namespace_name`. If segments contain characters outside of this range, the segment must be in quotes, for example, `.kubernetes.labels."foo.bar-bar/baz"`.
+<3> Optional: Any fields that are specified in this array are removed from the log record.
+<4> Optional: Any fields that are not specified in this array are removed from the log record.
+<5> Specify the pipeline that the `prune` filter is applied to.
+
+. Apply the `ClusterLogForwarder` CR by running the following command:
++
+[source,terminal]
+----
+$ oc apply -f <filename>.yaml
+----