diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 3a7b369a1654..d10301597e2b 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -102,6 +102,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Added Airflow lightweight module {pull}26220[26220] - Add state_job metricset to Kubernetes module{pull}26479[26479] - Bump AWS SDK version to v0.24.0 for WebIdentity authentication flow {issue}19393[19393] {pull}27126[27126] +- Add Linux pressure metricset {pull}27355[27355] *Packetbeat* diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 4e1f12c21936..21a7877be11c 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -43509,6 +43509,242 @@ type: object -- +[float] +=== pressure + +Linux pressure stall information metrics for cpu, memory, and io + + +*`linux.pressure.cpu.some.10.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on CPU over a ten second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.cpu.some.60.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on CPU over a sixty second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.cpu.some.300.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on CPU over a three hundred second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.cpu.some.total.time.us`*:: ++ +-- +The total absolute stall time (in microseconds) in which at least some tasks were stalled on CPU. + + +type: long + +-- + +*`linux.pressure.memory.some.10.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on Memory over a ten second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.some.60.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on Memory over a sixty second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.some.300.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on Memory over a three hundred second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.some.total.time.us`*:: ++ +-- +The total absolute stall time (in microseconds) in which at least some tasks were stalled on memory. + + +type: long + +-- + +*`linux.pressure.memory.full.10.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a ten second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.full.60.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a sixty second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.full.300.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a three hundred second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.memory.full.total.time.us`*:: ++ +-- +The total absolute stall time (in microseconds) in which in which all non-idle tasks were stalled on memory. + + +type: long + +-- + +*`linux.pressure.io.some.10.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on io over a ten second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.some.60.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on io over a sixty second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.some.300.pct`*:: ++ +-- +The average share of time in which at least some tasks were stalled on io over a three hundred second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.some.total.time.us`*:: ++ +-- +The total absolute stall time (in microseconds) in which at least some tasks were stalled on io. + + +type: long + +-- + +*`linux.pressure.io.full.10.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a ten second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.full.60.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a sixty second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.full.300.pct`*:: ++ +-- +The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a three hundred second window. + + +type: float + +format: percent + +-- + +*`linux.pressure.io.full.total.time.us`*:: ++ +-- +The total absolute stall time (in microseconds) in which in which all non-idle tasks were stalled on io. + + +type: long + +-- + [[exported-fields-logstash]] == Logstash fields diff --git a/metricbeat/docs/modules/linux.asciidoc b/metricbeat/docs/modules/linux.asciidoc index ab911885afd3..d7bb7f66315f 100644 --- a/metricbeat/docs/modules/linux.asciidoc +++ b/metricbeat/docs/modules/linux.asciidoc @@ -26,6 +26,7 @@ metricbeat.modules: # - ksm # - conntrack # - iostat + # - pressure enabled: true #hostfs: /hostfs @@ -46,6 +47,8 @@ The following metricsets are available: * <> +* <> + include::linux/conntrack.asciidoc[] include::linux/iostat.asciidoc[] @@ -56,3 +59,5 @@ include::linux/memory.asciidoc[] include::linux/pageinfo.asciidoc[] +include::linux/pressure.asciidoc[] + diff --git a/metricbeat/docs/modules/linux/pressure.asciidoc b/metricbeat/docs/modules/linux/pressure.asciidoc new file mode 100644 index 000000000000..9a549746fa8f --- /dev/null +++ b/metricbeat/docs/modules/linux/pressure.asciidoc @@ -0,0 +1,23 @@ +//// +This file is generated! See scripts/mage/docs_collector.go +//// + +[[metricbeat-metricset-linux-pressure]] +=== linux pressure metricset + +beta[] + +include::../../../module/linux/pressure/_meta/docs.asciidoc[] + + +==== Fields + +For a description of each field in the metricset, see the +<> section. + +Here is an example document generated by this metricset: + +[source,json] +---- +include::../../../module/linux/pressure/_meta/data.json[] +---- diff --git a/metricbeat/docs/modules_list.asciidoc b/metricbeat/docs/modules_list.asciidoc index 3e2e0b7f5bfa..183170a44b79 100644 --- a/metricbeat/docs/modules_list.asciidoc +++ b/metricbeat/docs/modules_list.asciidoc @@ -186,11 +186,12 @@ This file is generated! See scripts/mage/docs_collector.go .2+| .2+| |<> beta[] |<> beta[] |<> beta[] |image:./images/icon-no.png[No prebuilt dashboards] | -.5+| .5+| |<> beta[] +.6+| .6+| |<> beta[] |<> beta[] |<> beta[] |<> beta[] |<> beta[] +|<> beta[] |<> |image:./images/icon-no.png[No prebuilt dashboards] | .2+| .2+| |<> |<> diff --git a/metricbeat/include/list_common.go b/metricbeat/include/list_common.go index 4ccbd78a325c..de7d7c96993f 100644 --- a/metricbeat/include/list_common.go +++ b/metricbeat/include/list_common.go @@ -100,6 +100,7 @@ import ( _ "github.com/elastic/beats/v7/metricbeat/module/linux/ksm" _ "github.com/elastic/beats/v7/metricbeat/module/linux/memory" _ "github.com/elastic/beats/v7/metricbeat/module/linux/pageinfo" + _ "github.com/elastic/beats/v7/metricbeat/module/linux/pressure" _ "github.com/elastic/beats/v7/metricbeat/module/logstash" _ "github.com/elastic/beats/v7/metricbeat/module/logstash/node" _ "github.com/elastic/beats/v7/metricbeat/module/logstash/node_stats" diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index 7a10d5d269df..a6211e828714 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -594,6 +594,7 @@ metricbeat.modules: # - ksm # - conntrack # - iostat + # - pressure enabled: true #hostfs: /hostfs diff --git a/metricbeat/module/linux/_meta/config.yml b/metricbeat/module/linux/_meta/config.yml index 490d3245c19f..cac485307965 100644 --- a/metricbeat/module/linux/_meta/config.yml +++ b/metricbeat/module/linux/_meta/config.yml @@ -6,6 +6,7 @@ # - ksm # - conntrack # - iostat + # - pressure enabled: true #hostfs: /hostfs diff --git a/metricbeat/module/linux/fields.go b/metricbeat/module/linux/fields.go index f83e1b6e4254..0094cc54fc04 100644 --- a/metricbeat/module/linux/fields.go +++ b/metricbeat/module/linux/fields.go @@ -32,5 +32,5 @@ func init() { // AssetLinux returns asset data. // This is the base64 encoded zlib format compressed contents of module/linux. func AssetLinux() string { - return "eJzEmd9v2zgSx9/zVwwCHNAWVzfp7/rhgNylOBR3uQu27csudoUxObK5pkiVP+y6f/2CpBXLtmQrcaT6pWgkz3z4neHMkH4Oc1qNQQrlv58BOOEkjeE8/v/8DMCQJLQ0hgk5PAPgZJkRpRNajeEfZwCQvguF5l7SGUAuSHI7jo+eg8KCNubDx61KGsPUaF+u/9Jgc2PXrqyjAgpyRjC7flj3UffDtFLOIJvfPWnyFz6766o+LSzh02R8F6QOY31RoFltPWvDOeI6fNbmQOeg8uwOBqxDJ6wTzP49vkMckBltLfzr9iswbcju2GqCroNzo3fZNuRSq2nDwyPw4VMim5Oz0XxJHLgncHojK+QopPCGWsEIjVxlPeFtOEg5I2gD6jQUOCcwWheQawOKlqDVnq410GShB8qKTShwM6pBO5zIduVy7RXvAcd6xsja3Eu5Akto2Ix46/IrGjFVuiHMj5dilkgBSkPIV1EjYi4FEnfivLs9a5DKknFZSErqQ7r/+WJCJmznKqbLGRkCKaxbO6/+SQxwAHWBUpwIuW9+T1E3QwcMldIOJgRRxQZt7gpgzIfMkHVoXA8SxpwHqfXcl0E+wWYwwxjnCcHa76bSpNcNWfGjlpx3IupQSftoHHuWD3WNkLMjQ988WTcqyEzJZiWZzBJr7CS51Lir7VHpvswI1F3+BZewdmkh+uRQkgFLTCuewr4MufnNk0/7KBQfTgvBaNS4jKURjgZeR/T52AvZiseggdjQCmv3aGvr6hCAYZU/jTwqvgYeTVZur5E8GvY/g/Gkem500cw42p2etCnQjWGfbGsBuESxi3ciOC7I4JTAiYLAlqRcnEa2s6ZR8VQQLZkF8UP7dUDVU8o8puxpCcPpvpP0DxS+2qG4mGahMfWDHizDE6GSfE9DGAJlxx3bTB5raM/c0QdIUlM3exToIbflaYkRnglGWTDbU1IkDwk8JEchpBRp+9mncRGfXvz/NL0n3jafgB9Ef0uGkXIBXufxfBvZuTdCTdcD4BZyexN6MkHFl4K7GXgnpPiBwW1c9OatpyO4Tq9bdN6kVzRj3sRpPUzEwsICpQ9egEltY2gvLy7+ttFjb9Sc26KPOXPb7MGrCYeuub4/4GLiP59vapcQ97xrKDEMhnaGppdD1udoOHkJZ2ZvabexNLGIRn8nw4jQ+Nb2k7djMF71Js1XJb55OoBR4B3GQkt0ouF+4XSM2xgaNkM1Dao4rSFH66oCGVffrlLupcwsQ9XHXcvmhB6qTDpQxCNkSqcZLggm4VwcANQhTBuPnpnSnDI2Q9ELblIyFumIZgjjbUyB37NAXGV2N0zuy3415b6UgmE4n4cKspOHFVJBhd66pHm0avnfdG0d7QPHra8dqp1JyocX0LXHMnbgB5fNaci5bG6XWPJR1O6+waoG6jSYH4tmSvl1osNkBcn1MUAuDDE3PGDyK9tv98ppboiGAwveom5huEi/aRzSzhHKAaO7OZwlWkNMoii6RjrSDhfqdtqjYU8vZJTngglSbDUqWfvNpGUoiWdNo2qdukxT6THs5LuihQ1DZQCnNIIrkHpJpvY3EIrHQmlryRPGTeuMn05lapt3dlN9aS/yKZw/R4Lk+6dIUC1/5qfUlKPt1bs0lIvvYzj/Larw+/mh6v4lHAiiFWBaudDqa1U+9Clc/xIRQNYJ7G080aja4vauP440BKcdyn633bGGXlvQ+tepUmvZnojeEm+8aerM3fblDtg3qQsHhkCLUuo0i2xWcYT80K45wt1xt7SMeVtn4K0sOjgpG7r3+P64+YELFDLOz/fNFEPpouTn8lcUMPEOlHaNSdNtQdabUvqe2+Sx9egFGaaLQnRNe045eumarvs6o5+wZa+T+3SXmWvTyFyf0oXKdR8nhwbbhw4ME8/5Ktv5QjtQBzWu0WG6On9RGs1eRA/BQTIWumEsbCkjJyvQhu/ly7GOcn1z1RrkJuYO3JH95iqdfa63j1zHsOpo5xe7PXgbryUHOxJGCkMEbObV3Iat8vKPi2e3V//+mH3+9OvHw2iXg6NddkV7OTjay65orwZHe9UV7fXgaK+7or0ZHO1NV7S3g6O97Yr2bnC0d13R3g+O9r4r2ofB0T50LrnDt4PLtn5QQSnNyY6eNXZ8PfmT9g4PHUh+wWU1cwqtIDb82hQQumpwsDVpnP0VAAD//2DzKvg=" + return "eJzUml+P28YRwN/vUwwOKBAHtnxnJ05yDwXcXFAErdtDHb+0aInV7lDcarlL7x/JyqcPdpeUKImUyJNIS3oJYkozv5mdv8t7BXNcPYDg0n25AbDcCnyA2/D/tzcAGgUSgw8wRUtuABgaqnlhuZIP8OcbAIi/hVwxJ/AGIOUomHkIj16BJDluxPuPXRX4ADOtXFH+S4PMjVyzMhZzyNFqTk35sK6jrocqKa0mdL5+0qTPf3btqj4tLP7TJHwXpA5jXJ4Tvdp61oZzRLX/lOJApSDTZA0DxhLLjeXUvAzfQQaEamUM/Pz0CajSaHZkNUHXwZlWu2wbcqHkrOHhEXj/KQidozVBfIEMmEOwauNWSAkX3GlsBUOixSoZCG/DgdJqjhtQqyAncwStVA6p0iBxCUru+bUGGiUMQFmxcQk2wxq0JVPR7rlUOckGwDGOUjQmdUKswCDRNEPWan5Fw2dSNRzz+ULMIEogQiNhq+AjpDYeJNk55930rEFKg9omPihxCNf9w+VT1D6dqzNdZqgRBDe2VF79JzLAAdQFEfxEyH3xex61GbFAiZTKwhQheLHBN+sCGOIh0Wgs0XYAF4aYB6HU3BXefZxmkJFwzlOEUu+m0sSvazT891pwrp2ofCUdonHsST7UNXzMTjR+dmjsJEc9Q5MUqBODtLGTpEKRXd8edd1vGYJcx59XCaVKA0EngwI1GKRKsnjsSx+bnx26mEe++DBccIqTRjOWmlsc2Y6g89yGbJ3HqAexoeXG7NHW7OpwAON6/jTy4PESeDJd2b1Gcjbsv3jh0eupVnkz42R3elI6J/YB9sm2DCBLwnfxTgQnC9RkhmB5jmAKlDZMI9tR0+jxWBAN6gWyQ/k6otdjyJzT7dGE8fy+E/TPdHyVoWQxS3xjGgbdS4ZvuIzue+GPwVN2zNhm8lBDB+YOOkCgnNnsLNBjpuVpgeGfcYqJFztQUEQNEdwHR86F4DH9zItgxK+v/3mav6fONG/Az6J/Qk1RWg+v0rDfBnbmNJezcgDcQm5vQt9MiWRLzmwGznLBfydebTB6860XE3iMXzfEOh2/oih1OkzrfiLmBhZEOK8FqFAmHO393d2fNv7YGzXnJh9iztwWe/BqwhLbXN+fcTHxt48fapcQPe8aCuIHQ5MRPciS9TEIjlr8zuwM7jaWJhbeqO9kGO4bXyk/ajsG4+Rgrvkk+WeHBzByssZYKEEsb7hfOB3jKRwNzYicea9YpSAlxlYFMljf7qXUCZEYSuQQdy2bDd1XmbhQhBUyhlNGFghTvxd7AHkI04TVM5GKYUIzwgfBjZ4MRTqgaSThNiYnXxJPXEV2N0zmimF9ylwhOCV+P/cVZCcOK6Qcc7V1SXO2avn3eG0d5AMjWz87VDujK59fQEuNRejAzy6bMx9zydwsScEmwXd9D6saqONgfuw0Y8iXgQ7TFUTVxwAZ10jt+IBRr2i/3StmqUYcD8xrC37zw0V8p3HIdxaJGPF0N8tZpNVIBeF515MOtOMddTvt0WOPX0gwTTnlKOlqUtD2m0lDiUCWNI2qdeoiTqXHsKPuihY2DJUAMsMJvAehlqhr/wZcslAoTS14/LhprHazmYhtcy031pf2Ih+P8+u4IOr+Ki6ozM/cDJtitL16FxpT/uUBbv8TvPDf20PV/Te/EAQpQJW0vtXXqrzvU6R8E+FBygB2Jmw0smbc3vXHkYZglSVi2LQ71tBrBpVvpwqlRHsgOoOs8aapM3fbjztgf4hd2DN4WiKEirPIxooj5Iey5gh3x2xpGfO2duCtKDo4KWvsPb6fNz7IgnAR5ue+kaIxXpR8Xf6KAqbOglS2MWi6GWScLoQbuE0es0ctUFOV57xr2DNMiRO26bqvM/oJKfsY1ce7zFTpRub6lM5lqobYHBpkH1oYpo6xVbLzg3agDt54JJbEq/PXhVb0ddDgFURhvhuGwhYjcroCpdlevBzrKI8f3rcechNzB+7A/uF93H0et1euY1h1tNu73R68jdcSgx0JA4VGBJo5OTc+Vd787+7bp/d//SX5+Ou/fzmMdj862n1XtDejo73pivZ2dLS3XdG+Gx3tu65o34+O9n1XtHejo73rivbD6Gg/dEX7cXS0H7ui/TQ62k+dS+747eC+rR9UUFIxNJNvGzu+mv4f95aHDiT/Istq5uRKQmj4tSnAd1WvYGvS2B+MNBqz/UeWJwxG8RK1kuk3XSECmJ/2PGT5d7thXqOFe1nu5i+BSAa86whFCzcxKsfJ/V3D3tX+QvHwwnXE5VuvSjOisXoT4Gf8+K6RWPBOsuDhwBIzL185Bkcg8zv9z0+fwqgNBCzK6k+RllwytWy+olhb++6qrTX8i131svft3VUbbDNfJjInmUbWy/BwezPxyid7W2FrDetgX5ALZGqUcLZKz9rrfqrV+nV/X6ubTSqv3q41WctroX75Wrf5ClN22+bOWVu3+hoTd+eo++Zu3fyrS99DF+Tls9QJcWkZvLFRCJBKvuJMHDYRDM+dsESickY8M7GDKy4ssc/mir75Hpxxafl+vsB4ZhkIXrmoMtDfJc0GcnW1zZyrnvle2Xphud7P1s4JXVl7acnc82j7Zmxl9kVlazebWw269mbN1WmNunLBhSXuWVzQJ5+vvjm3B8Iz0vy6m7JP9z8CAAD//wf37qw=" } diff --git a/metricbeat/module/linux/pressure/_meta/data.json b/metricbeat/module/linux/pressure/_meta/data.json new file mode 100644 index 000000000000..295bd1bdbcbc --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/data.json @@ -0,0 +1,27 @@ +{ + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "linux.pressure", + "duration": 115000, + "module": "linux" + }, + "linux": { + "pressure": { + "cpu": { + "some": { + "10": 1.63, + "300": 0.06, + "60": 0.29, + "total": 155911207 + } + } + } + }, + "metricset": { + "name": "pressure", + "period": 10000 + }, + "service": { + "type": "linux" + } +} diff --git a/metricbeat/module/linux/pressure/_meta/docs.asciidoc b/metricbeat/module/linux/pressure/_meta/docs.asciidoc new file mode 100644 index 000000000000..d35776c27807 --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/docs.asciidoc @@ -0,0 +1 @@ +The Pressure module reports https://www.kernel.org/doc/Documentation/accounting/psi.txt[Pressure Stall Information (PSI)] collected for the `cpu`, `memory`, and `io` files/resources found in `/proc/pressure`. PSI metrics are included in Linux kernel versions from 4.20. Some distributions might have PSI support, but have disabled the feature via the `CONFIG_PSI_DEFAULT_DISABLED` setting, to enable PSI metrics pass `psi=1` on the kernel command line during boot. diff --git a/metricbeat/module/linux/pressure/_meta/fields.yml b/metricbeat/module/linux/pressure/_meta/fields.yml new file mode 100644 index 000000000000..cf3220f3030f --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/fields.yml @@ -0,0 +1,100 @@ +- name: pressure + type: group + release: beta + description: Linux pressure stall information metrics for cpu, memory, and io + fields: + - name: cpu.some.10.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on CPU over a ten second window. + - name: cpu.some.60.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on CPU over a sixty second window. + - name: cpu.some.300.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on CPU over a three hundred second window. + - name: cpu.some.total.time.us + type: long + description: > + The total absolute stall time (in microseconds) in which at least some tasks were stalled on CPU. + - name: memory.some.10.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on Memory over a ten second window. + - name: memory.some.60.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on Memory over a sixty second window. + - name: memory.some.300.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on Memory over a three hundred second window. + - name: memory.some.total.time.us + type: long + description: > + The total absolute stall time (in microseconds) in which at least some tasks were stalled on memory. + - name: memory.full.10.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a ten second window. + - name: memory.full.60.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a sixty second window. + - name: memory.full.300.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on memory simultaneously over a three hundred second window. + - name: memory.full.total.time.us + type: long + description: > + The total absolute stall time (in microseconds) in which in which all non-idle tasks were stalled on memory. + - name: io.some.10.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on io over a ten second window. + - name: io.some.60.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on io over a sixty second window. + - name: io.some.300.pct + type: float + format: percent + description: > + The average share of time in which at least some tasks were stalled on io over a three hundred second window. + - name: io.some.total.time.us + type: long + description: > + The total absolute stall time (in microseconds) in which at least some tasks were stalled on io. + - name: io.full.10.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a ten second window. + - name: io.full.60.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a sixty second window. + - name: io.full.300.pct + type: float + format: percent + description: > + The average share of time in which in which all non-idle tasks were stalled on io simultaneously over a three hundred second window. + - name: io.full.total.time.us + type: long + description: > + The total absolute stall time (in microseconds) in which in which all non-idle tasks were stalled on io. diff --git a/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/cpu b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/cpu new file mode 100644 index 000000000000..49932c3025a7 --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/cpu @@ -0,0 +1 @@ +some avg10=5.86 avg60=1.10 avg300=0.23 total=9895236 diff --git a/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/io b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/io new file mode 100644 index 000000000000..16963d05439f --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/io @@ -0,0 +1,2 @@ +some avg10=5.86 avg60=1.10 avg300=0.23 total=9895236 +full avg10=6.86 avg60=2.10 avg300=1.23 total=10895236 diff --git a/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/memory b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/memory new file mode 100644 index 000000000000..16963d05439f --- /dev/null +++ b/metricbeat/module/linux/pressure/_meta/testdata/proc/pressure/memory @@ -0,0 +1,2 @@ +some avg10=5.86 avg60=1.10 avg300=0.23 total=9895236 +full avg10=6.86 avg60=2.10 avg300=1.23 total=10895236 diff --git a/metricbeat/module/linux/pressure/pressure.go b/metricbeat/module/linux/pressure/pressure.go new file mode 100644 index 000000000000..fbd7ddb45152 --- /dev/null +++ b/metricbeat/module/linux/pressure/pressure.go @@ -0,0 +1,143 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package pressure + +import ( + "fmt" + "path/filepath" + "runtime" + + "github.com/pkg/errors" + "github.com/prometheus/procfs" + + "github.com/elastic/beats/v7/libbeat/common" + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" + "github.com/elastic/beats/v7/metricbeat/mb" + "github.com/elastic/beats/v7/metricbeat/module/linux" +) + +const ( + moduleName = "linux" + metricsetName = "pressure" +) + +// init registers the MetricSet with the central registry as soon as the program +// starts. The New function will be called later to instantiate an instance of +// the MetricSet for each host defined in the module's configuration. After the +// MetricSet has been created then Fetch will begin to be called periodically. +func init() { + mb.Registry.MustAddMetricSet(moduleName, metricsetName, New) +} + +// MetricSet holds any configuration or state information. It must implement +// the mb.MetricSet interface. And this is best achieved by embedding +// mb.BaseMetricSet because it implements all of the required mb.MetricSet +// interface methods except for Fetch. +type MetricSet struct { + mb.BaseMetricSet + fs string + procfs procfs.FS +} + +// New creates a new instance of the MetricSet. New is responsible for unpacking +// any MetricSet specific configuration options if there are any. +func New(base mb.BaseMetricSet) (mb.MetricSet, error) { + cfgwarn.Beta(fmt.Sprintf("The %s %s metricset is beta.", moduleName, metricsetName)) + + if runtime.GOOS != "linux" { + return nil, fmt.Errorf("the %v/%v metricset is only supported on Linux", moduleName, metricsetName) + } + + linuxModule, ok := base.Module().(*linux.Module) + if !ok { + return nil, errors.New("unexpected module type") + } + + path := filepath.Join(linuxModule.HostFS, "proc") + procfs, err := procfs.NewFS(path) + if err != nil { + return nil, errors.Wrapf(err, "error creating new Host FS at %s", path) + } + + return &MetricSet{ + BaseMetricSet: base, + fs: linuxModule.HostFS, + procfs: procfs, + }, nil +} + +// Fetch methods implements the data gathering and data conversion to the right +// format. It publishes the event which is then forwarded to the output. In case +// of an error set the Error field of mb.Event or simply call report.Error(). +func (m *MetricSet) Fetch(report mb.ReporterV2) error { + events, err := fetchLinuxPSIStats(m) + if err != nil { + return errors.Wrap(err, "error fetching PSI stats") + } + + for _, event := range events { + report.Event(mb.Event{ + MetricSetFields: event, + }) + } + return nil +} + +func fetchLinuxPSIStats(m *MetricSet) ([]common.MapStr, error) { + resources := []string{"cpu", "memory", "io"} + events := []common.MapStr{} + + for _, resource := range resources { + psiMetric, err := m.procfs.PSIStatsForResource(resource) + if err != nil { + return nil, errors.Wrap(err, "check that /proc/pressure is available, and/or enabled") + } + + event := common.MapStr{ + resource: common.MapStr{ + "some": common.MapStr{ + "10": common.MapStr{ + "pct": psiMetric.Some.Avg10, + }, + "60": common.MapStr{ + "pct": psiMetric.Some.Avg60, + }, + "300": common.MapStr{ + "pct": psiMetric.Some.Avg300, + }, + "total": common.MapStr{ + "time": common.MapStr{ + "us": psiMetric.Some.Total, + }, + }, + }, + }, + } + + // /proc/pressure/cpu does not contain 'full' metrics + if resource != "cpu" { + event.Put(resource+".full.10.pct", psiMetric.Full.Avg10) + event.Put(resource+".full.60.pct", psiMetric.Full.Avg60) + event.Put(resource+".full.300.pct", psiMetric.Full.Avg300) + event.Put(resource+".full.total.time.us", psiMetric.Full.Total) + } + + events = append(events, event) + } + return events, nil +} diff --git a/metricbeat/module/linux/pressure/pressure_test.go b/metricbeat/module/linux/pressure/pressure_test.go new file mode 100644 index 000000000000..083efa9d0c0e --- /dev/null +++ b/metricbeat/module/linux/pressure/pressure_test.go @@ -0,0 +1,95 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// +build linux + +package pressure + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/v7/libbeat/common" + + mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" +) + +func TestFetch(t *testing.T) { + f := mbtest.NewReportingMetricSetV2Error(t, getConfig()) + events, errs := mbtest.ReportingFetchV2Error(f) + + assert.Empty(t, errs) + if !assert.NotEmpty(t, events) { + t.FailNow() + } + t.Logf("%s/%s event: %+v", f.Module().Name(), f.Name(), + events[0].BeatEvent("linux", "pressure").Fields.StringToPrint()) + + resources := []string{"cpu", "memory", "io"} + + for i := range events { + resource := resources[i] + + testEvent := common.MapStr{ + resource: common.MapStr{ + "some": common.MapStr{ + "10": common.MapStr{ + "pct": 5.86, + }, + "60": common.MapStr{ + "pct": 1.10, + }, + "300": common.MapStr{ + "pct": 0.23, + }, + "total": common.MapStr{ + "time": common.MapStr{ + "us": uint64(9895236), + }, + }, + }, + }, + } + // /proc/pressure/cpu does not contain 'full' metrics + if resource != "cpu" { + testEvent.Put(resource+".full.10.pct", 6.86) + testEvent.Put(resource+".full.60.pct", 2.10) + testEvent.Put(resource+".full.300.pct", 1.23) + testEvent.Put(resource+".full.total.time.us", uint64(10895236)) + } + + rawEvent := events[i].BeatEvent("linux", "pressure").Fields["linux"].(common.MapStr)["pressure"] + assert.Equal(t, testEvent, rawEvent) + } +} + +func TestData(t *testing.T) { + f := mbtest.NewReportingMetricSetV2Error(t, getConfig()) + err := mbtest.WriteEventsReporterV2Error(f, t, ".") + if err != nil { + t.Fatal("write", err) + } +} + +func getConfig() map[string]interface{} { + return map[string]interface{}{ + "module": "linux", + "metricsets": []string{"pressure"}, + "hostfs": "./_meta/testdata", + } +} diff --git a/metricbeat/modules.d/linux.yml.disabled b/metricbeat/modules.d/linux.yml.disabled index 22e675cafffa..79fa81b4e0e1 100644 --- a/metricbeat/modules.d/linux.yml.disabled +++ b/metricbeat/modules.d/linux.yml.disabled @@ -9,6 +9,7 @@ # - ksm # - conntrack # - iostat + # - pressure enabled: true #hostfs: /hostfs diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 971d720b10aa..707f9bf654ba 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -966,6 +966,7 @@ metricbeat.modules: # - ksm # - conntrack # - iostat + # - pressure enabled: true #hostfs: /hostfs