From 1d8b089e0147191f5430b98179ceb54fb3377725 Mon Sep 17 00:00:00 2001 From: SG <13872653+mmguero@users.noreply.github.com> Date: Tue, 3 May 2022 10:55:01 -0600 Subject: [PATCH] added support for 128-bit MURMUR3 variant (#66) * added support for 128bit murmur variant * As logstash-plugins/logstash-filter-fingerprint#42 has not been updated in several years, this commit implements 128-bit murmur3 support and handles the base64 encoding flag correctly as well Co-authored-by: Mike Barretta Co-authored-by: Seth Grover Co-authored-by: kaisecheng <69120390+kaisecheng@users.noreply.github.com> --- CHANGELOG.md | 3 ++ docs/index.asciidoc | 8 ++-- lib/logstash/filters/fingerprint.rb | 40 +++++++++++++++++-- logstash-filter-fingerprint.gemspec | 5 +-- spec/filters/fingerprint_spec.rb | 61 +++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b222abb..abb3abd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 3.4.0 + - Added support for 128bit murmur variant [#66](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/66). + ## 3.3.2 - [DOC] Clarify behavior when key is set [#65](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/65). diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 61dfc2f..804c2c1 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -60,7 +60,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> | <>|No | <> |<>|No -| <> |<>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes +| <> |<>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "MURMUR3_128", IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes | <> |<>|No | <> |<>|No |======================================================================= @@ -76,7 +76,7 @@ filter plugins. * Value type is <> * Default value is `false` -When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `MD5` fingerprint methods will produce +When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint methods will produce base64 encoded rather than hex encoded strings. [id="plugins-{type}s-{plugin}-concatenate_sources"] @@ -174,7 +174,7 @@ With other methods, optionally fill in the HMAC key. ===== `method` * This is a required setting. - * Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION` + * Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `MURMUR3_128`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION` * Default value is `"SHA1"` The fingerprint method to use. @@ -183,7 +183,7 @@ If set to `SHA1`, `SHA256`, `SHA384`, `SHA512`, or `MD5` and a key is set, the corresponding cryptographic hash function and the keyed-hash (HMAC) digest function are used to generate the fingerprint. -If set to `MURMUR3` the non-cryptographic 64 bit MurmurHash function will be used. +If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash function (either the 32-bit or 128-bit implementation, respectively) will be used. If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and the hash value will be the masked-out address using the number of bits diff --git a/lib/logstash/filters/fingerprint.rb b/lib/logstash/filters/fingerprint.rb index a2faf1c..e027109 100644 --- a/lib/logstash/filters/fingerprint.rb +++ b/lib/logstash/filters/fingerprint.rb @@ -23,6 +23,10 @@ # https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID]. # To generate UUIDs, prefer the <>. class LogStash::Filters::Fingerprint < LogStash::Filters::Base + + INTEGER_MAX_32BIT = (1 << 31) - 1 + INTEGER_MIN_32BIT = -(1 << 31) + include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) config_name "fingerprint" @@ -40,8 +44,8 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base # With other methods, optionally fill in the HMAC key. config :key, :validate => :string - # When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `MD5` fingerprint methods will produce - # base64 encoded rather than hex encoded strings. + # When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint + # methods will produce base64 encoded rather than hex encoded strings. config :base64encode, :validate => :boolean, :default => false # The fingerprint method to use. @@ -51,7 +55,9 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base # the fingerprint. When a key set, the keyed-hash (HMAC) digest function will # be used. # - # If set to `MURMUR3` the non-cryptographic MurmurHash function will be used. + # If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash + # function (either the 32-bit or 128-bit implementation, respectively) + # will be used. # # If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and # the hash value will be the masked-out address using the number of bits @@ -64,7 +70,7 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base # If set to `UUID`, a # https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID] will # be generated. The result will be random and thus not a consistent hash. - config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1' + config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "MURMUR3_128", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1' # When set to `true` and `method` isn't `UUID` or `PUNCTUATION`, the # plugin concatenates the names and values of all fields given in the @@ -102,6 +108,8 @@ def register class << self; alias_method :fingerprint, :fingerprint_ipv4_network; end when :MURMUR3 class << self; alias_method :fingerprint, :fingerprint_murmur3; end + when :MURMUR3_128 + class << self; alias_method :fingerprint, :fingerprint_murmur3_128; end when :UUID # nothing when :PUNCTUATION @@ -210,6 +218,30 @@ def fingerprint_murmur3(value) end end + def fingerprint_murmur3_128(value) + if value.is_a?(Integer) + if (INTEGER_MIN_32BIT <= value) && (value <= INTEGER_MAX_32BIT) + if @base64encode + [MurmurHash3::V128.int32_hash(value, 2).pack("L*")].pack("m").chomp! + else + MurmurHash3::V128.int32_hash(value, 2).pack("L*").unpack("H*")[0] + end + else + if @base64encode + [MurmurHash3::V128.int64_hash(value, 2).pack("L*")].pack("m").chomp! + else + MurmurHash3::V128.int64_hash(value, 2).pack("L*").unpack("H*")[0] + end + end + else + if @base64encode + MurmurHash3::V128.str_base64digest(value.to_s, 2) + else + MurmurHash3::V128.str_hexdigest(value.to_s, 2) + end + end + end + def select_digest(method) case method when :SHA1 diff --git a/logstash-filter-fingerprint.gemspec b/logstash-filter-fingerprint.gemspec index 5dde584..583dbeb 100644 --- a/logstash-filter-fingerprint.gemspec +++ b/logstash-filter-fingerprint.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-filter-fingerprint' - s.version = '3.3.2' + s.version = '3.4.0' s.licenses = ['Apache-2.0'] s.summary = "Fingerprints fields by replacing values with a consistent hash" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -21,8 +21,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency "murmurhash3" #(MIT license) + s.add_runtime_dependency "murmurhash3" #(MIT license) s.add_development_dependency 'logstash-devutils' s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~>1.2' end - diff --git a/spec/filters/fingerprint_spec.rb b/spec/filters/fingerprint_spec.rb index a4ffe8d..48710cc 100644 --- a/spec/filters/fingerprint_spec.rb +++ b/spec/filters/fingerprint_spec.rb @@ -50,6 +50,59 @@ end end + describe "the MURMUR3_128 method" do + let(:fingerprint_method) { "MURMUR3_128" } + + context "string hex encoding" do + it "fingerprints the value" do + expect(fingerprint).to eq("41cbc4056eed401d091dfbeabf7ea9e0") + end + end + + context "string base64 encoding" do + let(:config) { super().merge("base64encode" => true) } + it "fingerprints the value" do + expect(fingerprint).to eq("QcvEBW7tQB0JHfvqv36p4A==") + end + end + + context "int32 hex encoding" do + let(:config) { super().merge("base64encode" => false) } + let(:data) { {"clientip" => 123 } } + + it "fingerprints the value" do + expect(fingerprint).to eq("286816c693ac410ed63e1430dcd6f6fe") + end + end + + context "int32 base64 encoding" do + let(:config) { super().merge("base64encode" => true) } + let(:data) { {"clientip" => 123 } } + + it "fingerprints the value" do + expect(fingerprint).to eq("KGgWxpOsQQ7WPhQw3Nb2/g==") + end + end + + context "int64 hex encoding" do + let(:config) { super().merge("base64encode" => false) } + let(:data) { {"clientip" => 2148483647 } } + + it "fingerprints the value" do + expect(fingerprint).to eq("fdc7699a82556c8c584131f0133ee989") + end + end + + context "int64 base64 encoding" do + let(:config) { super().merge("base64encode" => true) } + let(:data) { {"clientip" => 2148483647 } } + + it "fingerprints the value" do + expect(fingerprint).to eq("/cdpmoJVbIxYQTHwEz7piQ==") + end + end + end + describe "the SHA1 method" do let(:fingerprint_method) { "SHA1" } @@ -239,6 +292,14 @@ expect(fingerprint).to eq(743372282) end end + + describe 'MURMUR3_128 Fingerprinting' do + let(:fingerprint_method) { "MURMUR3_128" } + let(:data) { { "@timestamp" => epoch_time } } + it "fingerprints the timestamp correctly" do + expect(fingerprint).to eq("37785b62a8cae473acc315d39b66d86e") + end + end end describe "post fingerprint execution triggers" do