From eee78192d4fa6cca2f616a789826c792c6cb87e0 Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Fri, 19 Apr 2024 16:05:56 -0700 Subject: [PATCH] Add sampling rule glob pattern support Signed-off-by: Marco Costa --- lib/datadog/tracing/sampling/ext.rb | 4 +- lib/datadog/tracing/sampling/matcher.rb | 91 +++++++---- lib/datadog/tracing/sampling/rule.rb | 4 +- lib/datadog/tracing/sampling/rule_sampler.rb | 8 +- lib/datadog/tracing/sampling/span/matcher.rb | 54 ++----- sig/datadog/tracing/sampling/matcher.rbs | 24 +-- spec/datadog/tracing/sampling/matcher_spec.rb | 146 +++--------------- .../tracing/sampling/rule_sampler_spec.rb | 12 +- spec/datadog/tracing/sampling/rule_spec.rb | 42 +++-- 9 files changed, 157 insertions(+), 228 deletions(-) diff --git a/lib/datadog/tracing/sampling/ext.rb b/lib/datadog/tracing/sampling/ext.rb index c51b9b437b6..9968f15bc6a 100644 --- a/lib/datadog/tracing/sampling/ext.rb +++ b/lib/datadog/tracing/sampling/ext.rb @@ -50,9 +50,9 @@ module Decision # Single Span Sampled. SPAN_SAMPLING_RATE = '-8' # Dynamically configured rule, explicitly created by the user. - REMOTE_USER_RULE = '-10' + REMOTE_USER_RULE = '-11' # Dynamically configured rule, automatically generated by Datadog. - REMOTE_DYNAMIC_RULE = '-11' + REMOTE_DYNAMIC_RULE = '-12' end end end diff --git a/lib/datadog/tracing/sampling/matcher.rb b/lib/datadog/tracing/sampling/matcher.rb index aaa9861733d..4bca551c40e 100644 --- a/lib/datadog/tracing/sampling/matcher.rb +++ b/lib/datadog/tracing/sampling/matcher.rb @@ -6,6 +6,9 @@ module Sampling # Checks if a trace conforms to a matching criteria. # @abstract class Matcher + # Pattern that matches any string + MATCH_ALL_PATTERN = '*' + # Returns `true` if the trace should conforms to this rule, `false` otherwise # # @param [TraceOperation] trace @@ -13,20 +16,49 @@ class Matcher def match?(trace) raise NotImplementedError end - end - # A {Datadog::Sampling::Matcher} that supports matching a trace by - # trace name and/or service name. - class SimpleMatcher < Matcher - # Returns `true` for case equality (===) with any object + # Converts a glob pattern String to a case-insensitive String matcher object. + # The match object will only return `true` if it matches the complete String. + # + # The following special characters are supported: + # - `?` matches any single character + # - `*` matches any substring + # + # @param glob [String] + # @return [#match?(String)] + def self.glob_to_regex(glob) + # Optimization for match-all case + return MATCH_ALL if glob == MATCH_ALL_PATTERN + + # Ensure no undesired characters are treated as regex. + glob = Regexp.quote(glob) + + # Our valid special characters, `?` and `*`, were just escaped + # by `Regexp.quote` above. We need to unescape them: + glob.gsub!('\?', '.') # Any single character + glob.gsub!('\*', '.*') # Any substring + + # Patterns have to match the whole input string + glob = "\\A#{glob}\\z" + + Regexp.new(glob, Regexp::IGNORECASE) + end + + # Returns `true` for any input MATCH_ALL = Class.new do - # DEV: A class that implements `#===` is ~20% faster than - # DEV: a `Proc` that always returns `true`. - def ===(other) + def match?(_other) true end + + def inspect + "MATCH_ALL:Matcher('*')" + end end.new + end + # A {Datadog::Sampling::Matcher} that supports matching a trace by + # trace name and/or service name. + class SimpleMatcher < Matcher attr_reader :name, :service, :resource, :tags # @param name [String,Regexp,Proc] Matcher for case equality (===) with the trace name, @@ -35,16 +67,30 @@ def ===(other) # defaults to always match # @param resource [String,Regexp,Proc] Matcher for case equality (===) with the resource name, # defaults to always match - def initialize(name: MATCH_ALL, service: MATCH_ALL, resource: MATCH_ALL, tags: {}) + def initialize( + name: MATCH_ALL_PATTERN, + service: MATCH_ALL_PATTERN, + resource: MATCH_ALL_PATTERN, + tags: {} + ) super() - @name = name - @service = service - @resource = resource + + name = Matcher.glob_to_regex(name) + service = Matcher.glob_to_regex(service) + resource = Matcher.glob_to_regex(resource) + tags = tags.transform_values { |matcher| Matcher.glob_to_regex(matcher) } + + @name = name || Datadog::Tracing::Sampling::Matcher::MATCH_ALL + @service = service || Datadog::Tracing::Sampling::Matcher::MATCH_ALL + @resource = resource || Datadog::Tracing::Sampling::Matcher::MATCH_ALL @tags = tags end def match?(trace) - name === trace.name && service === trace.service && resource === trace.resource && tags_match?(trace) + @name.match?(trace.name) && + @service.match?(trace.service) && + @resource.match?(trace.resource) && + tags_match?(trace) end private @@ -59,27 +105,10 @@ def tags_match?(trace) # can affect exact string matching (e.g. '400' matching '400.0'). tag = format('%g', tag) if tag.is_a?(Numeric) - matcher === tag + matcher.match?(tag) end end end - - # A {Datadog::Tracing::Sampling::Matcher} that allows for arbitrary trace matching - # based on the return value of a provided block. - class ProcMatcher < Matcher - attr_reader :block - - # @yield [name, service] Provides trace name and service to the block - # @yieldreturn [Boolean] Whether the trace conforms to this matcher - def initialize(&block) - super() - @block = block - end - - def match?(trace) - block.call(trace.name, trace.service) - end - end end end end diff --git a/lib/datadog/tracing/sampling/rule.rb b/lib/datadog/tracing/sampling/rule.rb index 051a8315eef..9c18bed7fac 100644 --- a/lib/datadog/tracing/sampling/rule.rb +++ b/lib/datadog/tracing/sampling/rule.rb @@ -60,8 +60,8 @@ class SimpleRule < Rule # defaults to always match # @param sample_rate [Float] Sampling rate between +[0,1]+ def initialize( - name: SimpleMatcher::MATCH_ALL, service: SimpleMatcher::MATCH_ALL, - resource: SimpleMatcher::MATCH_ALL, tags: {}, + name: SimpleMatcher::MATCH_ALL_PATTERN, service: SimpleMatcher::MATCH_ALL_PATTERN, + resource: SimpleMatcher::MATCH_ALL_PATTERN, tags: {}, provenance: Rule::PROVENANCE_LOCAL, sample_rate: 1.0 ) diff --git a/lib/datadog/tracing/sampling/rule_sampler.rb b/lib/datadog/tracing/sampling/rule_sampler.rb index a7d11efe49f..ba5410d27d6 100644 --- a/lib/datadog/tracing/sampling/rule_sampler.rb +++ b/lib/datadog/tracing/sampling/rule_sampler.rb @@ -109,7 +109,13 @@ def update(*args, **kwargs) private def sample_trace(trace) - rule = @rules.find { |r| r.match?(trace) } + puts "sample_trace: #{caller}" + rule = @rules.find do |r| + Datadog.logger.debug do + "Matching #{r.inspect}\n against #{trace.inspect}:\n#{r.match?(trace)}" + end + r.match?(trace) + end return yield(trace) if rule.nil? diff --git a/lib/datadog/tracing/sampling/span/matcher.rb b/lib/datadog/tracing/sampling/span/matcher.rb index aa40ec22516..88480e63d2c 100644 --- a/lib/datadog/tracing/sampling/span/matcher.rb +++ b/lib/datadog/tracing/sampling/span/matcher.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative '../matcher' + module Datadog module Tracing module Sampling @@ -31,29 +33,19 @@ class Matcher # @param name_pattern [String] a pattern to be matched against {SpanOperation#name} # @param service_pattern [String] a pattern to be matched against {SpanOperation#service} def initialize(name_pattern: MATCH_ALL_PATTERN, service_pattern: MATCH_ALL_PATTERN) - @name = pattern_to_regex(name_pattern) - @service = pattern_to_regex(service_pattern) + @name = Sampling::Matcher.glob_to_regex(name_pattern) + @service = Sampling::Matcher.glob_to_regex(service_pattern) end - # {Regexp#match?} was added in Ruby 2.4, and it's measurably - # the least costly way to get a boolean result for a Regexp match. - # @see https://www.ruby-lang.org/en/news/2016/12/25/ruby-2-4-0-released/ - if Regexp.method_defined?(:match?) - # Returns `true` if the span conforms to the configured patterns, - # `false` otherwise - # - # @param [SpanOperation] span - # @return [Boolean] - def match?(span) - # Matching is performed at the end of the lifecycle of a Span, - # thus both `name` and `service` are guaranteed to be not `nil`. - @name.match?(span.name) && @service.match?(span.service) - end - else - # DEV: Remove when support for Ruby 2.3 and older is removed. - def match?(span) - @name === span.name && @service === span.service - end + # Returns `true` if the span conforms to the configured patterns, + # `false` otherwise + # + # @param [SpanOperation] span + # @return [Boolean] + def match?(span) + # Matching is performed at the end of the lifecycle of a Span, + # thus both `name` and `service` are guaranteed to be not `nil`. + @name.match?(span.name) && @service.match?(span.service) end def ==(other) @@ -62,26 +54,6 @@ def ==(other) name == other.name && service == other.service end - - private - - # @param pattern [String] - # @return [Regexp] - def pattern_to_regex(pattern) - # Ensure no undesired characters are treated as regex. - # Our valid special characters, `?` and `*`, - # will be escaped so... - pattern = Regexp.quote(pattern) - - # ...we account for that here: - pattern.gsub!('\?', '.') # Any single character - pattern.gsub!('\*', '.*') # Any substring - - # Patterns have to match the whole input string - pattern = "\\A#{pattern}\\z" - - Regexp.new(pattern) - end end end end diff --git a/sig/datadog/tracing/sampling/matcher.rbs b/sig/datadog/tracing/sampling/matcher.rbs index d1c4c541db9..6c1a42ee9c7 100644 --- a/sig/datadog/tracing/sampling/matcher.rbs +++ b/sig/datadog/tracing/sampling/matcher.rbs @@ -2,21 +2,25 @@ module Datadog module Tracing module Sampling class Matcher - def match?: (untyped trace) -> untyped - end - class SimpleMatcher < Matcher - MATCH_ALL: untyped + MATCH_ALL_PATTERN: String - attr_reader name: untyped + interface _Matcher + def match?: (::String) -> bool + end - attr_reader service: untyped - def initialize: (?name: untyped, ?service: untyped) -> void + MATCH_ALL: _Matcher + + def self.glob_to_regex: (::String) -> _Matcher def match?: (untyped trace) -> untyped end - class ProcMatcher < Matcher - attr_reader block: untyped - def initialize: () ?{ () -> untyped } -> void + class SimpleMatcher < Matcher + attr_reader name: Matcher::_Matcher + attr_reader service: Matcher::_Matcher + attr_reader resource: Matcher::_Matcher + attr_reader tags: Matcher::_Matcher + + def initialize: (?name: untyped, ?service: untyped) -> void def match?: (untyped trace) -> untyped end diff --git a/spec/datadog/tracing/sampling/matcher_spec.rb b/spec/datadog/tracing/sampling/matcher_spec.rb index 04b72c022f0..d50de571cc9 100644 --- a/spec/datadog/tracing/sampling/matcher_spec.rb +++ b/spec/datadog/tracing/sampling/matcher_spec.rb @@ -22,43 +22,41 @@ context 'with a name matcher' do let(:rule) { described_class.new(name: name) } - context 'with a regexp' do + context 'with a glob' do context 'matching' do - let(:name) { /.*/ } + let(:name) { 'operation.*' } it { is_expected.to eq(true) } - end - context 'not matching' do - let(:name) { /^$/ } + context 'case-insensitive' do + let(:name) { 'OPERATION.*' } - it { is_expected.to eq(false) } - end - end + it { is_expected.to eq(true) } + end - context 'with a string' do - context 'matching' do - let(:name) { trace_name.to_s } + context 'single-character wildcard' do + let(:name) { 'operation.nam?' } - it { is_expected.to eq(true) } + it { is_expected.to eq(true) } + end end context 'not matching' do - let(:name) { '' } + let(:name) { 'not.operation' } it { is_expected.to eq(false) } end end - context 'with a proc' do + context 'with a string' do context 'matching' do - let(:name) { ->(n) { n == trace_name } } + let(:name) { 'operation.name' } it { is_expected.to eq(true) } end context 'not matching' do - let(:name) { ->(_n) { false } } + let(:name) { '' } it { is_expected.to eq(false) } end @@ -71,20 +69,6 @@ context 'when trace service name is present' do let(:trace_service) { 'service-1' } - context 'with a regexp' do - context 'matching' do - let(:service) { /.*/ } - - it { is_expected.to eq(true) } - end - - context 'not matching' do - let(:service) { /^$/ } - - it { is_expected.to eq(false) } - end - end - context 'with a string' do context 'matching' do let(:service) { trace_service.to_s } @@ -98,20 +82,6 @@ it { is_expected.to eq(false) } end end - - context 'with a proc' do - context 'matching' do - let(:service) { ->(n) { n == trace_service } } - - it { is_expected.to eq(true) } - end - - context 'not matching' do - let(:service) { ->(_n) { false } } - - it { is_expected.to eq(false) } - end - end end context 'with a tags matcher' do @@ -120,20 +90,6 @@ context 'when span tags are present' do let(:trace_tags) { { 'tag1' => 'value1', 'tag2' => 'value2' } } - context 'with a regexp' do - context 'matching' do - let(:tags) { { 'tag1' => /value.*/, 'tag2' => /.*/ } } - - it { is_expected.to eq(true) } - end - - context 'not matching' do - let(:tags) { { 'tag1' => /value.*/, 'tag2' => /not_value/ } } - - it { is_expected.to eq(false) } - end - end - context 'with a string' do context 'matching' do let(:tags) { trace_tags } @@ -153,15 +109,15 @@ # Metrics are stored as tags, but have numeric values let(:trace_tags) { { 'metric1' => 1.0, 'metric2' => 2 } } - context 'with a regexp' do + context 'with a glob' do context 'matching' do - let(:tags) { { 'metric1' => /1/, 'metric2' => /.*/ } } + let(:tags) { { 'metric1' => '1', 'metric2' => '*' } } it { is_expected.to eq(true) } end context 'not matching' do - let(:tags) { { 'metric1' => /1/, 'metric2' => 3 } } + let(:tags) { { 'metric1' => '1', 'metric2' => '3' } } it { is_expected.to eq(false) } end @@ -191,9 +147,9 @@ context 'when trace service is not present' do let(:trace_service) { nil } - let(:service) { /.*/ } + let(:service) { '*' } - it { is_expected.to eq(false) } + it { is_expected.to eq(true) } end end @@ -203,20 +159,6 @@ context 'when trace resource is present' do let(:trace_resource) { 'resource-1' } - context 'with a regexp' do - context 'matching' do - let(:resource) { /resource-.*/ } - - it { is_expected.to eq(true) } - end - - context 'not matching' do - let(:resource) { /name-.*/ } - - it { is_expected.to eq(false) } - end - end - context 'with a string' do context 'matching' do let(:resource) { 'resource-1' } @@ -230,36 +172,22 @@ it { is_expected.to eq(false) } end end - - context 'with a proc' do - context 'matching' do - let(:resource) { ->(n) { n == 'resource-1' } } - - it { is_expected.to eq(true) } - end - - context 'not matching' do - let(:resource) { ->(_n) { false } } - - it { is_expected.to eq(false) } - end - end end context 'when trace resource is not present' do let(:trace_resource) { nil } - let(:resource) { /.*/ } + let(:resource) { '*' } - it { is_expected.to eq(false) } + it { is_expected.to eq(true) } end end context 'with name, service, resource matchers' do let(:rule) { described_class.new(name: name, service: service, resource: resource) } - let(:name) { /.*/ } - let(:service) { /.*/ } - let(:resource) { /.*/ } + let(:name) { '*' } + let(:service) { '*' } + let(:resource) { '*' } context 'when trace service name is present' do let(:trace_service) { 'service-1' } @@ -270,32 +198,8 @@ context 'when trace service is not present' do let(:trace_service) { nil } - it { is_expected.to eq(false) } + it { is_expected.to eq(true) } end end end end - -RSpec.describe Datadog::Tracing::Sampling::ProcMatcher do - let(:trace_op) { Datadog::Tracing::TraceOperation.new(name: trace_name, service: trace_service) } - let(:trace_name) { 'operation.name' } - let(:trace_service) { nil } - - describe '#match?' do - subject(:match?) { rule.match?(trace_op) } - - let(:rule) { described_class.new(&block) } - - context 'with matching block' do - let(:block) { ->(name, service) { name == trace_name && service == trace_service } } - - it { is_expected.to eq(true) } - end - - context 'with mismatching block' do - let(:block) { ->(_name, _service) { false } } - - it { is_expected.to eq(false) } - end - end -end diff --git a/spec/datadog/tracing/sampling/rule_sampler_spec.rb b/spec/datadog/tracing/sampling/rule_sampler_spec.rb index 9fbbe95e424..bd7488aa025 100644 --- a/spec/datadog/tracing/sampling/rule_sampler_spec.rb +++ b/spec/datadog/tracing/sampling/rule_sampler_spec.rb @@ -109,7 +109,7 @@ let(:rule) { { sample_rate: 0.1, name: 'test-name' } } it 'parses matching any service' do - expect(actual_rule.matcher.name).to eq('test-name') + expect(actual_rule.matcher.name).to eq(/\Atest\-name\z/i) expect(actual_rule.matcher.service).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL) expect(actual_rule.sampler.sample_rate).to eq(0.1) end @@ -120,7 +120,7 @@ it 'parses matching any name' do expect(actual_rule.matcher.name).to eq(Datadog::Tracing::Sampling::SimpleMatcher::MATCH_ALL) - expect(actual_rule.matcher.service).to eq('test-service') + expect(actual_rule.matcher.service).to eq(/\Atest\-service\z/i) expect(actual_rule.sampler.sample_rate).to eq(0.1) end end @@ -129,7 +129,7 @@ let(:rule) { { sample_rate: 0.1, resource: 'test-resource' } } it 'parses resource matcher' do - expect(actual_rule.matcher.resource).to eq('test-resource') + expect(actual_rule.matcher.resource).to eq(/\Atest\-resource\z/i) end end @@ -137,7 +137,7 @@ let(:rule) { { sample_rate: 0.1, tags: { tag: 'test-tag' } } } it 'parses matching tag' do - expect(actual_rule.matcher.tags).to eq({ 'tag' => 'test-tag' }) + expect(actual_rule.matcher.tags).to eq('tag' => /\Atest\-tag\z/i) expect(actual_rule.sampler.sample_rate).to eq(0.1) end end @@ -272,7 +272,7 @@ it_behaves_like 'a sampled! trace' do let(:expected_sampled) { true } let(:sampling_priority) { 2 } - let(:sampling_decision) { '-10' } + let(:sampling_decision) { '-11' } end end @@ -282,7 +282,7 @@ it_behaves_like 'a sampled! trace' do let(:expected_sampled) { true } let(:sampling_priority) { 2 } - let(:sampling_decision) { '-11' } + let(:sampling_decision) { '-12' } end end end diff --git a/spec/datadog/tracing/sampling/rule_spec.rb b/spec/datadog/tracing/sampling/rule_spec.rb index 399fe5c39cd..6451c32e51d 100644 --- a/spec/datadog/tracing/sampling/rule_spec.rb +++ b/spec/datadog/tracing/sampling/rule_spec.rb @@ -87,22 +87,36 @@ let(:trace_tags) { {} } describe '#initialize' do - subject(:rule) do - described_class.new(name: name, service: service, resource: resource, sample_rate: sample_rate, tags: tags) + context 'with parameters' do + subject(:rule) do + described_class.new(name: name, service: service, resource: resource, sample_rate: sample_rate, tags: tags) + end + + let(:name) { 'name' } + let(:service) { 'service' } + let(:resource) { 'resource' } + let(:tags) { { 'tag key' => 'tag value' } } + let(:sample_rate) { 0.123 } + + it 'initializes with the correct values' do + expect(rule.matcher.name).to eq(/\Aname\z/i) + expect(rule.matcher.service).to eq(/\Aservice\z/i) + expect(rule.matcher.resource).to eq(/\Aresource\z/i) + expect(rule.matcher.tags).to eq('tag key' => /\Atag\ value\z/i) + expect(rule.sampler.sample_rate).to eq(0.123) + end end - let(:name) { double('name') } - let(:service) { double('service') } - let(:resource) { double('resource') } - let(:tags) { { 'tag' => 'value' } } - let(:sample_rate) { 0.123 } - - it 'initializes with the correct values' do - expect(rule.matcher.name).to eq(name) - expect(rule.matcher.service).to eq(service) - expect(rule.matcher.resource).to eq(resource) - expect(rule.matcher.tags).to eq(tags) - expect(rule.sampler.sample_rate).to eq(sample_rate) + context 'with default values' do + subject(:rule) { described_class.new } + + it 'initializes with the correct values' do + expect(rule.matcher.name).to eq(Datadog::Tracing::Sampling::Matcher::MATCH_ALL) + expect(rule.matcher.service).to eq(Datadog::Tracing::Sampling::Matcher::MATCH_ALL) + expect(rule.matcher.resource).to eq(Datadog::Tracing::Sampling::Matcher::MATCH_ALL) + expect(rule.matcher.tags).to eq({}) + expect(rule.sampler.sample_rate).to eq(1.0) + end end end end