Skip to content

Commit

Permalink
fix(otel_processor): wait for runner process termination
Browse files Browse the repository at this point in the history
  • Loading branch information
SergeTupchiy committed Oct 19, 2023
1 parent 363d277 commit 0b307b9
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 7 deletions.
9 changes: 7 additions & 2 deletions apps/opentelemetry/src/otel_batch_processor.erl
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,15 @@ complete_exporting(Data) ->
handed_off_table=undefined}}.

kill_runner(Data=#data{runner_pid=RunnerPid}) when RunnerPid =/= undefined ->
Mon = erlang:monitor(process, RunnerPid),
erlang:unlink(RunnerPid),
erlang:exit(RunnerPid, kill),
Data#data{runner_pid=undefined,
handed_off_table=undefined}.
%% Wait for the runner process termination to be sure that
%% the export table is destroyed and can be safely recreated
receive
{'DOWN', Mon, process, RunnerPid, _} ->
Data#data{runner_pid=undefined, handed_off_table=undefined}
end.

new_export_table(Name) ->
ets:new(Name, [public,
Expand Down
9 changes: 7 additions & 2 deletions apps/opentelemetry/src/otel_simple_processor.erl
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,15 @@ complete_exporting(Data=#data{current_from=From,
[{reply, From, ok}]}.

kill_runner(Data=#data{runner_pid=RunnerPid}) when RunnerPid =/= undefined ->
Mon = erlang:monitor(process, RunnerPid),

Check warning on line 198 in apps/opentelemetry/src/otel_simple_processor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_simple_processor.erl#L198

Added line #L198 was not covered by tests
erlang:unlink(RunnerPid),
erlang:exit(RunnerPid, kill),
Data#data{runner_pid=undefined,
handed_off_table=undefined}.
%% Wait for the runner process termination to be sure that
%% the export table is destroyed and can be safely recreated
receive

Check warning on line 203 in apps/opentelemetry/src/otel_simple_processor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_simple_processor.erl#L203

Added line #L203 was not covered by tests
{'DOWN', Mon, process, RunnerPid, _} ->
Data#data{runner_pid=undefined, handed_off_table=undefined}

Check warning on line 205 in apps/opentelemetry/src/otel_simple_processor.erl

View check run for this annotation

Codecov / codecov/patch

apps/opentelemetry/src/otel_simple_processor.erl#L205

Added line #L205 was not covered by tests
end.

new_export_table(Name) ->
ets:new(Name, [public,
Expand Down
42 changes: 39 additions & 3 deletions apps/opentelemetry/test/otel_batch_processor_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
-include_lib("stdlib/include/assert.hrl").
-include_lib("common_test/include/ct.hrl").

-include("otel_span.hrl").
-include_lib("opentelemetry_api/include/opentelemetry.hrl").

all() ->
[exporting_timeout_test].
[exporting_timeout_test,
exporting_runner_timeout_test].

%% verifies that after the runner has to be killed for taking too long
%% that everything is still functional and the exporter does not crash
exporting_timeout_test(_Config) ->
process_flag(trap_exit, true),

{ok, Pid, _} = otel_batch_processor:start_link(#{reg_name => test_processor,
{ok, Pid, _} = otel_batch_processor:start_link(#{name => test_processor,
resource => otel_resource:create([]),
exporter => ?MODULE,
exporting_timeout_ms => 1,
Expand All @@ -30,13 +32,47 @@ exporting_timeout_test(_Config) ->
ok
end.

exporting_runner_timeout_test(_Config) ->
process_flag(trap_exit, true),

{ok, Pid, #{reg_name := RegName}} = otel_batch_processor:start_link(
#{name => test_processor1,
resource => otel_resource:create([]),
exporter => ?MODULE,
exporting_timeout_ms => 1,
scheduled_delay_ms => 1}),

%% Insert a few spans to make sure runner process will be spawned and killed
%% because it hangs for 10 minutes (see export/4 below)
true = otel_batch_processor:on_end(generate_span(), #{reg_name => RegName}),
true = otel_batch_processor:on_end(generate_span(), #{reg_name => RegName}),

receive
{'EXIT', Pid, _} ->
%% test is to ensure we don't hit this
ct:fail(batch_processor_crash)
after
200 ->
ok
end.

%% exporter behaviour

init(_) ->
{ok, []}.

export(_, _) ->
export(_, _, _, _) ->
timer:sleep(timer:minutes(10)).

shutdown(_) ->
ok.

%% helpers

generate_span() ->
#span{trace_id = otel_id_generator:generate_trace_id(),
span_id = otel_id_generator:generate_span_id(),
name = "test_span",
trace_flags = 1,
is_recording = true,
instrumentation_scope = #instrumentation_scope{name = "test"}}.

0 comments on commit 0b307b9

Please sign in to comment.