From d08bc1a1d5e75c83892d6197ab8e747971ad06cf Mon Sep 17 00:00:00 2001 From: Sam <370182+plantfansam@users.noreply.github.com> Date: Mon, 22 Jan 2024 08:55:22 -0700 Subject: [PATCH] Consider OTLP export failures handleable errors (#1565) * Consider OTLP export failures handleable errors * Update exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb Co-authored-by: Francis Bogsanyi * Update exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb Co-authored-by: Francis Bogsanyi * don't cache uri_string * fixup test * Use log_failure in backoff? * Make unexpected response its own case * use variables that exist * use variables that exist * Drop extraneous argument * Formatting * just use log_request_failure --------- Co-authored-by: Francis Bogsanyi --- .../lib/opentelemetry/exporter/otlp/exporter.rb | 13 +++++++++---- .../opentelemetry/exporter/otlp/exporter_test.rb | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb b/exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb index 696ea5bb9b..ba068784a0 100644 --- a/exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb +++ b/exporter/otlp/lib/opentelemetry/exporter/otlp/exporter.rb @@ -167,8 +167,7 @@ def send_bytes(bytes, timeout:) # rubocop:disable Metrics/CyclomaticComplexity, redo if backoff?(retry_count: retry_count += 1, reason: response.code) FAILURE when Net::HTTPNotFound - OpenTelemetry.handle_error(message: "OTLP exporter received http.code=404 for uri: '#{@path}'") - @metrics_reporter.add_to_counter('otel.otlp_exporter.failure', labels: { 'reason' => response.code }) + log_request_failure(response.code) FAILURE when Net::HTTPBadRequest, Net::HTTPClientError, Net::HTTPServerError log_status(response.body) @@ -180,6 +179,7 @@ def send_bytes(bytes, timeout:) # rubocop:disable Metrics/CyclomaticComplexity, redo if backoff?(retry_after: 0, retry_count: retry_count += 1, reason: response.code) else @http.finish + log_request_failure(response.code) FAILURE end rescue Net::OpenTimeout, Net::ReadTimeout @@ -222,11 +222,16 @@ def log_status(body) klass_or_nil = ::Google::Protobuf::DescriptorPool.generated_pool.lookup(detail.type_name).msgclass detail.unpack(klass_or_nil) if klass_or_nil end.compact - OpenTelemetry.handle_error(message: "OTLP exporter received rpc.Status{message=#{status.message}, details=#{details}}") + OpenTelemetry.handle_error(message: "OTLP exporter received rpc.Status{message=#{status.message}, details=#{details}} for uri=#{@uri}") rescue StandardError => e OpenTelemetry.handle_error(exception: e, message: 'unexpected error decoding rpc.Status in OTLP::Exporter#log_status') end + def log_request_failure(response_code) + OpenTelemetry.handle_error(message: "OTLP exporter received http.code=#{response_code} for uri='#{@uri}' in OTLP::Exporter#send_bytes") + @metrics_reporter.add_to_counter('otel.otlp_exporter.failure', labels: { 'reason' => response_code }) + end + def measure_request_duration start = Process.clock_gettime(Process::CLOCK_MONOTONIC) begin @@ -241,7 +246,7 @@ def measure_request_duration end def backoff?(retry_count:, reason:, retry_after: nil) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity - @metrics_reporter.add_to_counter('otel.otlp_exporter.failure', labels: { 'reason' => reason }) + log_request_failure(reason) return false if retry_count > RETRY_COUNT sleep_interval = nil diff --git a/exporter/otlp/test/opentelemetry/exporter/otlp/exporter_test.rb b/exporter/otlp/test/opentelemetry/exporter/otlp/exporter_test.rb index 39c1da1c8b..aa0232d5d9 100644 --- a/exporter/otlp/test/opentelemetry/exporter/otlp/exporter_test.rb +++ b/exporter/otlp/test/opentelemetry/exporter/otlp/exporter_test.rb @@ -524,7 +524,7 @@ result = exporter.export([span_data]) _(log_stream.string).must_match( - %r{ERROR -- : OpenTelemetry error: OTLP exporter received http\.code=404 for uri: '/v1/traces'} + %r{ERROR -- : OpenTelemetry error: OTLP exporter received http\.code=404 for uri='http://localhost:4318/v1/traces'} ) _(result).must_equal(FAILURE)