From b938b43033fce88f5dd7ee3fc1cd7c153bb7eb23 Mon Sep 17 00:00:00 2001 From: Mario Macias Date: Tue, 3 Sep 2024 12:06:58 +0200 Subject: [PATCH 1/2] Cleanup UID/InstanceID duplicity --- pkg/export/otel/common.go | 2 +- pkg/export/otel/metrics.go | 2 +- pkg/export/prom/prom.go | 4 +-- pkg/internal/request/span_getters.go | 2 -- pkg/internal/svc/svc.go | 7 ++--- pkg/internal/traces/read_decorator.go | 35 +++++----------------- pkg/internal/traces/read_decorator_test.go | 16 ++-------- pkg/transform/k8s.go | 4 +++ 8 files changed, 19 insertions(+), 53 deletions(-) diff --git a/pkg/export/otel/common.go b/pkg/export/otel/common.go index 671e0c32c..0549788e4 100644 --- a/pkg/export/otel/common.go +++ b/pkg/export/otel/common.go @@ -62,7 +62,7 @@ var DefaultBuckets = Buckets{ func getAppResourceAttrs(hostID string, service *svc.ID) []attribute.KeyValue { return append(getResourceAttrs(hostID, service), - semconv.ServiceInstanceID(service.Instance), + semconv.ServiceInstanceID(string(service.UID)), ) } diff --git a/pkg/export/otel/metrics.go b/pkg/export/otel/metrics.go index 2a0720ac8..22c6e9661 100644 --- a/pkg/export/otel/metrics.go +++ b/pkg/export/otel/metrics.go @@ -710,7 +710,7 @@ func otelHistogramConfig(metricName string, buckets []float64, useExponentialHis func (mr *MetricsReporter) metricResourceAttributes(service *svc.ID) attribute.Set { attrs := []attribute.KeyValue{ request.ServiceMetric(service.Name), - semconv.ServiceInstanceID(service.Instance), + semconv.ServiceInstanceID(string(service.UID)), semconv.ServiceNamespace(service.Namespace), semconv.TelemetrySDKLanguageKey.String(service.SDKLanguage.String()), semconv.TelemetrySDKNameKey.String("beyla"), diff --git a/pkg/export/prom/prom.go b/pkg/export/prom/prom.go index e72826d9e..c0f103ccb 100644 --- a/pkg/export/prom/prom.go +++ b/pkg/export/prom/prom.go @@ -711,7 +711,7 @@ func (r *metricsReporter) labelValuesSpans(span *request.Span) []string { span.TraceName(), strconv.Itoa(int(request.SpanStatusCode(span))), span.ServiceGraphKind(), - span.ServiceID.Instance, + string(span.ServiceID.UID), // app instance ID job, "beyla", } @@ -737,7 +737,7 @@ func (r *metricsReporter) labelValuesTargetInfo(service svc.ID) []string { service.HostName, service.Name, service.Namespace, - service.Instance, + string(service.UID), // app instance ID job, service.SDKLanguage.String(), "beyla", diff --git a/pkg/internal/request/span_getters.go b/pkg/internal/request/span_getters.go index 6d4d272b4..b1c1656a3 100644 --- a/pkg/internal/request/span_getters.go +++ b/pkg/internal/request/span_getters.go @@ -57,8 +57,6 @@ func SpanOTELGetters(name attr.Name) (attributes.Getter[*Span, attribute.KeyValu } case attr.Service: getter = func(s *Span) attribute.KeyValue { return ServiceMetric(s.ServiceID.Name) } - case attr.ServiceInstanceID: - getter = func(s *Span) attribute.KeyValue { return semconv.ServiceInstanceID(s.ServiceID.Instance) } case attr.ServiceName: getter = func(s *Span) attribute.KeyValue { return semconv.ServiceName(s.ServiceID.Name) } case attr.ServiceNamespace: diff --git a/pkg/internal/svc/svc.go b/pkg/internal/svc/svc.go index 85251caec..22bb5fbc2 100644 --- a/pkg/internal/svc/svc.go +++ b/pkg/internal/svc/svc.go @@ -59,10 +59,8 @@ const ( // ID stores the metadata attributes of a service/resource // TODO: rename to svc.Attributes type ID struct { - // UID might coincide with other fields (usually, Instance), but UID - // can't be overriden by the user, so it's the only field that can be - // used for internal differentiation of the users. - // UID is not exported in the metrics or traces. + // UID uniquely identifies a service instance. It is not exported + // in the metrics or traces, but it is used to compose the InstanceID UID UID Name string @@ -71,7 +69,6 @@ type ID struct { // again with Kubernetes metadata). Namespace string SDKLanguage InstrumentableType - Instance string Metadata map[attr.Name]string diff --git a/pkg/internal/traces/read_decorator.go b/pkg/internal/traces/read_decorator.go index 68ea8e438..34d0e93bb 100644 --- a/pkg/internal/traces/read_decorator.go +++ b/pkg/internal/traces/read_decorator.go @@ -29,10 +29,6 @@ type InstanceIDConfig struct { // value. Beyla will anyway attach the process ID to the given hostname for composing // the instance ID. OverrideHostname string `yaml:"override_hostname" env:"BEYLA_HOSTNAME"` - // OverrideInstanceID can be optionally set to avoid Beyla composing the instance ID - // and use this value. If you are managing multiple processes from a single Beyla instance, - // all the processes will have the same Instance ID. - OverrideInstanceID string `yaml:"override_instance_id" env:"BEYLA_INSTANCE_ID"` // Undocumented properties aimed at fine-grained tuning @@ -55,7 +51,7 @@ type ReadDecorator struct { type decorator func(spans []request.Span) func ReadFromChannel(ctx context.Context, r *ReadDecorator) pipe.StartFunc[[]request.Span] { - decorate := getDecorator(&r.InstanceID) + decorate := hostNamePIDDecorator(&r.InstanceID) return func(out chan<- []request.Span) { cancelChan := ctx.Done() for { @@ -76,21 +72,6 @@ func ReadFromChannel(ctx context.Context, r *ReadDecorator) pipe.StartFunc[[]req } } -func getDecorator(cfg *InstanceIDConfig) decorator { - hnPidDecorator := hostNamePIDDecorator(cfg) - if cfg.OverrideInstanceID == "" { - return hnPidDecorator - } - return func(spans []request.Span) { - // first decorate normally - hnPidDecorator(spans) - // later, override instance IDs - for i := range spans { - spans[i].ServiceID.Instance = cfg.OverrideInstanceID - } - } -} - func hostNamePIDDecorator(cfg *InstanceIDConfig) decorator { // TODO: periodically update in case the current Beyla instance is created from a VM snapshot running as a different hostname resolver := hostname.CreateResolver(cfg.OverrideHostname, "", cfg.HostnameDNSResolution) @@ -98,8 +79,7 @@ func hostNamePIDDecorator(cfg *InstanceIDConfig) decorator { log := rlog().With("function", "instance_ID_hostNamePIDDecorator") if err != nil { log.Warn("can't read hostname. Leaving empty. Consider overriding"+ - " the BEYLA_HOSTNAME or BEYLA_INSTANCE_ID properties", - "error", err) + " the BEYLA_HOSTNAME property", "error", err) } else { log.Info("using hostname", "hostname", fullHostName) } @@ -109,17 +89,16 @@ func hostNamePIDDecorator(cfg *InstanceIDConfig) decorator { if cfg.InternalIDCacheLen != 0 { cacheLen = cfg.InternalIDCacheLen } - idsCache, _ := lru.New[uint32, string](cacheLen) + uidsCache, _ := lru.New[uint32, svc.UID](cacheLen) return func(spans []request.Span) { for i := range spans { - instanceID, ok := idsCache.Get(spans[i].Pid.HostPID) + uid, ok := uidsCache.Get(spans[i].Pid.HostPID) if !ok { - instanceID = fullHostName + "-" + strconv.Itoa(int(spans[i].Pid.HostPID)) - idsCache.Add(spans[i].Pid.HostPID, instanceID) + uid = svc.UID(fullHostName + "-" + strconv.Itoa(int(spans[i].Pid.HostPID))) + uidsCache.Add(spans[i].Pid.HostPID, uid) } - spans[i].ServiceID.Instance = instanceID - spans[i].ServiceID.UID = svc.UID(instanceID) + spans[i].ServiceID.UID = uid spans[i].ServiceID.HostName = fullHostName } } diff --git a/pkg/internal/traces/read_decorator_test.go b/pkg/internal/traces/read_decorator_test.go index a9ab58a37..516d267ad 100644 --- a/pkg/internal/traces/read_decorator_test.go +++ b/pkg/internal/traces/read_decorator_test.go @@ -27,35 +27,23 @@ func TestReadDecorator(t *testing.T) { type testCase struct { desc string cfg ReadDecorator - expectedID string expectedUID svc.UID expectedHN string } for _, tc := range []testCase{{ desc: "dns", cfg: ReadDecorator{InstanceID: InstanceIDConfig{HostnameDNSResolution: true}}, - expectedID: dnsHostname + "-1234", expectedUID: svc.UID(dnsHostname + "-1234"), expectedHN: dnsHostname, }, { desc: "no-dns", - expectedID: localHostname + "-1234", expectedUID: svc.UID(localHostname + "-1234"), expectedHN: localHostname, }, { desc: "override hostname", cfg: ReadDecorator{InstanceID: InstanceIDConfig{OverrideHostname: "foooo"}}, - expectedID: "foooo-1234", expectedUID: "foooo-1234", expectedHN: "foooo", - }, { - desc: "override HN", - cfg: ReadDecorator{InstanceID: InstanceIDConfig{OverrideInstanceID: "instanceee"}}, - expectedID: "instanceee", - // even if we override instance ID, the UID should be set to a really unique value - // (same as the automatic instanceID value) - expectedUID: svc.UID(localHostname + "-1234"), - expectedHN: localHostname, }} { t.Run(tc.desc, func(t *testing.T) { cfg := tc.cfg @@ -72,9 +60,9 @@ func TestReadDecorator(t *testing.T) { } outSpans := testutil.ReadChannel(t, decoratedOutput, testTimeout) assert.Equal(t, []request.Span{ - {ServiceID: svc.ID{Instance: tc.expectedID, UID: tc.expectedUID, HostName: tc.expectedHN}, + {ServiceID: svc.ID{UID: tc.expectedUID, HostName: tc.expectedHN}, Path: "/foo", Pid: request.PidInfo{HostPID: 1234}}, - {ServiceID: svc.ID{Instance: tc.expectedID, UID: tc.expectedUID, HostName: tc.expectedHN}, + {ServiceID: svc.ID{UID: tc.expectedUID, HostName: tc.expectedHN}, Path: "/bar", Pid: request.PidInfo{HostPID: 1234}}, }, outSpans) }) diff --git a/pkg/transform/k8s.go b/pkg/transform/k8s.go index 7bb674d56..2bed10a5c 100644 --- a/pkg/transform/k8s.go +++ b/pkg/transform/k8s.go @@ -112,6 +112,10 @@ func (md *metadataDecorator) appendMetadata(span *request.Span, info *kube.PodIn if span.ServiceID.Namespace == "" { span.ServiceID.Namespace = info.Namespace } + // overriding the UID here will avoid reusing the OTEL resource reporter + // if the application/process was discovered and reported information + // before the kubernetes metadata was available + // (related issue: /~https://github.com/grafana/beyla/issues/1124) span.ServiceID.UID = svc.UID(info.UID) // if, in the future, other pipeline steps modify the service metadata, we should From db24bed18823be4be8c2b53a52e2d15b180b7aac Mon Sep 17 00:00:00 2001 From: Mario Macias Date: Tue, 3 Sep 2024 12:32:40 +0200 Subject: [PATCH 2/2] rm override_instance_id from documentation --- docs/sources/configure/options.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/sources/configure/options.md b/docs/sources/configure/options.md index 85c00c474..c8d4c4e74 100644 --- a/docs/sources/configure/options.md +++ b/docs/sources/configure/options.md @@ -604,14 +604,6 @@ instead of trying to automatically resolve the host name. This option takes precedence over `dns`. -| YAML | Environment variable | Type | Default | -| ---------------------- | ------------------- | ------ | ------- | -| `override_instance_id` | `BEYLA_INSTANCE_ID` | string | (unset) | - -If set, Beyla will use this value directly as instance ID of any instrumented -process. If you are managing multiple processes from a single Beyla instance, -all the processes will have the same instance ID. - ### Kubernetes decorator If you run Beyla in a Kubernetes environment, you can configure it to decorate the traces