Skip to content

Commit

Permalink
metrics: send metrics to the otel collector endpoint when active
Browse files Browse the repository at this point in the history
Introduce a meter provider to the buildx cli that will send metrics to
the otel-collector included in docker desktop if enabled.

This will send usage metrics to the desktop application but also send
metrics to a user-provided otlp receiver endpoint through the standard
environment variables.

This introduces a single metric which is the cli count for build and
bake along with the command name and a few additional attributes.

Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
  • Loading branch information
jsternberg committed Jan 5, 2024
1 parent 81ea718 commit 7094eb8
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 4 deletions.
9 changes: 9 additions & 0 deletions commands/bake.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
Expand All @@ -45,6 +46,14 @@ type bakeOptions struct {
func runBake(dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "bake")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil {
return err
Expand Down
41 changes: 41 additions & 0 deletions commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ import (
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
"github.com/docker/cli-docs-tool/annotation"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
Expand All @@ -51,6 +53,9 @@ import (
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/grpc/codes"
)

Expand Down Expand Up @@ -212,6 +217,15 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {

func runBuild(dockerCli command.Cli, options buildOptions) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "build")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil {
return err
Expand Down Expand Up @@ -926,3 +940,30 @@ func maybeJSONArray(v string) []string {
}
return []string{v}
}

func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}

meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)

counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
if err != nil {
otel.Handle(err)
}

counter.Add(context.Background(), 1,
metric.WithAttributes(
attribute.String("command", command),
attribute.String("package", version.Package),
attribute.String("version", version.Version),
attribute.String("revision", version.Revision),
),
)
}
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ require (
github.com/stretchr/testify v1.8.4
github.com/zclconf/go-cty v1.14.1
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.11.0
golang.org/x/sync v0.3.0
Expand Down Expand Up @@ -138,15 +142,11 @@ require (
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.45.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
Expand Down
189 changes: 189 additions & 0 deletions util/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package metrics

import (
"context"
"fmt"
"net/url"
"path"
"time"

"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"golang.org/x/sync/errgroup"
)

const (
otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second
)

// ReportFunc is invoked to signal the metrics should be sent to the
// desired endpoint. It should be invoked on application shutdown.
type ReportFunc func()

// MeterProvider returns a MeterProvider suitable for CLI usage.
// The primary difference between this metric reader and a more typical
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
var exps []sdkmetric.Exporter

if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
}

// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
}

// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
// exporting them to the configured Exporter.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
return func() {
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()

var rm metricdata.ResourceMetrics
if err := reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}

var eg errgroup.Group
for _, exp := range exps {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}

// Can't report an error because we don't allow it to.
_ = eg.Wait()
}
}

// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter
// endpoint for docker metrics to be sent.
//
// This location, configuration, and usage is hard-coded as part of
// sending usage statistics so this metric reporting is not meant to be
// user facing.
func dockerOtelExporter(cli command.Cli) (sdkmetric.Exporter, error) {
endpoint, err := otelExporterOtlpEndpoint(cli)
if endpoint == "" || err != nil {
return nil, err
}

// Parse the endpoint. The docker config expects the endpoint to be
// in the form of a URL to match the environment variable, but this
// option doesn't correspond directly to WithEndpoint.
//
// We pretend we're the same as the environment reader.
u, err := url.Parse(endpoint)
if err != nil {
return nil, errors.Errorf("docker otel endpoint is invalid: %s", err)
}

var opts []otlpmetricgrpc.Option
switch u.Scheme {
case "unix":
// Unix sockets are a bit weird. OTEL seems to imply they
// can be used as an environment variable and are handled properly,
// but they don't seem to be as the behavior of the environment variable
// is to strip the scheme from the endpoint, but the underlying implementation
// needs the scheme to use the correct resolver.
//
// We'll just handle this in a special way and add the unix:// back to the endpoint.
opts = []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))),
otlpmetricgrpc.WithInsecure(),
}
case "http":
opts = []otlpmetricgrpc.Option{
// Omit the scheme when using http or https.
otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)),
otlpmetricgrpc.WithInsecure(),
}
default:
opts = []otlpmetricgrpc.Option{
// Omit the scheme when using http or https.
otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)),
}
}

// Hardcoded endpoint from the endpoint.
exp, err := otlpmetricgrpc.New(context.Background(), opts...)
if err != nil {
return nil, err
}
return exp, nil
}

// otelExporterOtlpEndpoint retrieves the OTLP endpoint used for the docker reporter
// from the current context.
func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
if err != nil {
return "", err
}

var otelCfg interface{}
switch m := meta.Metadata.(type) {
case command.DockerContext:
otelCfg = m.AdditionalFields[otelConfigFieldName]
case map[string]interface{}:
otelCfg = m[otelConfigFieldName]
}

if otelCfg == nil {
return "", nil
}

otelMap, ok := otelCfg.(map[string]interface{})
if !ok {
return "", errors.Errorf(
"unexpected type for field %q: %T (expected: %T)",
otelConfigFieldName,
otelCfg,
otelMap,
)
}

// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
endpoint, _ := otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
return endpoint, nil
}

// deltaTemporality sets the Temporality of every instrument to delta.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}
49 changes: 49 additions & 0 deletions util/metrics/otlp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package metrics

import (
"context"
"os"

"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)

// detectOtlpExporter configures a metrics exporter based on environment variables.
// This is similar to the version of this in buildkit, but we need direct access
// to the exporter and the prometheus exporter doesn't work at all in a CLI context.
//
// There's some duplication here which I hope to remove when the detect package
// is refactored or extracted from buildkit so it can be utilized here.
//
// This version of the exporter is public facing in contrast to the
// docker otel collector.
func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {
set := os.Getenv("OTEL_METRICS_EXPORTER") == "otlp" || os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") != "" || os.Getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") != ""
if !set {
return nil, nil
}

proto := os.Getenv("OTEL_EXPORTER_OTLP_METRICS_PROTOCOL")
if proto == "" {
proto = os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
}
if proto == "" {
proto = "grpc"
}

switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)
}
}

0 comments on commit 7094eb8

Please sign in to comment.