Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: send metrics to the otel collector endpoint when active #2155

Merged
merged 1 commit into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions commands/bake.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
Expand All @@ -45,6 +46,14 @@ type bakeOptions struct {
func runBake(dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "bake")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil {
return err
Expand Down
41 changes: 41 additions & 0 deletions commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ import (
"github.com/docker/buildx/util/buildflags"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
"github.com/docker/cli-docs-tool/annotation"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
Expand All @@ -51,6 +53,9 @@ import (
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/grpc/codes"
)

Expand Down Expand Up @@ -212,6 +217,15 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {

func runBuild(dockerCli command.Cli, options buildOptions) (err error) {
ctx := appcontext.Context()

mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()

recordVersionInfo(mp, "build")

ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil {
return err
Expand Down Expand Up @@ -926,3 +940,30 @@ func maybeJSONArray(v string) []string {
}
return []string{v}
}

func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add extra comment in here saying that we are still in the process of testing/stabilizing these counters.

return
}

meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)

counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
if err != nil {
otel.Handle(err)
}

counter.Add(context.Background(), 1,
metric.WithAttributes(
attribute.String("command", command),
attribute.String("package", version.Package),
attribute.String("version", version.Version),
attribute.String("revision", version.Revision),
),
)
}
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ require (
github.com/stretchr/testify v1.8.4
github.com/zclconf/go-cty v1.14.1
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.11.0
golang.org/x/sync v0.3.0
Expand Down Expand Up @@ -138,15 +142,11 @@ require (
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.45.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
Expand Down
189 changes: 189 additions & 0 deletions util/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package metrics

import (
"context"
"fmt"
"net/url"
"path"
"time"

"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"golang.org/x/sync/errgroup"
)

const (
otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second
)

// ReportFunc is invoked to signal the metrics should be sent to the
// desired endpoint. It should be invoked on application shutdown.
type ReportFunc func()

// MeterProvider returns a MeterProvider suitable for CLI usage.
// The primary difference between this metric reader and a more typical
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
var exps []sdkmetric.Exporter

if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
} else if exp != nil {
exps = append(exps, exp)
}

if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
}

// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
}

// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
// exporting them to the configured Exporter.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
return func() {
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()

var rm metricdata.ResourceMetrics
if err := reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}

var eg errgroup.Group
for _, exp := range exps {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}

// Can't report an error because we don't allow it to.
_ = eg.Wait()
}
}

// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter
// endpoint for docker metrics to be sent.
//
// This location, configuration, and usage is hard-coded as part of
// sending usage statistics so this metric reporting is not meant to be
// user facing.
func dockerOtelExporter(cli command.Cli) (sdkmetric.Exporter, error) {
endpoint, err := otelExporterOtlpEndpoint(cli)
if endpoint == "" || err != nil {
return nil, err
}

// Parse the endpoint. The docker config expects the endpoint to be
// in the form of a URL to match the environment variable, but this
// option doesn't correspond directly to WithEndpoint.
//
// We pretend we're the same as the environment reader.
u, err := url.Parse(endpoint)
if err != nil {
return nil, errors.Errorf("docker otel endpoint is invalid: %s", err)
}

var opts []otlpmetricgrpc.Option
switch u.Scheme {
case "unix":
// Unix sockets are a bit weird. OTEL seems to imply they
// can be used as an environment variable and are handled properly,
// but they don't seem to be as the behavior of the environment variable
// is to strip the scheme from the endpoint, but the underlying implementation
// needs the scheme to use the correct resolver.
//
// We'll just handle this in a special way and add the unix:// back to the endpoint.
opts = []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))),
otlpmetricgrpc.WithInsecure(),
}
case "http":
opts = []otlpmetricgrpc.Option{
// Omit the scheme when using http or https.
otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)),
otlpmetricgrpc.WithInsecure(),
}
default:
opts = []otlpmetricgrpc.Option{
// Omit the scheme when using http or https.
otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)),
}
}

// Hardcoded endpoint from the endpoint.
exp, err := otlpmetricgrpc.New(context.Background(), opts...)
if err != nil {
return nil, err
}
return exp, nil
}

// otelExporterOtlpEndpoint retrieves the OTLP endpoint used for the docker reporter
// from the current context.
func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
if err != nil {
return "", err
}

var otelCfg interface{}
switch m := meta.Metadata.(type) {
case command.DockerContext:
otelCfg = m.AdditionalFields[otelConfigFieldName]
case map[string]interface{}:
otelCfg = m[otelConfigFieldName]
}

if otelCfg == nil {
return "", nil
}

otelMap, ok := otelCfg.(map[string]interface{})
if !ok {
return "", errors.Errorf(
"unexpected type for field %q: %T (expected: %T)",
otelConfigFieldName,
otelCfg,
otelMap,
)
}

// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
endpoint, _ := otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
return endpoint, nil
}

// deltaTemporality sets the Temporality of every instrument to delta.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}
49 changes: 49 additions & 0 deletions util/metrics/otlp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package metrics

import (
"context"
"os"

"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)

// detectOtlpExporter configures a metrics exporter based on environment variables.
// This is similar to the version of this in buildkit, but we need direct access
// to the exporter and the prometheus exporter doesn't work at all in a CLI context.
//
// There's some duplication here which I hope to remove when the detect package
// is refactored or extracted from buildkit so it can be utilized here.
//
// This version of the exporter is public facing in contrast to the
// docker otel collector.
func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {
set := os.Getenv("OTEL_METRICS_EXPORTER") == "otlp" || os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") != "" || os.Getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") != ""
if !set {
return nil, nil
}

proto := os.Getenv("OTEL_EXPORTER_OTLP_METRICS_PROTOCOL")
if proto == "" {
proto = os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
}
if proto == "" {
proto = "grpc"
}

switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)
}
}