From 821ed4b2a6216accea0d42463f5c257179407983 Mon Sep 17 00:00:00 2001 From: Katarzyna Kujawa Date: Thu, 9 Jul 2020 13:17:28 +0200 Subject: [PATCH] Replace average scaling ratio with min scaling ratio for perf aggregated Signed-off-by: Katarzyna Kujawa --- docs/runtime_options.md | 2 +- metrics/prometheus.go | 25 +++++++------- metrics/prometheus_test.go | 34 +++++++++++++++++-- .../prometheus_metrics_perf_aggregated | 4 +-- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index fb28e1fe40..2fc8149fae 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -144,7 +144,7 @@ Core perf events can be exposed on Prometheus endpoint per CPU or aggregated by - `--disable_metrics="percpu"` - core perf events are aggregated - `--disable_metrics=""` - core perf events are exposed per CPU. -Aggregated form of core perf events significantly decrease volume of data. For aggregated form of core perf events scaling ratio (`container_perf_metric_scaling ratio`) indicates average scaling ratio for specific event. +Aggregated form of core perf events significantly decrease volume of data. For aggregated form of core perf events scaling ratio (`container_perf_metric_scaling ratio`) indicates the lowest value of scaling ratio for specific event to show the worst precision. ### Perf subsystem introduction diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 6da114e213..d68faf3a39 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -1586,7 +1586,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri valueType: prometheus.GaugeValue, extraLabels: []string{"cpu", "event"}, getValues: func(s *info.ContainerStats) metricValues { - return getAvgCoreScalingRatio(s) + return getMinCoreScalingRatio(s) }, }}...) } @@ -1954,22 +1954,23 @@ func getAggregatedCorePerfEvents(s *info.ContainerStats) metricValues { return values } -func getAvgCoreScalingRatio(s *info.ContainerStats) metricValues { +func getMinCoreScalingRatio(s *info.ContainerStats) metricValues { values := make(metricValues, 0) - perfEventStatAgg := make(map[string][]float64) - // collect scaling ratios for event + perfEventStatMin := make(map[string]float64) + // search for minimal value of scalin ratio for specific event for _, perfStat := range s.PerfStats { - perfEventStatAgg[perfStat.Name] = append(perfEventStatAgg[perfStat.Name], perfStat.ScalingRatio) - } - // calculate average scaling ratio - for perfEvent, perfScalingRatio := range perfEventStatAgg { - sumScalingRatio := 0.0 - for _, scalingRatio := range perfScalingRatio { - sumScalingRatio += scalingRatio + if _, ok := perfEventStatMin[perfStat.Name]; !ok { + // found a new event + perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio + } else if perfStat.ScalingRatio < perfEventStatMin[perfStat.Name] { + // found a lower value of scaling ration so replace the minimal value + perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio } + } + for perfEvent, perfScalingRatio := range perfEventStatMin { values = append(values, metricValue{ - value: sumScalingRatio / float64(len(perfScalingRatio)), + value: perfScalingRatio, labels: []string{"", perfEvent}, timestamp: s.Timestamp, }) diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index 1d8fd33ba5..c2a8c7938f 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -170,6 +170,14 @@ func TestGetPerCpuCorePerfEvents(t *testing.T) { } metricVals := getPerCPUCorePerfEvents(containerStats) assert.Equal(t, 4, len(metricVals)) + values := []float64{} + for _, metric := range metricVals { + values = append(values, metric.value) + } + assert.Contains(t, values, 123.0) + assert.Contains(t, values, 456.0) + assert.Contains(t, values, 321.0) + assert.Contains(t, values, 789.0) } func TestGetPerCpuCoreScalingRatio(t *testing.T) { @@ -204,9 +212,17 @@ func TestGetPerCpuCoreScalingRatio(t *testing.T) { } metricVals := getPerCPUCoreScalingRatio(containerStats) assert.Equal(t, 4, len(metricVals)) + values := []float64{} + for _, metric := range metricVals { + values = append(values, metric.value) + } + assert.Contains(t, values, 1.0) + assert.Contains(t, values, 0.5) + assert.Contains(t, values, 0.7) + assert.Contains(t, values, 0.3) } -func TestGetCorePerfEventsAggregated(t *testing.T) { +func TestGetAggCorePerfEvents(t *testing.T) { containerStats := &info.ContainerStats{ Timestamp: time.Unix(1395066367, 0), PerfStats: []info.PerfStat{ @@ -238,9 +254,15 @@ func TestGetCorePerfEventsAggregated(t *testing.T) { } metricVals := getAggregatedCorePerfEvents(containerStats) assert.Equal(t, 2, len(metricVals)) + values := []float64{} + for _, metric := range metricVals { + values = append(values, metric.value) + } + assert.Contains(t, values, 579.0) + assert.Contains(t, values, 1110.0) } -func TestGetCoreScalingRatioAverage(t *testing.T) { +func TestGetMinCoreScalingRatio(t *testing.T) { containerStats := &info.ContainerStats{ Timestamp: time.Unix(1395066367, 0), PerfStats: []info.PerfStat{ @@ -270,6 +292,12 @@ func TestGetCoreScalingRatioAverage(t *testing.T) { }, }, } - metricVals := getAvgCoreScalingRatio(containerStats) + metricVals := getMinCoreScalingRatio(containerStats) assert.Equal(t, 2, len(metricVals)) + values := []float64{} + for _, metric := range metricVals { + values = append(values, metric.value) + } + assert.Contains(t, values, 0.5) + assert.Contains(t, values, 0.3) } diff --git a/metrics/testdata/prometheus_metrics_perf_aggregated b/metrics/testdata/prometheus_metrics_perf_aggregated index fffea6fa27..bee60f5141 100644 --- a/metrics/testdata/prometheus_metrics_perf_aggregated +++ b/metrics/testdata/prometheus_metrics_perf_aggregated @@ -6,8 +6,8 @@ cadvisor_version_info{cadvisorRevision="abcdef",cadvisorVersion="0.16.0",dockerV container_last_seen{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.395066363e+09 1395066363000 # HELP container_perf_events_scaling_ratio Perf event metric scaling ratio. # TYPE container_perf_events_scaling_ratio gauge -container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.75 1395066363000 -container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.499999999995 1395066363000 +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000 +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 1395066363000 # HELP container_perf_events_total Perf event metric. # TYPE container_perf_events_total counter container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 579 1395066363000