Search code examples
gotestingprometheus

How to test dynamic metric registration in custom prometheus exporter?


UPDATE: current source available here.

I'm currently working on a custom prometheus exporter for changedetection.io to expose both scraping and price metrics for all registered watches.

After having a working Proof-of-Concept, i'm trying to get the project maintainable and ready for release to the open source community (eg. adding tests & documentation and making it as feature complete as possible).

While writing those tests, i stumbled across a problem when trying to test the dynamic registration of new watches as they are created in the monitored changedetection.io instance. In order for the exporter to pick them up without a restart, i check the API for newly added watches on each collect run.

Here's the Collect function of the priceCollector:

func (c *priceCollector) Collect(ch chan<- prometheus.Metric) {
    // check for new watches before collecting metrics
    watches, err := c.ApiClient.getWatches()
    if err != nil {
        log.Errorf("error while fetching watches: %v", err)
    } else {
        for id, watch := range watches {
            if _, ok := c.priceMetrics[id]; !ok {
                // create new metric and register it on the DefaultRegisterer
                c.priceMetrics[id] = newPriceMetric(prometheus.Labels{"title": watch.Title}, c.ApiClient, id)
                prometheus.MustRegister(c.priceMetrics[id])

                log.Infof("Picked up new watch %s, registered as metric %s", watch.Title, id)
            }
        }
    }

    // collect all registered metrics
    for _, metric := range c.priceMetrics {
        metric.Collect(ch)
    }
}

The newPriceMetric function simply creates a new priceMetric object consisting of a prometheus.Desc, an ApiClient (class providing access to changedetection.io's API) and a UUID:

func newPriceMetric(labels prometheus.Labels, apiClient *ApiClient, uuid string) priceMetric {
    return priceMetric{
        desc: prometheus.NewDesc(
            prometheus.BuildFQName(namespace, "watch", "price"),
            "Current price of an offer type watch",
            nil, labels,
        ),
        apiClient: apiClient,
        UUID:      uuid,
    }
}

Testing the default behavior works perfectly fine and passes all tests, but when i try to test the behavior of adding a new watch (while the exporter is running without restarting it), the test fails.

Notes: Both expectMetrics and expectMetricCount are wrapper-functions around prometheus' own testutil.CollectAndCompare and testutil.CollectAndCount. The helper CreateTestApiServer creates a wrapped httptest server which returns JSON payloads based on the passed map[string]*data.WatchItem structure.

func TestAutoregisterPriceCollector(t *testing.T) {
    watchDb := createCollectorTestDb()
    server := testutil.CreateTestApiServer(t, watchDb)
    defer server.Close()

    c, err := NewPriceCollector(server.URL(), "foo-bar-key")
    if err != nil {
        t.Fatal(err)
    }
    expectMetricCount(t, c, 2, "changedetectionio_watch_price")

    // now add a new watch and expect the collector to pick it up
    uuid, newItem := testutil.NewTestItem("Item 3", 300, "USD")
    watchDb[uuid] = newItem

    expectMetrics(t, c, "price_metrics_autoregister.prom", "changedetectionio_watch_price")
    expectMetricCount(t, c, 3, "changedetectionio_watch_price")
}

When running that test, the run fails with the following error:

collector_test.go:23: Unexpected metrics returned: gathering metrics failed: collected metric changedetectionio_watch_price label:{name:"title" value:"Item 3"} gauge:{value:300} with unregistered descriptor Desc{fqName: "changedetectionio_watch_price", help: "Current price of an offer type watch", constLabels: {title="Item 3"}, variableLabels: {}}

I currently assume that this error is related to the way testutil.CollectAnd* work internally. According to the function comments, they register the collector on a newly created pedantic Registry which may lead to it not picking up the lazily registered descriptor.

Any thoughts on that?


Solution

  • I'm unsure whether this answers your question but, here's an example

    package main
    
    import (
        "flag"
        "fmt"
        "log/slog"
        "net/http"
        "sync"
        "time"
    
        "github.com/prometheus/client_golang/prometheus"
        "github.com/prometheus/client_golang/prometheus/collectors"
        "github.com/prometheus/client_golang/prometheus/promhttp"
        "github.com/prometheus/client_golang/prometheus/testutil"
    )
    
    var (
        endpoint = flag.String(
            "endpoint",
            "0.0.0.0:8080",
            "The endpoint of the HTTP server",
        )
    )
    
    type TestCollector struct {
        sync.RWMutex
    
        values []string
        foo    *prometheus.Desc
    }
    
    func NewTestCollector() *TestCollector {
        return &TestCollector{
            foo: prometheus.NewDesc(
                "foo",
                "foo",
                []string{
                    "labels",
                },
                nil,
            ),
        }
    }
    func (c *TestCollector) Collect(ch chan<- prometheus.Metric) {
        c.RLock()
        defer c.RUnlock()
    
        for _, value := range c.values {
            ch <- prometheus.MustNewConstMetric(
                c.foo,
                prometheus.CounterValue,
                1,
                value,
            )
        }
    }
    func (c *TestCollector) Describe(ch chan<- *prometheus.Desc) {
        ch <- c.foo
    }
    
    func main() {
        flag.Parse()
    
        c := NewTestCollector()
    
        registry := prometheus.NewRegistry()
        registry.MustRegister(c)
    
        go func() {
            for i := range 20 {
                value := fmt.Sprintf("value-%02d", i)
                slog.Info("Adding value", "value", value)
                c.Lock()
                c.values = append(c.values, value)
                c.Unlock()
                slog.Info("testutil",
                    "count", testutil.CollectAndCount(c, "foo"))
    
                time.Sleep(15 * time.Second)
            }
        }()
    
        http.Handle(
            "/metrics",
            promhttp.HandlerFor(
                registry, promhttp.HandlerOpts{}))
        slog.Error("unable to listen",
            "err", http.ListenAndServe(*endpoint, nil))
    }
    

    The metric foo has a growing (every 15 seconds) set (0..19) of labels (value-xx)

    The CollectAndCount increases with each iteration:

    logs:

    2024/04/12 10:43:37 INFO Adding value value=value-00
    2024/04/12 10:43:37 INFO testutil count=1
    2024/04/12 10:43:52 INFO Adding value value=value-01
    2024/04/12 10:43:52 INFO testutil count=2
    2024/04/12 10:44:07 INFO Adding value value=value-02
    2024/04/12 10:44:07 INFO testutil count=3
    2024/04/12 10:44:22 INFO Adding value value=value-03
    2024/04/12 10:44:22 INFO testutil count=4
    2024/04/12 10:44:37 INFO Adding value value=value-04
    2024/04/12 10:44:37 INFO testutil count=5
    2024/04/12 10:44:52 INFO Adding value value=value-05
    

    And:

    curl --silent --get http://localhost:8080/metrics
    
    # HELP foo foo
    # TYPE foo counter
    foo{labels="value-00"} 1
    foo{labels="value-01"} 1
    foo{labels="value-02"} 1
    foo{labels="value-03"} 1
    foo{labels="value-04"} 1
    foo{labels="value-05"} 1