headscale/hscontrol/metrics.go
Kristoffer Dalby c8ebbede54
Simplify map session management (#1931)
This PR removes the complicated session management introduced in https://github.com/juanfont/headscale/pull/1791 which kept track of the sessions in a map, in addition to the channel already kept track of in the notifier.

Instead of trying to close the mapsession, it will now be replaced by the new one and closed after so all new updates goes to the right place.

The map session serve function is also split into a streaming and a non-streaming version for better readability.

RemoveNode in the notifier will not remove a node if the channel is not matching the one that has been passed (e.g. it has been replaced with a new one).

A new tuning parameter has been added to added to set timeout before the notifier gives up to send an update to a node.

Add a keep alive resetter so we wait with sending keep alives if a node has just received an update.

In addition it adds a bunch of env debug flags that can be set:

- `HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS`: make certain metrics include per node.id, not recommended to use in prod. 
- `HEADSCALE_DEBUG_PROFILING_ENABLED`: activate tracing 
- `HEADSCALE_DEBUG_PROFILING_PATH`: where to store traces 
- `HEADSCALE_DEBUG_DUMP_CONFIG`: calls `spew.Dump` on the config object startup
- `HEADSCALE_DEBUG_DEADLOCK`: enable go-deadlock to dump goroutines if it looks like a deadlock has occured, enabled in integration tests.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
2024-05-24 10:15:34 +02:00

120 lines
3.9 KiB
Go

package hscontrol
import (
"net/http"
"strconv"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"tailscale.com/envknob"
)
var debugHighCardinalityMetrics = envknob.Bool("HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS")
var mapResponseLastSentSeconds *prometheus.GaugeVec
func init() {
if debugHighCardinalityMetrics {
mapResponseLastSentSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_last_sent_seconds",
Help: "last sent metric to node.id",
}, []string{"type", "id"})
}
}
const prometheusNamespace = "headscale"
var (
mapResponseSent = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_sent_total",
Help: "total count of mapresponses sent to clients",
}, []string{"status", "type"})
mapResponseUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_updates_received_total",
Help: "total count of mapresponse updates received on update channel",
}, []string{"type"})
mapResponseWriteUpdatesInStream = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_write_updates_in_stream_total",
Help: "total count of writes that occured in a stream session, pre-68 nodes",
}, []string{"status"})
mapResponseEndpointUpdates = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_endpoint_updates_total",
Help: "total count of endpoint updates received",
}, []string{"status"})
mapResponseReadOnly = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_readonly_requests_total",
Help: "total count of readonly requests received",
}, []string{"status"})
mapResponseEnded = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_ended_total",
Help: "total count of new mapsessions ended",
}, []string{"reason"})
mapResponseClosed = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_closed_total",
Help: "total count of calls to mapresponse close",
}, []string{"return"})
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: prometheusNamespace,
Name: "http_duration_seconds",
Help: "Duration of HTTP requests.",
}, []string{"path"})
httpCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "http_requests_total",
Help: "Total number of http requests processed",
}, []string{"code", "method", "path"},
)
)
// prometheusMiddleware implements mux.MiddlewareFunc.
func prometheusMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
route := mux.CurrentRoute(r)
path, _ := route.GetPathTemplate()
// Ignore streaming and noise sessions
// it has its own router further down.
if path == "/ts2021" || path == "/machine/map" || path == "/derp" || path == "/derp/probe" || path == "/bootstrap-dns" {
next.ServeHTTP(w, r)
return
}
rw := &respWriterProm{ResponseWriter: w}
timer := prometheus.NewTimer(httpDuration.WithLabelValues(path))
next.ServeHTTP(rw, r)
timer.ObserveDuration()
httpCounter.WithLabelValues(strconv.Itoa(rw.status), r.Method, path).Inc()
})
}
type respWriterProm struct {
http.ResponseWriter
status int
written int64
wroteHeader bool
}
func (r *respWriterProm) WriteHeader(code int) {
r.status = code
r.wroteHeader = true
r.ResponseWriter.WriteHeader(code)
}
func (r *respWriterProm) Write(b []byte) (int, error) {
if !r.wroteHeader {
r.WriteHeader(http.StatusOK)
}
n, err := r.ResponseWriter.Write(b)
r.written += int64(n)
return n, err
}