mirror of
https://github.com/juanfont/headscale.git
synced 2025-01-19 02:10:04 +09:00
Added helper methods for subnet failover + unit tests
Added method to perform subnet failover Added tests for subnet failover
This commit is contained in:
parent
b62acff2e3
commit
6718ff71d3
3 changed files with 281 additions and 6 deletions
17
machine.go
17
machine.go
|
@ -138,6 +138,17 @@ func (machine Machine) isExpired() bool {
|
|||
return time.Now().UTC().After(*machine.Expiry)
|
||||
}
|
||||
|
||||
// isOnline returns if the machine is connected to Headscale.
|
||||
// This is really a naive implementation, as we don't really see
|
||||
// if there is a working connection between the client and the server.
|
||||
func (machine *Machine) isOnline() bool {
|
||||
if machine.LastSeen == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return machine.LastSeen.After(time.Now().Add(-keepAliveInterval))
|
||||
}
|
||||
|
||||
func containsAddresses(inputs []string, addrs []string) bool {
|
||||
for _, addr := range addrs {
|
||||
if contains(inputs, addr) {
|
||||
|
@ -708,9 +719,7 @@ func (h *Headscale) toNode(
|
|||
|
||||
hostInfo := machine.GetHostInfo()
|
||||
|
||||
// A node is Online if it is connected to the control server,
|
||||
// and we now we update LastSeen every keepAliveInterval duration at least.
|
||||
online := machine.LastSeen.After(time.Now().Add(-keepAliveInterval))
|
||||
online := machine.isOnline()
|
||||
|
||||
node := tailcfg.Node{
|
||||
ID: tailcfg.NodeID(machine.ID), // this is the actual ID
|
||||
|
@ -1027,7 +1036,7 @@ func (h *Headscale) EnableRoutes(machine *Machine, routeStrs ...string) error {
|
|||
|
||||
// Mark already as primary if there is only this node offering this subnet
|
||||
// (and is not an exit route)
|
||||
if prefix != ExitRouteV4 && prefix != ExitRouteV6 {
|
||||
if !route.isExitRoute() {
|
||||
route.IsPrimary = h.isUniquePrefix(route)
|
||||
}
|
||||
|
||||
|
|
123
routes.go
123
routes.go
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"net/netip"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
|
@ -34,6 +35,10 @@ func (r *Route) String() string {
|
|||
return fmt.Sprintf("%s:%s", r.Machine, netip.Prefix(r.Prefix).String())
|
||||
}
|
||||
|
||||
func (r *Route) isExitRoute() bool {
|
||||
return netip.Prefix(r.Prefix) == ExitRouteV4 || netip.Prefix(r.Prefix) == ExitRouteV6
|
||||
}
|
||||
|
||||
func (rs Routes) toPrefixes() []netip.Prefix {
|
||||
prefixes := make([]netip.Prefix, len(rs))
|
||||
for i, r := range rs {
|
||||
|
@ -54,6 +59,23 @@ func (h *Headscale) isUniquePrefix(route Route) bool {
|
|||
return count == 0
|
||||
}
|
||||
|
||||
func (h *Headscale) getPrimaryRoute(prefix netip.Prefix) (*Route, error) {
|
||||
var route Route
|
||||
err := h.db.
|
||||
Preload("Machine").
|
||||
Where("prefix = ? AND advertised = ? AND enabled = ? AND is_primary = ?", IPPrefix(prefix), true, true, true).
|
||||
First(&route).Error
|
||||
if err != nil && err != gorm.ErrRecordNotFound {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
return nil, gorm.ErrRecordNotFound
|
||||
}
|
||||
|
||||
return &route, nil
|
||||
}
|
||||
|
||||
// getMachinePrimaryRoutes returns the routes that are enabled and marked as primary (for subnet failover)
|
||||
// Exit nodes are not considered for this, as they are never marked as Primary
|
||||
func (h *Headscale) getMachinePrimaryRoutes(m *Machine) ([]Route, error) {
|
||||
|
@ -120,3 +142,104 @@ func (h *Headscale) processMachineRoutes(machine *Machine) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *Headscale) handlePrimarySubnetFailover() error {
|
||||
// first, get all the enabled routes
|
||||
var routes []Route
|
||||
err := h.db.
|
||||
Preload("Machine").
|
||||
Where("advertised = ? AND enabled = ?", true, true).
|
||||
Find(&routes).Error
|
||||
if err != nil && err != gorm.ErrRecordNotFound {
|
||||
log.Error().Err(err).Msg("error getting routes")
|
||||
}
|
||||
|
||||
for _, route := range routes {
|
||||
if route.isExitRoute() {
|
||||
continue
|
||||
}
|
||||
|
||||
if !route.IsPrimary {
|
||||
_, err := h.getPrimaryRoute(netip.Prefix(route.Prefix))
|
||||
if h.isUniquePrefix(route) || err == gorm.ErrRecordNotFound {
|
||||
route.IsPrimary = true
|
||||
err := h.db.Save(&route).Error
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error marking route as primary")
|
||||
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if route.IsPrimary {
|
||||
if route.Machine.isOnline() {
|
||||
continue
|
||||
}
|
||||
|
||||
// machine offline, find a new primary
|
||||
log.Info().
|
||||
Str("machine", route.Machine.Hostname).
|
||||
Str("prefix", netip.Prefix(route.Prefix).String()).
|
||||
Msgf("machine offline, finding a new primary subnet")
|
||||
|
||||
// find a new primary route
|
||||
var newPrimaryRoutes []Route
|
||||
err := h.db.
|
||||
Preload("Machine").
|
||||
Where("prefix = ? AND machine_id != ? AND advertised = ? AND enabled = ?",
|
||||
route.Prefix,
|
||||
route.MachineID,
|
||||
true, true).
|
||||
Find(&newPrimaryRoutes).Error
|
||||
if err != nil && err != gorm.ErrRecordNotFound {
|
||||
log.Error().Err(err).Msg("error finding new primary route")
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
var newPrimaryRoute *Route
|
||||
for _, r := range newPrimaryRoutes {
|
||||
if r.Machine.isOnline() {
|
||||
newPrimaryRoute = &r
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if newPrimaryRoute == nil {
|
||||
log.Warn().
|
||||
Str("machine", route.Machine.Hostname).
|
||||
Str("prefix", netip.Prefix(route.Prefix).String()).
|
||||
Msgf("no alternative primary route found")
|
||||
continue
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Str("old_machine", route.Machine.Hostname).
|
||||
Str("prefix", netip.Prefix(route.Prefix).String()).
|
||||
Str("new_machine", newPrimaryRoute.Machine.Hostname).
|
||||
Msgf("found new primary route")
|
||||
|
||||
// disable the old primary route
|
||||
route.IsPrimary = false
|
||||
err = h.db.Save(&route).Error
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error disabling old primary route")
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// enable the new primary route
|
||||
newPrimaryRoute.IsPrimary = true
|
||||
err = h.db.Save(&newPrimaryRoute).Error
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error enabling new primary route")
|
||||
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
147
routes_test.go
147
routes_test.go
|
@ -2,6 +2,7 @@ package headscale
|
|||
|
||||
import (
|
||||
"net/netip"
|
||||
"time"
|
||||
|
||||
"gopkg.in/check.v1"
|
||||
"tailscale.com/tailcfg"
|
||||
|
@ -150,7 +151,7 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
|
|||
RoutableIPs: []netip.Prefix{route, route2},
|
||||
}
|
||||
machine1 := Machine{
|
||||
ID: 0,
|
||||
ID: 1,
|
||||
MachineKey: "foo",
|
||||
NodeKey: "bar",
|
||||
DiscoKey: "faa",
|
||||
|
@ -175,7 +176,7 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
|
|||
RoutableIPs: []netip.Prefix{route2},
|
||||
}
|
||||
machine2 := Machine{
|
||||
ID: 0,
|
||||
ID: 2,
|
||||
MachineKey: "foo",
|
||||
NodeKey: "bar",
|
||||
DiscoKey: "faa",
|
||||
|
@ -209,3 +210,145 @@ func (s *Suite) TestIsUniquePrefix(c *check.C) {
|
|||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 0)
|
||||
}
|
||||
|
||||
func (s *Suite) TestSubnetFailover(c *check.C) {
|
||||
namespace, err := app.CreateNamespace("test")
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
pak, err := app.CreatePreAuthKey(namespace.Name, false, false, nil, nil)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
_, err = app.GetMachine("test", "test_enable_route_machine")
|
||||
c.Assert(err, check.NotNil)
|
||||
|
||||
prefix, err := netip.ParsePrefix(
|
||||
"10.0.0.0/24",
|
||||
)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
prefix2, err := netip.ParsePrefix(
|
||||
"150.0.10.0/25",
|
||||
)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
hostInfo1 := tailcfg.Hostinfo{
|
||||
RoutableIPs: []netip.Prefix{prefix, prefix2},
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
machine1 := Machine{
|
||||
ID: 1,
|
||||
MachineKey: "foo",
|
||||
NodeKey: "bar",
|
||||
DiscoKey: "faa",
|
||||
Hostname: "test_enable_route_machine",
|
||||
NamespaceID: namespace.ID,
|
||||
RegisterMethod: RegisterMethodAuthKey,
|
||||
AuthKeyID: uint(pak.ID),
|
||||
HostInfo: HostInfo(hostInfo1),
|
||||
LastSeen: &now,
|
||||
}
|
||||
app.db.Save(&machine1)
|
||||
|
||||
err = app.processMachineRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.EnableRoutes(&machine1, prefix.String())
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.EnableRoutes(&machine1, prefix2.String())
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.handlePrimarySubnetFailover()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
enabledRoutes1, err := app.GetEnabledRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(enabledRoutes1), check.Equals, 2)
|
||||
|
||||
route, err := app.getPrimaryRoute(prefix)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(route.MachineID, check.Equals, machine1.ID)
|
||||
|
||||
hostInfo2 := tailcfg.Hostinfo{
|
||||
RoutableIPs: []netip.Prefix{prefix2},
|
||||
}
|
||||
machine2 := Machine{
|
||||
ID: 2,
|
||||
MachineKey: "foo",
|
||||
NodeKey: "bar",
|
||||
DiscoKey: "faa",
|
||||
Hostname: "test_enable_route_machine",
|
||||
NamespaceID: namespace.ID,
|
||||
RegisterMethod: RegisterMethodAuthKey,
|
||||
AuthKeyID: uint(pak.ID),
|
||||
HostInfo: HostInfo(hostInfo2),
|
||||
LastSeen: &now,
|
||||
}
|
||||
app.db.Save(&machine2)
|
||||
|
||||
err = app.processMachineRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.EnableRoutes(&machine2, prefix2.String())
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.handlePrimarySubnetFailover()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
enabledRoutes1, err = app.GetEnabledRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(enabledRoutes1), check.Equals, 2)
|
||||
|
||||
enabledRoutes2, err := app.GetEnabledRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(enabledRoutes2), check.Equals, 1)
|
||||
|
||||
routes, err := app.getMachinePrimaryRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 2)
|
||||
|
||||
routes, err = app.getMachinePrimaryRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 0)
|
||||
|
||||
// lets make machine1 lastseen 10 mins ago
|
||||
before := now.Add(-10 * time.Minute)
|
||||
machine1.LastSeen = &before
|
||||
err = app.db.Save(&machine1).Error
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.handlePrimarySubnetFailover()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
routes, err = app.getMachinePrimaryRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 1)
|
||||
|
||||
routes, err = app.getMachinePrimaryRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 1)
|
||||
|
||||
machine2.HostInfo = HostInfo(tailcfg.Hostinfo{
|
||||
RoutableIPs: []netip.Prefix{prefix, prefix2},
|
||||
})
|
||||
err = app.db.Save(&machine2).Error
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.processMachineRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.EnableRoutes(&machine2, prefix.String())
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = app.handlePrimarySubnetFailover()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
routes, err = app.getMachinePrimaryRoutes(&machine1)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 0)
|
||||
|
||||
routes, err = app.getMachinePrimaryRoutes(&machine2)
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(len(routes), check.Equals, 2)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue