make stream shutdown if self-node has been removed (#2125)

* add shutdown that asserts if headscale had panics

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* add test case producing 2118 panic

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* make stream shutdown if self-node has been removed

Currently we will read the node from database, and since it is
deleted, the id might be set to nil. Keep the node around and
just shutdown, so it is cleanly removed from notifier.

Fixes #2118

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

---------

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-09-11 12:00:32 +02:00 committed by GitHub
parent 4b02dc9565
commit 64319f79ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 148 additions and 21 deletions

View file

@ -52,6 +52,7 @@ jobs:
- TestExpireNode - TestExpireNode
- TestNodeOnlineStatus - TestNodeOnlineStatus
- TestPingAllByIPManyUpDown - TestPingAllByIPManyUpDown
- Test2118DeletingOnlineNodePanics
- TestEnablingRoutes - TestEnablingRoutes
- TestHASubnetRouterFailover - TestHASubnetRouterFailover
- TestEnableDisableAutoApprovedRoute - TestEnableDisableAutoApprovedRoute

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"math/rand/v2" "math/rand/v2"
"net/http" "net/http"
"slices"
"sort" "sort"
"strings" "strings"
"time" "time"
@ -273,6 +274,12 @@ func (m *mapSession) serveLongPoll() {
return return
} }
// If the node has been removed from headscale, close the stream
if slices.Contains(update.Removed, m.node.ID) {
m.tracef("node removed, closing stream")
return
}
m.tracef("received stream update: %s %s", update.Type.String(), update.Message) m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc() mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()

View file

@ -6,8 +6,8 @@ import (
) )
type ControlServer interface { type ControlServer interface {
Shutdown() error Shutdown() (string, string, error)
SaveLog(string) error SaveLog(string) (string, string, error)
SaveProfile(string) error SaveProfile(string) error
Execute(command []string) (string, error) Execute(command []string) (string, error)
WriteFile(path string, content []byte) error WriteFile(path string, content []byte) error

View file

@ -17,10 +17,10 @@ func SaveLog(
pool *dockertest.Pool, pool *dockertest.Pool,
resource *dockertest.Resource, resource *dockertest.Resource,
basePath string, basePath string,
) error { ) (string, string, error) {
err := os.MkdirAll(basePath, os.ModePerm) err := os.MkdirAll(basePath, os.ModePerm)
if err != nil { if err != nil {
return err return "", "", err
} }
var stdout bytes.Buffer var stdout bytes.Buffer
@ -41,28 +41,30 @@ func SaveLog(
}, },
) )
if err != nil { if err != nil {
return err return "", "", err
} }
log.Printf("Saving logs for %s to %s\n", resource.Container.Name, basePath) log.Printf("Saving logs for %s to %s\n", resource.Container.Name, basePath)
stdoutPath := path.Join(basePath, resource.Container.Name+".stdout.log")
err = os.WriteFile( err = os.WriteFile(
path.Join(basePath, resource.Container.Name+".stdout.log"), stdoutPath,
stdout.Bytes(), stdout.Bytes(),
filePerm, filePerm,
) )
if err != nil { if err != nil {
return err return "", "", err
} }
stderrPath := path.Join(basePath, resource.Container.Name+".stderr.log")
err = os.WriteFile( err = os.WriteFile(
path.Join(basePath, resource.Container.Name+".stderr.log"), stderrPath,
stderr.Bytes(), stderr.Bytes(),
filePerm, filePerm,
) )
if err != nil { if err != nil {
return err return "", "", err
} }
return nil return stdoutPath, stderrPath, nil
} }

View file

@ -954,3 +954,102 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps)) t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
} }
} }
func Test2118DeletingOnlineNodePanics(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.ShutdownAssertNoPanics(t)
// TODO(kradalby): it does not look like the user thing works, only second
// get created? maybe only when many?
spec := map[string]int{
"user1": 1,
"user2": 1,
}
err = scenario.CreateHeadscaleEnv(spec,
[]tsic.Option{},
hsic.WithTestName("deletenocrash"),
hsic.WithEmbeddedDERPServerOnly(),
hsic.WithTLS(),
hsic.WithHostnameAsServerURL(),
)
assertNoErrHeadscaleEnv(t, err)
allClients, err := scenario.ListTailscaleClients()
assertNoErrListClients(t, err)
allIps, err := scenario.ListTailscaleClientsIPs()
assertNoErrListClientIPs(t, err)
err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
})
success := pingAllHelper(t, allClients, allAddrs)
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
headscale, err := scenario.Headscale()
assertNoErr(t, err)
// Test list all nodes after added otherUser
var nodeList []v1.Node
err = executeAndUnmarshal(
headscale,
[]string{
"headscale",
"nodes",
"list",
"--output",
"json",
},
&nodeList,
)
assert.Nil(t, err)
assert.Len(t, nodeList, 2)
assert.True(t, nodeList[0].Online)
assert.True(t, nodeList[1].Online)
// Delete the first node, which is online
_, err = headscale.Execute(
[]string{
"headscale",
"nodes",
"delete",
"--identifier",
// Delete the last added machine
fmt.Sprintf("%d", nodeList[0].Id),
"--output",
"json",
"--force",
},
)
assert.Nil(t, err)
time.Sleep(2 * time.Second)
// Ensure that the node has been deleted, this did not occur due to a panic.
var nodeListAfter []v1.Node
err = executeAndUnmarshal(
headscale,
[]string{
"headscale",
"nodes",
"list",
"--output",
"json",
},
&nodeListAfter,
)
assert.Nil(t, err)
assert.Len(t, nodeListAfter, 1)
assert.True(t, nodeListAfter[0].Online)
assert.Equal(t, nodeList[1].Id, nodeListAfter[0].Id)
}

View file

@ -398,8 +398,8 @@ func (t *HeadscaleInContainer) hasTLS() bool {
} }
// Shutdown stops and cleans up the Headscale container. // Shutdown stops and cleans up the Headscale container.
func (t *HeadscaleInContainer) Shutdown() error { func (t *HeadscaleInContainer) Shutdown() (string, string, error) {
err := t.SaveLog("/tmp/control") stdoutPath, stderrPath, err := t.SaveLog("/tmp/control")
if err != nil { if err != nil {
log.Printf( log.Printf(
"Failed to save log from control: %s", "Failed to save log from control: %s",
@ -458,12 +458,12 @@ func (t *HeadscaleInContainer) Shutdown() error {
t.pool.Purge(t.pgContainer) t.pool.Purge(t.pgContainer)
} }
return t.pool.Purge(t.container) return stdoutPath, stderrPath, t.pool.Purge(t.container)
} }
// SaveLog saves the current stdout log of the container to a path // SaveLog saves the current stdout log of the container to a path
// on the host system. // on the host system.
func (t *HeadscaleInContainer) SaveLog(path string) error { func (t *HeadscaleInContainer) SaveLog(path string) (string, string, error) {
return dockertestutil.SaveLog(t.pool, t.container, path) return dockertestutil.SaveLog(t.pool, t.container, path)
} }

View file

@ -8,6 +8,7 @@ import (
"os" "os"
"sort" "sort"
"sync" "sync"
"testing"
"time" "time"
v1 "github.com/juanfont/headscale/gen/go/headscale/v1" v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
@ -18,6 +19,7 @@ import (
"github.com/ory/dockertest/v3" "github.com/ory/dockertest/v3"
"github.com/puzpuzpuz/xsync/v3" "github.com/puzpuzpuz/xsync/v3"
"github.com/samber/lo" "github.com/samber/lo"
"github.com/stretchr/testify/assert"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"tailscale.com/envknob" "tailscale.com/envknob"
) )
@ -187,13 +189,9 @@ func NewScenario(maxWait time.Duration) (*Scenario, error) {
}, nil }, nil
} }
// Shutdown shuts down and cleans up all the containers (ControlServer, TailscaleClient) func (s *Scenario) ShutdownAssertNoPanics(t *testing.T) {
// and networks associated with it.
// In addition, it will save the logs of the ControlServer to `/tmp/control` in the
// environment running the tests.
func (s *Scenario) Shutdown() {
s.controlServers.Range(func(_ string, control ControlServer) bool { s.controlServers.Range(func(_ string, control ControlServer) bool {
err := control.Shutdown() stdoutPath, stderrPath, err := control.Shutdown()
if err != nil { if err != nil {
log.Printf( log.Printf(
"Failed to shut down control: %s", "Failed to shut down control: %s",
@ -201,6 +199,16 @@ func (s *Scenario) Shutdown() {
) )
} }
if t != nil {
stdout, err := os.ReadFile(stdoutPath)
assert.NoError(t, err)
assert.NotContains(t, string(stdout), "panic")
stderr, err := os.ReadFile(stderrPath)
assert.NoError(t, err)
assert.NotContains(t, string(stderr), "panic")
}
return true return true
}) })
@ -224,6 +232,14 @@ func (s *Scenario) Shutdown() {
// } // }
} }
// Shutdown shuts down and cleans up all the containers (ControlServer, TailscaleClient)
// and networks associated with it.
// In addition, it will save the logs of the ControlServer to `/tmp/control` in the
// environment running the tests.
func (s *Scenario) Shutdown() {
s.ShutdownAssertNoPanics(nil)
}
// Users returns the name of all users associated with the Scenario. // Users returns the name of all users associated with the Scenario.
func (s *Scenario) Users() []string { func (s *Scenario) Users() []string {
users := make([]string, 0) users := make([]string, 0)

View file

@ -998,7 +998,9 @@ func (t *TailscaleInContainer) WriteFile(path string, data []byte) error {
// SaveLog saves the current stdout log of the container to a path // SaveLog saves the current stdout log of the container to a path
// on the host system. // on the host system.
func (t *TailscaleInContainer) SaveLog(path string) error { func (t *TailscaleInContainer) SaveLog(path string) error {
return dockertestutil.SaveLog(t.pool, t.container, path) // TODO(kradalby): Assert if tailscale logs contains panics.
_, _, err := dockertestutil.SaveLog(t.pool, t.container, path)
return err
} }
// ReadFile reads a file from the Tailscale container. // ReadFile reads a file from the Tailscale container.