fixes to extra-record file watcher (#2298)
Some checks are pending
Build / build-nix (push) Waiting to run
Build / build-cross (GOARCH=386 GOOS=linux) (push) Waiting to run
Build / build-cross (GOARCH=amd64 GOOS=darwin) (push) Waiting to run
Build / build-cross (GOARCH=amd64 GOOS=linux) (push) Waiting to run
Build / build-cross (GOARCH=arm GOOS=linux GOARM=5) (push) Waiting to run
Build / build-cross (GOARCH=arm GOOS=linux GOARM=6) (push) Waiting to run
Build / build-cross (GOARCH=arm GOOS=linux GOARM=7) (push) Waiting to run
Build / build-cross (GOARCH=arm64 GOOS=darwin) (push) Waiting to run
Build / build-cross (GOARCH=arm64 GOOS=linux) (push) Waiting to run
Tests / test (push) Waiting to run

* Fix excess error message during writes

Fixes #2290

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* retry filewatcher on removed files

This should handled if files are deleted and added again, and for rename
scenarios.

Fixes #2289

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* test more write and remove in filewatcher

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

---------

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2024-12-16 11:26:56 +01:00 committed by GitHub
parent 5345f19693
commit ccc895b4c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 119 additions and 10 deletions

View file

@ -7,6 +7,7 @@ import (
"os"
"sync"
"github.com/cenkalti/backoff/v4"
"github.com/fsnotify/fsnotify"
"github.com/rs/zerolog/log"
"tailscale.com/tailcfg"
@ -83,13 +84,40 @@ func (e *ExtraRecordsMan) Run() {
log.Error().Caller().Msgf("file watcher event channel closing")
return
}
switch event.Op {
case fsnotify.Create, fsnotify.Write, fsnotify.Chmod:
log.Trace().Caller().Str("path", event.Name).Str("op", event.Op.String()).Msg("extra records received filewatch event")
if event.Name != e.path {
continue
}
e.updateRecords()
// If a file is removed or renamed, fsnotify will loose track of it
// and not watch it. We will therefore attempt to re-add it with a backoff.
case fsnotify.Remove, fsnotify.Rename:
err := backoff.Retry(func() error {
if _, err := os.Stat(e.path); err != nil {
return err
}
return nil
}, backoff.NewExponentialBackOff())
if err != nil {
log.Error().Caller().Err(err).Msgf("extra records filewatcher retrying to find file after delete")
continue
}
err = e.watcher.Add(e.path)
if err != nil {
log.Error().Caller().Err(err).Msgf("extra records filewatcher re-adding file after delete failed, giving up.")
return
} else {
log.Trace().Caller().Str("path", e.path).Msg("extra records file re-added after delete")
e.updateRecords()
}
}
case err, ok := <-e.watcher.Errors:
if !ok {
log.Error().Caller().Msgf("file watcher error channel closing")
@ -116,6 +144,11 @@ func (e *ExtraRecordsMan) updateRecords() {
return
}
// If there are no records, ignore the update.
if records == nil {
return
}
e.mu.Lock()
defer e.mu.Unlock()
@ -143,6 +176,12 @@ func readExtraRecordsFromPath(path string) ([]tailcfg.DNSRecord, [32]byte, error
return nil, [32]byte{}, fmt.Errorf("reading path: %s, err: %w", path, err)
}
// If the read was triggered too fast, and the file is not complete, ignore the update
// if the file is empty. A consecutive update will be triggered when the file is complete.
if len(b) == 0 {
return nil, [32]byte{}, nil
}
var records []tailcfg.DNSRecord
err = json.Unmarshal(b, &records)
if err != nil {

View file

@ -146,6 +146,27 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) {
assertCommandOutputContains(t, client, []string{"dig", "test.myvpn.example.com"}, "6.6.6.6")
}
hs, err := scenario.Headscale()
assertNoErr(t, err)
// Write the file directly into place from the docker API.
b0, _ := json.Marshal([]tailcfg.DNSRecord{
{
Name: "docker.myvpn.example.com",
Type: "A",
Value: "2.2.2.2",
},
})
err = hs.WriteFile(erPath, b0)
assertNoErr(t, err)
for _, client := range allClients {
assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "2.2.2.2")
}
// Write a new file and move it to the path to ensure the reload
// works when a file is moved atomically into place.
extraRecords = append(extraRecords, tailcfg.DNSRecord{
Name: "otherrecord.myvpn.example.com",
Type: "A",
@ -153,12 +174,6 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) {
})
b2, _ := json.Marshal(extraRecords)
hs, err := scenario.Headscale()
assertNoErr(t, err)
// Write it to a separate file to ensure Docker's API doesnt
// do anything unexpected and rather move it into place to trigger
// a reload.
err = hs.WriteFile(erPath+"2", b2)
assertNoErr(t, err)
_, err = hs.Execute([]string{"mv", erPath + "2", erPath})
@ -168,6 +183,61 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) {
assertCommandOutputContains(t, client, []string{"dig", "test.myvpn.example.com"}, "6.6.6.6")
assertCommandOutputContains(t, client, []string{"dig", "otherrecord.myvpn.example.com"}, "7.7.7.7")
}
// Write a new file and copy it to the path to ensure the reload
// works when a file is copied into place.
b3, _ := json.Marshal([]tailcfg.DNSRecord{
{
Name: "copy.myvpn.example.com",
Type: "A",
Value: "8.8.8.8",
},
})
err = hs.WriteFile(erPath+"3", b3)
assertNoErr(t, err)
_, err = hs.Execute([]string{"cp", erPath + "3", erPath})
assertNoErr(t, err)
for _, client := range allClients {
assertCommandOutputContains(t, client, []string{"dig", "copy.myvpn.example.com"}, "8.8.8.8")
}
// Write in place to ensure pipe like behaviour works
b4, _ := json.Marshal([]tailcfg.DNSRecord{
{
Name: "docker.myvpn.example.com",
Type: "A",
Value: "9.9.9.9",
},
})
command := []string{"echo", fmt.Sprintf("'%s'", string(b4)), ">", erPath}
_, err = hs.Execute([]string{"bash", "-c", strings.Join(command, " ")})
assertNoErr(t, err)
for _, client := range allClients {
assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "9.9.9.9")
}
// Delete the file and create a new one to ensure it is picked up again.
_, err = hs.Execute([]string{"rm", erPath})
assertNoErr(t, err)
time.Sleep(2 * time.Second)
// The same paths should still be available as it is not cleared on delete.
for _, client := range allClients {
assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "9.9.9.9")
}
// Write a new file, the backoff mechanism should make the filewatcher pick it up
// again.
err = hs.WriteFile(erPath, b3)
assertNoErr(t, err)
for _, client := range allClients {
assertCommandOutputContains(t, client, []string{"dig", "copy.myvpn.example.com"}, "8.8.8.8")
}
}
// TestValidateResolvConf validates that the resolv.conf file