Skip to content

Commit c07b729

Browse files
authored
Merge pull request #5568 from hashicorp/b-nomad-logger-restart
Fixes #5566 . Fix a case where docker logging process may lock up nomad agent restart. Looks like we have a case where docker logger is started even through logmon isn't. In such case, the fifo writer blocks indefinitely and because the open operation happens in the main goroutine, nomad agent blocks indefinitely. This fixes the issue where the fifo open operation happens in goroutine instead of main goroutine. We should follow up independently to ensure logmon <-> dockerlogger ordering and consider having task recovery happen in non-main goroutine with some sensible timeouts.
2 parents 009b750 + 96a54cb commit c07b729

File tree

1 file changed

+66
-24
lines changed

1 file changed

+66
-24
lines changed

drivers/docker/docklog/docker_logger.go

+66-24
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"io"
66
"math/rand"
77
"strings"
8+
"sync"
89
"time"
910

1011
docker "github.com/fsouza/go-dockerclient"
@@ -56,11 +57,12 @@ func NewDockerLogger(logger hclog.Logger) DockerLogger {
5657
type dockerLogger struct {
5758
logger hclog.Logger
5859

59-
stdout io.WriteCloser
60-
stderr io.WriteCloser
61-
cancelCtx context.CancelFunc
60+
stdout io.WriteCloser
61+
stderr io.WriteCloser
62+
stdLock sync.Mutex
6263

63-
doneCh chan interface{}
64+
cancelCtx context.CancelFunc
65+
doneCh chan interface{}
6466
}
6567

6668
// Start log monitoring
@@ -70,35 +72,27 @@ func (d *dockerLogger) Start(opts *StartOpts) error {
7072
return fmt.Errorf("failed to open docker client: %v", err)
7173
}
7274

73-
if d.stdout == nil {
74-
stdout, err := fifo.OpenWriter(opts.Stdout)
75-
if err != nil {
76-
return fmt.Errorf("failed to open fifo for path %s: %v", opts.Stdout, err)
77-
}
78-
d.stdout = stdout
79-
}
80-
if d.stderr == nil {
81-
stderr, err := fifo.OpenWriter(opts.Stderr)
82-
if err != nil {
83-
return fmt.Errorf("failed to open fifo for path %s: %v", opts.Stdout, err)
84-
}
85-
d.stderr = stderr
86-
}
8775
ctx, cancel := context.WithCancel(context.Background())
8876
d.cancelCtx = cancel
8977

9078
go func() {
9179
defer close(d.doneCh)
9280

81+
stdout, stderr, err := d.openStreams(ctx, opts)
82+
if err != nil {
83+
d.logger.Error("log streaming ended with terminal error", "error", err)
84+
return
85+
}
86+
9387
sinceTime := time.Unix(opts.StartTime, 0)
9488
backoff := 0.0
9589

9690
for {
9791
logOpts := docker.LogsOptions{
9892
Context: ctx,
9993
Container: opts.ContainerID,
100-
OutputStream: d.stdout,
101-
ErrorStream: d.stderr,
94+
OutputStream: stdout,
95+
ErrorStream: stderr,
10296
Since: sinceTime.Unix(),
10397
Follow: true,
10498
Stdout: true,
@@ -138,16 +132,64 @@ func (d *dockerLogger) Start(opts *StartOpts) error {
138132

139133
}
140134

135+
// openStreams open logger stdout/stderr; should be called in a background goroutine to avoid locking up
136+
// process to avoid locking goroutine process
137+
func (d *dockerLogger) openStreams(ctx context.Context, opts *StartOpts) (stdout, stderr io.WriteCloser, err error) {
138+
d.stdLock.Lock()
139+
stdoutF, stderrF := d.stdout, d.stderr
140+
d.stdLock.Unlock()
141+
142+
if stdoutF != nil && stderrF != nil {
143+
return stdoutF, stderrF, nil
144+
}
145+
146+
// opening a fifo may block indefinitely until a reader end opens, so
147+
// we preform open() without holding the stdLock, so Stop and interleave.
148+
// This a defensive measure - logmon (the reader end) should be up and
149+
// started before dockerLogger is started
150+
if stdoutF == nil {
151+
stdoutF, err = fifo.OpenWriter(opts.Stdout)
152+
if err != nil {
153+
return nil, nil, err
154+
}
155+
}
156+
157+
if stderrF == nil {
158+
stderrF, err = fifo.OpenWriter(opts.Stderr)
159+
if err != nil {
160+
return nil, nil, err
161+
}
162+
}
163+
164+
if ctx.Err() != nil {
165+
// Stop was called and don't need files anymore
166+
stdoutF.Close()
167+
stderrF.Close()
168+
return nil, nil, ctx.Err()
169+
}
170+
171+
d.stdLock.Lock()
172+
d.stdout, d.stderr = stdoutF, stderrF
173+
d.stdLock.Unlock()
174+
175+
return stdoutF, stderrF, nil
176+
}
177+
141178
// Stop log monitoring
142179
func (d *dockerLogger) Stop() error {
143180
if d.cancelCtx != nil {
144181
d.cancelCtx()
145182
}
146-
if d.stdout != nil {
147-
d.stdout.Close()
183+
184+
d.stdLock.Lock()
185+
stdout, stderr := d.stdout, d.stderr
186+
d.stdLock.Unlock()
187+
188+
if stdout != nil {
189+
stdout.Close()
148190
}
149-
if d.stderr != nil {
150-
d.stderr.Close()
191+
if stderr != nil {
192+
stderr.Close()
151193
}
152194
return nil
153195
}

0 commit comments

Comments
 (0)