Skip to content

Commit 009b750

Browse files
authored
Merge pull request #5479 from hashicorp/b-vault-renewal
vault: fix renewal time
2 parents eeb282c + 888304b commit 009b750

File tree

3 files changed

+103
-27
lines changed

3 files changed

+103
-27
lines changed

CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@ FEATURES:
44

55
* vault: Add initial support for Vault namespaces [[GH-5520](https://github.com/hashicorp/nomad/pull/5520)]
66
* allocations: Add support for restarting allocations in-place [[GH-5502](https://github.com/hashicorp/nomad/pull/5502)]
7-
7+
88
IMPROVEMENTS:
99

1010
* core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)]
1111
* core: Add `-verbose` flag to `nomad status` wrapper command [[GH-5516](https://github.com/hashicorp/nomad/pull/5516)]
1212

13+
BUG FIXES:
14+
15+
* vault: Fix renewal time to be 1/2 lease duration with jitter [[GH-5479](https://github.com/hashicorp/nomad/issues/5479)]
16+
1317
## 0.9.0 (April 9, 2019)
1418

1519
__BACKWARDS INCOMPATIBILITIES:__

client/vaultclient/vaultclient.go

+46-22
Original file line numberDiff line numberDiff line change
@@ -194,28 +194,36 @@ func (c *vaultClient) isTracked(id string) bool {
194194
return ok
195195
}
196196

197+
// isRunning returns true if the client is running.
198+
func (c *vaultClient) isRunning() bool {
199+
c.lock.RLock()
200+
defer c.lock.RUnlock()
201+
return c.running
202+
}
203+
197204
// Starts the renewal loop of vault client
198205
func (c *vaultClient) Start() {
206+
c.lock.Lock()
207+
defer c.lock.Unlock()
208+
199209
if !c.config.IsEnabled() || c.running {
200210
return
201211
}
202212

203-
c.lock.Lock()
204213
c.running = true
205-
c.lock.Unlock()
206214

207215
go c.run()
208216
}
209217

210218
// Stops the renewal loop of vault client
211219
func (c *vaultClient) Stop() {
220+
c.lock.Lock()
221+
defer c.lock.Unlock()
222+
212223
if !c.config.IsEnabled() || !c.running {
213224
return
214225
}
215226

216-
c.lock.Lock()
217-
defer c.lock.Unlock()
218-
219227
c.running = false
220228
close(c.stopCh)
221229
}
@@ -235,7 +243,7 @@ func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string)
235243
if !c.config.IsEnabled() {
236244
return nil, fmt.Errorf("vault client not enabled")
237245
}
238-
if !c.running {
246+
if !c.isRunning() {
239247
return nil, fmt.Errorf("vault client is not running")
240248
}
241249

@@ -421,21 +429,9 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
421429
}
422430
}
423431

424-
duration := leaseDuration / 2
425-
switch {
426-
case leaseDuration < 30:
427-
// Don't bother about introducing randomness if the
428-
// leaseDuration is too small.
429-
default:
430-
// Give a breathing space of 20 seconds
431-
min := 10
432-
max := leaseDuration - min
433-
rand.Seed(time.Now().Unix())
434-
duration = min + rand.Intn(max-min)
435-
}
436-
437432
// Determine the next renewal time
438-
next := time.Now().Add(time.Duration(duration) * time.Second)
433+
renewalDuration := renewalTime(rand.Intn, leaseDuration)
434+
next := time.Now().Add(renewalDuration)
439435

440436
fatal := false
441437
if renewalErr != nil &&
@@ -445,7 +441,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
445441
strings.Contains(renewalErr.Error(), "permission denied")) {
446442
fatal = true
447443
} else if renewalErr != nil {
448-
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration)
444+
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration)
449445
c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying",
450446
"error", renewalErr, "period", next)
451447
}
@@ -517,7 +513,7 @@ func (c *vaultClient) run() {
517513
}
518514

519515
var renewalCh <-chan time.Time
520-
for c.config.IsEnabled() && c.running {
516+
for c.config.IsEnabled() && c.isRunning() {
521517
// Fetches the candidate for next renewal
522518
renewalReq, renewalTime := c.nextRenewal()
523519
if renewalTime.IsZero() {
@@ -728,3 +724,31 @@ func (h *vaultDataHeapImp) Pop() interface{} {
728724
*h = old[0 : n-1]
729725
return entry
730726
}
727+
728+
// randIntn is the function in math/rand needed by renewalTime. A type is used
729+
// to ease deterministic testing.
730+
type randIntn func(int) int
731+
732+
// renewalTime returns when a token should be renewed given its leaseDuration
733+
// and a randomizer to provide jitter.
734+
//
735+
// Leases < 1m will be not jitter.
736+
func renewalTime(dice randIntn, leaseDuration int) time.Duration {
737+
// Start trying to renew at half the lease duration to allow ample time
738+
// for latency and retries.
739+
renew := leaseDuration / 2
740+
741+
// Don't bother about introducing randomness if the
742+
// leaseDuration is too small.
743+
const cutoff = 30
744+
if renew < cutoff {
745+
return time.Duration(renew) * time.Second
746+
}
747+
748+
// jitter is the amount +/- to vary the renewal time
749+
const jitter = 10
750+
min := renew - jitter
751+
renew = min + dice(jitter*2)
752+
753+
return time.Duration(renew) * time.Second
754+
}

client/vaultclient/vaultclient_test.go

+52-4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/hashicorp/nomad/testutil"
1111
vaultapi "github.com/hashicorp/vault/api"
1212
vaultconsts "github.com/hashicorp/vault/helper/consts"
13+
"github.com/stretchr/testify/assert"
1314
"github.com/stretchr/testify/require"
1415
)
1516

@@ -76,8 +77,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
7677
}(errCh)
7778
}
7879

79-
if c.heap.Length() != num {
80-
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, c.heap.Length())
80+
c.lock.Lock()
81+
length := c.heap.Length()
82+
c.lock.Unlock()
83+
if length != num {
84+
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, length)
8185
}
8286

8387
time.Sleep(time.Duration(testutil.TestMultiplier()) * time.Second)
@@ -88,8 +92,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
8892
}
8993
}
9094

91-
if c.heap.Length() != 0 {
92-
t.Fatalf("bad: heap length: expected: 0, actual: %d", c.heap.Length())
95+
c.lock.Lock()
96+
length = c.heap.Length()
97+
c.lock.Unlock()
98+
if length != 0 {
99+
t.Fatalf("bad: heap length: expected: 0, actual: %d", length)
93100
}
94101
}
95102

@@ -300,3 +307,44 @@ func TestVaultClient_RenewNonexistentLease(t *testing.T) {
300307
t.Fatalf("expected \"%s\" or \"%s\" in error message, got \"%v\"", "lease not found", "lease is not renewable", err.Error())
301308
}
302309
}
310+
311+
// TestVaultClient_RenewalTime_Long asserts that for leases over 1m the renewal
312+
// time is jittered.
313+
func TestVaultClient_RenewalTime_Long(t *testing.T) {
314+
t.Parallel()
315+
316+
// highRoller is a randIntn func that always returns the max value
317+
highRoller := func(n int) int {
318+
return n - 1
319+
}
320+
321+
// lowRoller is a randIntn func that always returns the min value (0)
322+
lowRoller := func(int) int {
323+
return 0
324+
}
325+
326+
assert.Equal(t, 39*time.Second, renewalTime(highRoller, 60))
327+
assert.Equal(t, 20*time.Second, renewalTime(lowRoller, 60))
328+
329+
assert.Equal(t, 309*time.Second, renewalTime(highRoller, 600))
330+
assert.Equal(t, 290*time.Second, renewalTime(lowRoller, 600))
331+
332+
const days3 = 60 * 60 * 24 * 3
333+
assert.Equal(t, (days3/2+9)*time.Second, renewalTime(highRoller, days3))
334+
assert.Equal(t, (days3/2-10)*time.Second, renewalTime(lowRoller, days3))
335+
}
336+
337+
// TestVaultClient_RenewalTime_Short asserts that for leases under 1m the renewal
338+
// time is lease/2.
339+
func TestVaultClient_RenewalTime_Short(t *testing.T) {
340+
t.Parallel()
341+
342+
dice := func(int) int {
343+
require.Fail(t, "dice should not have been called")
344+
panic("unreachable")
345+
}
346+
347+
assert.Equal(t, 29*time.Second, renewalTime(dice, 58))
348+
assert.Equal(t, 15*time.Second, renewalTime(dice, 30))
349+
assert.Equal(t, 1*time.Second, renewalTime(dice, 2))
350+
}

0 commit comments

Comments
 (0)