Skip to content

Commit

Permalink
rename InterruptionEvent.Drained to InterruptionEvent.NodeProcessed
Browse files Browse the repository at this point in the history
  • Loading branch information
cucxabong committed Apr 15, 2021
1 parent 0dbaa7c commit 646ed7c
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion cmd/node-termination-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
}
}

interruptionEventStore.MarkAllAsDrained(nodeName)
interruptionEventStore.MarkAllAsProcessed(nodeName)
if nthConfig.WebhookURL != "" {
webhook.Post(nodeMetadata, drainEvent, nthConfig)
}
Expand Down
12 changes: 6 additions & 6 deletions docs/aemm_interruption_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ If you run the example above you might notice that the logs are heavily populate
```
2020/09/15 21:13:41 Sending interruption event to the interruption channel
2020/09/15 21:13:41 Got interruption event from channel {InstanceID:i-1234567890abcdef0 InstanceType:m4.xlarge PublicHostname:ec2-192-0-2-54.compute-1.amazonaws.com PublicIP:192.0.2.54 LocalHostname:ip-172-16-34-43.ec2.internal LocalIP:172.16.34.43 AvailabilityZone:us-east-1a} {EventID:spot-itn-47ddfb5e39791606bec3e91fea4cdfa86f86a60ddaf014c8b4af8e008f134b19 Kind:SPOT_ITN Description:Spot ITN received. Instance will be interrupted at 2020-09-15T21:15:41Z
State: NodeName:ip-192-168-123-456.us-east-1.compute.internal StartTime:2020-09-15 21:15:41 +0000 UTC EndTime:0001-01-01 00:00:00 +0000 UTC Drained:false PreDrainTask:0x113c8a0 PostDrainTask:<nil>}
State: NodeName:ip-192-168-123-456.us-east-1.compute.internal StartTime:2020-09-15 21:15:41 +0000 UTC EndTime:0001-01-01 00:00:00 +0000 UTC NodeProcessed:false PreDrainTask:0x113c8a0 PostDrainTask:<nil>}
WARNING: ignoring DaemonSet-managed Pods: default/amazon-ec2-metadata-mock-pszj2, kube-system/aws-node-bl2bj, kube-system/aws-node-termination-handler-2pvjr, kube-system/kube-proxy-fct9f
evicting pod "coredns-67bfd975c5-rgkh7"
evicting pod "coredns-67bfd975c5-6g88n"
2020/09/15 21:13:42 Node "ip-192-168-123-456.us-east-1.compute.internal" successfully cordoned and drained.
2020/09/15 21:13:43 Sending interruption event to the interruption channel
2020/09/15 21:13:43 Got interruption event from channel {InstanceID:i-1234567890abcdef0 InstanceType:m4.xlarge PublicHostname:ec2-192-0-2-54.compute-1.amazonaws.com PublicIP:192.0.2.54 LocalHostname:ip-172-16-34-43.ec2.internal LocalIP:172.16.34.43 AvailabilityZone:us-east-1a} {EventID:spot-itn-97be476b6246aba6401ba36e54437719bfdf987773e9c83fe30336eb7fea9704 Kind:SPOT_ITN Description:Spot ITN received. Instance will be interrupted at 2020-09-15T21:15:43Z
State: NodeName:ip-192-168-123-456.us-east-1.compute.internal StartTime:2020-09-15 21:15:43 +0000 UTC EndTime:0001-01-01 00:00:00 +0000 UTC Drained:false PreDrainTask:0x113c8a0 PostDrainTask:<nil>}
State: NodeName:ip-192-168-123-456.us-east-1.compute.internal StartTime:2020-09-15 21:15:43 +0000 UTC EndTime:0001-01-01 00:00:00 +0000 UTC NodeProcessed:false PreDrainTask:0x113c8a0 PostDrainTask:<nil>}
WARNING: ignoring DaemonSet-managed Pods: default/amazon-ec2-metadata-mock-pszj2, kube-system/aws-node-bl2bj, kube-system/aws-node-termination-handler-2pvjr, kube-system/kube-proxy-fct9f
2020/09/15 21:13:44 Node "ip-192-168-123-456.us-east-1.compute.internal" successfully cordoned and drained.
2020/09/15 21:13:45 Sending interruption event to the interruption channel
Expand All @@ -54,17 +54,17 @@ WARNING: ignoring DaemonSet-managed Pods: default/amazon-ec2-metadata-mock-pszj2

This isn't a mistake, by default AEMM will respond to any request for metadata with a spot interruption occurring 2 minutes
later than the request time.\* AWS Node Termination Handler polls for events every 2 seconds by default, so the effect is
that new interruption events are found and processed every 2 seconds.
that new interruption events are found and processed every 2 seconds.

In reality there will only be a single interruption event, and you can mock this by setting the `spot.time` parameter of
AEMM when installing it.
AEMM when installing it.
```
helm install amazon-ec2-metadata-mock amazon-ec2-metadata-mock-1.6.0.tgz \
--set aemm.spot.time="2020-09-09T22:40:47Z" \
--namespace default
```

Now when you check the logs you should only see a single event get processed.
Now when you check the logs you should only see a single event get processed.

For more ways of configuring AEMM check out the [Helm configuration page](https://github.com/aws/amazon-ec2-metadata-mock/tree/main/helm/amazon-ec2-metadata-mock).

Expand All @@ -82,7 +82,7 @@ for the local tests that use a kind cluster, and [here](https://github.com/aws/a
for the eks-cluster e2e tests.

Check out the [ReadMe](https://github.com/aws/aws-node-termination-handler/tree/main/test) in our test folder for more
info on the e2e tests.
info on the e2e tests.

---

Expand Down
10 changes: 5 additions & 5 deletions pkg/interruptioneventstore/interruption-event-store.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (s *Store) ShouldDrainNode() bool {

func (s *Store) shouldEventDrain(interruptionEvent *monitor.InterruptionEvent) bool {
_, ignored := s.ignoredEvents[interruptionEvent.EventID]
if !ignored && !interruptionEvent.Drained && s.TimeUntilDrain(interruptionEvent) <= 0 {
if !ignored && !interruptionEvent.NodeProcessed && s.TimeUntilDrain(interruptionEvent) <= 0 {
return true
}
return false
Expand All @@ -108,19 +108,19 @@ func (s *Store) TimeUntilDrain(interruptionEvent *monitor.InterruptionEvent) tim
return drainTime.Sub(time.Now())
}

// MarkAllAsDrained should be called after the node has been drained to prevent further unnecessary drain calls to the k8s api
func (s *Store) MarkAllAsDrained(nodeName string) {
// MarkAllAsProcessed should be called after the node has been drained to prevent further unnecessary drain calls to the k8s api
func (s *Store) MarkAllAsProcessed(nodeName string) {
s.Lock()
defer s.Unlock()
for _, interruptionEvent := range s.interruptionEventStore {
if interruptionEvent.NodeName == nodeName {
interruptionEvent.Drained = true
interruptionEvent.NodeProcessed = true
}
}
}

// IgnoreEvent will store an event ID so that monitor loops cannot write to the store with the same event ID
// Drain actions are ignored on the passed in event ID by setting the Drained flag to true
// Drain actions are ignored on the passed in event ID by setting the NodeProcessed flag to true
func (s *Store) IgnoreEvent(eventID string) {
if eventID == "" {
return
Expand Down
22 changes: 11 additions & 11 deletions pkg/interruptioneventstore/interruption-event-store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,26 +99,26 @@ func TestShouldDrainNode(t *testing.T) {
h.Equals(t, true, store.ShouldDrainNode())
}

func TestMarkAllAsDrained(t *testing.T) {
func TestMarkAllAsProcessed(t *testing.T) {
store := interruptioneventstore.New(config.Config{})
event1 := &monitor.InterruptionEvent{
EventID: "1",
StartTime: time.Now().Add(time.Second * 20),
Drained: false,
NodeName: node1,
EventID: "1",
StartTime: time.Now().Add(time.Second * 20),
NodeProcessed: false,
NodeName: node1,
}
event2 := &monitor.InterruptionEvent{
EventID: "2",
StartTime: time.Now().Add(time.Second * 20),
Drained: false,
NodeName: node1,
EventID: "2",
StartTime: time.Now().Add(time.Second * 20),
NodeProcessed: false,
NodeName: node1,
}

store.AddInterruptionEvent(event1)
store.AddInterruptionEvent(event2)
store.MarkAllAsDrained(node1)
store.MarkAllAsProcessed(node1)

// When events are marked as Drained=true, then they are no longer
// When events are marked as NodeProcessed=true, then they are no longer
// returned by the GetActiveEvent func, so we expect false
_, isActive := store.GetActiveEvent()
h.Equals(t, false, isActive)
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type InterruptionEvent struct {
InstanceID string
StartTime time.Time
EndTime time.Time
Drained bool
NodeProcessed bool
InProgress bool
PreDrainTask DrainTask `json:"-"`
PostDrainTask DrainTask `json:"-"`
Expand Down

0 comments on commit 646ed7c

Please sign in to comment.