Skip to content

Commit

Permalink
fix undefined log levels
Browse files Browse the repository at this point in the history
  • Loading branch information
bwagner5 committed Apr 8, 2021
1 parent a90a50d commit dbef3ef
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 79 deletions.
44 changes: 22 additions & 22 deletions cmd/node-termination-handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func main() {
wait.PollImmediateUntil(2*time.Second, func() (done bool, err error) {
err = handleRebootUncordon(nthConfig.NodeName, interruptionEventStore, *node)
if err != nil {
log.Log().Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying")
log.Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying")
}
return false, nil
}, stopCh)
Expand All @@ -152,7 +152,7 @@ func main() {
if nthConfig.EnableSQSTerminationDraining {
creds, err := nthConfig.AWSSession.Config.Credentials.Get()
if err != nil {
log.Warn().Err(err).Msg("Unable to get AWS credentials")
log.Err(err).Msg("Unable to get AWS credentials")
}
log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName)

Expand All @@ -171,13 +171,13 @@ func main() {

for _, fn := range monitoringFns {
go func(monitor monitor.Monitor) {
log.Log().Str("event_type", monitor.Kind()).Msg("Started monitoring for events")
log.Info().Str("event_type", monitor.Kind()).Msg("Started monitoring for events")
var previousErr error
var duplicateErrCount int
for range time.Tick(time.Second * 2) {
err := monitor.Monitor()
if err != nil {
log.Log().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events")
log.Warn().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events")
metrics.ErrorEventsInc(monitor.Kind())
if previousErr != nil && err.Error() == previousErr.Error() {
duplicateErrCount++
Expand All @@ -186,7 +186,7 @@ func main() {
previousErr = err
}
if duplicateErrCount >= duplicateErrThreshold {
log.Log().Msg("Stopping NTH - Duplicate Error Threshold hit.")
log.Warn().Msg("Stopping NTH - Duplicate Error Threshold hit.")
panic(fmt.Sprintf("%v", err))
}
}
Expand All @@ -195,11 +195,11 @@ func main() {
}

go watchForInterruptionEvents(interruptionChan, interruptionEventStore)
log.Log().Msg("Started watching for interruption events")
log.Log().Msg("Kubernetes AWS Node Termination Handler has started successfully!")
log.Info().Msg("Started watching for interruption events")
log.Info().Msg("Kubernetes AWS Node Termination Handler has started successfully!")

go watchForCancellationEvents(cancelChan, interruptionEventStore, node, metrics)
log.Log().Msg("Started watching for event cancellations")
log.Info().Msg("Started watching for event cancellations")

var wg sync.WaitGroup

Expand All @@ -222,7 +222,7 @@ func main() {
}
}
}
log.Log().Msg("AWS Node Termination Handler is shutting down")
log.Info().Msg("AWS Node Termination Handler is shutting down")
wg.Wait()
log.Debug().Msg("all event processors finished")
}
Expand Down Expand Up @@ -260,17 +260,17 @@ func watchForCancellationEvents(cancelChan <-chan monitor.InterruptionEvent, int
nodeName := interruptionEvent.NodeName
interruptionEventStore.CancelInterruptionEvent(interruptionEvent.EventID)
if interruptionEventStore.ShouldUncordonNode(nodeName) {
log.Log().Msg("Uncordoning the node due to a cancellation event")
log.Info().Msg("Uncordoning the node due to a cancellation event")
err := node.Uncordon(nodeName)
if err != nil {
log.Log().Err(err).Msg("Uncordoning the node failed")
log.Info().Err(err).Msg("Uncordoning the node failed")
}
metrics.NodeActionsInc("uncordon", nodeName, err)

node.RemoveNTHLabels(nodeName)
node.RemoveNTHTaints(nodeName)
} else {
log.Log().Msg("Another interruption event is active, not uncordoning the node")
log.Info().Msg("Another interruption event is active, not uncordoning the node")
}
}
}
Expand All @@ -280,13 +280,13 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
nodeName := drainEvent.NodeName
nodeLabels, err := node.GetNodeLabels(nodeName)
if err != nil {
log.Warn().Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName)
log.Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName)
}
drainEvent.NodeLabels = nodeLabels
if drainEvent.PreDrainTask != nil {
err := drainEvent.PreDrainTask(*drainEvent, node)
if err != nil {
log.Log().Err(err).Msg("There was a problem executing the pre-drain task")
log.Err(err).Msg("There was a problem executing the pre-drain task")
}
metrics.NodeActionsInc("pre-drain", nodeName, err)
}
Expand All @@ -295,35 +295,35 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
err := node.Cordon(nodeName)
if err != nil {
if errors.IsNotFound(err) {
log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName)
log.Err(err).Msgf("node '%s' not found in the cluster", nodeName)
} else {
log.Log().Err(err).Msg("There was a problem while trying to cordon the node")
log.Err(err).Msg("There was a problem while trying to cordon the node")
os.Exit(1)
}
} else {
log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned")
log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned")
podNameList, err := node.FetchPodNameList(nodeName)
if err != nil {
log.Log().Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName)
log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName)
}
drainEvent.Pods = podNameList
err = node.LogPods(podNameList, nodeName)
if err != nil {
log.Log().Err(err).Msg("There was a problem while trying to log all pod names on the node")
log.Err(err).Msg("There was a problem while trying to log all pod names on the node")
}
metrics.NodeActionsInc("cordon", nodeName, err)
}
} else {
err := node.CordonAndDrain(nodeName)
if err != nil {
if errors.IsNotFound(err) {
log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName)
log.Err(err).Msgf("node '%s' not found in the cluster", nodeName)
} else {
log.Log().Err(err).Msg("There was a problem while trying to cordon and drain the node")
log.Err(err).Msg("There was a problem while trying to cordon and drain the node")
os.Exit(1)
}
} else {
log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned and drained")
log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned and drained")
metrics.NodeActionsInc("cordon-and-drain", nodeName, err)
}
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ func ParseCliArgs() (config Config, err error) {
}

if isConfigProvided("pod-termination-grace-period", podTerminationGracePeriodConfigKey) && isConfigProvided("grace-period", gracePeriodConfigKey) {
log.Log().Msg("Deprecated argument \"grace-period\" and the replacement argument \"pod-termination-grace-period\" was provided. Using the newer argument \"pod-termination-grace-period\"")
log.Warn().Msg("Deprecated argument \"grace-period\" and the replacement argument \"pod-termination-grace-period\" was provided. Using the newer argument \"pod-termination-grace-period\"")
} else if isConfigProvided("grace-period", gracePeriodConfigKey) {
log.Log().Msg("Deprecated argument \"grace-period\" was provided. This argument will eventually be removed. Please switch to \"pod-termination-grace-period\" instead.")
log.Warn().Msg("Deprecated argument \"grace-period\" was provided. This argument will eventually be removed. Please switch to \"pod-termination-grace-period\" instead.")
config.PodTerminationGracePeriod = gracePeriod
}

Expand Down Expand Up @@ -239,7 +239,7 @@ func (c Config) Print() {
func (c Config) PrintJsonConfigArgs() {
// manually setting fields instead of using log.Log().Interface() to use snake_case instead of PascalCase
// intentionally did not log webhook configuration as there may be secrets
log.Log().
log.Info().
Bool("dry_run", c.DryRun).
Str("node_name", c.NodeName).
Str("metadata_url", c.MetadataURL).
Expand Down Expand Up @@ -277,7 +277,7 @@ func (c Config) PrintHumanConfigArgs() {
webhookURLDisplay = "<provided-not-displayed>"
}
// intentionally did not log webhook configuration as there may be secrets
log.Log().Msgf(
log.Info().Msgf(
"aws-node-termination-handler arguments: \n"+
"\tdry-run: %t,\n"+
"\tnode-name: %s,\n"+
Expand Down
15 changes: 7 additions & 8 deletions pkg/ec2metadata/ec2metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ func (e *Service) Request(contextPath string) (*http.Response, error) {
if err != nil {
e.v2Token = ""
e.tokenTTL = -1
log.Log().Err(err).Msg("Unable to retrieve an IMDSv2 token, continuing with IMDSv1")
log.Err(err).Msg("Unable to retrieve an IMDSv2 token, continuing with IMDSv1")
} else {
e.v2Token = token
e.tokenTTL = ttl
Expand Down Expand Up @@ -267,7 +267,7 @@ func (e *Service) getV2Token() (string, int, error) {
httpReq := func() (*http.Response, error) {
return e.httpClient.Do(req)
}
log.Log().Msg("Trying to get token from IMDSv2")
log.Debug().Msg("Trying to get token from IMDSv2")
resp, err := retry(1, 2*time.Second, httpReq)
if err != nil {
return "", -1, err
Expand All @@ -284,7 +284,7 @@ func (e *Service) getV2Token() (string, int, error) {
if err != nil {
return "", -1, fmt.Errorf("IMDS v2 Token TTL header not sent in response: %w", err)
}
log.Log().Msg("Got token from IMDSv2")
log.Debug().Msg("Got token from IMDSv2")
return string(token), ttl, nil
}

Expand All @@ -307,8 +307,7 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er
jitter := time.Duration(rand.Int63n(int64(sleep)))
sleep = sleep + jitter/2

log.Log().Msgf("Request failed. Attempts remaining: %d", attempts)
log.Log().Msgf("Sleep for %s seconds", sleep)
log.Warn().Msgf("Request failed. Attempts remaining: %d, sleeping for %s seconds", attempts, sleep)
time.Sleep(sleep)
return retry(attempts, 2*sleep, httpReq)
}
Expand All @@ -322,12 +321,12 @@ func (e *Service) GetNodeMetadata() NodeMetadata {
var metadata NodeMetadata
identityDoc, err := e.GetMetadataInfo(IdentityDocPath)
if err != nil {
log.Log().Err(err).Msg("Unable to fetch metadata from IMDS")
log.Err(err).Msg("Unable to fetch metadata from IMDS")
return metadata
}
err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata)
if err != nil {
log.Log().Msg("Unable to fetch instance identity document from ec2 metadata")
log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata")
metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath)
metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath)
metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath)
Expand All @@ -340,7 +339,7 @@ func (e *Service) GetNodeMetadata() NodeMetadata {
metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath)
metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath)

log.Log().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")
log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")

return metadata
}
2 changes: 1 addition & 1 deletion pkg/interruptioneventstore/interruption-event-store.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (s *Store) AddInterruptionEvent(interruptionEvent *monitor.InterruptionEven

s.Lock()
defer s.Unlock()
log.Log().Interface("event", interruptionEvent).Msg("Adding new event to the event store")
log.Info().Interface("event", interruptionEvent).Msg("Adding new event to the event store")
s.interruptionEventStore[interruptionEvent.EventID] = interruptionEvent
if _, ignored := s.ignoredEvents[interruptionEvent.EventID]; !ignored {
s.atLeastOneEvent = true
Expand Down
6 changes: 3 additions & 3 deletions pkg/monitor/scheduledevent/scheduled-event-monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func (m ScheduledEventMonitor) checkForScheduledEvents() ([]monitor.Interruption
notAfter, err = time.Parse(scheduledEventDateFormat, scheduledEvent.NotAfter)
if err != nil {
notAfter = notBefore
log.Log().Err(err).Msg("Unable to parse scheduled event end time, continuing")
log.Err(err).Msg("Unable to parse scheduled event end time, continuing")
}
}
events = append(events, monitor.InterruptionEvent{
Expand Down Expand Up @@ -128,7 +128,7 @@ func uncordonAfterRebootPreDrain(interruptionEvent monitor.InterruptionEvent, n
// if the node is already marked as unschedulable, then don't do anything
unschedulable, err := n.IsUnschedulable(nodeName)
if err == nil && unschedulable {
log.Log().Msg("Node is already marked unschedulable, not taking any action to add uncordon label.")
log.Debug().Msg("Node is already marked unschedulable, not taking any action to add uncordon label.")
return nil
} else if err != nil {
return fmt.Errorf("Encountered an error while checking if the node is unschedulable. Not setting an uncordon label: %w", err)
Expand All @@ -137,7 +137,7 @@ func uncordonAfterRebootPreDrain(interruptionEvent monitor.InterruptionEvent, n
if err != nil {
return fmt.Errorf("Unable to mark the node for uncordon: %w", err)
}
log.Log().Msg("Successfully applied uncordon after reboot action label to node.")
log.Info().Msg("Successfully applied uncordon after reboot action label to node.")
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/sqsevent/asg-lifecycle-event.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func (m SQSMonitor) asgTerminationToInterruptionEvent(event EventBridgeEvent, me
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
err := n.TaintASGLifecycleTermination(interruptionEvent.NodeName, interruptionEvent.EventID)
if err != nil {
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID)
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID)
}
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/sqsevent/rebalance-recommendation-event.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (m SQSMonitor) rebalanceRecommendationToInterruptionEvent(event EventBridge
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
err := n.TaintRebalanceRecommendation(interruptionEvent.NodeName, interruptionEvent.EventID)
if err != nil {
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.RebalanceRecommendationTaint, interruptionEvent.EventID)
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.RebalanceRecommendationTaint, interruptionEvent.EventID)
}
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/monitor/sqsevent/spot-itn-event.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event EventBridgeEvent
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
err := n.TaintSpotItn(interruptionEvent.NodeName, interruptionEvent.EventID)
if err != nil {
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
}
return nil
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/monitor/sqsevent/sqs-monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ func (m SQSMonitor) Monitor() error {
switch {
case errors.Is(err, ErrNodeStateNotRunning):
// If the node is no longer running, just log and delete the message. If message deletion fails, count it as an error.
log.Warn().Err(err).Msg("dropping event for an already terminated node")
log.Err(err).Msg("dropping event for an already terminated node")
errs := m.deleteMessages([]*sqs.Message{message})
if len(errs) > 0 {
log.Warn().Err(errs[0]).Msg("error deleting event for already terminated node")
log.Err(errs[0]).Msg("error deleting event for already terminated node")
failedEvents++
}

case err != nil:
// Log errors and record as failed events
log.Warn().Err(err).Msg("ignoring event due to error")
log.Err(err).Msg("ignoring event due to error")
failedEvents++

case err == nil && interruptionEvent != nil && interruptionEvent.Kind == SQSTerminateKind:
Expand Down Expand Up @@ -176,7 +176,7 @@ func (m SQSMonitor) deleteMessages(messages []*sqs.Message) []error {
if err != nil {
errs = append(errs, err)
}
log.Log().Msgf("SQS Deleted Message: %s", message)
log.Debug().Msgf("SQS Deleted Message: %s", message)
}
return errs
}
Expand Down
Loading

0 comments on commit dbef3ef

Please sign in to comment.