Skip to content

Commit 1734c24

Browse files
authored
fix: [NPM] Reduce/Refactor Noisy NPM Logs (#3468)
* removed all logs from npm dataplane (except error/warning logs) * removed all logs from npm controller (except error/warning logs) * restored logs that are ununused by current npm (v2) * removed test files * resolved comments * keep log related to reconciling chain placement when the chain is not in the right place * added bootup logs back * Removed two more noisy logs * Add loglevel config option when printing application insight logs * Updated all non-error/warning logs to commented out and with a vap TODO * fixed typo * small typo fix * updated configmap with loglevel * updated default value * added a default value for loglevel * fixed typo in json * removed comma * changed loglevel to info in configmap * add a short sleep in TestNetPolInBackgroundSkipAddAfterRemove * test remove dataplane changes to see if race condition fixes * Revert "test remove dataplane changes to see if race condition fixes" This reverts commit 08697eb. * test * Revert "test" This reverts commit 449c2af. * test * update dataplane to test if changes are flagged in race * added stop channels to unit tests to avoid race condtiions * add non noisy logs back * increased time * revert time change after RunPeriodicTasks * test with 1000 seconds * 5000 milliseconds * tweaked the delay * update to 1500 for defer * increased to 1500 * increase to 2000 * removed kubernetes
1 parent 190269a commit 1734c24

18 files changed

+305
-143
lines changed

npm/azure-npm.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -173,5 +173,6 @@ data:
173173
"PlaceAzureChainFirst": false,
174174
"ApplyInBackground": true,
175175
"NetPolInBackground": true
176-
}
176+
},
177+
"LogLevel": "info"
177178
}

npm/cmd/start.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,11 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error {
135135
)
136136
}
137137

138-
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
138+
logLevel := config.LogLevel
139+
if logLevel == "" {
140+
logLevel = npmconfig.DefaultConfig.LogLevel
141+
}
142+
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel)
139143
if err != nil {
140144
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
141145
}

npm/cmd/start_daemon.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ func startDaemon(config npmconfig.Config) error {
9494
return fmt.Errorf("failed to create dataplane: %w", err)
9595
}
9696

97-
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
97+
logLevel := config.LogLevel
98+
if logLevel == "" {
99+
logLevel = npmconfig.DefaultConfig.LogLevel
100+
}
101+
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel)
98102
if err != nil {
99103
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
100104
}

npm/cmd/start_server.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,11 @@ func startControlplane(config npmconfig.Config, flags npmconfig.Flags) error {
113113
return fmt.Errorf("failed to create NPM controlplane manager: %w", err)
114114
}
115115

116-
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata())
116+
logLevel := config.LogLevel
117+
if logLevel == "" {
118+
logLevel = npmconfig.DefaultConfig.LogLevel
119+
}
120+
err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel)
117121
if err != nil {
118122
klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err)
119123
}

npm/config/config.go

+4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ var DefaultConfig = Config{
5353
NetPolInBackground: true,
5454
EnableNPMLite: false,
5555
},
56+
57+
// Setting LogLevel to "info" by default. Set to "debug" to get application insight logs (creates a listener that outputs diagnosticMessageWriter logs).
58+
LogLevel: "info",
5659
}
5760

5861
type GrpcServerConfig struct {
@@ -82,6 +85,7 @@ type Config struct {
8285
MaxPendingNetPols int `json:"MaxPendingNetPols,omitempty"`
8386
NetPolInvervalInMilliseconds int `json:"NetPolInvervalInMilliseconds,omitempty"`
8487
Toggles Toggles `json:"Toggles,omitempty"`
88+
LogLevel string `json:"LogLevel,omitempty"`
8589
}
8690

8791
type Toggles struct {

npm/metrics/ai-utils.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package metrics
33
import (
44
"fmt"
55
"strconv"
6+
"strings"
67
"time"
78

89
"github.com/Azure/azure-container-networking/aitelemetry"
@@ -21,15 +22,18 @@ var (
2122
)
2223

2324
// CreateTelemetryHandle creates a handler to initialize AI telemetry
24-
func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata string) error {
25+
func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata, logLevel string) error {
2526
npmVersion = npmVersionNum
27+
debugMode := strings.EqualFold(logLevel, "debug")
28+
klog.Infof("LogLevel is %s. Debugmode is set to %v.", logLevel, debugMode)
29+
2630
aiConfig := aitelemetry.AIConfig{
2731
AppName: util.AzureNpmFlag,
2832
AppVersion: imageVersion,
2933
BatchSize: util.BatchSizeInBytes,
3034
BatchInterval: util.BatchIntervalInSecs,
3135
RefreshTimeout: util.RefreshTimeoutInSecs,
32-
DebugMode: util.DebugMode,
36+
DebugMode: debugMode,
3337
GetEnvRetryCount: util.GetEnvRetryCount,
3438
GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs,
3539
}

npm/pkg/controlplane/controllers/v2/namespaceController.go

+25-14
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,17 @@ func (nsc *NamespaceController) Run(stopCh <-chan struct{}) {
164164
defer utilruntime.HandleCrash()
165165
defer nsc.workqueue.ShutDown()
166166

167-
klog.Info("Starting Namespace controller\n")
168-
klog.Info("Starting workers")
167+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
168+
// klog.Info("Starting Namespace controller\n")
169+
// klog.Info("Starting workers")
169170
// Launch workers to process namespace resources
170171
go wait.Until(nsc.runWorker, time.Second, stopCh)
171172

172-
klog.Info("Started workers")
173+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
174+
// klog.Info("Started workers")
173175
<-stopCh
174-
klog.Info("Shutting down workers")
176+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
177+
// klog.Info("Shutting down workers")
175178
}
176179

177180
func (nsc *NamespaceController) runWorker() {
@@ -209,7 +212,8 @@ func (nsc *NamespaceController) processNextWorkItem() bool {
209212
// Finally, if no error occurs we Forget this item so it does not
210213
// get queued again until another change happens.
211214
nsc.workqueue.Forget(obj)
212-
klog.Infof("Successfully synced '%s'", key)
215+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
216+
// klog.Infof("Successfully synced '%s'", key)
213217
return nil
214218
}(obj)
215219
if err != nil {
@@ -321,7 +325,8 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error
321325
// Add the namespace to its label's ipset list.
322326
for nsLabelKey, nsLabelVal := range nsObj.ObjectMeta.Labels {
323327
nsLabelKeyValue := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal)
324-
klog.Infof("Adding namespace %s to ipset list %s and %s", nsObj.ObjectMeta.Name, nsLabelKey, nsLabelKeyValue)
328+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
329+
// klog.Infof("Adding namespace %s to ipset list %s and %s", nsObj.ObjectMeta.Name, nsLabelKey, nsLabelKeyValue)
325330
labelIPSets := []*ipsets.IPSetMetadata{
326331
ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace),
327332
ipsets.NewIPSetMetadata(nsLabelKeyValue, ipsets.KeyValueLabelOfNamespace),
@@ -344,7 +349,8 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error
344349
func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) (metrics.OperationKind, error) {
345350
var err error
346351
newNsName, newNsLabel := newNsObj.ObjectMeta.Name, newNsObj.ObjectMeta.Labels
347-
klog.Infof("NAMESPACE UPDATING:\n namespace: [%s/%v]", newNsName, newNsLabel)
352+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
353+
// klog.Infof("NAMESPACE UPDATING:\n namespace: [%s/%v]", newNsName, newNsLabel)
348354

349355
// If previous syncAddNamespace failed for some reasons
350356
// before caching npm namespace object or syncUpdateNamespace is called due to namespace creation event,
@@ -373,7 +379,8 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)
373379
}
374380
toBeRemoved := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(newNsName, ipsets.Namespace)}
375381

376-
klog.Infof("Deleting namespace %s from ipset list %s", newNsName, nsLabelVal)
382+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
383+
// klog.Infof("Deleting namespace %s from ipset list %s", newNsName, nsLabelVal)
377384
if err = nsc.dp.RemoveFromList(labelSet, toBeRemoved); err != nil {
378385
metrics.SendErrorLogAndMetric(util.NSID, "[UpdateNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", newNsName, nsLabelVal, err)
379386
return metrics.UpdateOp, fmt.Errorf("failed to remove from list during sync update namespace with err %w", err)
@@ -389,7 +396,8 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)
389396

390397
// Add the namespace to its label's ipset list.
391398
for _, nsLabelVal := range addToIPSets {
392-
klog.Infof("Adding namespace %s to ipset list %s", newNsName, nsLabelVal)
399+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
400+
// klog.Infof("Adding namespace %s to ipset list %s", newNsName, nsLabelVal)
393401

394402
var labelSet []*ipsets.IPSetMetadata
395403
if util.IsKeyValueLabelSetName(nsLabelVal) {
@@ -422,29 +430,32 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace)
422430

423431
// cleanDeletedNamespace handles deleting namespace from ipset.
424432
func (nsc *NamespaceController) cleanDeletedNamespace(cachedNsKey string) error {
425-
klog.Infof("NAMESPACE DELETING: [%s]", cachedNsKey)
433+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
434+
// klog.Infof("NAMESPACE DELETING: [%s]", cachedNsKey)
426435
cachedNsObj, exists := nsc.npmNamespaceCache.NsMap[cachedNsKey]
427436
if !exists {
428437
return nil
429438
}
430-
431-
klog.Infof("NAMESPACE DELETING cached labels: [%s/%v]", cachedNsKey, cachedNsObj.LabelsMap)
439+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
440+
// klog.Infof("NAMESPACE DELETING cached labels: [%s/%v]", cachedNsKey, cachedNsObj.LabelsMap)
432441

433442
var err error
434443
toBeDeletedNs := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNsKey, ipsets.Namespace)}
435444
// Delete the namespace from its label's ipset list.
436445
for nsLabelKey, nsLabelVal := range cachedNsObj.LabelsMap {
437446

438447
labelKey := ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace)
439-
klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelKey)
448+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
449+
// klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelKey)
440450
if err = nsc.dp.RemoveFromList(labelKey, toBeDeletedNs); err != nil {
441451
metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelKey, err)
442452
return fmt.Errorf("failed to clean deleted namespace when deleting key with err %w", err)
443453
}
444454

445455
labelIpsetName := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal)
446456
labelKeyValue := ipsets.NewIPSetMetadata(labelIpsetName, ipsets.KeyValueLabelOfNamespace)
447-
klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelIpsetName)
457+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
458+
// klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelIpsetName)
448459
if err = nsc.dp.RemoveFromList(labelKeyValue, toBeDeletedNs); err != nil {
449460
metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelIpsetName, err)
450461
return fmt.Errorf("failed to clean deleted namespace when deleting key value with err %w", err)

npm/pkg/controlplane/controllers/v2/networkPolicyController.go

+8-4
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,15 @@ func (c *NetworkPolicyController) Run(stopCh <-chan struct{}) {
148148
defer utilruntime.HandleCrash()
149149
defer c.workqueue.ShutDown()
150150

151-
klog.Infof("Starting Network Policy worker")
151+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
152+
// klog.Infof("Starting Network Policy worker")
152153
go wait.Until(c.runWorker, time.Second, stopCh)
153154

154-
klog.Infof("Started Network Policy worker")
155+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
156+
// klog.Infof("Started Network Policy worker")
155157
<-stopCh
156-
klog.Info("Shutting down Network Policy workers")
158+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
159+
// klog.Info("Shutting down Network Policy workers")
157160
}
158161

159162
func (c *NetworkPolicyController) runWorker() {
@@ -190,7 +193,8 @@ func (c *NetworkPolicyController) processNextWorkItem() bool {
190193
// Finally, if no error occurs we Forget this item so it does not
191194
// get queued again until another change happens.
192195
c.workqueue.Forget(obj)
193-
klog.Infof("Successfully synced '%s'", key)
196+
// TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level
197+
// klog.Infof("Successfully synced '%s'", key)
194198
return nil
195199
}(obj)
196200
if err != nil {

0 commit comments

Comments
 (0)