Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NETOBSERV-754: add pprof, improve memory footprint #449

Merged
merged 5 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .mk/development.mk
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,19 @@ set-release-kind-downstream:
@echo -e "\n==> Redeploying..."
kubectl rollout status -n $(NAMESPACE) --timeout=60s deployment netobserv-controller-manager
kubectl wait -n $(NAMESPACE) --timeout=60s --for condition=Available=True deployment netobserv-controller-manager

.PHONY: pprof
pprof:
@echo -e "\n==> Enabling pprof... Check https://github.com/netobserv/network-observability-operator/blob/main/DEVELOPMENT.md#profiling for help."
kubectl -n $(NAMESPACE) set env deployment netobserv-controller-manager -c "manager" PROFILING_BIND_ADDRESS=:6060
@echo -e "\n==> Redeploying..."
kubectl rollout status -n $(NAMESPACE) --timeout=60s deployment netobserv-controller-manager
kubectl wait -n $(NAMESPACE) --timeout=60s --for condition=Available=True deployment netobserv-controller-manager
sleep 2
$(MAKE) pprof-pf

.PHONY: pprof-pf
pprof-pf:
@echo -e "\n==> Port-forwarding..."
oc get pods
kubectl port-forward -n $(NAMESPACE) $(shell kubectl get pod -l app=netobserv-operator -o jsonpath="{.items[0].metadata.name}") 6060
15 changes: 15 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,18 @@ Remove the tag after you tested:
git tag -d "0.0.0-rc0"
git push --delete upstream 0.0.0-rc0
```

## Profiling

You can use `pprof` for profiling. Run `pprof` make target to start listening and port-forward on 6060:

```bash
make pprof
```

In another terminal, run for instance:

```bash
curl "http://localhost:6060/debug/pprof/heap?gc" -o /tmp/heap
go tool pprof -http localhost:3435 /tmp/heap
```
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ spec:
- --flowlogs-pipeline-image=$(RELATED_IMAGE_FLOWLOGS_PIPELINE)
- --console-plugin-image=$(RELATED_IMAGE_CONSOLE_PLUGIN)
- --downstream-deployment=$(DOWNSTREAM_DEPLOYMENT)
- --profiling-bind-address=$(PROFILING_BIND_ADDRESS)
command:
- /manager
env:
Expand All @@ -698,6 +699,7 @@ spec:
value: quay.io/netobserv/network-observability-console-plugin:v0.1.11
- name: DOWNSTREAM_DEPLOYMENT
value: "false"
- name: PROFILING_BIND_ADDRESS
image: quay.io/netobserv/network-observability-operator:1.0.4
imagePullPolicy: Always
livenessProbe:
Expand Down
3 changes: 3 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ spec:
- --flowlogs-pipeline-image=$(RELATED_IMAGE_FLOWLOGS_PIPELINE)
- --console-plugin-image=$(RELATED_IMAGE_CONSOLE_PLUGIN)
- --downstream-deployment=$(DOWNSTREAM_DEPLOYMENT)
- --profiling-bind-address=$(PROFILING_BIND_ADDRESS)
env:
- name: RELATED_IMAGE_EBPF_AGENT
value: quay.io/netobserv/netobserv-ebpf-agent:v0.3.2
Expand All @@ -37,6 +38,8 @@ spec:
value: quay.io/netobserv/network-observability-console-plugin:v0.1.11
- name: DOWNSTREAM_DEPLOYMENT
value: "false"
- name: PROFILING_BIND_ADDRESS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not call it PROFILE_PORT since what we pass here is is just the port like we do in the agent and pls add profile md file to doc with examples of the different collections with HOWTOs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the operator already uses this kind of addresses (e.g. for its metric server) so I prefer to be consistent within this repo
I've added some doc in DEVELOPMENT.md

value: ""
image: controller:latest
name: manager
imagePullPolicy: Always
Expand Down
8 changes: 6 additions & 2 deletions controllers/flowcollector_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,13 @@ func (r *FlowCollectorReconciler) SetupWithManager(ctx context.Context, mgr ctrl
return err
}

r.watcher = watchers.RegisterWatcher(builder)
ctrl, err := builder.Build(r)
if err != nil {
return err
}
r.watcher = watchers.NewWatcher(ctrl, mgr.GetCache())

return builder.Complete(r)
return nil
}

func (r *FlowCollectorReconciler) setupDiscovery(ctx context.Context, mgr ctrl.Manager, builder *builder.Builder) error {
Expand Down
8 changes: 5 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@ import (
"context"
"flag"
"fmt"
_ "net/http/pprof"
"os"

"go.uber.org/zap/zapcore"

"github.com/netobserv/network-observability-operator/controllers/operator"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
osv1alpha1 "github.com/openshift/api/console/v1alpha1"
Expand All @@ -50,6 +48,7 @@ import (
flowsv1beta1 "github.com/netobserv/network-observability-operator/api/v1beta1"
"github.com/netobserv/network-observability-operator/controllers"
"github.com/netobserv/network-observability-operator/controllers/constants"
"github.com/netobserv/network-observability-operator/controllers/operator"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -81,12 +80,14 @@ func main() {
var metricsAddr string
var enableLeaderElection bool
var probeAddr string
var pprofAddr string
var versionFlag bool

config := operator.Config{}

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.StringVar(&pprofAddr, "profiling-bind-address", "", "The address the profiling endpoint binds to, such as ':6060'. Leave unset to disable profiling.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
Expand Down Expand Up @@ -124,6 +125,7 @@ func main() {
WebhookServer: webhook.NewServer(webhook.Options{
Port: 9443,
}),
PprofBindAddress: pprofAddr,
HealthProbeBindAddress: probeAddr,
LeaderElection: enableLeaderElection,
LeaderElectionID: "7a7ecdcd.netobserv.io",
Expand Down
89 changes: 55 additions & 34 deletions pkg/watchers/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,47 @@ import (
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
rec "sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

flowslatest "github.com/netobserv/network-observability-operator/api/v1beta1"
"github.com/netobserv/network-observability-operator/controllers/constants"
"github.com/netobserv/network-observability-operator/pkg/helper"
)

var (
secrets SecretWatchable
configs ConfigWatchable
)

type Watcher struct {
ctrl controller.Controller
cache cache.Cache
watched map[string]interface{}
defaultNamespace string
secrets SecretWatchable
configs ConfigWatchable
}

func NewWatcher() Watcher {
return Watcher{
func NewWatcher(ctrl controller.Controller, cache cache.Cache) *Watcher {
// Note that Watcher doesn't start any informer at this point, in order to keep informers watching strictly
// the desired object rather than the whole cluster.
// Since watched objects can be in any namespace, we cannot use namespace-based restriction to limit memory consumption.
return &Watcher{
ctrl: ctrl,
cache: cache,
watched: make(map[string]interface{}),
}
}

func RegisterWatcher(builder *builder.Builder) *Watcher {
w := NewWatcher()
w.registerWatches(builder, &w.secrets, flowslatest.RefTypeSecret)
w.registerWatches(builder, &w.configs, flowslatest.RefTypeConfigMap)
return &w
}

func (w *Watcher) registerWatches(builder *builder.Builder, watchable Watchable, kind flowslatest.MountableType) {
builder.Watches(
watchable.ProvidePlaceholder(),
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) []rec.Request {
if w.isWatched(kind, o.GetName(), o.GetNamespace()) {
// Trigger FlowCollector reconcile
return []rec.Request{{NamespacedName: constants.FlowCollectorName}}
}
return []rec.Request{}
}),
)
func kindToWatchable(kind flowslatest.MountableType) Watchable {
if kind == flowslatest.RefTypeConfigMap {
return &configs
}
return &secrets
}

func (w *Watcher) Reset(namespace string) {
Expand All @@ -60,8 +59,33 @@ func key(kind flowslatest.MountableType, name, namespace string) string {
return string(kind) + "/" + namespace + "/" + name
}

func (w *Watcher) watch(kind flowslatest.MountableType, name, namespace string) {
w.watched[key(kind, name, namespace)] = true
func (w *Watcher) watch(ctx context.Context, kind flowslatest.MountableType, obj client.Object) error {
if w.isWatched(kind, obj.GetName(), obj.GetNamespace()) {
// This watcher was already registered
return nil
}
i, err := w.cache.GetInformer(ctx, obj)
if err != nil {
return err
}
// Note that currently, watches are never removed (they can't - cf https://github.com/kubernetes-sigs/controller-runtime/issues/1884)
// This isn't a big deal here, as the number of watches that we set is very limited and not meant to grow over and over
// (unless user keeps reconfiguring cert references endlessly)
err = w.ctrl.Watch(
&source.Informer{Informer: i},
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) []reconcile.Request {
if w.isWatched(kind, o.GetName(), o.GetNamespace()) {
// Trigger FlowCollector reconcile
return []reconcile.Request{{NamespacedName: constants.FlowCollectorName}}
}
return []reconcile.Request{}
}),
)
if err != nil {
return err
}
w.watched[key(kind, obj.GetName(), obj.GetNamespace())] = true
return nil
}

func (w *Watcher) isWatched(kind flowslatest.MountableType, name, namespace string) bool {
Expand Down Expand Up @@ -137,19 +161,16 @@ func (w *Watcher) reconcile(ctx context.Context, cl helper.Client, ref objectRef
report := helper.NewChangeReport("Watcher for " + string(ref.kind) + " " + ref.name)
defer report.LogIfNeeded(ctx)

w.watch(ref.kind, ref.name, ref.namespace)
var watchable Watchable
if ref.kind == flowslatest.RefTypeConfigMap {
watchable = &w.configs
} else {
watchable = &w.secrets
}

watchable := kindToWatchable(ref.kind)
obj := watchable.ProvidePlaceholder()
err := cl.Get(ctx, types.NamespacedName{Name: ref.name, Namespace: ref.namespace}, obj)
if err != nil {
return "", err
}
err = w.watch(ctx, ref.kind, obj)
if err != nil {
return "", err
}
digest, err := watchable.GetDigest(obj, ref.keys)
if err != nil {
return "", err
Expand Down
15 changes: 12 additions & 3 deletions pkg/watchers/watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/builder"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/cache/informertest"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const baseNamespace = "base-ns"
Expand Down Expand Up @@ -112,15 +113,23 @@ var kafkaSaslConfig = flowslatest.SASLConfig{
},
}

type fakeReconcile struct{}

func (r *fakeReconcile) Reconcile(context.Context, reconcile.Request) (ctrl.Result, error) {
return ctrl.Result{}, nil
}

func initWatcher(t *testing.T) *Watcher {
m, err := manager.New(&rest.Config{}, manager.Options{
NewCache: func(config *rest.Config, opts cache.Options) (cache.Cache, error) {
return &informertest.FakeInformers{}, nil
},
})
assert.NoError(t, err)
b := builder.ControllerManagedBy(m)
return RegisterWatcher(b)
b := ctrl.NewControllerManagedBy(m).For(&corev1.Pod{})
ctrl, err := b.Build(&fakeReconcile{})
assert.NoError(t, err)
return NewWatcher(ctrl, m.GetCache())
}

func TestGenDigests(t *testing.T) {
Expand Down