From a4bbd9a51043cf5d20ebef117090f83ed75f7819 Mon Sep 17 00:00:00 2001 From: Harshil Goel Date: Fri, 31 Jan 2025 02:04:42 +0530 Subject: [PATCH 1/3] addded some metrics --- edgraph/server.go | 8 +++----- posting/mvcc.go | 28 +++++++++++++++++----------- worker/task.go | 2 -- x/metrics.go | 27 +++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/edgraph/server.go b/edgraph/server.go index 2845e086eed..ebfc32fa109 100644 --- a/edgraph/server.go +++ b/edgraph/server.go @@ -1282,11 +1282,9 @@ func (s *Server) doQuery(ctx context.Context, req *Request) (resp *api.Response, l := &query.Latency{} l.Start = time.Now() - // TODO: Following trace messages have been commented out as stringified trace messages allocate - // too much memory. These trace messages need to be generated if tracing is enabled. - // if bool(glog.V(3)) || worker.LogDQLRequestEnabled() { - // glog.Infof("Got a query, DQL form: %+v at %+v", req.req, l.Start.Format(time.RFC3339)) - // } + if bool(glog.V(3)) || worker.LogDQLRequestEnabled() { + glog.Infof("Got a query, DQL form: %+v at %+v", req.req, l.Start.Format(time.RFC3339)) + } isMutation := len(req.req.Mutations) > 0 methodRequest := methodQuery diff --git a/posting/mvcc.go b/posting/mvcc.go index 7c5e9dcaf82..f707bd0021c 100644 --- a/posting/mvcc.go +++ b/posting/mvcc.go @@ -347,9 +347,9 @@ func RemoveCacheFor(key []byte) { type Cache struct { data *ristretto.Cache[[]byte, *CachePL] - numCacheRead int64 - numCacheReadFails int64 - numCacheSave int64 + numCacheRead atomic.Int64 + numCacheReadFails atomic.Int64 + numCacheSave atomic.Int64 } func (c *Cache) wait() { @@ -365,15 +365,14 @@ func (c *Cache) get(key []byte) (*CachePL, bool) { } val, ok := c.data.Get(key) if !ok { - atomic.AddInt64(&c.numCacheReadFails, 1) + c.numCacheReadFails.Add(1) return val, ok } if val.list == nil { - atomic.AddInt64(&c.numCacheReadFails, 1) + c.numCacheReadFails.Add(1) return nil, false } - atomic.AddInt64(&c.numCacheRead, 1) - c.numCacheRead += 1 + c.numCacheRead.Add(1) return val, true } @@ -381,7 +380,7 @@ func (c *Cache) set(key []byte, i *CachePL) { if c == nil { return } - c.numCacheSave += 1 + c.numCacheSave.Add(1) c.data.Set(key, i, 1) } @@ -407,8 +406,7 @@ type MemoryLayer struct { cache *Cache // metrics - statsHolder *StatsHolder - numDisksRead int64 + statsHolder *StatsHolder } func (ml *MemoryLayer) clear() { @@ -448,6 +446,15 @@ func initMemoryLayer(cacheSize int64, deleteOnUpdates bool) *MemoryLayer { for range ticker.C { // Record the posting list cache hit ratio ostats.Record(context.Background(), x.PLCacheHitRatio.M(m.Ratio())) + + x.NumPostingListCacheSave.M(ml.cache.numCacheRead.Load()) + ml.cache.numCacheSave.Store(0) + + x.NumPostingListCacheRead.M(ml.cache.numCacheRead.Load()) + ml.cache.numCacheRead.Store(0) + + x.NumPostingListCacheReadFail.M(ml.cache.numCacheReadFails.Load()) + ml.cache.numCacheReadFails.Store(0) } }() @@ -657,7 +664,6 @@ func (ml *MemoryLayer) readFromCache(key []byte, readTs uint64) *List { } func (ml *MemoryLayer) readFromDisk(key []byte, pstore *badger.DB, readTs uint64) (*List, error) { - atomic.AddInt64(&ml.numDisksRead, 1) txn := pstore.NewTransactionAt(readTs, false) defer txn.Discard() diff --git a/worker/task.go b/worker/task.go index 378d7059f38..ccf30d5944a 100644 --- a/worker/task.go +++ b/worker/task.go @@ -838,10 +838,8 @@ func (qs *queryState) handleUidPostings( } if srcFn.fnType == compareAttrFn { - pl.RLock() posting.GetStatsHolder().InsertRecord( q.Attr, []byte(srcFn.tokens[i]), uint64(pl.ApproxLen())) - pl.RUnlock() } switch { diff --git a/x/metrics.go b/x/metrics.go index 92e22065d3e..3c19868b77c 100644 --- a/x/metrics.go +++ b/x/metrics.go @@ -147,6 +147,12 @@ var ( // RaftLeaderChanges records the total number of leader changes seen. RaftLeaderChanges = ostats.Int64("raft_leader_changes_total", "Total number of leader changes seen", ostats.UnitDimensionless) + NumPostingListCacheRead = ostats.Int64("num_posting_list_cache_reads", + "Number of times cache was read", ostats.UnitDimensionless) + NumPostingListCacheReadFail = ostats.Int64("num_posting_list_cache_reads_fail", + "Number of times cache was read", ostats.UnitDimensionless) + NumPostingListCacheSave = ostats.Int64("num_posting_list_cache_saves", + "Number of times item was saved in cache", ostats.UnitDimensionless) // Conf holds the metrics config. // TODO: Request statistics, latencies, 500, timeouts @@ -202,6 +208,27 @@ var ( Aggregation: view.Count(), TagKeys: allTagKeys, }, + { + Name: NumPostingListCacheRead.Name(), + Measure: NumPostingListCacheRead, + Description: NumPostingListCacheRead.Description(), + Aggregation: view.Count(), + TagKeys: allTagKeys, + }, + { + Name: NumPostingListCacheReadFail.Name(), + Measure: NumPostingListCacheReadFail, + Description: NumPostingListCacheReadFail.Description(), + Aggregation: view.Count(), + TagKeys: allTagKeys, + }, + { + Name: NumPostingListCacheSave.Name(), + Measure: NumPostingListCacheSave, + Description: NumPostingListCacheSave.Description(), + Aggregation: view.Count(), + TagKeys: allTagKeys, + }, { Name: NumEdges.Name(), Measure: NumEdges, From 66b2eb66a2072f58e60bf6c5b22fa5ce7b3e3ef4 Mon Sep 17 00:00:00 2001 From: Harshil Goel Date: Fri, 31 Jan 2025 02:41:19 +0530 Subject: [PATCH 2/3] fixed stuff --- edgraph/server.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/edgraph/server.go b/edgraph/server.go index ebfc32fa109..41483944d3c 100644 --- a/edgraph/server.go +++ b/edgraph/server.go @@ -1283,7 +1283,8 @@ func (s *Server) doQuery(ctx context.Context, req *Request) (resp *api.Response, l.Start = time.Now() if bool(glog.V(3)) || worker.LogDQLRequestEnabled() { - glog.Infof("Got a query, DQL form: %+v at %+v", req.req, l.Start.Format(time.RFC3339)) + glog.Infof("Got a query, DQL form: %+v %+v at %+v", + req.req.Query, req.req.Mutations, l.Start.Format(time.RFC3339)) } isMutation := len(req.req.Mutations) > 0 From 5bff73e5feabd729e7775ca578563079f29beeb6 Mon Sep 17 00:00:00 2001 From: Harshil Goel Date: Fri, 31 Jan 2025 02:48:40 +0530 Subject: [PATCH 3/3] trying minio backup test fix --- systest/backup/minio/docker-compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/systest/backup/minio/docker-compose.yml b/systest/backup/minio/docker-compose.yml index 550d8d659b6..e712fcc000a 100644 --- a/systest/backup/minio/docker-compose.yml +++ b/systest/backup/minio/docker-compose.yml @@ -23,7 +23,7 @@ services: read_only: true command: /gobin/dgraph ${COVERAGE_OUTPUT} alpha --telemetry "reports=false; sentry=false;" - --my=alpha1:7080 --zero=zero1:5080 --logtostderr -v=2 --security + --my=alpha1:7080 --zero=zero1:5080 --logtostderr --cache "size-mb=500;" -v=2 --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;" --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key; internal-port=true; client-cert=/dgraph-tls/client.alpha1.crt; client-key=/dgraph-tls/client.alpha1.key;" @@ -48,7 +48,7 @@ services: read_only: true command: /gobin/dgraph ${COVERAGE_OUTPUT} alpha --telemetry "reports=false; sentry=false;" - --my=alpha2:7080 --zero=zero1:5080 --logtostderr -v=2 --security + --my=alpha2:7080 --zero=zero1:5080 --logtostderr --cache "size-mb=500;" -v=2 --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;" --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key; internal-port=true; client-cert=/dgraph-tls/client.alpha2.crt; client-key=/dgraph-tls/client.alpha2.key;" @@ -73,7 +73,7 @@ services: read_only: true command: /gobin/dgraph ${COVERAGE_OUTPUT} alpha --telemetry "reports=false; sentry=false;" - --my=alpha3:7080 --zero=zero1:5080 --logtostderr -v=2 --security + --my=alpha3:7080 --zero=zero1:5080 --logtostderr --cache "size-mb=500;" -v=2 --security "whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16;" --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key; internal-port=true; client-cert=/dgraph-tls/client.alpha3.crt; client-key=/dgraph-tls/client.alpha3.key;"