Skip to content

Commit

Permalink
add more metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jievince committed Dec 15, 2021
1 parent 199dfbd commit 4ba9df9
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/common/expression/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(expression_test_common_libs
$<TARGET_OBJECTS:network_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:stats_obj>
$<TARGET_OBJECTS:stats_def_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
Expand Down
6 changes: 6 additions & 0 deletions src/graph/executor/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "common/base/ObjectPool.h"
#include "common/memory/MemoryUtils.h"
#include "common/stats/StatsManager.h"
#include "common/time/ScopedTimer.h"
#include "graph/context/ExecutionContext.h"
#include "graph/context/QueryContext.h"
Expand Down Expand Up @@ -98,6 +99,7 @@
#include "graph/planner/plan/PlanNode.h"
#include "graph/planner/plan/Query.h"
#include "graph/service/GraphFlags.h"
#include "graph/stats/StatsDef.h"
#include "interface/gen-cpp2/graph_types.h"

using folly::stringPrintf;
Expand Down Expand Up @@ -157,9 +159,11 @@ Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) {
return pool->add(new PassThroughExecutor(node, qctx));
}
case PlanNode::Kind::kAggregate: {
stats::StatsManager::addValue(kNumAggregateExecutors);
return pool->add(new AggregateExecutor(node, qctx));
}
case PlanNode::Kind::kSort: {
stats::StatsManager::addValue(kNumSortExecutors);
return pool->add(new SortExecutor(node, qctx));
}
case PlanNode::Kind::kTopN: {
Expand Down Expand Up @@ -202,6 +206,7 @@ Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) {
case PlanNode::Kind::kTagIndexFullScan:
case PlanNode::Kind::kTagIndexPrefixScan:
case PlanNode::Kind::kTagIndexRangeScan: {
stats::StatsManager::addValue(kNumIndexScanExecutors);
return pool->add(new IndexScanExecutor(node, qctx));
}
case PlanNode::Kind::kStart: {
Expand Down Expand Up @@ -573,6 +578,7 @@ Status Executor::close() {

Status Executor::checkMemoryWatermark() {
if (node_->isQueryNode() && MemoryUtils::kHitMemoryHighWatermark.load()) {
stats::StatsManager::addValue(kNumOomExecutors);
return Status::Error("Used memory hits the high watermark(%lf) of total system memory.",
FLAGS_system_memory_high_watermark_ratio);
}
Expand Down
1 change: 1 addition & 0 deletions src/graph/executor/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ SET(EXEC_QUERY_TEST_OBJS
$<TARGET_OBJECTS:graph_obj>
$<TARGET_OBJECTS:ssl_obj>
$<TARGET_OBJECTS:version_obj>
$<TARGET_OBJECTS:stats_def_obj>
)

SET(EXEC_QUERY_TEST_LIBS
Expand Down
3 changes: 3 additions & 0 deletions src/graph/service/GraphService.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ folly::Future<AuthResponse> GraphService::future_authenticate(const std::string&
return ctx->finish();
}
stats::StatsManager::addValue(kNumOpenedSessions);
stats::StatsManager::addValue(kNumActiveSessions);
ctx->setSession(sessionPtr);
ctx->resp().sessionId.reset(new int64_t(ctx->session()->id()));
ctx->resp().timeZoneOffsetSeconds.reset(
Expand All @@ -118,6 +119,7 @@ folly::Future<AuthResponse> GraphService::future_authenticate(const std::string&
void GraphService::signout(int64_t sessionId) {
VLOG(2) << "Sign out session " << sessionId;
sessionManager_->removeSession(sessionId);
stats::StatsManager::decValue(kNumActiveSessions);
}

folly::Future<ExecutionResponse> GraphService::future_execute(int64_t sessionId,
Expand All @@ -129,6 +131,7 @@ folly::Future<ExecutionResponse> GraphService::future_execute(int64_t sessionId,
auto future = ctx->future();
stats::StatsManager::addValue(kNumQueries);
stats::StatsManager::addValue(kNumActiveQueries);
stats::StatsManager::addValue(kReceivedBytes, query.size());
// When the sessionId is 0, it means the clients to ping the connection is ok
if (sessionId == 0) {
ctx->resp().errorCode = ErrorCode::E_SESSION_INVALID;
Expand Down
2 changes: 2 additions & 0 deletions src/graph/service/QueryInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ Status QueryInstance::validateAndOptimize() {

NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx()));
NG_RETURN_IF_ERROR(findBestPlan());
stats::StatsManager::addValue(kOptimizerLatencyUs, *(qctx_->plan()->optimizeTimeInUs()));

return Status::OK();
}
Expand All @@ -103,6 +104,7 @@ void QueryInstance::onFinish() {
rctx->resp().latencyInUs = latency;
addSlowQueryStats(latency);
rctx->finish();
stats::StatsManager::addValue(kSentBytes, rctx->resp().data->size());

rctx->session()->deleteQuery(qctx_.get());
// The `QueryInstance' is the root node holding all resources during the
Expand Down
4 changes: 4 additions & 0 deletions src/graph/session/ClientSession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

#include "graph/session/ClientSession.h"

#include "common/stats/StatsManager.h"
#include "common/time/WallClock.h"
#include "graph/context/QueryContext.h"
#include "graph/stats/StatsDef.h"

namespace nebula {
namespace graph {
Expand Down Expand Up @@ -75,6 +77,7 @@ void ClientSession::markQueryKilled(nebula::ExecutionPlanID epId) {
return;
}
context->second->markKilled();
// stats::StatsManager::addValue(kNumKilledQueries);
VLOG(1) << "Mark query killed in local cache, epId: " << epId;

auto query = session_.queries_ref()->find(epId);
Expand All @@ -91,6 +94,7 @@ void ClientSession::markAllQueryKilled() {
context.second->markKilled();
session_.queries_ref()->clear();
}
// stats::StatsManager::addValue(kNumKilledQueries, contexts_.size());
}
} // namespace graph
} // namespace nebula
4 changes: 4 additions & 0 deletions src/graph/session/GraphSessionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
#include "graph/session/GraphSessionManager.h"

#include "common/base/Base.h"
#include "common/stats/StatsManager.h"
#include "common/time/WallClock.h"
#include "graph/service/GraphFlags.h"
#include "graph/stats/StatsDef.h"

namespace nebula {
namespace graph {
Expand Down Expand Up @@ -181,6 +183,8 @@ void GraphSessionManager::reclaimExpiredSessions() {
LOG(ERROR) << "Remove session `" << iter->first << "' failed: " << resp.status();
}
iter = activeSessions_.erase(iter);
// stats::StatsManager::decValue(kNumActiveSessions);
// stats::StatsManager::addValue(kNumReclaimedExpiredSessions);
// TODO: Disconnect the connection of the session
}
}
Expand Down
32 changes: 32 additions & 0 deletions src/graph/stats/StatsDef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,24 @@ stats::CounterId kNumQueryErrosLeaderChanges;
stats::CounterId kNumSentences;
stats::CounterId kQueryLatencyUs;
stats::CounterId kSlowQueryLatencyUs;
stats::CounterId kNumKilledQueries;

stats::CounterId kOptimizerLatencyUs;

stats::CounterId kNumAggregateExecutors;
stats::CounterId kNumSortExecutors;
stats::CounterId kNumIndexScanExecutors;
stats::CounterId kNumOomExecutors;

stats::CounterId kReceivedBytes;
stats::CounterId kSentBytes;

stats::CounterId kNumOpenedSessions;
stats::CounterId kNumAuthFailedSessions;
stats::CounterId kNumAuthFailedSessionsBadUserNamePassword;
stats::CounterId kNumAuthFailedSessionsOutOfMaxAllowed;
stats::CounterId kNumActiveSessions;
stats::CounterId kNumReclaimedExpiredSessions;

void initCounters() {
kNumQueries = stats::StatsManager::registerStats("num_queries", "rate, sum");
Expand All @@ -41,6 +54,22 @@ void initCounters() {
"query_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999");
kSlowQueryLatencyUs = stats::StatsManager::registerHisto(
"slow_query_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999");
kNumKilledQueries = stats::StatsManager::registerStats("num_killed_queries", "rate, sum");

kOptimizerLatencyUs = stats::StatsManager::registerHisto(
"optimizer_latency_us", 1000, 0, 2000, "avg, p75, p95, p99, p999");

kReceivedBytes = stats::StatsManager::registerHisto(
"received_bytes", 1000, 1, 4194304, "avg, p75, p95, p99, p999"); // 1 Byte ~ 4194304 Bytes
kSentBytes = stats::StatsManager::registerHisto(
"sent_bytes", 1000, 1, 4294967296, "avg, p75, p95, p99, p999"); // 1 Byte ~ 4 GiB

kNumAggregateExecutors =
stats::StatsManager::registerStats("num_aggregate_executors", "rate, sum");
kNumSortExecutors = stats::StatsManager::registerStats("num_sort_executors", "rate, sum");
kNumIndexScanExecutors =
stats::StatsManager::registerStats("num_indexscan_executors", "rate, sum");
kNumOomExecutors = stats::StatsManager::registerStats("num_oom_executors", "rate, sum");

kNumOpenedSessions = stats::StatsManager::registerStats("num_opened_sessions", "rate, sum");
kNumAuthFailedSessions =
Expand All @@ -49,6 +78,9 @@ void initCounters() {
"num_auth_failed_sessions_bad_username_password", "rate, sum");
kNumAuthFailedSessionsOutOfMaxAllowed = stats::StatsManager::registerStats(
"num_auth_failed_sessions_out_of_max_allowed", "rate, sum");
kNumActiveSessions = stats::StatsManager::registerStats("num_active_sessions", "rate, sum");
kNumReclaimedExpiredSessions =
stats::StatsManager::registerStats("num_reclaimed_expired_sessions", "rate, sum");
}

} // namespace nebula
19 changes: 17 additions & 2 deletions src/graph/stats/StatsDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ DECLARE_int32(slow_query_threshold_us);

namespace nebula {

// query related
// Query
extern stats::CounterId kNumQueries;
extern stats::CounterId kNumActiveQueries;
extern stats::CounterId kNumSlowQueries;
Expand All @@ -22,12 +22,27 @@ extern stats::CounterId kNumQueryErrosLeaderChanges;
extern stats::CounterId kNumSentences;
extern stats::CounterId kQueryLatencyUs;
extern stats::CounterId kSlowQueryLatencyUs;
extern stats::CounterId kNumKilledQueries;

// session related
extern stats::CounterId kOptimizerLatencyUs;

// Executor
extern stats::CounterId kNumAggregateExecutors;
extern stats::CounterId kNumSortExecutors;
extern stats::CounterId kNumIndexScanExecutors;
extern stats::CounterId kNumOomExecutors;

// Server client traffic
extern stats::CounterId kReceivedBytes;
extern stats::CounterId kSentBytes;

// Session
extern stats::CounterId kNumOpenedSessions;
extern stats::CounterId kNumAuthFailedSessions;
extern stats::CounterId kNumAuthFailedSessionsBadUserNamePassword;
extern stats::CounterId kNumAuthFailedSessionsOutOfMaxAllowed;
extern stats::CounterId kNumActiveSessions;
extern stats::CounterId kNumReclaimedExpiredSessions;

void initCounters();

Expand Down

0 comments on commit 4ba9df9

Please sign in to comment.