Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28661: OTEL: Latency in retrieving query end time leads to threa… #5576

Merged
merged 3 commits into from
Dec 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 50 additions & 43 deletions service/src/java/org/apache/hive/service/servlet/OTELExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ public OTELExporter(OpenTelemetry openTelemetry, SessionManager sessionManager,
@Override
public void run() {
while (true) {
jvmMetrics.setJvmMetrics();
exposeMetricsToOTEL();
try {
jvmMetrics.setJvmMetrics();
exposeMetricsToOTEL();
} catch (Throwable e) {
LOG.error("Exception occurred in OTELExporter thread ", e);
}

try {
Thread.sleep(frequency);
} catch (InterruptedException e) {
Expand Down Expand Up @@ -136,55 +141,57 @@ public void exposeMetricsToOTEL() {

Set<String> historicalQueryIDs = new HashSet<>();
for (QueryInfo hQuery : historicalQueries) {
String hQueryId = hQuery.getQueryDisplay().getQueryId();
historicalQueryIDs.add(hQueryId);
Span rootspan = queryIdToSpanMap.remove(hQueryId);
Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
if (hQuery.getEndTime() != null) {
String hQueryId = hQuery.getQueryDisplay().getQueryId();
historicalQueryIDs.add(hQueryId);
Span rootspan = queryIdToSpanMap.remove(hQueryId);
Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);

//For queries that were live till last loop but have ended before start of this loop
if (rootspan != null) {
for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
if (!completedTasks.contains(task.getTaskId())) {
Context parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}
}

//Update the rootSpan name & attributes before ending it
rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery))
.end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
historicalQueryId.add(hQueryId);
}

//For queries that were live till last loop but have ended before start of this loop
if (rootspan != null) {
for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
if (!completedTasks.contains(task.getTaskId())) {
Context parentContext = Context.current().with(rootspan);
//For queries that already ended either before OTEL service started or in between OTEL loops
if (historicalQueryId.add(hQueryId)) {
rootspan = tracer.spanBuilder(hQueryId + " - completed")
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan();
Context parentContext = Context.current().with(rootspan);

Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.setAttribute("QueryId", hQueryId)
.setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString())
.setAttribute("UserName", hQuery.getUserName())
.setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
if (hQuery.getQueryDisplay().getErrorMessage() != null) {
initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage());
}
initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);

for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}
}

//Update the rootSpan name & attributes before ending it
rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery))
.end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
historicalQueryId.add(hQueryId);
}

//For queries that already ended either before OTEL service started or in between OTEL loops
if (historicalQueryId.add(hQueryId)) {
rootspan = tracer.spanBuilder(hQueryId + " - completed")
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan();
Context parentContext = Context.current().with(rootspan);

Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.setAttribute("QueryId", hQueryId)
.setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString())
.setAttribute("UserName", hQuery.getUserName())
.setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
if (hQuery.getQueryDisplay().getErrorMessage() != null) {
initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage());
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
}
initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);

for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}

rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
}
}

Expand Down
Loading