diff --git a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java index 56aac3fc34b3..bd70b179e25d 100644 --- a/service/src/java/org/apache/hive/service/servlet/OTELExporter.java +++ b/service/src/java/org/apache/hive/service/servlet/OTELExporter.java @@ -65,8 +65,13 @@ public OTELExporter(OpenTelemetry openTelemetry, SessionManager sessionManager, @Override public void run() { while (true) { - jvmMetrics.setJvmMetrics(); - exposeMetricsToOTEL(); + try { + jvmMetrics.setJvmMetrics(); + exposeMetricsToOTEL(); + } catch (Throwable e) { + LOG.error("Exception occurred in OTELExporter thread ", e); + } + try { Thread.sleep(frequency); } catch (InterruptedException e) { @@ -136,55 +141,57 @@ public void exposeMetricsToOTEL() { Set historicalQueryIDs = new HashSet<>(); for (QueryInfo hQuery : historicalQueries) { - String hQueryId = hQuery.getQueryDisplay().getQueryId(); - historicalQueryIDs.add(hQueryId); - Span rootspan = queryIdToSpanMap.remove(hQueryId); - Set completedTasks = queryIdToTasksMap.remove(hQueryId); + if (hQuery.getEndTime() != null) { + String hQueryId = hQuery.getQueryDisplay().getQueryId(); + historicalQueryIDs.add(hQueryId); + Span rootspan = queryIdToSpanMap.remove(hQueryId); + Set completedTasks = queryIdToTasksMap.remove(hQueryId); + + //For queries that were live till last loop but have ended before start of this loop + if (rootspan != null) { + for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) { + if (!completedTasks.contains(task.getTaskId())) { + Context parentContext = Context.current().with(rootspan); + tracer.spanBuilder(hQueryId + " - " + task.getTaskId()) + .setParent(parentContext).setAllAttributes(addTaskAttributes(task)) + .setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan() + .end(task.getEndTime(), TimeUnit.MILLISECONDS); + } + } + + //Update the rootSpan name & attributes before ending it + rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery)) + .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS); + historicalQueryId.add(hQueryId); + } - //For queries that were live till last loop but have ended before start of this loop - if (rootspan != null) { - for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) { - if (!completedTasks.contains(task.getTaskId())) { - Context parentContext = Context.current().with(rootspan); + //For queries that already ended either before OTEL service started or in between OTEL loops + if (historicalQueryId.add(hQueryId)) { + rootspan = tracer.spanBuilder(hQueryId + " - completed") + .setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan(); + Context parentContext = Context.current().with(rootspan); + + Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext) + .setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan() + .setAttribute("QueryId", hQueryId) + .setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString()) + .setAttribute("UserName", hQuery.getUserName()) + .setAttribute("ExecutionEngine", hQuery.getExecutionEngine()); + if (hQuery.getQueryDisplay().getErrorMessage() != null) { + initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage()); + } + initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS); + + for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) { + parentContext = Context.current().with(rootspan); tracer.spanBuilder(hQueryId + " - " + task.getTaskId()) .setParent(parentContext).setAllAttributes(addTaskAttributes(task)) .setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan() .end(task.getEndTime(), TimeUnit.MILLISECONDS); } - } - - //Update the rootSpan name & attributes before ending it - rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery)) - .end(hQuery.getEndTime(), TimeUnit.MILLISECONDS); - historicalQueryId.add(hQueryId); - } - //For queries that already ended either before OTEL service started or in between OTEL loops - if (historicalQueryId.add(hQueryId)) { - rootspan = tracer.spanBuilder(hQueryId + " - completed") - .setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan(); - Context parentContext = Context.current().with(rootspan); - - Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext) - .setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan() - .setAttribute("QueryId", hQueryId) - .setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString()) - .setAttribute("UserName", hQuery.getUserName()) - .setAttribute("ExecutionEngine", hQuery.getExecutionEngine()); - if (hQuery.getQueryDisplay().getErrorMessage() != null) { - initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage()); + rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS); } - initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS); - - for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) { - parentContext = Context.current().with(rootspan); - tracer.spanBuilder(hQueryId + " - " + task.getTaskId()) - .setParent(parentContext).setAllAttributes(addTaskAttributes(task)) - .setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan() - .end(task.getEndTime(), TimeUnit.MILLISECONDS); - } - - rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS); } }