diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DataJobsSynchronizer.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DataJobsSynchronizer.java index d5d23c308a..dafb0ad4e8 100644 --- a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DataJobsSynchronizer.java +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DataJobsSynchronizer.java @@ -10,6 +10,7 @@ import com.vmware.taurus.service.model.ActualDataJobDeployment; import com.vmware.taurus.service.model.DataJob; import com.vmware.taurus.service.model.DesiredDataJobDeployment; +import com.vmware.taurus.service.monitoring.DataJobSynchronizerMonitor; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import net.javacrumbs.shedlock.spring.annotation.SchedulerLock; @@ -49,6 +50,8 @@ public class DataJobsSynchronizer { private final ThreadPoolTaskExecutor dataJobsSynchronizerTaskExecutor; + private final DataJobSynchronizerMonitor dataJobSynchronizerMonitor; + @Value("${datajobs.deployment.configuration.synchronization.task.enabled:false}") private boolean synchronizationEnabled; @@ -86,6 +89,7 @@ public void synchronizeDataJobs() { "Skipping data job deployment synchronization because deployment names cannot be loaded" + " from Kubernetes.", e); + dataJobSynchronizerMonitor.countSynchronizerFailures(); return; } @@ -210,8 +214,10 @@ private void waitForSynchronizationCompletion(CountDownLatch countDownLatch) { + " some time..."); countDownLatch.await(); log.info("Data job deployments synchronization has successfully completed."); + dataJobSynchronizerMonitor.countSuccessfulSynchronizerInvocation(); } catch (InterruptedException e) { log.error("An error occurred during the data job deployments' synchronization", e); + dataJobSynchronizerMonitor.countSynchronizerFailures(); } } } diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitor.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitor.java new file mode 100644 index 0000000000..5601ff5b65 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitor.java @@ -0,0 +1,65 @@ +/* + * Copyright 2021-2023 VMware, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.vmware.taurus.service.monitoring; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.MeterRegistry; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +@Component +@Slf4j +public class DataJobSynchronizerMonitor { + + public static final String DATAJOBS_SUCCESSFUL_SYNCHRONIZER_INVOCATIONS_COUNTER = + "vdk.deploy.datajob.synchronizer.successful.invocations.counter"; + public static final String DATAJOBS_FAILED_SYNCHRONIZER_INVOCATIONS_COUNTER = + "vdk.deploy.datajob.synchronizer.failed.invocations.counter"; + + private final Counter successfulInvocationsCounter; + private final Counter failedInvocationsCounter; + + @Autowired(required = true) + public DataJobSynchronizerMonitor(MeterRegistry meterRegistry) { + successfulInvocationsCounter = + Counter.builder(DATAJOBS_SUCCESSFUL_SYNCHRONIZER_INVOCATIONS_COUNTER) + .description( + "Counts the number of times the synchronizeDataJobs() method is called and" + + " completes.") + .register(meterRegistry); + + failedInvocationsCounter = + Counter.builder(DATAJOBS_FAILED_SYNCHRONIZER_INVOCATIONS_COUNTER) + .description( + "Counts the number of times the synchronizeDataJobs() method failed to finish.") + .register(meterRegistry); + } + + /** + * Counts the number of times the DataJobSynchronizer's synchronize method was invoked and + * completed. + */ + public void countSuccessfulSynchronizerInvocation() { + incrementCounter(successfulInvocationsCounter); + } + + /** + * Counts the number of failed data job deployment synchronizations invocations by the + * DataJobsSynchronizer due to K8S issues. + */ + public void countSynchronizerFailures() { + incrementCounter(failedInvocationsCounter); + } + + private void incrementCounter(Counter counter) { + try { + counter.increment(); + } catch (Exception e) { + log.warn("Error while trying to increment counter.", e); + } + } +} diff --git a/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitorTest.java b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitorTest.java new file mode 100644 index 0000000000..ecf29ab4c7 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobSynchronizerMonitorTest.java @@ -0,0 +1,43 @@ +/* + * Copyright 2021-2023 VMware, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.vmware.taurus.service.monitoring; + +import com.vmware.taurus.ControlplaneApplication; +import io.micrometer.core.instrument.MeterRegistry; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; + +@SpringBootTest(classes = ControlplaneApplication.class) +public class DataJobSynchronizerMonitorTest { + + @Autowired DataJobSynchronizerMonitor dataJobSynchronizerMonitor; + + @Autowired private MeterRegistry meterRegistry; + + @Test + public void testIncrementSuccessfulInvocations() { + var counter = + meterRegistry.counter( + DataJobSynchronizerMonitor.DATAJOBS_SUCCESSFUL_SYNCHRONIZER_INVOCATIONS_COUNTER); + Assertions.assertEquals(0.0, counter.count(), 0.001); + dataJobSynchronizerMonitor.countSuccessfulSynchronizerInvocation(); + + Assertions.assertEquals(1, counter.count(), 0.001); + } + + @Test + public void testIncrementFailedInvocations() { + var counter = + meterRegistry.counter( + DataJobSynchronizerMonitor.DATAJOBS_FAILED_SYNCHRONIZER_INVOCATIONS_COUNTER); + Assertions.assertEquals(0.0, counter.count(), 0.001); + dataJobSynchronizerMonitor.countSynchronizerFailures(); + + Assertions.assertEquals(1, counter.count(), 0.001); + } +}