From 5e22e78459cc422b1aa3811c613c4e7a2410eda6 Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Tue, 23 May 2023 17:08:52 +0300 Subject: [PATCH 1/9] control-service: killed job was shown as successful Why We recently got the following feedback from our internal client: A data job was listed as successful even though it hit the 12 hour limit and was killed; the logs do not show that either - the last entry in the log just shows the last object that was sent for ingestion, but there is no summary of the data job. The problem is caused by the following fix - https://github.com/vmware/versatile-data-kit/pull/1586. When the job hit the 12-hour limit the K8S Pod is terminated and we construct partial JobExecutionStatus which enters in the following if statement and returns Optional.empty() rather than the constructed object. https://github.com/vmware/versatile-data-kit/blob/4763ba877f43b270fbd4770bc1533216f7c5d618/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java#L1656 As a result, this job execution becomes stuck in the Running status until it is detected by emergency logic, which marks such executions as successful due to the lack of associated Pods to them. What Added validation for an already completed job in a more appropriate place. Testing Done Added integration test Signed-off-by: Miroslav Ivanov miroslavi@vmware.com --- .../it/DataJobBackoffLimitExceededIT.java | 71 +++++++++++++++++++ .../datajobs/it/DataJobDeploymentCrudIT.java | 52 +++++++------- .../it/DataJobInitContainerOOMIT.java | 8 +-- .../datajobs/it/DataJobPropertiesIT.java | 4 -- .../TestJobImageBuilderDynamicVdkImageIT.java | 29 ++++---- .../datajobs/it/UploadSourceValidationIT.java | 8 +-- .../taurus/datajobs/it/common/BaseIT.java | 3 + .../datajobs/it/common/JobExecutionUtil.java | 6 +- .../backoff_limit_exceeded_cron_job.yaml | 41 +++++++++++ .../fast_failing_cron_job.yaml | 41 +++++++++++ .../taurus/service/KubernetesService.java | 6 -- .../execution/JobExecutionService.java | 6 +- .../monitoring/DataJobMonitorTest.java | 71 +++++++++++++++++++ 13 files changed, 278 insertions(+), 68 deletions(-) create mode 100644 projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java create mode 100644 projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml create mode 100644 projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java new file mode 100644 index 0000000000..5c146530f9 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java @@ -0,0 +1,71 @@ +/* + * Copyright 2021-2023 VMware, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.vmware.taurus.datajobs.it; + +import com.vmware.taurus.ControlplaneApplication; +import com.vmware.taurus.controlplane.model.data.DataJobExecution; +import com.vmware.taurus.datajobs.it.common.BaseIT; +import com.vmware.taurus.datajobs.it.common.DataJobDeploymentExtension; +import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; + +import static com.vmware.taurus.datajobs.it.common.JobExecutionUtil.*; + +@Slf4j +@TestPropertySource( + properties = { + // This is a standard cron job template except activeDeadlineSeconds is set to 1 + "datajobs.control.k8s.data.job.template.file=data_job_templates/backoff_limit_exceeded_cron_job.yaml" + }) +@SpringBootTest( + webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, + classes = ControlplaneApplication.class) +public class DataJobBackoffLimitExceededIT extends BaseIT { + + @RegisterExtension + static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension(); + + @Test + public void testDataJob_causesBackoffLimitExceeded_shouldCompleteWithUserError( + String jobName, String teamName, String username, String deploymentId) throws Exception { + // manually start job execution + ImmutablePair executeDataJobResult = + JobExecutionUtil.executeDataJob(jobName, teamName, username, deploymentId, mockMvc); + String opId = executeDataJobResult.getLeft(); + String executionId = executeDataJobResult.getRight(); + + // Check the data job execution status + testDataJobExecutionRead( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + testDataJobExecutionList( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + testDataJobDeploymentExecutionList( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + } +} diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java index 9c32ebf485..3e58d91668 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java @@ -11,7 +11,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobMode; import com.vmware.taurus.controlplane.model.data.DataJobVersion; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import com.vmware.taurus.service.deploy.JobImageDeployer; import com.vmware.taurus.service.model.JobDeploymentStatus; import org.apache.commons.io.IOUtils; @@ -51,9 +50,6 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class DataJobDeploymentCrudIT extends BaseIT { - - private static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(DataJobDeploymentCrudIT.class.getSimpleName()); private static final Object DEPLOYMENT_ID = "testing"; @TestConfiguration @@ -70,7 +66,7 @@ public TaskExecutor taskExecutor() { @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc @@ -89,7 +85,7 @@ public void setup() throws Exception { s.endsWith( String.format( "/data-jobs/for-team/%s/jobs/%s", - TEST_TEAM_NAME, TEST_JOB_NAME))))); + TEST_TEAM_NAME, testJobName))))); } @Test @@ -104,7 +100,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) .andExpect(status().isUnauthorized()); @@ -114,7 +110,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) @@ -136,7 +132,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_WRONG_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_WRONG_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) @@ -146,7 +142,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -155,7 +151,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) @@ -166,13 +162,13 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( post(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME)) + TEST_TEAM_WRONG_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); - String jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + String jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment created Optional cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); @@ -189,7 +185,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -199,7 +195,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -225,7 +221,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); @@ -236,7 +232,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -247,7 +243,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -259,7 +255,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -277,7 +273,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("new_vdk_version_tag")) .contentType(MediaType.APPLICATION_JSON)) @@ -289,7 +285,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -301,7 +297,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("")) .contentType(MediaType.APPLICATION_JSON)) @@ -312,7 +308,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -324,7 +320,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -334,7 +330,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); @@ -345,7 +341,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); @@ -363,7 +359,7 @@ public void cleanUp() throws Exception { .perform( delete( String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user"))) .andExpect(status().isOk()); } @@ -377,7 +373,7 @@ public void testDataJobDeleteSource() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java index 86d1e52566..81a9a60219 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java @@ -27,7 +27,7 @@ // This is a standard cron job template except restartPolicy is set to never so that when a // job runs out of memory it is // not retied but instead reports more quickly that it is a platform error - "datajobs.control.k8s.data.job.template.file=fast_failing_cron_job.yaml" + "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" }) @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, @@ -49,7 +49,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( // Check the data job execution status testDataJobExecutionRead( executionId, - DataJobExecution.StatusEnum.PLATFORM_ERROR, + DataJobExecution.StatusEnum.USER_ERROR, opId, jobName, teamName, @@ -57,7 +57,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( mockMvc); testDataJobExecutionList( executionId, - DataJobExecution.StatusEnum.PLATFORM_ERROR, + DataJobExecution.StatusEnum.USER_ERROR, opId, jobName, teamName, @@ -65,7 +65,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( mockMvc); testDataJobDeploymentExecutionList( executionId, - DataJobExecution.StatusEnum.PLATFORM_ERROR, + DataJobExecution.StatusEnum.USER_ERROR, opId, jobName, teamName, diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java index 6c37587f34..3485d9af91 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java @@ -7,9 +7,7 @@ import com.vmware.taurus.ControlplaneApplication; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.properties.service.PropertiesRepository; import org.junit.jupiter.api.Test; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; @@ -27,8 +25,6 @@ classes = ControlplaneApplication.class) public class DataJobPropertiesIT extends BaseIT { - @Autowired private PropertiesRepository propertiesRepository; - @Test public void testDataJobProperties() throws Exception { // Setup diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java index 494c2f080b..5319cc2331 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java @@ -11,7 +11,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobMode; import com.vmware.taurus.controlplane.model.data.DataJobVersion; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import com.vmware.taurus.service.deploy.JobImageDeployer; import com.vmware.taurus.service.model.JobDeploymentStatus; import org.apache.commons.io.IOUtils; @@ -54,8 +53,6 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class TestJobImageBuilderDynamicVdkImageIT extends BaseIT { - private static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(TestJobImageBuilderDynamicVdkImageIT.class.getSimpleName()); private static final Object DEPLOYMENT_ID = "testing"; @TestConfiguration @@ -72,7 +69,7 @@ public TaskExecutor taskExecutor() { @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc @@ -91,7 +88,7 @@ public void setup() throws Exception { s.endsWith( String.format( "/data-jobs/for-team/%s/jobs/%s", - TEST_TEAM_NAME, TEST_JOB_NAME))))); + TEST_TEAM_NAME, testJobName))))); } @Test @@ -106,7 +103,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { ResultActions resultAction = mockMvc.perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)); @@ -135,13 +132,13 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); - String jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + String jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment created Optional cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); @@ -159,7 +156,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -186,7 +183,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -204,7 +201,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("new_vdk_version_tag")) .contentType(MediaType.APPLICATION_JSON)) @@ -215,7 +212,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -225,13 +222,13 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(getDataJobDeploymentRequestBody(testJobVersionSha, "3.8")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); - jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment updated properly cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); Assertions.assertTrue(cronJobOptional.isPresent()); @@ -250,7 +247,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); @@ -268,7 +265,7 @@ public void cleanUp() throws Exception { .perform( delete( String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user"))) .andExpect(status().isOk()); } diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java index 7dc4719723..00d5b5dda9 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java @@ -7,7 +7,6 @@ import com.vmware.taurus.ControlplaneApplication; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -28,12 +27,9 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class UploadSourceValidationIT extends BaseIT { - protected static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(UploadSourceValidationIT.class.getSimpleName()); - @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc .perform( @@ -53,7 +49,7 @@ public void testDataJobUploadSource() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java index c986652e75..1ae111c32c 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java @@ -74,6 +74,9 @@ public class BaseIT { private boolean ownsControlNamespace = false; + protected final String testJobName = + JobExecutionUtil.generateJobName(this.getClass().getSimpleName()); + @BeforeEach public void before() { log.info("Running test with: {} bytes of memory.", Runtime.getRuntime().totalMemory()); diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java index 18bb815bad..c90219f85e 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java @@ -74,9 +74,9 @@ public static DataJobExecution createDataJobExecution( .endTime(endTime) .type(ExecutionType.MANUAL) .status(executionStatus) - .resourcesCpuRequest(1F) - .resourcesCpuLimit(2F) - .resourcesMemoryRequest(500) + .resourcesCpuRequest(0.1F) + .resourcesCpuLimit(1F) + .resourcesMemoryRequest(100) .resourcesMemoryLimit(1000) .message("message") .lastDeployedBy("test_user") diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml new file mode 100644 index 0000000000..a2299b1d96 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml @@ -0,0 +1,41 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + annotations: # merged with additional annotations from TPCS + name: cronjob-template-name # overridden by TPCS +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 2 + schedule: "*/10 * * * *" # overridden by TPCS + startingDeadlineSeconds: 1800 + successfulJobsHistoryLimit: 1 + suspend: false # overridden by TPCS + jobTemplate: + metadata: + annotations: # merged with additional annotations from TPCS + labels: # merged with additional labels from TPCS + spec: + activeDeadlineSeconds: 1 + backoffLimit: 3 + template: + metadata: + labels: # merged with additional labels from TPCS + spec: + containers: # overridden by TPCS + - command: + - /bin/sh + - -c + - date; echo '************** Cronjob Template ******************' + name: cronjob-template-container-name + image: busybox + imagePullPolicy: IfNotPresent + restartPolicy: Never + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + automountServiceAccountToken: false + ttlSecondsAfterFinished: 600 diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml new file mode 100644 index 0000000000..e497796e69 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml @@ -0,0 +1,41 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + annotations: # merged with additional annotations from TPCS + name: cronjob-template-name # overridden by TPCS +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 2 + schedule: "*/10 * * * *" # overridden by TPCS + startingDeadlineSeconds: 1800 + successfulJobsHistoryLimit: 1 + suspend: false # overridden by TPCS + jobTemplate: + metadata: + annotations: # merged with additional annotations from TPCS + labels: # merged with additional labels from TPCS + spec: + activeDeadlineSeconds: 43200 + backoffLimit: 3 + template: + metadata: + labels: # merged with additional labels from TPCS + spec: + containers: # overridden by TPCS + - command: + - /bin/sh + - -c + - date; echo '************** Cronjob Template ******************' + name: cronjob-template-container-name + image: busybox + imagePullPolicy: IfNotPresent + restartPolicy: Never + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + automountServiceAccountToken: false + ttlSecondsAfterFinished: 600 diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java index d026fb6a70..e70bfbf3be 100644 --- a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java @@ -1582,12 +1582,6 @@ Optional getJobExecutionStatus(V1Job job, JobStatusCondition jobSt jobExecutionStatusBuilder.succeeded( Optional.ofNullable(jobStatusCondition).map(JobStatusCondition::isSuccess).orElse(null)); - // omits events that come after the Data Job completion - if (jobExecutionStatusBuilder.succeeded != null - && StringUtils.isBlank(jobExecutionStatusBuilder.initContainerTerminationReason)) { - return Optional.empty(); - } - return Optional.of(jobExecutionStatusBuilder.build()); } diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java index b884b26a75..e4327104af 100644 --- a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java @@ -300,7 +300,11 @@ public Optional updateJobExecu // with null. var finalStatusSet = new HashSet<>( - List.of(ExecutionStatus.CANCELLED, ExecutionStatus.SUCCEEDED, ExecutionStatus.SKIPPED)); + List.of( + ExecutionStatus.CANCELLED, + ExecutionStatus.SUCCEEDED, + ExecutionStatus.SKIPPED, + ExecutionStatus.USER_ERROR)); ExecutionStatus executionStatus = executionResult.getExecutionStatus(); // Optimization: diff --git a/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java index 11d6ad846d..d3521a1ded 100644 --- a/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java +++ b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java @@ -824,6 +824,77 @@ void testJobExecutionStatus_fromPlatformToUser_shouldUpdateExecution() { gauges.stream().findFirst().get().value()); } + @Test + @Order(34) + void testJobExecutionStatus_fromUserErrorToPlatformError_shouldNotUpdateExecution() { + // Clean up from previous tests + jobsRepository.deleteAll(); + dataJobMonitor.clearDataJobsGaugesNotIn(Collections.emptyList()); + + // Create data job + String jobId = "job-id-test"; + var dataJob = + new DataJob( + jobId, + new JobConfig(), + DeploymentStatus.NONE, + ExecutionStatus.USER_ERROR, + "old-execution-id"); + jobsRepository.save(dataJob); + + // Change status to USER_ERROR + JobExecution jobExecutionUserError = + buildJobExecutionStatus( + jobId, + "last-execution-id", + ExecutionStatus.USER_ERROR.getPodStatus(), + false, + OffsetDateTime.now().minus(Duration.ofDays(2)), + OffsetDateTime.now().minus(Duration.ofDays(1))); + dataJobMonitor.recordJobExecutionStatus(jobExecutionUserError); + + // Check status is saved OK. + Optional actualJobUserError = + jobsRepository.findById(jobExecutionUserError.getJobName()); + Assertions.assertFalse(actualJobUserError.isEmpty()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR, actualJobUserError.get().getLastExecutionStatus()); + + // Check gauge status + var gaugesUserError = + meterRegistry.find(DataJobMetrics.TAURUS_DATAJOB_TERMINATION_STATUS_METRIC_NAME).gauges(); + Assertions.assertEquals(1, gaugesUserError.size()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR.getAlertValue().doubleValue(), + gaugesUserError.stream().findFirst().get().value()); + + // Change status to PLATFORM_ERROR + JobExecution jobExecutionPlatformError = + buildJobExecutionStatus( + jobId, + "last-execution-id", + ExecutionStatus.PLATFORM_ERROR.getPodStatus(), + false, + OffsetDateTime.now().minus(Duration.ofDays(2)), + OffsetDateTime.now().minus(Duration.ofDays(1))); + dataJobMonitor.recordJobExecutionStatus(jobExecutionPlatformError); + + // Check status is not saved OK. + Optional actualJobPlatformError = + jobsRepository.findById(jobExecutionUserError.getJobName()); + Assertions.assertFalse(actualJobPlatformError.isEmpty()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR, actualJobPlatformError.get().getLastExecutionStatus()); + + // Check gauge status + var gaugesPlatformError = + meterRegistry.find(DataJobMetrics.TAURUS_DATAJOB_TERMINATION_STATUS_METRIC_NAME).gauges(); + Assertions.assertEquals(1, gaugesPlatformError.size()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR.getAlertValue().doubleValue(), + gaugesPlatformError.stream().findFirst().get().value()); + } + private static String randomId(String prefix) { return prefix + UUID.randomUUID(); } From 1d211f304cfd02414129829c8741f8d8d0e0f540 Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Tue, 23 May 2023 17:11:28 +0300 Subject: [PATCH 2/9] Fixed test --- .../taurus/datajobs/it/DataJobInitContainerOOMIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java index 81a9a60219..aa8dc5afd8 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java @@ -49,7 +49,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( // Check the data job execution status testDataJobExecutionRead( executionId, - DataJobExecution.StatusEnum.USER_ERROR, + DataJobExecution.StatusEnum.PLATFORM_ERROR, opId, jobName, teamName, @@ -57,7 +57,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( mockMvc); testDataJobExecutionList( executionId, - DataJobExecution.StatusEnum.USER_ERROR, + DataJobExecution.StatusEnum.PLATFORM_ERROR, opId, jobName, teamName, @@ -65,7 +65,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( mockMvc); testDataJobDeploymentExecutionList( executionId, - DataJobExecution.StatusEnum.USER_ERROR, + DataJobExecution.StatusEnum.PLATFORM_ERROR, opId, jobName, teamName, From aed4af620779b2431fdd61552d9cddb47ed022bd Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Tue, 23 May 2023 17:56:29 +0300 Subject: [PATCH 3/9] Refactored the code --- .../vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java index aa8dc5afd8..64ab5531a4 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java @@ -42,7 +42,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( String jobName, String teamName, String username, String deploymentId) throws Exception { // manually start job execution ImmutablePair executeDataJobResult = - JobExecutionUtil.executeDataJob(jobName, teamName, username, deploymentId, mockMvc); + executeDataJob(jobName, teamName, username, deploymentId, mockMvc); String opId = executeDataJobResult.getLeft(); String executionId = executeDataJobResult.getRight(); From b8e54ccfc1ded39feafd57e5a8fcf9884e3dafaf Mon Sep 17 00:00:00 2001 From: github-actions <> Date: Tue, 23 May 2023 14:57:47 +0000 Subject: [PATCH 4/9] Google Java Format --- .../com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java index 64ab5531a4..f9b8ea9646 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java @@ -9,7 +9,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobExecution; import com.vmware.taurus.datajobs.it.common.BaseIT; import com.vmware.taurus.datajobs.it.common.DataJobDeploymentExtension; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; From fc330c1ed6539990e5923ae6089765636df8f9fd Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Thu, 25 May 2023 13:15:10 +0300 Subject: [PATCH 5/9] Refactored the code --- .../it/DataJobMainContainerOOMIT.java | 9 ++++++- .../graphql/it/GraphQLExecutionsIT.java | 26 +++++++++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java index a3b438f88a..163c081eb4 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java @@ -12,20 +12,27 @@ import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; @Slf4j @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) +@TestPropertySource( + properties = { + "datajobs.job.resources.requests.memory=6Mi", + "datajobs.job.resources.limits.memory=6Mi", + }) public class DataJobMainContainerOOMIT extends BaseIT { @RegisterExtension static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension("oom_job.zip"); - // @Test + @Test public void testDataJob_causesOOM_shouldCompleteWithUserError( String jobName, String teamName, String username, String deploymentId) throws Exception { // manually start job execution diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java index 11c8c389ba..e1f7c10a31 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java @@ -21,6 +21,8 @@ import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import java.math.BigDecimal; +import java.math.RoundingMode; import java.time.OffsetDateTime; import java.util.List; @@ -165,14 +167,13 @@ public void testExecutions_filterByStartTimeGte_shouldReturnAllProperties() thro jsonPath( "$.data.content[*].deployment.resources.cpuRequest", Matchers.contains( - dataJobExecution1.getResourcesCpuRequest().doubleValue(), - dataJobExecution2.getResourcesCpuRequest().doubleValue()))) - .andExpect( - jsonPath( - "$.data.content[*].deployment.resources.cpuLimit", - Matchers.contains( - dataJobExecution1.getResourcesCpuLimit().doubleValue(), - dataJobExecution2.getResourcesCpuLimit().doubleValue()))) + convertFloatToDouble(dataJobExecution1.getResourcesCpuRequest()), + convertFloatToDouble(dataJobExecution2.getResourcesCpuRequest())))) + .andExpect(jsonPath( + "$.data.content[*].deployment.resources.cpuLimit", + Matchers.contains( + convertFloatToDouble(dataJobExecution1.getResourcesCpuLimit()), + convertFloatToDouble(dataJobExecution2.getResourcesCpuLimit())))) .andExpect( jsonPath( "$.data.content[*].deployment.resources.memoryRequest", @@ -427,4 +428,13 @@ public void testExecutions_filterByTeamNameIn() throws Exception { "$.data.content[*].id", Matchers.not(Matchers.contains(dataJobExecution2.getId())))); } + + + /** + * Helper method that converts Float to Double and rounds it as scale 2. + * It is necessary because tests' checks resolved Float to <0.1F> but it should be <0.1>. + */ + private static Double convertFloatToDouble(Float value) { + return BigDecimal.valueOf(value).setScale(2, RoundingMode.HALF_UP).doubleValue(); + } } From 389e238627f5f2c54d50b70d2a4e1c831ca4f5fc Mon Sep 17 00:00:00 2001 From: github-actions <> Date: Thu, 25 May 2023 10:16:40 +0000 Subject: [PATCH 6/9] Google Java Format --- .../datajobs/it/DataJobMainContainerOOMIT.java | 8 ++++---- .../taurus/graphql/it/GraphQLExecutionsIT.java | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java index 163c081eb4..e5f20ad160 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java @@ -22,10 +22,10 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) @TestPropertySource( - properties = { - "datajobs.job.resources.requests.memory=6Mi", - "datajobs.job.resources.limits.memory=6Mi", - }) + properties = { + "datajobs.job.resources.requests.memory=6Mi", + "datajobs.job.resources.limits.memory=6Mi", + }) public class DataJobMainContainerOOMIT extends BaseIT { @RegisterExtension diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java index e1f7c10a31..f73a619960 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java @@ -169,9 +169,10 @@ public void testExecutions_filterByStartTimeGte_shouldReturnAllProperties() thro Matchers.contains( convertFloatToDouble(dataJobExecution1.getResourcesCpuRequest()), convertFloatToDouble(dataJobExecution2.getResourcesCpuRequest())))) - .andExpect(jsonPath( - "$.data.content[*].deployment.resources.cpuLimit", - Matchers.contains( + .andExpect( + jsonPath( + "$.data.content[*].deployment.resources.cpuLimit", + Matchers.contains( convertFloatToDouble(dataJobExecution1.getResourcesCpuLimit()), convertFloatToDouble(dataJobExecution2.getResourcesCpuLimit())))) .andExpect( @@ -429,10 +430,9 @@ public void testExecutions_filterByTeamNameIn() throws Exception { Matchers.not(Matchers.contains(dataJobExecution2.getId())))); } - /** - * Helper method that converts Float to Double and rounds it as scale 2. - * It is necessary because tests' checks resolved Float to <0.1F> but it should be <0.1>. + * Helper method that converts Float to Double and rounds it as scale 2. It is necessary because + * tests' checks resolved Float to <0.1F> but it should be <0.1>. */ private static Double convertFloatToDouble(Float value) { return BigDecimal.valueOf(value).setScale(2, RoundingMode.HALF_UP).doubleValue(); From bd39be3726f2e85d36c0260a12bcb38a1742c89a Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Thu, 25 May 2023 13:25:53 +0300 Subject: [PATCH 7/9] Refactored the code --- .../datajobs/it/DataJobMainContainerOOMIT.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java index e5f20ad160..c964ef9375 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java @@ -22,10 +22,14 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) @TestPropertySource( - properties = { - "datajobs.job.resources.requests.memory=6Mi", - "datajobs.job.resources.limits.memory=6Mi", - }) + properties = { + "datajobs.job.resources.requests.memory=6Mi", + "datajobs.job.resources.limits.memory=6Mi", + // This is a standard cron job template except restartPolicy is set to never so that when a + // job runs out of memory it is + // not retied but instead reports more quickly that it is a platform error + "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" + }) public class DataJobMainContainerOOMIT extends BaseIT { @RegisterExtension From fd2e1173ede70a1023e795a1f0ae2d264dfcc9e3 Mon Sep 17 00:00:00 2001 From: github-actions <> Date: Thu, 25 May 2023 10:28:14 +0000 Subject: [PATCH 8/9] Google Java Format --- .../datajobs/it/DataJobMainContainerOOMIT.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java index c964ef9375..1386f7df78 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java @@ -22,14 +22,14 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) @TestPropertySource( - properties = { - "datajobs.job.resources.requests.memory=6Mi", - "datajobs.job.resources.limits.memory=6Mi", - // This is a standard cron job template except restartPolicy is set to never so that when a - // job runs out of memory it is - // not retied but instead reports more quickly that it is a platform error - "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" - }) + properties = { + "datajobs.job.resources.requests.memory=6Mi", + "datajobs.job.resources.limits.memory=6Mi", + // This is a standard cron job template except restartPolicy is set to never so that when a + // job runs out of memory it is + // not retied but instead reports more quickly that it is a platform error + "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" + }) public class DataJobMainContainerOOMIT extends BaseIT { @RegisterExtension From 0ca9cc9812eb7691f0b6fe16d19473d9b0db0db0 Mon Sep 17 00:00:00 2001 From: Miroslav Ivanov Date: Thu, 25 May 2023 14:15:00 +0300 Subject: [PATCH 9/9] Refactored the code --- ...xceededIT.java => TestDataJobBackoffLimitExceededIT.java} | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/{DataJobBackoffLimitExceededIT.java => TestDataJobBackoffLimitExceededIT.java} (91%) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java similarity index 91% rename from projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java rename to projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java index 5c146530f9..06b673e7eb 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobBackoffLimitExceededIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java @@ -9,7 +9,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobExecution; import com.vmware.taurus.datajobs.it.common.BaseIT; import com.vmware.taurus.datajobs.it.common.DataJobDeploymentExtension; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; @@ -28,7 +27,7 @@ @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) -public class DataJobBackoffLimitExceededIT extends BaseIT { +public class TestDataJobBackoffLimitExceededIT extends BaseIT { @RegisterExtension static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension(); @@ -38,7 +37,7 @@ public void testDataJob_causesBackoffLimitExceeded_shouldCompleteWithUserError( String jobName, String teamName, String username, String deploymentId) throws Exception { // manually start job execution ImmutablePair executeDataJobResult = - JobExecutionUtil.executeDataJob(jobName, teamName, username, deploymentId, mockMvc); + executeDataJob(jobName, teamName, username, deploymentId, mockMvc); String opId = executeDataJobResult.getLeft(); String executionId = executeDataJobResult.getRight();