diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java index 9c32ebf485..3e58d91668 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobDeploymentCrudIT.java @@ -11,7 +11,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobMode; import com.vmware.taurus.controlplane.model.data.DataJobVersion; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import com.vmware.taurus.service.deploy.JobImageDeployer; import com.vmware.taurus.service.model.JobDeploymentStatus; import org.apache.commons.io.IOUtils; @@ -51,9 +50,6 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class DataJobDeploymentCrudIT extends BaseIT { - - private static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(DataJobDeploymentCrudIT.class.getSimpleName()); private static final Object DEPLOYMENT_ID = "testing"; @TestConfiguration @@ -70,7 +66,7 @@ public TaskExecutor taskExecutor() { @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc @@ -89,7 +85,7 @@ public void setup() throws Exception { s.endsWith( String.format( "/data-jobs/for-team/%s/jobs/%s", - TEST_TEAM_NAME, TEST_JOB_NAME))))); + TEST_TEAM_NAME, testJobName))))); } @Test @@ -104,7 +100,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) .andExpect(status().isUnauthorized()); @@ -114,7 +110,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) @@ -136,7 +132,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_WRONG_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_WRONG_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) @@ -146,7 +142,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -155,7 +151,7 @@ public void testDataJobDeploymentCrud() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) @@ -166,13 +162,13 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( post(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME)) + TEST_TEAM_WRONG_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); - String jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + String jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment created Optional cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); @@ -189,7 +185,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -199,7 +195,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -225,7 +221,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); @@ -236,7 +232,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -247,7 +243,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -259,7 +255,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -277,7 +273,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("new_vdk_version_tag")) .contentType(MediaType.APPLICATION_JSON)) @@ -289,7 +285,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -301,7 +297,7 @@ public void testDataJobDeploymentCrud() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("")) .contentType(MediaType.APPLICATION_JSON)) @@ -312,7 +308,7 @@ public void testDataJobDeploymentCrud() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -324,7 +320,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isUnauthorized()); @@ -334,7 +330,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_WRONG_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_WRONG_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isNotFound()); @@ -345,7 +341,7 @@ public void testDataJobDeploymentCrud() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); @@ -363,7 +359,7 @@ public void cleanUp() throws Exception { .perform( delete( String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user"))) .andExpect(status().isOk()); } @@ -377,7 +373,7 @@ public void testDataJobDeleteSource() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java index 86d1e52566..f9b8ea9646 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobInitContainerOOMIT.java @@ -9,7 +9,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobExecution; import com.vmware.taurus.datajobs.it.common.BaseIT; import com.vmware.taurus.datajobs.it.common.DataJobDeploymentExtension; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; @@ -27,7 +26,7 @@ // This is a standard cron job template except restartPolicy is set to never so that when a // job runs out of memory it is // not retied but instead reports more quickly that it is a platform error - "datajobs.control.k8s.data.job.template.file=fast_failing_cron_job.yaml" + "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" }) @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, @@ -42,7 +41,7 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError( String jobName, String teamName, String username, String deploymentId) throws Exception { // manually start job execution ImmutablePair executeDataJobResult = - JobExecutionUtil.executeDataJob(jobName, teamName, username, deploymentId, mockMvc); + executeDataJob(jobName, teamName, username, deploymentId, mockMvc); String opId = executeDataJobResult.getLeft(); String executionId = executeDataJobResult.getRight(); diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java index a3b438f88a..1386f7df78 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobMainContainerOOMIT.java @@ -12,20 +12,31 @@ import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; @Slf4j @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) +@TestPropertySource( + properties = { + "datajobs.job.resources.requests.memory=6Mi", + "datajobs.job.resources.limits.memory=6Mi", + // This is a standard cron job template except restartPolicy is set to never so that when a + // job runs out of memory it is + // not retied but instead reports more quickly that it is a platform error + "datajobs.control.k8s.data.job.template.file=data_job_templates/fast_failing_cron_job.yaml" + }) public class DataJobMainContainerOOMIT extends BaseIT { @RegisterExtension static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension("oom_job.zip"); - // @Test + @Test public void testDataJob_causesOOM_shouldCompleteWithUserError( String jobName, String teamName, String username, String deploymentId) throws Exception { // manually start job execution diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java index 6c37587f34..3485d9af91 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/DataJobPropertiesIT.java @@ -7,9 +7,7 @@ import com.vmware.taurus.ControlplaneApplication; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.properties.service.PropertiesRepository; import org.junit.jupiter.api.Test; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; @@ -27,8 +25,6 @@ classes = ControlplaneApplication.class) public class DataJobPropertiesIT extends BaseIT { - @Autowired private PropertiesRepository propertiesRepository; - @Test public void testDataJobProperties() throws Exception { // Setup diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java new file mode 100644 index 0000000000..06b673e7eb --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestDataJobBackoffLimitExceededIT.java @@ -0,0 +1,70 @@ +/* + * Copyright 2021-2023 VMware, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package com.vmware.taurus.datajobs.it; + +import com.vmware.taurus.ControlplaneApplication; +import com.vmware.taurus.controlplane.model.data.DataJobExecution; +import com.vmware.taurus.datajobs.it.common.BaseIT; +import com.vmware.taurus.datajobs.it.common.DataJobDeploymentExtension; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; + +import static com.vmware.taurus.datajobs.it.common.JobExecutionUtil.*; + +@Slf4j +@TestPropertySource( + properties = { + // This is a standard cron job template except activeDeadlineSeconds is set to 1 + "datajobs.control.k8s.data.job.template.file=data_job_templates/backoff_limit_exceeded_cron_job.yaml" + }) +@SpringBootTest( + webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, + classes = ControlplaneApplication.class) +public class TestDataJobBackoffLimitExceededIT extends BaseIT { + + @RegisterExtension + static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension(); + + @Test + public void testDataJob_causesBackoffLimitExceeded_shouldCompleteWithUserError( + String jobName, String teamName, String username, String deploymentId) throws Exception { + // manually start job execution + ImmutablePair executeDataJobResult = + executeDataJob(jobName, teamName, username, deploymentId, mockMvc); + String opId = executeDataJobResult.getLeft(); + String executionId = executeDataJobResult.getRight(); + + // Check the data job execution status + testDataJobExecutionRead( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + testDataJobExecutionList( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + testDataJobDeploymentExecutionList( + executionId, + DataJobExecution.StatusEnum.USER_ERROR, + opId, + jobName, + teamName, + username, + mockMvc); + } +} diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java index 494c2f080b..5319cc2331 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/TestJobImageBuilderDynamicVdkImageIT.java @@ -11,7 +11,6 @@ import com.vmware.taurus.controlplane.model.data.DataJobMode; import com.vmware.taurus.controlplane.model.data.DataJobVersion; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import com.vmware.taurus.service.deploy.JobImageDeployer; import com.vmware.taurus.service.model.JobDeploymentStatus; import org.apache.commons.io.IOUtils; @@ -54,8 +53,6 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class TestJobImageBuilderDynamicVdkImageIT extends BaseIT { - private static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(TestJobImageBuilderDynamicVdkImageIT.class.getSimpleName()); private static final Object DEPLOYMENT_ID = "testing"; @TestConfiguration @@ -72,7 +69,7 @@ public TaskExecutor taskExecutor() { @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc @@ -91,7 +88,7 @@ public void setup() throws Exception { s.endsWith( String.format( "/data-jobs/for-team/%s/jobs/%s", - TEST_TEAM_NAME, TEST_JOB_NAME))))); + TEST_TEAM_NAME, testJobName))))); } @Test @@ -106,7 +103,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { ResultActions resultAction = mockMvc.perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)); @@ -135,13 +132,13 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(dataJobDeploymentRequestBody) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); - String jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + String jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment created Optional cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); @@ -159,7 +156,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -186,7 +183,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentEnableRequestBody(false)) .contentType(MediaType.APPLICATION_JSON)) @@ -204,7 +201,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { patch( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .content(getDataJobDeploymentVdkVersionRequestBody("new_vdk_version_tag")) .contentType(MediaType.APPLICATION_JSON)) @@ -215,7 +212,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { .perform( get(String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()) @@ -225,13 +222,13 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/deployments", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(getDataJobDeploymentRequestBody(testJobVersionSha, "3.8")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); - jobDeploymentName = JobImageDeployer.getCronJobName(TEST_JOB_NAME); + jobDeploymentName = JobImageDeployer.getCronJobName(testJobName); // Verify job deployment updated properly cronJobOptional = dataJobsKubernetesService.readCronJob(jobDeploymentName); Assertions.assertTrue(cronJobOptional.isPresent()); @@ -250,7 +247,7 @@ public void testDataJobDeploymentDynamicVdkVersion() throws Exception { delete( String.format( "/data-jobs/for-team/%s/jobs/%s/deployments/%s", - TEST_TEAM_NAME, TEST_JOB_NAME, DEPLOYMENT_ID)) + TEST_TEAM_NAME, testJobName, DEPLOYMENT_ID)) .with(user("user")) .contentType(MediaType.APPLICATION_JSON)) .andExpect(status().isAccepted()); @@ -268,7 +265,7 @@ public void cleanUp() throws Exception { .perform( delete( String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user"))) .andExpect(status().isOk()); } diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java index 7dc4719723..00d5b5dda9 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/UploadSourceValidationIT.java @@ -7,7 +7,6 @@ import com.vmware.taurus.ControlplaneApplication; import com.vmware.taurus.datajobs.it.common.BaseIT; -import com.vmware.taurus.datajobs.it.common.JobExecutionUtil; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -28,12 +27,9 @@ webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, classes = ControlplaneApplication.class) public class UploadSourceValidationIT extends BaseIT { - protected static final String TEST_JOB_NAME = - JobExecutionUtil.generateJobName(UploadSourceValidationIT.class.getSimpleName()); - @BeforeEach public void setup() throws Exception { - String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, TEST_JOB_NAME); + String dataJobRequestBody = getDataJobRequestBody(TEST_TEAM_NAME, testJobName); // Execute create job mockMvc .perform( @@ -53,7 +49,7 @@ public void testDataJobUploadSource() throws Exception { mockMvc .perform( post(String.format( - "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, TEST_JOB_NAME)) + "/data-jobs/for-team/%s/jobs/%s/sources", TEST_TEAM_NAME, testJobName)) .with(user("user")) .content(jobZipBinary) .contentType(MediaType.APPLICATION_OCTET_STREAM)) diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java index c986652e75..1ae111c32c 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/BaseIT.java @@ -74,6 +74,9 @@ public class BaseIT { private boolean ownsControlNamespace = false; + protected final String testJobName = + JobExecutionUtil.generateJobName(this.getClass().getSimpleName()); + @BeforeEach public void before() { log.info("Running test with: {} bytes of memory.", Runtime.getRuntime().totalMemory()); diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java index 18bb815bad..c90219f85e 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/datajobs/it/common/JobExecutionUtil.java @@ -74,9 +74,9 @@ public static DataJobExecution createDataJobExecution( .endTime(endTime) .type(ExecutionType.MANUAL) .status(executionStatus) - .resourcesCpuRequest(1F) - .resourcesCpuLimit(2F) - .resourcesMemoryRequest(500) + .resourcesCpuRequest(0.1F) + .resourcesCpuLimit(1F) + .resourcesMemoryRequest(100) .resourcesMemoryLimit(1000) .message("message") .lastDeployedBy("test_user") diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java index 11c8c389ba..f73a619960 100644 --- a/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/java/com/vmware/taurus/graphql/it/GraphQLExecutionsIT.java @@ -21,6 +21,8 @@ import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.web.servlet.request.MockMvcRequestBuilders; +import java.math.BigDecimal; +import java.math.RoundingMode; import java.time.OffsetDateTime; import java.util.List; @@ -165,14 +167,14 @@ public void testExecutions_filterByStartTimeGte_shouldReturnAllProperties() thro jsonPath( "$.data.content[*].deployment.resources.cpuRequest", Matchers.contains( - dataJobExecution1.getResourcesCpuRequest().doubleValue(), - dataJobExecution2.getResourcesCpuRequest().doubleValue()))) + convertFloatToDouble(dataJobExecution1.getResourcesCpuRequest()), + convertFloatToDouble(dataJobExecution2.getResourcesCpuRequest())))) .andExpect( jsonPath( "$.data.content[*].deployment.resources.cpuLimit", Matchers.contains( - dataJobExecution1.getResourcesCpuLimit().doubleValue(), - dataJobExecution2.getResourcesCpuLimit().doubleValue()))) + convertFloatToDouble(dataJobExecution1.getResourcesCpuLimit()), + convertFloatToDouble(dataJobExecution2.getResourcesCpuLimit())))) .andExpect( jsonPath( "$.data.content[*].deployment.resources.memoryRequest", @@ -427,4 +429,12 @@ public void testExecutions_filterByTeamNameIn() throws Exception { "$.data.content[*].id", Matchers.not(Matchers.contains(dataJobExecution2.getId())))); } + + /** + * Helper method that converts Float to Double and rounds it as scale 2. It is necessary because + * tests' checks resolved Float to <0.1F> but it should be <0.1>. + */ + private static Double convertFloatToDouble(Float value) { + return BigDecimal.valueOf(value).setScale(2, RoundingMode.HALF_UP).doubleValue(); + } } diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml new file mode 100644 index 0000000000..a2299b1d96 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/backoff_limit_exceeded_cron_job.yaml @@ -0,0 +1,41 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + annotations: # merged with additional annotations from TPCS + name: cronjob-template-name # overridden by TPCS +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 2 + schedule: "*/10 * * * *" # overridden by TPCS + startingDeadlineSeconds: 1800 + successfulJobsHistoryLimit: 1 + suspend: false # overridden by TPCS + jobTemplate: + metadata: + annotations: # merged with additional annotations from TPCS + labels: # merged with additional labels from TPCS + spec: + activeDeadlineSeconds: 1 + backoffLimit: 3 + template: + metadata: + labels: # merged with additional labels from TPCS + spec: + containers: # overridden by TPCS + - command: + - /bin/sh + - -c + - date; echo '************** Cronjob Template ******************' + name: cronjob-template-container-name + image: busybox + imagePullPolicy: IfNotPresent + restartPolicy: Never + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + automountServiceAccountToken: false + ttlSecondsAfterFinished: 600 diff --git a/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml new file mode 100644 index 0000000000..e497796e69 --- /dev/null +++ b/projects/control-service/projects/pipelines_control_service/src/integration-test/resources/data_job_templates/fast_failing_cron_job.yaml @@ -0,0 +1,41 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + annotations: # merged with additional annotations from TPCS + name: cronjob-template-name # overridden by TPCS +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 2 + schedule: "*/10 * * * *" # overridden by TPCS + startingDeadlineSeconds: 1800 + successfulJobsHistoryLimit: 1 + suspend: false # overridden by TPCS + jobTemplate: + metadata: + annotations: # merged with additional annotations from TPCS + labels: # merged with additional labels from TPCS + spec: + activeDeadlineSeconds: 43200 + backoffLimit: 3 + template: + metadata: + labels: # merged with additional labels from TPCS + spec: + containers: # overridden by TPCS + - command: + - /bin/sh + - -c + - date; echo '************** Cronjob Template ******************' + name: cronjob-template-container-name + image: busybox + imagePullPolicy: IfNotPresent + restartPolicy: Never + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + automountServiceAccountToken: false + ttlSecondsAfterFinished: 600 diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java index d026fb6a70..e70bfbf3be 100644 --- a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/KubernetesService.java @@ -1582,12 +1582,6 @@ Optional getJobExecutionStatus(V1Job job, JobStatusCondition jobSt jobExecutionStatusBuilder.succeeded( Optional.ofNullable(jobStatusCondition).map(JobStatusCondition::isSuccess).orElse(null)); - // omits events that come after the Data Job completion - if (jobExecutionStatusBuilder.succeeded != null - && StringUtils.isBlank(jobExecutionStatusBuilder.initContainerTerminationReason)) { - return Optional.empty(); - } - return Optional.of(jobExecutionStatusBuilder.build()); } diff --git a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java index b884b26a75..e4327104af 100644 --- a/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java +++ b/projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/execution/JobExecutionService.java @@ -300,7 +300,11 @@ public Optional updateJobExecu // with null. var finalStatusSet = new HashSet<>( - List.of(ExecutionStatus.CANCELLED, ExecutionStatus.SUCCEEDED, ExecutionStatus.SKIPPED)); + List.of( + ExecutionStatus.CANCELLED, + ExecutionStatus.SUCCEEDED, + ExecutionStatus.SKIPPED, + ExecutionStatus.USER_ERROR)); ExecutionStatus executionStatus = executionResult.getExecutionStatus(); // Optimization: diff --git a/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java index 11d6ad846d..d3521a1ded 100644 --- a/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java +++ b/projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/monitoring/DataJobMonitorTest.java @@ -824,6 +824,77 @@ void testJobExecutionStatus_fromPlatformToUser_shouldUpdateExecution() { gauges.stream().findFirst().get().value()); } + @Test + @Order(34) + void testJobExecutionStatus_fromUserErrorToPlatformError_shouldNotUpdateExecution() { + // Clean up from previous tests + jobsRepository.deleteAll(); + dataJobMonitor.clearDataJobsGaugesNotIn(Collections.emptyList()); + + // Create data job + String jobId = "job-id-test"; + var dataJob = + new DataJob( + jobId, + new JobConfig(), + DeploymentStatus.NONE, + ExecutionStatus.USER_ERROR, + "old-execution-id"); + jobsRepository.save(dataJob); + + // Change status to USER_ERROR + JobExecution jobExecutionUserError = + buildJobExecutionStatus( + jobId, + "last-execution-id", + ExecutionStatus.USER_ERROR.getPodStatus(), + false, + OffsetDateTime.now().minus(Duration.ofDays(2)), + OffsetDateTime.now().minus(Duration.ofDays(1))); + dataJobMonitor.recordJobExecutionStatus(jobExecutionUserError); + + // Check status is saved OK. + Optional actualJobUserError = + jobsRepository.findById(jobExecutionUserError.getJobName()); + Assertions.assertFalse(actualJobUserError.isEmpty()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR, actualJobUserError.get().getLastExecutionStatus()); + + // Check gauge status + var gaugesUserError = + meterRegistry.find(DataJobMetrics.TAURUS_DATAJOB_TERMINATION_STATUS_METRIC_NAME).gauges(); + Assertions.assertEquals(1, gaugesUserError.size()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR.getAlertValue().doubleValue(), + gaugesUserError.stream().findFirst().get().value()); + + // Change status to PLATFORM_ERROR + JobExecution jobExecutionPlatformError = + buildJobExecutionStatus( + jobId, + "last-execution-id", + ExecutionStatus.PLATFORM_ERROR.getPodStatus(), + false, + OffsetDateTime.now().minus(Duration.ofDays(2)), + OffsetDateTime.now().minus(Duration.ofDays(1))); + dataJobMonitor.recordJobExecutionStatus(jobExecutionPlatformError); + + // Check status is not saved OK. + Optional actualJobPlatformError = + jobsRepository.findById(jobExecutionUserError.getJobName()); + Assertions.assertFalse(actualJobPlatformError.isEmpty()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR, actualJobPlatformError.get().getLastExecutionStatus()); + + // Check gauge status + var gaugesPlatformError = + meterRegistry.find(DataJobMetrics.TAURUS_DATAJOB_TERMINATION_STATUS_METRIC_NAME).gauges(); + Assertions.assertEquals(1, gaugesPlatformError.size()); + Assertions.assertEquals( + ExecutionStatus.USER_ERROR.getAlertValue().doubleValue(), + gaugesPlatformError.stream().findFirst().get().value()); + } + private static String randomId(String prefix) { return prefix + UUID.randomUUID(); }