Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

control-service: fix oom tests #2028

Merged
merged 10 commits into from
May 11, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@
import com.vmware.taurus.datajobs.it.common.JobExecutionUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.TestPropertySource;

import static com.vmware.taurus.datajobs.it.common.JobExecutionUtil.*;

@Slf4j
@TestPropertySource(
properties = {
"datajobs.deployment.initContainer.resources.requests.memory=6Mi",
"datajobs.deployment.initContainer.resources.limits.memory=6Mi",
// This is a standard cron job template except restartPolicy is set to never so that when a
// job runs out of memory it is
// not retied but instead reports more quickly that it is a platform error
"datajobs.control.k8s.data.job.template.file=fast_failing_cron_job.yaml"
})
@SpringBootTest(
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
Expand All @@ -30,7 +37,7 @@ public class DataJobInitContainerOOMIT extends BaseIT {
@RegisterExtension
static DataJobDeploymentExtension dataJobDeploymentExtension = new DataJobDeploymentExtension();

// @Test
@Test
public void testDataJob_causesOOM_shouldCompleteWithUserError(
String jobName, String teamName, String username, String deploymentId) throws Exception {
// manually start job execution
Expand All @@ -40,7 +47,23 @@ public void testDataJob_causesOOM_shouldCompleteWithUserError(
String executionId = executeDataJobResult.getRight();

// Check the data job execution status
JobExecutionUtil.checkDataJobExecutionStatus(
testDataJobExecutionRead(
executionId,
DataJobExecution.StatusEnum.PLATFORM_ERROR,
opId,
jobName,
teamName,
username,
mockMvc);
testDataJobExecutionList(
executionId,
DataJobExecution.StatusEnum.PLATFORM_ERROR,
opId,
jobName,
teamName,
username,
mockMvc);
testDataJobDeploymentExecutionList(
executionId,
DataJobExecution.StatusEnum.PLATFORM_ERROR,
opId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ public static ImmutablePair<String, String> executeDataJob(
return ImmutablePair.of(opId, executionId);
}

private static void testDataJobExecutionRead(
public static void testDataJobExecutionRead(
String executionId,
com.vmware.taurus.controlplane.model.data.DataJobExecution.StatusEnum executionStatus,
String opId,
Expand Down Expand Up @@ -198,7 +198,7 @@ private static void testDataJobExecutionRead(
executionId, executionStatus, opId, dataJobExecution[0], jobName, username);
}

private static void testDataJobExecutionList(
public static void testDataJobExecutionList(
String executionId,
com.vmware.taurus.controlplane.model.data.DataJobExecution.StatusEnum executionStatus,
String opId,
Expand Down Expand Up @@ -232,7 +232,7 @@ private static void testDataJobExecutionList(
executionId, executionStatus, opId, dataJobExecutions.get(0), jobName, username);
}

private static void testDataJobDeploymentExecutionList(
public static void testDataJobDeploymentExecutionList(
String executionId,
com.vmware.taurus.controlplane.model.data.DataJobExecution.StatusEnum executionStatus,
String opId,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0

apiVersion: batch/v1beta1
kind: CronJob
metadata:
annotations: # merged with additional annotations from TPCS
name: cronjob-template-name # overridden by TPCS
spec:
concurrencyPolicy: Forbid
failedJobsHistoryLimit: 2
schedule: "*/10 * * * *" # overridden by TPCS
startingDeadlineSeconds: 1800
successfulJobsHistoryLimit: 1
suspend: false # overridden by TPCS
jobTemplate:
metadata:
annotations: # merged with additional annotations from TPCS
labels: # merged with additional labels from TPCS
spec:
activeDeadlineSeconds: 43200
backoffLimit: 3
template:
metadata:
labels: # merged with additional labels from TPCS
spec:
containers: # overridden by TPCS
- command:
- /bin/sh
- -c
- date; echo '************** Cronjob Template ******************'
name: cronjob-template-container-name
image: busybox
imagePullPolicy: IfNotPresent
restartPolicy: Never
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
automountServiceAccountToken: false
ttlSecondsAfterFinished: 600
Original file line number Diff line number Diff line change
Expand Up @@ -332,16 +332,10 @@ private V1beta1CronJob loadConfigurableV1beta1CronjobTemplate() {
return null;
}

// Check whether the configurable datajob template file exists.
File datajobTemplateFile = new File(datajobTemplateFileLocation);
if (!datajobTemplateFile.isFile()) {
log.warn("Datajob template location '{}' is not a file.", datajobTemplateFileLocation);
return null;
}

try {
// Load the configurable datajob template file.
return loadV1beta1CronjobTemplate(datajobTemplateFile);
return loadV1beta1CronjobTemplate(
new ClassPathResource(datajobTemplateFileLocation).getFile());
} catch (Exception e) {
log.error("Error while loading the datajob template file.", e);
return null;
Expand All @@ -355,16 +349,9 @@ private V1CronJob loadConfigurableV1CronjobTemplate() {
return null;
}

// Check whether the configurable datajob template file exists.
File datajobTemplateFile = new File(datajobTemplateFileLocation);
if (!datajobTemplateFile.isFile()) {
log.warn("Datajob template location '{}' is not a file.", datajobTemplateFileLocation);
return null;
}

try {
// Load the configurable datajob template file.
return loadV1CronjobTemplate(datajobTemplateFile);
return loadV1CronjobTemplate(new ClassPathResource(datajobTemplateFileLocation).getFile());
} catch (Exception e) {
log.error("Error while loading the datajob template file.", e);
return null;
Expand Down