Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

control-service: add file filter before job upload #2540

Merged
merged 11 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ spec:
value: "{{ .Values.monitoringSync.initialDelayMillis }}"
- name: UPLOAD_VALIDATION_FILETYPES_ALLOWLIST
value: "{{ .Values.uploadValidationFileTypesAllowList }}"
- name: UPLOAD_VALIDATION_FILETYPES_FILTER_LIST
value: "{{ .Values.uploadValidationFileTypesFilterList }}"
- name: DATAJOBS_TEMP_STORAGE_FOLDER
value: /datajobs_temp_storage
{{- if .Values.datajobTemplate.enabled }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ uploadGitReadWritePassword: ""
# If set to empty, then all file types are allowed.
uploadValidationFileTypesAllowList: ""

# Works as the uploadValidationFileTypesAllowList above, only it deletes the files instead of failing
# the job upload. Runs before the allow list, therefore if only files of the same types are present in
# both lists, job upload will succeed.
uploadValidationFileTypesFilterList: ""

## [Required] The repository where the data job images will be stored (so write permission must be granted)
## Automatically a repository will be created for each data job in ${deploymentDockerRepository}/data-job-name
## (without https:// scheme)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import com.vmware.taurus.exception.ExternalSystemError;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public abstract class AbstractJobFileValidator {

/**
* Returns a list of file types against which the validation logic will perform operations for the
* currently validated user file.
*
* @return
*/
abstract String[] getValidationTypes();

/**
* Performs an operation on user uploaded files which returns a formatted string used to check if
* any match against the validation types. E.g. returns the apache tika file type, or file ending
* etc.
*
* @param filePath
* @return
* @throws IOException
*/
abstract String detectFileType(Path filePath) throws IOException;

/**
* Checks if the detected file type matches against one of the strings present in validation types
* according to a condition which should be defined in the overriding class.
*
* @param detectedType
* @return
*/
abstract boolean matchTypes(String detectedType);

/**
* Performs operations on already matched files, such as throwing exception, logging or deleting.
*
* @param filePath
* @param jobName
* @param pathInsideJob
* @throws IOException
*/
abstract void processMatchedFile(Path filePath, String jobName, String pathInsideJob)
throws IOException;

/**
* Validate all files of a data job to be upto standard and do not contain anything forbidden. The
* validation done is by detecting the file type and checking if that file type is allowed against
* pre-configured list specified in the extending class. If the allowList is empty then all files
* are allowed and no further processing is performed.
*
* @param jobName the name of the data job whose files are validated
* @param jobDirectory path to the data job directory where unarchived content of the data job
* being uploaded can be seen.
* @throws InvalidJobUpload
*/
public void validateJob(String jobName, Path jobDirectory) {
try {
validateAllowedFiles(jobName, jobDirectory);
} catch (IOException e) {
throw new ExternalSystemError(
ExternalSystemError.MainExternalSystem.HOST_CONTAINER,
String.format("Unable to open and process job %s directory.", jobName),
e);
}
}

void validateAllowedFiles(String jobName, Path jobDirectory) throws IOException {
if (getValidationTypes().length == 0) {
log.debug(
"List of validation files is empty. That means all files are allowed. No checks are"
+ " done.");
return;
}
Files.walk(jobDirectory)
.filter(p -> p.toFile().isFile())
.forEach(filePath -> validateFile(jobName, filePath, jobDirectory.relativize(filePath)));
}

private void validateFile(String jobName, Path filePath, Path pathInsideJob)
throws InvalidJobUpload {
if (filePath.toFile().isDirectory()) {
return;
}
try {
var fileType = detectFileType(filePath);
log.debug("Job {}'s file {} is of type {}", jobName, filePath, pathInsideJob);
if (matchTypes(fileType)) {
processMatchedFile(filePath, jobName, pathInsideJob.toString());
}
} catch (IOException e) {
throw new ExternalSystemError(
ExternalSystemError.MainExternalSystem.HOST_CONTAINER,
String.format("Unable to open and process file %s", pathInsideJob),
e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public class JobUpload {
private final GitWrapper gitWrapper;
private final FeatureFlags featureFlags;
private final AuthorizationProvider authorizationProvider;
private final JobUploadValidator jobUploadValidator;
private final JobUploadAllowListValidator jobUploadAllowListValidator;
private final JobUploadFilterListValidator jobUploadFilterListValidator;

@Autowired
public JobUpload(
Expand All @@ -48,13 +49,15 @@ public JobUpload(
GitWrapper gitWrapper,
FeatureFlags featureFlags,
AuthorizationProvider authorizationProvider,
JobUploadValidator jobUploadValidator) {
JobUploadAllowListValidator jobUploadAllowListValidator,
JobUploadFilterListValidator jobUploadFilterListValidator) {
this.datajobsTempStorageFolder = datajobsTempStorageFolder;
this.gitCredentialsProvider = gitCredentialsProvider;
this.gitWrapper = gitWrapper;
this.featureFlags = featureFlags;
this.authorizationProvider = authorizationProvider;
this.jobUploadValidator = jobUploadValidator;
this.jobUploadAllowListValidator = jobUploadAllowListValidator;
this.jobUploadFilterListValidator = jobUploadFilterListValidator;
}

/**
Expand Down Expand Up @@ -116,7 +119,8 @@ public String publishDataJob(String jobName, Resource resource, String reason) {
try (var tempDirPath = new EphemeralFile(datajobsTempStorageFolder, jobName, "deploy")) {
File jobFolder =
FileUtils.unzipDataJob(resource, new File(tempDirPath.toFile(), "job"), jobName);
jobUploadValidator.validateJob(jobName, jobFolder.toPath());
jobUploadFilterListValidator.validateJob(jobName, jobFolder.toPath());
jobUploadAllowListValidator.validateJob(jobName, jobFolder.toPath());

Git git =
gitWrapper.cloneJobRepository(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

@Service
class JobUploadAllowListValidator extends AbstractJobFileValidator {

private final Logger log = LoggerFactory.getLogger(this.getClass());

/**
* List of files that are allowed to be uploaded. Full list of file types are documented in
* https://tika.apache.org/2.5.0/formats.html#Full_list_of_Supported_Formats_in_standard_artifacts
* If set to empty, then all files are allowed using the allowlist.
*/
private final String[] allowlist;

private final FileFormatDetector formatDetector;

public JobUploadAllowListValidator(
@Value("${upload.validation.fileTypes.allowlist:}") String[] allowList) {
this.allowlist = allowList;
this.formatDetector = new FileFormatDetector();
log.debug("Job sources upload validator allowlist: {}", Arrays.toString(allowList));
}

@Override
String[] getValidationTypes() {
return this.allowlist;
}

@Override
String detectFileType(Path filePath) throws IOException {
return this.formatDetector.detectFileType(filePath);
}

/**
* Validate the file in a data job based on its file type : Checking if it's NOT in allowed list,
* the processMatchedFile method is called otherwise the file is allowed and no processing is
* performed.
*/
@Override
boolean matchTypes(String detectedType) {
return Arrays.stream(getValidationTypes())
.noneMatch(validationType -> formatDetector.matchTypes(detectedType, validationType));
}

@Override
void processMatchedFile(Path filePath, String jobName, String pathInsideJob) throws IOException {
throw new InvalidJobUpload(
jobName,
String.format(
"file '%s' was detected to be of type '%s' "
+ "and it is not in the allowed list of file types.",
pathInsideJob, detectFileType(filePath)),
"Make sure to remove the forbidden file. "
+ "Current list of allowed file types is "
+ Arrays.toString(getValidationTypes()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import java.nio.file.Path;
import java.util.Arrays;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

/**
* This class filters out the job directory from forbidden files so that it can be uploaded without
* them. Forbidden files are specified in a comma separated list in apache tika format. Full list of
* supported files can be found - https://tika.apache.org ; This operation is intended to allow the
* upload of a data job by deleting any files specified in the:
* upload.validation.fileTypes.filterlist property.
*/
@Service
@Slf4j
public class JobUploadFilterListValidator extends AbstractJobFileValidator {

private final String[] filterList;

public JobUploadFilterListValidator(
@Value("${upload.validation.fileTypes.filterlist:}") String[] filterList) {
this.filterList = filterList;
}

@Override
String[] getValidationTypes() {
return this.filterList;
}

@Override
String detectFileType(Path filePath) {
return FilenameUtils.getExtension(filePath.getFileName().toString());
}

@Override
boolean matchTypes(String detectedType) {
return Arrays.stream(getValidationTypes())
.anyMatch(validationType -> detectedType.endsWith(validationType));
}

@Override
void processMatchedFile(Path filePath, String jobName, String pathInsideJob) {
log.debug(
"Deleting file: {}, from job: {}, before uploading to version control.",
pathInsideJob,
jobName);
filePath.toFile().delete();
}
}
Loading