Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

control-service: add file filter before job upload #2540

Merged
merged 11 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ spec:
value: "{{ .Values.monitoringSync.initialDelayMillis }}"
- name: UPLOAD_VALIDATION_FILETYPES_ALLOWLIST
value: "{{ .Values.uploadValidationFileTypesAllowList }}"
- name: UPLOAD_VALIDATION_EXTENSIONS_ALLOWLIST
value: "{{ .Values.uploadValidationFileExtensionsAllowList }}"
- name: UPLOAD_VALIDATION_FILETYPES_FILTER_LIST
value: "{{ .Values.uploadValidationFileTypesFilterList }}"
- name: UPLOAD_VALIDATION_EXTENSIONS_FILTER_LIST
value: "{{ .Values.uploadValidationFileExtensionsFilterList }}"
- name: DATAJOBS_TEMP_STORAGE_FOLDER
value: /datajobs_temp_storage
{{- if .Values.datajobTemplate.enabled }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ uploadGitReadWritePassword: ""
# Full list of file types are documented in https://tika.apache.org
# If set to empty, then all file types are allowed.
uploadValidationFileTypesAllowList: ""
# List of file extensions that are allowed to be uploaded. Comma separated list e.g: "py,csv,sql"
# only files with extensions that are present in this list will be allowed to be uploaded.
# if the list is empty all extensions are allowed.
uploadValidationFileExtensionsAllowList: ""
# Works as the uploadValidationFileTypesAllowList above, only it deletes the files instead of failing
# the job upload. Runs before the allow list, therefore if only files of the same types are present in
# both lists, job upload will succeed.
uploadValidationFileTypesFilterList: ""
# List of file extensions that are automatically deleted from data job source code before upload.
# Comma separated list e.g: "pyc,exe,sh". If the list is empty no files will be deleted.
# Files are first deleted before the allow list performs its checks.
uploadValidationFileExtensionsFilterList: ""

## [Required] The repository where the data job images will be stored (so write permission must be granted)
## Automatically a repository will be created for each data job in ${deploymentDockerRepository}/data-job-name
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import com.vmware.taurus.exception.ExternalSystemError;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;

@Slf4j
public abstract class AbstractJobFileValidator {

private final FileFormatDetector formatDetector = new FileFormatDetector();

/** Returns a list of file types against which the validation logic should perform operations. */
abstract String[] getValidationTypes();

/**
* Returns a list of file extensions against which the validation logic should perform operations.
*/
abstract String[] getValidationExtensions();

/**
* Checks if the detected file type matches against one of the strings present in validation types
* according to a condition which should be defined in the overriding class.
*/
abstract boolean matchTypes(String detectedType, String detectedExtension);

/**
* Performs operations on already matched files, such as throwing exception, logging or deleting.
*/
abstract void processMatchedFile(Path filePath, String jobName, String pathInsideJob)
throws IOException;

/**
* Validate all files of a data job to be upto standard and do not contain anything forbidden. The
* validation done is by detecting the file type and checking if that file type is allowed against
* pre-configured list specified in the extending class. If the allowList is empty then all files
* are allowed and no further processing is performed.
*
* @param jobName the name of the data job whose files are validated
* @param jobDirectory path to the data job directory where unarchived content of the data job
* being uploaded can be seen.
*/
public void validateJob(String jobName, Path jobDirectory) {
try {
validateAllowedFiles(jobName, jobDirectory);
} catch (IOException e) {
throw new ExternalSystemError(
ExternalSystemError.MainExternalSystem.HOST_CONTAINER,
String.format("Unable to open and process job %s directory.", jobName),
e);
}
}

private void validateAllowedFiles(String jobName, Path jobDirectory) throws IOException {
if (getValidationTypes().length == 0 && getValidationExtensions().length == 0) {
log.debug(
"List of validation files is empty. That means all files are allowed. No checks are"
+ " done.");
return;
}
Files.walk(jobDirectory)
.filter(p -> p.toFile().isFile())
.forEach(filePath -> validateFile(jobName, filePath, jobDirectory.relativize(filePath)));
}

private void validateFile(String jobName, Path filePath, Path pathInsideJob)
throws InvalidJobUpload {
if (filePath.toFile().isDirectory()) {
return;
}
try {
var fileType = detectFileType(filePath);
var fileExtension = detectFileExtension(filePath);
log.debug("Job {}'s file {} is of type {}", jobName, filePath, pathInsideJob);
if (matchTypes(fileType, fileExtension)) {
processMatchedFile(filePath, jobName, pathInsideJob.toString());
}
} catch (IOException e) {
throw new ExternalSystemError(
ExternalSystemError.MainExternalSystem.HOST_CONTAINER,
String.format("Unable to open and process file %s", pathInsideJob),
e);
}
}

String detectFileType(Path filePath) throws IOException {
return this.formatDetector.detectFileType(filePath);
}

String detectFileExtension(Path filePath) {
return FilenameUtils.getExtension(filePath.getFileName().toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ public class JobUpload {
private final GitWrapper gitWrapper;
private final FeatureFlags featureFlags;
private final AuthorizationProvider authorizationProvider;
private final JobUploadValidator jobUploadValidator;
private final JobUploadAllowListValidator jobUploadAllowListValidator;
private final JobUploadFilterListValidator jobUploadFilterListValidator;

@Autowired
public JobUpload(
Expand All @@ -48,13 +49,15 @@ public JobUpload(
GitWrapper gitWrapper,
FeatureFlags featureFlags,
AuthorizationProvider authorizationProvider,
JobUploadValidator jobUploadValidator) {
JobUploadAllowListValidator jobUploadAllowListValidator,
JobUploadFilterListValidator jobUploadFilterListValidator) {
this.datajobsTempStorageFolder = datajobsTempStorageFolder;
this.gitCredentialsProvider = gitCredentialsProvider;
this.gitWrapper = gitWrapper;
this.featureFlags = featureFlags;
this.authorizationProvider = authorizationProvider;
this.jobUploadValidator = jobUploadValidator;
this.jobUploadAllowListValidator = jobUploadAllowListValidator;
this.jobUploadFilterListValidator = jobUploadFilterListValidator;
}

/**
Expand Down Expand Up @@ -116,7 +119,8 @@ public String publishDataJob(String jobName, Resource resource, String reason) {
try (var tempDirPath = new EphemeralFile(datajobsTempStorageFolder, jobName, "deploy")) {
File jobFolder =
FileUtils.unzipDataJob(resource, new File(tempDirPath.toFile(), "job"), jobName);
jobUploadValidator.validateJob(jobName, jobFolder.toPath());
jobUploadFilterListValidator.validateJob(jobName, jobFolder.toPath());
jobUploadAllowListValidator.validateJob(jobName, jobFolder.toPath());

Git git =
gitWrapper.cloneJobRepository(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

@Service
@Slf4j
class JobUploadAllowListValidator extends AbstractJobFileValidator {

/**
* List of files that are allowed to be uploaded. Full list of file types are documented in
* https://tika.apache.org/2.5.0/formats.html#Full_list_of_Supported_Formats_in_standard_artifacts
* If set to empty, then all files are allowed using the allowlist.
*/
private final String[] allowListTypes;

private final String[] allowListExtensions;

private final FileFormatDetector formatDetector;

public JobUploadAllowListValidator(
@Value("${upload.validation.fileTypes.allowlist:}") String[] allowListTypes,
@Value("${upload.validation.fileExtensions.allowlist:}") String[] allowListExtensions) {
this.allowListTypes = allowListTypes;
this.allowListExtensions = allowListExtensions;
this.formatDetector = new FileFormatDetector();
log.debug(
"Job sources upload validator allowlistTypes: {} allowListExtensions: {}",
Arrays.toString(allowListTypes),
Arrays.toString(allowListExtensions));
}

@Override
String[] getValidationTypes() {
return this.allowListTypes;
}

@Override
String[] getValidationExtensions() {
return this.allowListExtensions;
}

/**
* Validate the file in a data job based on its file type : Checking if it's NOT in allowed list,
* the processMatchedFile method is called otherwise the file is allowed and no processing is
* performed.
*/
@Override
boolean matchTypes(String detectedType, String detectedExtension) {
return matchTypes(detectedType) || matchExtensions(detectedExtension);
}

/** If the list of validationTypes is empty, all types are allowed. */
private boolean matchTypes(String detectedType) {
if (getValidationTypes().length == 0) {
return false;
}
return Arrays.stream(getValidationTypes())
.noneMatch(validationType -> formatDetector.matchTypes(detectedType, validationType));
}

/** If the list of validationExtensions is empty, all extensions are allowed. */
private boolean matchExtensions(String detectedExtension) {
if (getValidationExtensions().length == 0) {
return false;
}
return Arrays.stream(getValidationExtensions()).noneMatch(detectedExtension::endsWith);
}

@Override
void processMatchedFile(Path filePath, String jobName, String pathInsideJob) throws IOException {
throw new InvalidJobUpload(
jobName,
String.format(
"file '%s' was detected to be of type '%s' and extension '%s' "
+ "and it is not in the allowed list of file types or allowed extensions.",
pathInsideJob, detectFileType(filePath), detectFileExtension(filePath)),
"Make sure to remove the forbidden file. "
+ "Current list of allowed file types is "
+ Arrays.toString(getValidationTypes())
+ "Current list of allowed file extensions is "
+ Arrays.toString(getValidationExtensions()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright 2021-2023 VMware, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import java.nio.file.Path;
import java.util.Arrays;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

/**
* This class filters out the job directory from forbidden files so that it can be uploaded without
* them. Forbidden files are specified in a comma separated list in apache tika format, or file
* extensions. Full list of supported files can be found - https://tika.apache.org ; This operation
* is intended to allow the upload of a data job by deleting any files specified in the:
* upload.validation.fileTypes.filterlist property.
*/
@Service
@Slf4j
public class JobUploadFilterListValidator extends AbstractJobFileValidator {

private final String[] filterListExtensions;
private final String[] filterListTypes;
private final FileFormatDetector formatDetector;

public JobUploadFilterListValidator(
@Value("${upload.validation.fileExtensions.filterlist:}") String[] filterListExtensions,
@Value("${upload.validation.fileTypes.filterlist:}") String[] filterListTypes) {
this.filterListExtensions = filterListExtensions;
this.filterListTypes = filterListTypes;
this.formatDetector = new FileFormatDetector();
}

@Override
String[] getValidationTypes() {
return this.filterListTypes;
}

@Override
String[] getValidationExtensions() {
return this.filterListExtensions;
}

@Override
boolean matchTypes(String detectedType, String detectedExtension) {
return Arrays.stream(getValidationExtensions()).anyMatch(detectedExtension::endsWith)
|| Arrays.stream(getValidationTypes())
.anyMatch(validationType -> formatDetector.matchTypes(detectedType, validationType));
}

@Override
void processMatchedFile(Path filePath, String jobName, String pathInsideJob) {
log.debug(
"Deleting file: {}, from job: {}, before uploading to version control.",
pathInsideJob,
jobName);

if (!filePath.toFile().delete()) {
throw new InvalidJobUpload(
jobName,
String.format(
"File: %s was scheduled for deletion before uploading"
+ " job code to version control but the operation was unsuccessful.",
pathInsideJob),
"Remove the file locally from your data job and deploy it again. "
+ "List of file types that will be scheduled for deletion "
+ Arrays.toString(getValidationTypes())
+ " List of file extensions that will be scheduled for deletion "
+ Arrays.toString(getValidationExtensions()));
}
}
}
Loading