Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add service to import data from remote URL vs uploading local file #5406

Merged
merged 10 commits into from
Aug 23, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -48,6 +49,7 @@
import org.apache.commons.exec.Executor;
import org.apache.commons.exec.LogOutputStream;
import org.apache.commons.exec.PumpStreamHandler;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.exec.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -101,8 +103,8 @@ public String obfuscateConsoleLog(String in) {
.matcher(out)
.replaceAll("$1=<redacted> ");

//strip out osm api url
out = out.replaceAll("https?://\\S+?/", "<osmapi>/");
//strip out osm api url, but not other remote data urls
out = out.replaceAll("https?://\\S+?/api/0.6/map", "<osmapi>/api/0.6/map");

return out;
}
Expand Down Expand Up @@ -210,7 +212,15 @@ protected void processLine(String line, int level) {
// }
// });

exitCode = executor.execute(cmdLine);
if (substitutionMap.containsKey("ENV_VARS")) {
List<String> envVars = (List<String>) substitutionMap.get("ENV_VARS");
Map<String, String> procEnv = EnvironmentUtils.getProcEnvironment();
envVars.stream().forEach(var -> EnvironmentUtils.addVariableToEnvironment(procEnv, var));

exitCode = executor.execute(cmdLine, procEnv);
} else {
exitCode = executor.execute(cmdLine);
}
}
catch (Exception e) {
exitCode = CommandResult.FAILURE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* This will properly maintain the copyright information. Maxar
* copyrights will be updated automatically.
*
* @copyright Copyright (C) 2016, 2017, 2018, 2019, 2020, 2021 Maxar (http://www.maxar.com/)
* @copyright Copyright (C) 2016, 2017, 2018, 2019, 2020, 2021, 2022 Maxar (http://www.maxar.com/)
*/
package hoot.services.controllers.ingest;

Expand All @@ -33,6 +33,7 @@

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
Expand Down Expand Up @@ -62,6 +63,28 @@ class ImportCommand extends ExternalCommand {

List<String> inputs = filesToImport.stream().map(File::getAbsolutePath).collect(Collectors.toList());

List<String> hootOptions = buildHootOptions(user, translation, isNoneTranslation, advUploadOpts);

String inputName = HOOTAPI_DB_URL + "/" + etlName;

Map<String, Object> substitutionMap = new HashMap<>();
substitutionMap.put("DEBUG_LEVEL", debugLevel);
substitutionMap.put("HOOT_OPTIONS", hootOptions);
substitutionMap.put("INPUT_NAME", inputName);
substitutionMap.put("INPUTS", inputs);

if ((classification == ZIP) && !zipsToImport.isEmpty()) {
//Reading a GDAL dataset in a .gz file or a .zip archive
inputs = zipsToImport.stream().map(zip -> "/vsizip/" + zip.getAbsolutePath()).collect(Collectors.toList());
substitutionMap.put("INPUTS", inputs);
}

String command = "hoot.bin convert --${DEBUG_LEVEL} -C Import.conf ${HOOT_OPTIONS} ${INPUTS} ${INPUT_NAME}";

super.configureCommand(command, substitutionMap, caller);
}

private List<String> buildHootOptions(Users user, String translation, Boolean isNoneTranslation, String advUploadOpts) {
List<String> options = new LinkedList<>();
//TODO: always set remap ids to false??
options.add("hootapi.db.writer.overwrite.map=true");
Expand All @@ -78,6 +101,8 @@ class ImportCommand extends ExternalCommand {
//A boolean gets passed if we don't mean to translate, but the UI has a dummy translation name of None
if (!isNoneTranslation) {
options.add("schema.translation.script=" + translationPath);
} else {
options.add("schema.translation.script=" + new File(HOME_FOLDER, "translations/quick.js").getAbsolutePath());
}

if (advUploadOpts != null && !advUploadOpts.isEmpty()) {
Expand All @@ -95,20 +120,50 @@ class ImportCommand extends ExternalCommand {
}

List<String> hootOptions = toHootOptions(options);
return hootOptions;
}

String inputName = HOOTAPI_DB_URL + "/" + etlName;
ImportCommand(String jobId, String url, String username, String password, String translation, String advUploadOpts,
String etlName, Boolean isNoneTranslation, String debugLevel, UploadClassification classification,
Class<?> caller, Users user) {
super(jobId);
this.workDir = null;

String input;
List<String> envVars = new ArrayList<>();
envVars.add("OSM_USE_CUSTOM_INDEXING=NO"); //allows negative IDs on elements when read by OGR
String[] urlParts = url.split("://");
if (url.startsWith("s3")) {
input = url.replace("s3://", "");
input = "/vsis3/" + input;
if (!username.isEmpty() && !password.isEmpty()) {
envVars.add(String.format("AWS_SECRET_ACCESS_KEY=%s", password));
envVars.add(String.format("AWS_ACCESS_KEY_ID=%s", username));
}
} else if (url.startsWith("http") || url.startsWith("ftp")) {
if (!username.isEmpty() && !password.isEmpty()) {
input = "/vsicurl/" + urlParts[0] + "://" + username + ":" + password + "@" + urlParts[1];
} else {
input = "/vsicurl/" + url;
}
} else {
input = url;
}

Map<String, Object> substitutionMap = new HashMap<>();
if ((classification == ZIP)) {
input = "/vsizip/" + input;
}

List<String> hootOptions = buildHootOptions(user, translation, isNoneTranslation, advUploadOpts);

String inputName = HOOTAPI_DB_URL + "/" + etlName;
java.util.Map<String, Object> substitutionMap = new HashMap<>();
substitutionMap.put("ENV_VARS", envVars);
substitutionMap.put("DEBUG_LEVEL", debugLevel);
substitutionMap.put("HOOT_OPTIONS", hootOptions);
substitutionMap.put("INPUT_NAME", inputName);
substitutionMap.put("INPUTS", inputs);
substitutionMap.put("INPUTS", input);

if ((classification == ZIP) && !zipsToImport.isEmpty()) {
//Reading a GDAL dataset in a .gz file or a .zip archive
inputs = zipsToImport.stream().map(zip -> "/vsizip/" + zip.getAbsolutePath()).collect(Collectors.toList());
substitutionMap.put("INPUTS", inputs);
}

String command = "hoot.bin convert --${DEBUG_LEVEL} -C Import.conf ${HOOT_OPTIONS} ${INPUTS} ${INPUT_NAME}";

Expand All @@ -119,13 +174,14 @@ class ImportCommand extends ExternalCommand {
public CommandResult execute() {
CommandResult commandResult = super.execute();

try {
FileUtils.forceDelete(workDir);
}
catch (IOException ioe) {
logger.error("Error deleting folder: {} ", workDir.getAbsolutePath(), ioe);
if (workDir != null) {
try {
FileUtils.forceDelete(workDir);
}
catch (IOException ioe) {
logger.error("Error deleting folder: {} ", workDir.getAbsolutePath(), ioe);
}
}

return commandResult;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* This will properly maintain the copyright information. Maxar
* copyrights will be updated automatically.
*
* @copyright Copyright (C) 2016, 2017, 2018, 2019, 2021 Maxar (http://www.maxar.com/)
* @copyright Copyright (C) 2016, 2017, 2018, 2019, 2021, 2022 Maxar (http://www.maxar.com/)
*/
package hoot.services.controllers.ingest;

Expand All @@ -33,7 +33,7 @@
import org.springframework.stereotype.Component;

import hoot.services.models.db.Users;

import hoot.services.controllers.ingest.UploadClassification;

@Component
class ImportCommandFactory {
Expand All @@ -44,4 +44,11 @@ ImportCommand build(String jobId, File workDir, List<File> filesToImport,
return new ImportCommand(jobId, workDir, filesToImport, zipsToImport, translation, advUploadOpts, etlName, isNoneTranslation,
debugLevel, uploadClassification, caller, user);
}

ImportCommand build(String jobId, String url, String username, String password, String translation, String advUploadOpts, String etlName, Boolean isNoneTranslation,
String debugLevel, UploadClassification uploadClassification, Class<?> caller, Users user) {
return new ImportCommand(jobId, url, username, password, translation, advUploadOpts, etlName, isNoneTranslation,
debugLevel, uploadClassification, caller, user);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ public class ImportResource {
* [OSM | OGR ] OSM for osm file and OGR for shapefile.
* @param inputName
* optional input name which is used in hoot db. Defaults to the file name.
* @param userEmail
* mail address of the user requesting job
* @param noneTranslation
* ?
* @param multiPart
Expand All @@ -135,7 +133,6 @@ public Response processFileUpload(@Context HttpServletRequest request,
@QueryParam("TRANSLATION") String translation,
@QueryParam("INPUT_TYPE") String inputType,
@QueryParam("INPUT_NAME") String inputName,
@QueryParam("USER_EMAIL") String userEmail,
@QueryParam("NONE_TRANSLATION") Boolean noneTranslation,
@QueryParam("ADV_UPLOAD_OPTS") String advUploadOpts,
@QueryParam("FOLDER_ID") String folderId,
Expand Down Expand Up @@ -235,16 +232,7 @@ public Response processFileUpload(@Context HttpServletRequest request,

List<Command> workflow = new LinkedList<>();

// if id is used for translation then set translation to the file path of that file
try {
long translationId = Long.parseLong(translation);
Translations translationFile = CustomScriptResource.getTranslationForUser(user, translationId, false /*editable*/);
TranslationFolder folder = CustomScriptResource.getTranslationFolderForUser(user, translationFile.getFolderId());

String translationPath = File.separator + translationFile.getDisplayName();
translationPath = folder.getPath() != null ? folder.getPath() + translationPath : translationPath;
translation = translationPath + ".js";
} catch (NumberFormatException exc) {}
translation = lookupTranslation(translation, user);

ExternalCommand importCommand = fileETLCommandFactory.build(jobId, workDir, filesToImport, zipsToImport, translation, advUploadOpts,
etlName, noneTranslation, debugLevel, finalUploadClassification, this.getClass(), user);
Expand Down Expand Up @@ -292,4 +280,81 @@ public Response getOptions() {
}
return Response.ok(template).build();
}


private String lookupTranslation(String translation, Users user) {
// if id is used for translation then set translation to the file path of that file
try {
long translationId = Long.parseLong(translation);
Translations translationFile = CustomScriptResource.getTranslationForUser(user, translationId, false /*editable*/);
TranslationFolder folder = CustomScriptResource.getTranslationFolderForUser(user, translationFile.getFolderId());

String translationPath = File.separator + translationFile.getDisplayName();
translationPath = folder.getPath() != null ? folder.getPath() + translationPath : translationPath;
return translationPath + ".js";
} catch (NumberFormatException exc) {
return translation;
}

}
/**
* Purpose of this service is to provide ingest service for fetching remote (http, s3, ftp) shape
* and osm file and performing ETL operation on the file(s).
*
* GET hoot-services/ingest/upload?TRANSLATION=NFDD.js&INPUT_TYPE=OSM&INPUT_NAME=ToyTest
*
* @param translation
* Translation script used during OGR ETL process.
* @param advUploadOpts
advanced options for importing shapefiles
* @param inputType
* [OSM | OGR ] OSM for osm file and OGR for shapefile.
* @param inputName
* optional input name which is used in hoot db. Defaults to the file name.
* @param noneTranslation
* ?
* @param multiPart
* uploaded files
* @return Array of job status
*/
@GET
@Path("/remote")
@Produces(MediaType.APPLICATION_JSON)
public Response processRemoteFile(@Context HttpServletRequest request,
@QueryParam("TRANSLATION") String translation,
@QueryParam("INPUT_NAME") String inputName,
@QueryParam("URL") String url,
@QueryParam("USERNAME") @DefaultValue("") String username,
@QueryParam("PASSWORD") @DefaultValue("") String password,
@QueryParam("NONE_TRANSLATION") Boolean noneTranslation,
@QueryParam("ADV_UPLOAD_OPTS") String advUploadOpts,
@QueryParam("FOLDER_ID") String folderId,
@QueryParam("DEBUG_LEVEL") @DefaultValue("info") String debugLevel) {

Users user = Users.fromRequest(request);
List<Map<String,Object>> results = new ArrayList<Map<String,Object>>();
String jobId = UUID.randomUUID().toString();
List<Command> workflow = new LinkedList<>();

translation = lookupTranslation(translation, user);

ExternalCommand importCommand = fileETLCommandFactory.build(jobId, url, username, password, translation, advUploadOpts,
inputName, noneTranslation, debugLevel, UploadClassification.ZIP, this.getClass(), user);
workflow.add(importCommand);

long parentFolderId = folderId != null ? Long.parseLong(folderId) : 0;
InternalCommand setFolderCommand = updateParentCommandFactory.build(jobId, parentFolderId, inputName, user, this.getClass());
workflow.add(setFolderCommand);

jobProcessor.submitAsync(new Job(jobId, user.getId(), workflow.toArray(new Command[workflow.size()]), JobType.IMPORT));

Map<String, Object> res = new HashMap<String, Object>();
res.put("jobid", jobId);
res.put("input", url);
res.put("output", inputName);
res.put("status", "success");
results.add(res);

return Response.ok(results).build();
}
}