Skip to content

Commit

Permalink
Enable Read/Execution Project for BQ Queries (#7136)
Browse files Browse the repository at this point in the history
* use project id for query operation for samples
* changed back to previous behavior for interactive vs batch
  • Loading branch information
kcibul authored and mmorgantaylor committed Apr 6, 2021
1 parent dd2cb77 commit bded85b
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.utils.bigquery.BigQueryUtils;
Expand All @@ -20,9 +18,9 @@ public class SampleList {
private Map<Long, String> sampleIdMap = new HashMap<>();
private Map<String, Long> sampleNameMap = new HashMap<>();

public SampleList(String sampleTableName, File sampleFile, boolean printDebugInformation) {
public SampleList(String sampleTableName, File sampleFile, String executionProjectId, boolean printDebugInformation) {
if (sampleTableName != null) {
initializeMaps(new TableReference(sampleTableName, SchemaUtils.SAMPLE_FIELDS), printDebugInformation);
initializeMaps(new TableReference(sampleTableName, SchemaUtils.SAMPLE_FIELDS), executionProjectId, printDebugInformation);
} else if (sampleFile != null) {
initializeMaps(sampleFile);
} else {
Expand Down Expand Up @@ -50,26 +48,8 @@ public Map<Long, String> getMap() {
return sampleIdMap;
}

// protected Map<String, Integer> getSampleNameMap(TableReference sampleTable, List<String> samples, boolean printDebugInformation) {
// Map<String, Integer> results = new HashMap<>();
// // create optional where clause
// String whereClause = "";
// if (samples != null && samples.size() > 0) {
// whereClause = " WHERE " + SchemaUtils.SAMPLE_NAME_FIELD_NAME + " in (\'" + StringUtils.join(samples, "\',\'") + "\') ";
// }
//
// TableResult queryResults = querySampleTable(sampleTable.getFQTableName(), whereClause, printDebugInformation);
//
// // Add our samples to our map:
// for (final FieldValueList row : queryResults.iterateAll()) {
// results.put(row.get(1).getStringValue(), (int) row.get(0).getLongValue());
// }
// return results;
// }


protected void initializeMaps(TableReference sampleTable, boolean printDebugInformation) {
TableResult queryResults = querySampleTable(sampleTable.getFQTableName(), "", printDebugInformation);
protected void initializeMaps(TableReference sampleTable, String executionProjectId, boolean printDebugInformation) {
TableResult queryResults = querySampleTable(sampleTable.getFQTableName(), "", executionProjectId, printDebugInformation);

// Add our samples to our map:
for (final FieldValueList row : queryResults.iterateAll()) {
Expand All @@ -95,15 +75,14 @@ protected void initializeMaps(File cohortSampleFile) {
}
}

private TableResult querySampleTable(String fqSampleTableName, String whereClause, boolean printDebugInformation) {
private TableResult querySampleTable(String fqSampleTableName, String whereClause, String executionProjectId, boolean printDebugInformation) {
// Get the query string:
final String sampleListQueryString =
"SELECT " + SchemaUtils.SAMPLE_ID_FIELD_NAME + ", " + SchemaUtils.SAMPLE_NAME_FIELD_NAME +
" FROM `" + fqSampleTableName + "`" + whereClause;
" FROM `" + fqSampleTableName + "`" + whereClause;


// Execute the query:
final TableResult result = BigQueryUtils.executeQuery(sampleListQueryString);
// Execute the query:
final TableResult result = BigQueryUtils.executeQuery(BigQueryUtils.getBigQueryEndPoint(executionProjectId) , sampleListQueryString, false);

// Show our pretty results:
if (printDebugInformation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ protected void onStartup() {

vcfWriter = createVCFWriter(IOUtils.getPath(outputVcfPathString));

SampleList sampleIdMap = new SampleList(sampleTableName, cohortSampleFile, printDebugInformation);
SampleList sampleIdMap = new SampleList(sampleTableName, cohortSampleFile, readProjectID, printDebugInformation);
// Map<Integer, String> sampleIdMap;
VCFHeader header = CommonCode.generateRawArrayVcfHeader(new HashSet<>(sampleIdMap.getSampleNames()), reference.getSequenceDictionary());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void onTraversalStart() {
// Get sample name
final VCFHeader inputVCFHeader = getHeaderForVariants();
TableReference sampleTable = new TableReference(sampleListFQTablename, SchemaUtils.SAMPLE_FIELDS);
sampleNameMap = new SampleList(sampleListFQTablename, null, printDebugInformation);
sampleNameMap = new SampleList(sampleListFQTablename, null, null, printDebugInformation);
tableToCreatorMap = new HashMap<>();

Map<Integer, Set<String>> tableNumberToSampleList = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class ExtractCohort extends ExtractTool {
protected void onStartup() {
super.onStartup();

SampleList sampleList = new SampleList(sampleTableName, sampleFileName, printDebugInformation);
SampleList sampleList = new SampleList(sampleTableName, sampleFileName, projectID, printDebugInformation);
Set<String> sampleNames = new HashSet<>(sampleList.getSampleNames());

VCFHeader header = CommonCode.generateVcfHeader(sampleNames, reference.getSequenceDictionary());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ public static BigQuery getBigQueryEndPoint() {
return BigQueryOptions.getDefaultInstance().getService();
}

/**
* @param executionProjectId The google project that should be used to execute this query
*
* @return A {@link BigQuery} object that can be used to interact with a BigQuery data set.
*/
public static BigQuery getBigQueryEndPoint(String executionProjectId) {
return (executionProjectId != null) ? BigQueryOptions.newBuilder().setProjectId(executionProjectId).build().getService() : getBigQueryEndPoint();
}

/**
* Executes the given {@code queryString} on the default instance of {@link BigQuery} as created by {@link #getBigQueryEndPoint()}.
* Will block until results are returned.
Expand Down

0 comments on commit bded85b

Please sign in to comment.