Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ah - optional service account #7140

Merged
merged 2 commits into from
Mar 11, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions scripts/variantstore/wdl/ImportGenomes.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ workflow ImportGenomes {

input {
Array[File] input_vcfs
Array[File] input_vcf_indexes
File interval_list
String output_directory
File sample_map
Expand All @@ -12,6 +13,7 @@ workflow ImportGenomes {
File pet_schema
File vet_schema
File metadata_schema
File? service_account_json
String? drop_state
Boolean? drop_state_includes_greater_than = false

Expand All @@ -37,6 +39,7 @@ workflow ImportGenomes {
superpartitioned = "false",
partitioned = "false",
uuid = "",
service_account_json = service_account_json,
preemptible_tries = preemptible_tries,
docker = docker_final
}
Expand All @@ -51,6 +54,7 @@ workflow ImportGenomes {
superpartitioned = "true",
partitioned = "true",
uuid = "",
service_account_json = service_account_json,
preemptible_tries = preemptible_tries,
docker = docker_final
}
Expand All @@ -65,6 +69,7 @@ workflow ImportGenomes {
superpartitioned = "true",
partitioned = "true",
uuid = "",
service_account_json = service_account_json,
preemptible_tries = preemptible_tries,
docker = docker_final
}
Expand All @@ -73,8 +78,10 @@ workflow ImportGenomes {
call CreateImportTsvs {
input:
input_vcf = input_vcfs[i],
input_vcf_index = input_vcf_indexes[i],
interval_list = interval_list,
sample_map = sample_map,
service_account_json = service_account_json,
drop_state = drop_state,
drop_state_includes_greater_than = drop_state_includes_greater_than,
output_directory = output_directory,
Expand All @@ -96,6 +103,7 @@ workflow ImportGenomes {
schema = metadata_schema,
table_creation_done = CreateMetadataTables.done,
tsv_creation_done = CreateImportTsvs.done,
service_account_json = service_account_json,
docker = docker_final
}
}
Expand All @@ -112,6 +120,7 @@ workflow ImportGenomes {
schema = pet_schema,
table_creation_done = CreatePetTables.done,
tsv_creation_done = CreateImportTsvs.done,
service_account_json = service_account_json,
docker = docker_final
}
}
Expand All @@ -128,6 +137,7 @@ workflow ImportGenomes {
schema = vet_schema,
table_creation_done = CreateVetTables.done,
tsv_creation_done = CreateImportTsvs.done,
service_account_json = service_account_json,
docker = docker_final
}
}
Expand Down Expand Up @@ -162,20 +172,28 @@ task GetMaxTableId {
task CreateImportTsvs {
input {
File input_vcf
File input_vcf_index
File interval_list
String output_directory
File sample_map
File? service_account_json
String? drop_state
Boolean? drop_state_includes_greater_than = false

# runtime
Int? preemptible_tries
File? gatk_override
String docker

String? for_testing_only
}

Int multiplier = if defined(drop_state) then 4 else 10
Int disk_size = ceil(size(input_vcf, "GB") * multiplier) + 20
#TODO does this affect the memory allocation for the disk?
Int disk_size = 1000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe turn this into a parameter for the task and workflow with a default so it could be overridden if necessary but works for the "drop GQ60" case. 1000 GB (1TB) seems like a LOT of disk though, is that how big the PET/VET TSVs are?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the biggest one I see is 7.2 GB. I guess I'll change it to something like 50GB

String has_service_account_file = if (defined(service_account_json)) then 'true' else 'false'
# if we are doing a manual localization, we need to set the filename
String updated_input_vcf = if (defined(service_account_json)) then basename(input_vcf) else input_vcf

meta {
description: "Creates a tsv file for import into BigQuery"
Expand All @@ -186,6 +204,9 @@ task CreateImportTsvs {
input_vcf: {
localization_optional: true
}
input_vcf_index: {
localization_optional: true
}
}
command <<<
set -e
Expand All @@ -194,9 +215,16 @@ task CreateImportTsvs {
export TMPDIR=/tmp

export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
~{for_testing_only}

if [ ~{has_service_account_file} = 'true' ]; then
gcloud auth activate-service-account --key-file='~{service_account_json}'
gsutil cp ~{input_vcf} .
gsutil cp ~{input_vcf_index} .
fi

gatk --java-options "-Xmx7000m" CreateVariantIngestFiles \
-V ~{input_vcf} \
-V ~{updated_input_vcf} \
-L ~{interval_list} \
~{"-IG " + drop_state} \
--ignore-above-gq-threshold ~{drop_state_includes_greater_than} \
Expand Down Expand Up @@ -235,16 +263,24 @@ task CreateTables {
String superpartitioned
String partitioned
String uuid
File? service_account_json

# runtime
Int? preemptible_tries
String docker
}

String has_service_account_file = if (defined(service_account_json)) then 'true' else 'false'

command <<<
set -x
set -e

if [ ~{has_service_account_file} = 'true' ]; then
gcloud auth activate-service-account --key-file='~{service_account_json}'
gcloud config set project ~{project_id}
fi

PREFIX=""
if [ -n "~{uuid}" ]; then
PREFIX="~{uuid}_"
Expand Down Expand Up @@ -305,16 +341,24 @@ task LoadTable {
String datatype
String superpartitioned
File schema
File? service_account_json
String table_creation_done
Array[String] tsv_creation_done

String docker
}

String has_service_account_file = if (defined(service_account_json)) then 'true' else 'false'

command <<<
set -x
set -e

if [ ~{has_service_account_file} = 'true' ]; then
gcloud auth activate-service-account --key-file='~{service_account_json}'
gcloud config set project ~{project_id}
fi

DIR="~{storage_location}/~{datatype}_tsvs/"

printf -v PADDED_TABLE_ID "%03d" ~{table_id}
Expand Down Expand Up @@ -348,3 +392,4 @@ task LoadTable {
cpu: 1
}
}