diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index 499dcbcefe0..4e0e74162a4 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -387,8 +387,8 @@ private static FeatureReader getFeatureReader(final Featu protected static FeatureReader getGenomicsDBFeatureReader(final GATKPath path, final File reference, final GenomicsDBOptions genomicsDBOptions) { final String workspace = IOUtils.getGenomicsDBAbsolutePath(path) ; if (workspace == null) { - throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB inputpath " + path); - } else if (Files.notExists(IOUtils.getPath(workspace))) { + throw new IllegalArgumentException("Trying to create a GenomicsDBReader from non-GenomicsDB input path " + path); + } else if (Files.notExists(IOUtils.getPath(workspace.endsWith("/") ? workspace : workspace + "/"))) { throw new UserException("GenomicsDB workspace " + path + " does not exist"); } @@ -401,10 +401,10 @@ protected static FeatureReader getGenomicsDBFeatureReader(final try { final GenomicsDBExportConfiguration.ExportConfiguration exportConfigurationBuilder = createExportConfiguration(workspace, callsetJson, vidmapJson, vcfHeader, genomicsDBOptions); - if (genomicsDBOptions.useVCFCodec()) { - return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new VCFCodec(), Optional.empty()); - } else { + if (genomicsDBOptions.useBCFCodec()) { return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new BCF2Codec(), Optional.empty()); + } else { + return new GenomicsDBFeatureReader<>(exportConfigurationBuilder, new VCFCodec(), Optional.empty()); } } catch (final IOException e) { throw new UserException("Couldn't create GenomicsDBFeatureReader", e); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBArgumentCollection.java index 32ec38c0818..9bde82973e4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBArgumentCollection.java @@ -8,11 +8,11 @@ public class GenomicsDBArgumentCollection implements Serializable { private static final long serialVersionUID = 1L; - public static final String USE_VCF_CODEC_LONG_NAME = "genomicsdb-use-vcf-codec"; + public static final String USE_BCF_CODEC_LONG_NAME = "genomicsdb-use-bcf-codec"; public static final String SHARED_POSIXFS_OPTIMIZATIONS = "genomicsdb-shared-posixfs-optimizations"; private static final boolean DEFAULT_CALL_GENOTYPES = false; - private static final boolean DEFAULT_USE_VCF_CODEC = false; + private static final boolean DEFAULT_USE_BCF_CODEC = false; private static final boolean DEFAULT_SHARED_POSIXFS_OPTIMIZATIONS = false; /** @@ -21,18 +21,20 @@ public class GenomicsDBArgumentCollection implements Serializable { public boolean callGenotypes = DEFAULT_CALL_GENOTYPES; public int maxDiploidAltAllelesThatCanBeGenotyped = GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED; - /** - * Currently there is no support for 64-bit fields in BCFCodec. Specifying this option will allow - * for 64-bit width positions and INFO fields and for computed annotation sizes to exceed the 32-bit - * integer space while encoding/decoding with GenomicsDB. + * Currently there is no support for 64-bit fields in BCF2Codec. The VCFCodec allows for 64-bit + * width positions and INFO fields and for computed annotation sizes to exceed the 32-bit + * integer space while encoding/decoding with GenomicsDB. Use the BCF2Codec option if and + * only if performance is an issue. */ @Advanced @Argument( - fullName = USE_VCF_CODEC_LONG_NAME, - doc = "Use VCF Codec Streaming for data from GenomicsDB instead of the default BCF", + fullName = USE_BCF_CODEC_LONG_NAME, + doc = + "Use BCF Codec Streaming for data from GenomicsDB instead of the default VCFCodec. BCFCodec performs slightly better but currently does not support " + + "64-bit width positions and INFO fields and for computed annotation sizes to exceed 32-bit integer space.", optional = true) - public boolean useVCFCodec = DEFAULT_USE_VCF_CODEC; + public boolean useBCFCodec = DEFAULT_USE_BCF_CODEC; @Argument(fullName = SHARED_POSIXFS_OPTIMIZATIONS, doc = "Allow for optimizations to improve the usability and performance for shared Posix Filesystems(e.g. NFS, Lustre). " + diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index 322c902e1b7..77af3b52bf9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -203,7 +203,7 @@ public final class GenomicsDBImport extends GATKTool { public static final String SHARED_POSIXFS_OPTIMIZATIONS = GenomicsDBArgumentCollection.SHARED_POSIXFS_OPTIMIZATIONS; @Argument(fullName = WORKSPACE_ARG_LONG_NAME, - doc = "Workspace for GenomicsDB. Must be a POSIX file system path, but can be a relative path. " + + doc = "Workspace for GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. " + "Use this argument when creating a new GenomicsDB workspace. " + "Either this or "+INCREMENTAL_WORKSPACE_ARG_LONG_NAME+" must be specified." + " Must be an empty or non-existent directory.", @@ -211,7 +211,7 @@ public final class GenomicsDBImport extends GATKTool { private String workspace; @Argument(fullName = INCREMENTAL_WORKSPACE_ARG_LONG_NAME, - doc = "Workspace when updating GenomicsDB. Must be a POSIX file system path, but can be a relative path. " + + doc = "Workspace when updating GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. " + "Use this argument when adding new samples to an existing GenomicsDB workspace or "+ "when using the "+INTERVAL_LIST_LONG_NAME+" option. " + "Either this or "+WORKSPACE_ARG_LONG_NAME+" must be specified. " + diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java index 2334794cb3b..71af5dc48f5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java @@ -12,7 +12,7 @@ public final class GenomicsDBOptions { final private boolean callGenotypes; final private int maxDiploidAltAllelesThatCanBeGenotyped; final private int maxGenotypeCount; - final private boolean useVCFCodec; + final private boolean useBCFCodec; final private boolean sharedPosixFSOptimizations; public GenomicsDBOptions() { @@ -32,7 +32,7 @@ public GenomicsDBOptions(final Path reference, GenomicsDBArgumentCollection geno this.callGenotypes = genomicsdbArgs.callGenotypes; this.maxDiploidAltAllelesThatCanBeGenotyped = genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped; this.maxGenotypeCount = genotypeCalcArgs.MAX_GENOTYPE_COUNT; - this.useVCFCodec = genomicsdbArgs.useVCFCodec; + this.useBCFCodec = genomicsdbArgs.useBCFCodec; this.sharedPosixFSOptimizations = genomicsdbArgs.sharedPosixFSOptimizations; } @@ -52,8 +52,8 @@ public int getMaxGenotypeCount() { return maxGenotypeCount; } - public boolean useVCFCodec() { - return useVCFCodec; + public boolean useBCFCodec() { + return useBCFCodec; } public boolean sharedPosixFSOptimizations() { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 4834a2f9279..2975cb6e749 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -95,6 +95,7 @@ public final class GenomicsDBImportIntegrationTest extends CommandLineProgramTes private static final String INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/interval_expected.interval_list"; private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/multiple_non_adjacent_intervals_combine_gvcfs_expected.interval_list"; + private static final String TEST_INT64_SUPPORT_GENOMICSDB_BUNDLE = GENOMICSDB_TEST_DIR + "/int64_test.tar.gz"; //Consider a gVCF with a REF block chr20:50-150. Importing this data into GenomicsDB using multiple intervals //-L chr20:1-100 and -L chr20:101-200 will cause the REF block to be imported into both the arrays //Now, when reading data from the workspace (assume full scan) - the data is split into 2 REF block intervals chr20:50-100 @@ -1073,6 +1074,27 @@ public void testWithMiscOptionsNoOverwrite() throws IOException { basicWriteAndQueryWithOptions(workspace, options); } + @Test + public void testQueryWithComputationsExceeding32BitsDefault() throws IOException { + final String folder = createTempDir("computations_exceed_32bits").getAbsolutePath(); + IOUtils.extractTarGz(Paths.get(TEST_INT64_SUPPORT_GENOMICSDB_BUNDLE), Paths.get(folder)); + IOUtils.deleteOnExit(IOUtils.getPath(folder)); + final String workspace = folder + "/bigint_genomicsdb_ws"; + checkGenomicsDBAgainstExpected(workspace, new ArrayList(Arrays.asList(new SimpleInterval("1"))), folder+"/expected_combined_bigint.vcf", + folder+"/reference/chr1_10MB.fasta.gz", true, ATTRIBUTES_TO_IGNORE, false, false, true); + } + + // The following test should fail with a Throwable because of limitations in BCF2Codec - see https://github.com/broadinstitute/gatk/issues/6548 + @Test(expectedExceptions = Throwable.class) + public void testQueryWithComputationsExceeding32BitsBCFCodec() throws IOException { + final String folder = createTempDir("computations_exceed_32bits_bcf2codec").getAbsolutePath() + "/testQueryWithComputationsExceed32Bits"; + IOUtils.extractTarGz(Paths.get(TEST_INT64_SUPPORT_GENOMICSDB_BUNDLE), Paths.get(folder)); + IOUtils.deleteOnExit(IOUtils.getPath(folder)); + final String workspace = folder + "/bigint_genomicsdb_ws"; + checkGenomicsDBAgainstExpected(workspace, new ArrayList(Arrays.asList(new SimpleInterval("1"))), folder+"/expected_combined_bigint.vcf", + folder+"/reference/chr1_10MB.fasta.gz", true, ATTRIBUTES_TO_IGNORE, false, false, false); + } + @Test(groups = {"bucket"}) public void testWriteToAndQueryFromGCS() throws IOException { final String workspace = BucketUtils.randomRemotePath(getGCPTestStaging(), "", "") + "/"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java index a4cd31611dc..692ae9dce9c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java @@ -17,6 +17,7 @@ import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBArgumentCollection; import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport; import org.broadinstitute.hellbender.tools.walkers.annotator.RMSMappingQuality; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -220,8 +221,6 @@ public void testGenotypesOnly(File input, File expected, List extraArgs, //this is different from the above data provider because we can currently only load a single interval into a genomics db in a sane way //so we need to provide a list of intervals and then look at each one public Object[][] getGVCFsForGenomicsDB(){ - - return new Object[][]{ {getTestFile(BASE_PAIR_GVCF), getTestFile(BASE_PAIR_EXPECTED), new SimpleInterval("20", 1, 11_000_000), b37_reference_20_21}, {CEUTRIO_20_21_GATK3_4_G_VCF, getTestFile("CEUTrio.20.gatk3.7_30_ga4f720357.expected.vcf"), new SimpleInterval("20", 1, 11_000_000), b37_reference_20_21}, @@ -244,10 +243,26 @@ public Object[][] getGVCFsForGenomicsDBOverMultipleIntervals() { //this only tests single-sample @Test(dataProvider = "getGVCFsForGenomicsDB", timeOut = 1000000) - public void assertMatchingGenotypesFromTileDB(File input, File expected, Locatable interval, String reference) throws IOException { + public void assertMatchingGenotypesFromGenomicsDB(File input, File expected, Locatable interval, String reference) throws IOException { final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, interval); final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB); runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, NO_EXTRA_ARGS, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference); + + // The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly + final List args = new ArrayList(); + args.add("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME); + runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference); + } + + private void runAndCheckGenomicsDBOutput(final ArgumentsBuilder args, final File expected, final File output) { + Utils.resetRandomGenerator(); + runCommandLine(args); + + // Note that if this isn't working it will take *FOREVER* + // runs in 0.06 minutes with no input intervals specfied + final List expectedVC = VariantContextTestUtils.getVariantContexts(expected); + final List actualVC = VariantContextTestUtils.getVariantContexts(output); + assertForEachElementInLists(actualVC, expectedVC, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes); } @Test(dataProvider = "getGVCFsForGenomicsDBOverMultipleIntervals") @@ -259,21 +274,17 @@ public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, Li final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(reference)) - .add("V", genomicsDBUri) - .addOutput(output); + .add("V", genomicsDBUri); + args.addOutput(output); intervals.forEach(args::addInterval); args.addRaw("--" + GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME); args.addRaw("--only-output-calls-starting-in-intervals"); //note that this will restrict calls to just the specified intervals - Utils.resetRandomGenerator(); - runCommandLine(args); - - //Note that if this isn't working it will take *FOREVER* - // runs in 0.06 minutes with no input intervals specfied - final List expectedVC = VariantContextTestUtils.getVariantContexts(expected); - final List actualVC = VariantContextTestUtils.getVariantContexts(output); - assertForEachElementInLists(actualVC, expectedVC, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes); + runAndCheckGenomicsDBOutput(args, expected, output); + // The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly + args.addRaw("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME); + runAndCheckGenomicsDBOutput(args, expected, output); } //this tests single-sample with new MQ format @@ -285,6 +296,11 @@ public void assertMatchingAnnotationsFromGenomicsDB_newMQformat(File input, File final VCFHeader header = VCFHeaderReader.readHeaderFrom(new SeekablePathStream(IOUtils.getPath(expected.getAbsolutePath()))); final List attributesToFilter = Stream.concat(ATTRIBUTES_WITH_JITTER.stream(), ATTRIBUTES_TO_IGNORE.stream()).collect(Collectors.toList()); runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, NO_EXTRA_ARGS, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, ATTRIBUTES_TO_IGNORE, ATTRIBUTES_WITH_JITTER, header), reference); + + // The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly + final List args = new ArrayList(); + args.add("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME); + runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, (a, e) -> VariantContextTestUtils.assertVariantContextsAreEqualAlleleOrderIndependent(a, e, ATTRIBUTES_TO_IGNORE, ATTRIBUTES_WITH_JITTER, header), reference); } @Test(dataProvider = "gvcfsToGenotype") diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index f25b4a9145b..e872a5d6be1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -37,28 +37,28 @@ public Object[][] getVCFdata() { //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, - // Same as above, but with GenomicsDB using VCFCodec for interchange + // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals", "--genomicsdb-use-vcf-codec"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals", "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, //lower calling confidence //same as above except (different intervals and) with SNPs with 40 < QUAL < 60 and INDELs with 49 < QUAL < 69 {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, getTestFile("fiveSampleTest.lowerCallThreshold.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 250865, 348163)), Arrays.asList("-stand-call-conf 10"), b38_reference_20_21}, - // Same as above, but with GenomicsDB using VCFCodec for interchange + // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.lowerCallThreshold.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 250865, 348163)), Arrays.asList("-stand-call-conf 10", "--genomicsdb-use-vcf-codec"), b38_reference_20_21}, + getTestFile("fiveSampleTest.lowerCallThreshold.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 250865, 348163)), Arrays.asList("-stand-call-conf 10", "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, //using latest reblocking output with allele-specific annotations //has all of the above plus AS_AltDP, AS_FS, AS_MQ, AS_MQRankSum, AS_QD, AS_ReadPosRankSum {new File[]{new File(getToolTestDataDir() + "/../variantutils/ReblockGVCF/expected.NA12878.AS.chr20snippet.reblocked.g.vcf"), new File(getToolTestDataDir() + "/../variantutils/ReblockGVCF/expected.NA12892.AS.chr20snippet.reblocked.g.vcf")}, getTestFile("twoSampleAS.vcf"), getTestFile("twoSampleASDB.vcf"), Arrays.asList(new SimpleInterval("20")), NO_EXTRA_ARGS, b37_reference_20_21}, - // Same as above, but with GenomicsDB using VCFCodec for interchange + // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{new File(getToolTestDataDir() + "/../variantutils/ReblockGVCF/expected.NA12878.AS.chr20snippet.reblocked.g.vcf"), new File(getToolTestDataDir() + "/../variantutils/ReblockGVCF/expected.NA12892.AS.chr20snippet.reblocked.g.vcf")}, - getTestFile("twoSampleAS.vcf"), getTestFile("twoSampleASDB.vcf"), Arrays.asList(new SimpleInterval("20")), Arrays.asList("--genomicsdb-use-vcf-codec"), b37_reference_20_21}, + getTestFile("twoSampleAS.vcf"), getTestFile("twoSampleASDB.vcf"), Arrays.asList(new SimpleInterval("20")), Arrays.asList("--genomicsdb-use-bcf-codec"), b37_reference_20_21}, //using legacy reblocking data with no raw GT count values //has ExcessHet, calculated from genotypes diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/int64_test.tar.gz b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/int64_test.tar.gz new file mode 100644 index 00000000000..c309ef7fb0b Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/int64_test.tar.gz differ