From a4b6f334294797ca87db4b70af2cd7f406ff92f3 Mon Sep 17 00:00:00 2001
From: George Tollefson <gatollefson@gmail.com>
Date: Thu, 1 Nov 2018 16:12:35 -0400
Subject: [PATCH 1/6] finished most tests. Fixed siglist filter.

---
 src/ViVa.jl                                   |   2 +-
 src/vcf_utils_complete.jl                     |  13 +-
 test/.DS_Store                                | Bin 8196 -> 8196 bytes
 test/new_vcf_utils.jl                         | 244 ++++++++----------
 ...olumn_list.txt => select_samples_list.txt} |   0
 viva_cli.jl                                   |  13 +-
 6 files changed, 124 insertions(+), 148 deletions(-)
 rename test/test_files/{select_column_list.txt => select_samples_list.txt} (100%)

diff --git a/src/ViVa.jl b/src/ViVa.jl
index 67780f7..be303b2 100644
--- a/src/ViVa.jl
+++ b/src/ViVa.jl
@@ -4,7 +4,7 @@ using DataFrames #use CSV.jl ? depwarnings
 using PlotlyJS
 using Rsvg
 using Blink
-using CSV
+#using CSV
 using GeneticVariation
 using ArgParse
 using VCFTools
diff --git a/src/vcf_utils_complete.jl b/src/vcf_utils_complete.jl
index 68b749b..fb8964e 100644
--- a/src/vcf_utils_complete.jl
+++ b/src/vcf_utils_complete.jl
@@ -51,7 +51,6 @@ function sort_genotype_array(genotype_array)
     data=genotype_array[:,3:size(genotype_array,2)]
     chrom_positions = [parse(Int, i) for i in genotype_array[:,1:2]]
     genotype_array = hcat(chrom_positions,data)
-
     genotype_array = sortrows(genotype_array, by=x->(x[1],x[2]))
 
 return genotype_array
@@ -122,16 +121,18 @@ function io_sig_list_vcf_filter(sig_list,vcf_filename)
               pos=(sig_list[row,2])
 
               reader = VCF.Reader(open(vcf_filename, "r"))
-
+              tic()
               for record in reader
 
                      if typeof(VCF.chrom(record)) == String
                             chr = string(chr)
-
                             if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
                                 push!(vcf_subarray,record)
                             end
 
+
+                        end#remove this if need code below
+#=
                     else
 
                             if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
@@ -139,7 +140,9 @@ function io_sig_list_vcf_filter(sig_list,vcf_filename)
 
                             end
                     end
+                    =#
               end
+              toc()
        end
 
        return vcf_subarray
@@ -367,9 +370,7 @@ convert sub from variant filters to gt_num_array and gt_chromosome_labels for pl
 """
 function combined_all_genotype_array_functions(sub)
     genotype_array = generate_genotype_array(sub,"GT")
-
     map!(s->replace(s, "chr", ""), genotype_array, genotype_array)
-
     clean_column1!(genotype_array)
     genotype_array=ViVa.sort_genotype_array(genotype_array)
     geno_dict = define_geno_dict()
@@ -508,7 +509,7 @@ function get_sample_names(reader)
 end
 
 """
-    find_group_label_indices(pheno)
+    find_group_label_indices(pheno,trait_to_group_by,row_to_sort_by)
 find indices and determines names for group 1 and group 2 labels on plots. finds index of center of each sample group to place tick mark and label.
 """
 function find_group_label_indices(pheno,trait_to_group_by,row_to_sort_by)
diff --git a/test/.DS_Store b/test/.DS_Store
index e9bcd34daba8bc5a670b0d044f29f56de51d96ab..c02d3ce4ddfb142ddb2f95a94384b282c69c667d 100644
GIT binary patch
delta 16
XcmZp1XmQxES8(!r0m;qR1oij;I<5vt

delta 18
ZcmZp1XmQxESCEl$@&*Bk&DR9=_y9oD21)<`

diff --git a/test/new_vcf_utils.jl b/test/new_vcf_utils.jl
index a0ee61b..a0825e9 100644
--- a/test/new_vcf_utils.jl
+++ b/test/new_vcf_utils.jl
@@ -26,18 +26,21 @@ sample_names = get_sample_names(reader)
     @test df[3,1] == 2
 end
 
+#functions for variant filters
+
 @testset "io_chromosome_range_vcf_filter" begin
 sub = io_chromosome_range_vcf_filter("chr4:0-400000000",reader)
 println(sub[1:2])
 println(size(sub,2))
 end
 
-@testset "io_sig_list_vcf_filter" begin
+#=
+@testset "filters_with_siglist" begin
 
     @testset "load_siglist" begin
-        sig_list=load_siglist("test_files/significantList_for_proteinstructures.csv")
-        println(sig_list[2:1])
-        println(size(sig_list,1))
+    sig_list=load_siglist("test_files/significantList_for_proteinstructures.csv")
+    println(sig_list[2:1])
+    println(size(sig_list,1))
 
             @testset "clean_column1_siglist!" begin
             clean_column1_siglist!(sig_list)
@@ -45,170 +48,145 @@ end
             println(size(sig_list,1))
             end
 
-        sub=io_sig_list_vcf_filter(sig_list,vcf_filename)
-        println(sub[1,5])
-
-        @testset "pass_chrrange_siglist_filter" begin
-        sub = pass_chrrange_siglist_filter(vcf_filename,sig_list,"chr4:0-400000000")
-        println(sub[1,5])
+            @testset "io_sig_list_vcf_filter" begin
+            sub=io_sig_list_vcf_filter(sig_list,vcf_filename)
+            @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+            @test (length(sub)) ==  13
+            end
 
-        @testset pass_siglist_filter begin
-        sub = pass_siglist_filter(vcf_filename,sig_list)
-        end
+            @testset "pass_chrrange_siglist_filter" begin
+            sub = pass_chrrange_siglist_filter(vcf_filename,sig_list,"chr4:0-400000000")
+            @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+            @test (length(sub)) ==  12
+            end
 
-        @testset "chrrange_siglist_filter" begin
-        sub = chrrange_siglist_filter(vcf_filename,sig_list,"chr4:0-400000000")
-        end
+            @testset "pass_siglist_filter" begin
+            sub = pass_siglist_filter(vcf_filename, sig_list)
+            @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+            @test (length(sub)) ==  12
+            end
 
+            @testset "chrrange_siglist_filter" begin
+            sub = chrrange_siglist_filter(vcf_filename,sig_list,"chr4:0-400000000")
+            @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+            @test (length(sub)) ==  13
+            end
 
-        end
+    end
 
     end
-end
+=#
 
 @testset "io_pass_filter" begin
+    reader = VCF.Reader(open(vcf_filename, "r"))
     sub = io_pass_filter(reader)
-    println(sub[2,1])
+    @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+    @test (length(sub)) ==  1164
 end
 
 @testset "pass_chrrange_filter" begin
+    reader = VCF.Reader(open(vcf_filename, "r"))
     sub = pass_chrrange_filter(reader,"chr4:0-400000000")
+    @test (typeof(sub[1])) == GeneticVariation.VCF.Record
+    @test (length(sub)) ==  856
 end
 
-
-
-
-
-
-#=
-#functions for variant filters
-
-
 #functions for converting vcf record array to numerical array
+@testset "combined_all_genotype_array_functions" begin
 
-"""
-    create_chr_dict()
-creates dict for use in combined_all_genotype_array_functions() for removing 'chr' from chromosome labels to allow sorting variant records by chromosome position.
-"""
-@testset create_chr_dict() begin
-
-end
-
-"""
-    combined_all_genotype_array_functions(sub)
-convert sub from variant filters to gt_num_array and gt_chromosome_labels for plot functions.
-"""
-@testset combined_all_genotype_array_functions(sub) begin
-
-end
-
-"""
-    combined_all_read_depth_array_functions(sub)
-convert sub from variant filters to dp_num_array and dp_chromosome_labels for plot functions.
-"""
-@testset combined_all_read_depth_array_functions(sub) begin
-
-end
-
-"""
-    generate_genotype_array(record_sub::Array{Any,1},genotype_field::String)
-Returns numerical array of genotype values (either genotype or read_depth values) which are translated by another function into num_array
-Where genotype_field is either GT or DP to visualize genotype or read_depth
-"""
-@testset generate_genotype_array(record_sub::Array{Any,1},y) begin
-
-end
-
-"""
-    define_geno_dict()
-returns dictionary of values for use in replace_genotype_with_vals()
-"""
-@testset define_geno_dict() begin
+reader = VCF.Reader(open(vcf_filename, "r"))
+sub = io_pass_filter(reader)
 
-end
+gt_num_array,gt_chromosome_labels=combined_all_genotype_array_functions(sub)
+println(typeof(gt_num_array))
+println(length(gt_num_array))
+println(typeof(gt_chromosome_labels))
+println(length(gt_chromosome_labels))
 
-"""
-    translate_genotype_to_num_array(genotype_array,geno_dict)
-returns a tuple of num_array for plotting, and chromosome labels for plotting as label bar.
-Translates array of genotype values to numerical array of categorical values.
-Genotype values are converted to categorical values. No_call=0, 0/0=400, heterozygous_variant=600, homozygous_variant=800
-"""
-@testset translate_genotype_to_num_array(genotype_array,geno_dict) begin
+    @testset "generate_genotype_array" begin
+    reader = VCF.Reader(open(vcf_filename, "r"))
+    sub = io_pass_filter(reader)
+    genotype_array=generate_genotype_array(sub,"GT")
+    println(typeof(genotype_array))
+    println(length(genotype_array))
+    println(genotype_array[3:5])
+
+    @testset "define_geno_dict" begin
+    geno_dict = define_geno_dict()
+    println(typeof(geno_dict))
+    println(length(geno_dict))
+
+    @testset "translate_genotype_to_num_array" begin
+    gt_num_array,gt_chromosome_labels=translate_genotype_to_num_array(genotype_array,geno_dict)
+    println(typeof(gt_num_array))
+    println(length(gt_num_array))
+    println(typeof(gt_chromosome_labels))
+    println(length(gt_chromosome_labels))
+    end
+    end
+    end
 
 end
 
-"""
-    translate_readdepth_strings_to_num_array(read_depth_array::Array{Any,2})
-Returns array of read_depth as int for plotting and average calculation.
-By default, read depth values over 100 are replaced with 100 to improve heatmap visualization (see read_depth_threshhold() ).
-Where read_depth_array is output of generate_genotype_array() for DP option
-returns a tuple of num_array type Int for average calculation and plotting, and chromosome labels for plotting as label bar
-"""
-@testset translate_readdepth_strings_to_num_array(read_depth_array::Array{Any,2}) begin
+@testset "combined_all_read_depth_array_functions" begin #inside functions same used in combined_all_genotype_array_functions
 
+reader = VCF.Reader(open(vcf_filename, "r"))
+sub = io_pass_filter(reader)
+dp_num_array,dp_chromosome_labels=combined_all_read_depth_array_functions(sub)
+println(typeof(dp_num_array))
+println(length(dp_num_array))
+println(typeof(dp_chromosome_labels))
+println(length(dp_chromosome_labels))
+
+@testset "get_sample_names" begin
+reader = VCF.Reader(open(vcf_filename, "r"))
+sample_names=get_sample_names(reader)
+println("get_sample_names")
+println(typeof(sample_names))
+println(length(sample_names))
+
+@testset "avg_dp_samples" begin
+avg_sample_list=avg_dp_samples(dp_num_array)
+println("avg_sample_list is $avg_sample_list")
+
+@testset "list_sample_names_low_dp" begin
+list=list_sample_names_low_dp(avg_sample_list,sample_names)
+println(list)
 end
 
-
-#functions for sample filters
-
-"""
-    get_sample_names(reader)
-returns sample ids of vcf file as a vector of symbols for naming columns of num_array dataframe object for column filter functions
-"""
-@testset get_sample_names(reader) begin
-
 end
 
-"""
-    find_group_label_indices(pheno)
-find indices and determines names for group 1 and group 2 labels on plots. finds index of center of each sample group to place tick mark and label.
-"""
-@testset find_group_label_indices(pheno,trait_to_group_by,row_to_sort_by) begin
 
-end
-
-"""
-    sortcols_by_phenotype_matrix(pheno_matrix_filename::String,trait_to_group_by::String,num_array::Array{Int64,2}, sample_names::Array{Symbol,2})
-group samples by a common trait using a user generated key matrix ("phenotype matrix")
-"""
-@testset sortcols_by_phenotype_matrix(pheno_matrix_filename::String,trait_to_group_by::String, num_array::Array{Int64,2}, sample_names::Array{Symbol,2}) begin
-
-end
-
-"""
-    select_columns(filename_sample_list::AbstractString, num_array::Array{Int64,2}, sample_names::Array{Symbol,2})
-returns num_array with columns matching user generated list of sample ids to select for analysis. num_array now has sample ids in first row.
-"""
-@testset select_columns(filename_sample_list::AbstractString, num_array::Array{Int64,2}, sample_names::Array{Symbol,2}) begin
 
+@testset "avg_dp_variant" begin
+avg_variant_list=avg_dp_variant(dp_num_array)
+println("avg_dp_variant is $avg_variant_list")
 end
 
+@testset "sortcols_by_phenotype_matrix" begin
+vcf,group_label_pack=sortcols_by_phenotype_matrix("test_files/sample_phenotype_matrix.csv","control,case", dp_num_array, sample_names)
+println(typeof(vcf))
+println(size(vcf,1))
+println(typeof(group_label_pack))
+println(length(group_label_pack))
+
+    @testset "find_group_label_indices" begin
+    pheno = readdlm("test_files/sample_phenotype_matrix.csv", ',')
+    row_to_sort_by = find(x -> x == "control,case", pheno)
+    row_to_sort_by = row_to_sort_by[1]
+    group_label_pack=find_group_label_indices(pheno,"control,case",row_to_sort_by)
+    println(typeof(group_label_pack))
+    println(length(group_label_pack))
+    end
 
-#functions for mathematic analysis
-"""
-    avg_dp_samples(dp_num_array::Array{Int64,2})
-create sample_avg_list vector that lists averages of read depth for each sample for input into avg_sample_dp_line_chart(sample_avg_list)
-dp_num_array must contain dp values as Int64 and be without chromosome position columns
-"""
-@testset avg_dp_samples(dp_num_array::Array{Int64,2}) begin
+    @testset "select_columns" begin
+    dp_num_array=select_columns("test_files/select_samples_list.txt", dp_num_array, sample_names)
+    println(typeof(dp_num_array))
+    println(length(dp_num_array))
+    end
 
 end
-
-
-"""
-    avg_dp_variant(dp_num_array::Array{Int64,2})
-create variant_avg_list vector that lists averages of read depth for each variant for input into avg_variant_dp_line_chart(variant_avg_list)
-"""
-@testset avg_dp_variant(dp_num_array::Array{Int64,2}) begin
-
 end
-
-"""
-    list_sample_names_low_dp(sample_avg_list::Array{Float64,2},sample_names)
-returns list of sample ids that have an average read depth of under 15 across all variant positions
-"""
-@testset list_sample_names_low_dp(sample_avg_list::Array{Float64,1},sample_names) begin
-
 end
 
 """
diff --git a/test/test_files/select_column_list.txt b/test/test_files/select_samples_list.txt
similarity index 100%
rename from test/test_files/select_column_list.txt
rename to test/test_files/select_samples_list.txt
diff --git a/viva_cli.jl b/viva_cli.jl
index 8209662..6171d20 100644
--- a/viva_cli.jl
+++ b/viva_cli.jl
@@ -268,6 +268,8 @@ if parsed_args["heatmap"] == "genotype"
         graphic = ViVa.genotype_heatmap2(gt_num_array,title,chrom_label_info,sample_names)
     end
 
+    println("saving now")
+
     PlotlyJS.savefig(graphic, joinpath("$(parsed_args["output_directory"])" ,"$title.$(parsed_args["save_format"])"), js=:remote)
 
 end
@@ -327,10 +329,7 @@ println("_______________________________________________")
 if parsed_args["avg_dp"] == "sample"
 
     if isdefined(:dp_num_array) == false
-        read_depth_array = ViVa.generate_genotype_array(sub,"DP")
-        clean_column1!(read_depth_array)
-        read_depth_array=ViVa.sort_genotype_array(read_depth_array)
-        dp_num_array,dp_chromosome_labels = translate_readdepth_strings_to_num_array(read_depth_array)
+        dp_num_array,dp_chromosome_labels=combined_all_read_depth_array_functions(sub)
     end
 
     chr_pos_tuple_list = generate_chromosome_positions_for_hover_labels(dp_chromosome_labels)
@@ -343,11 +342,9 @@ if parsed_args["avg_dp"] == "sample"
     PlotlyJS.savefig(graphic, joinpath("$(parsed_args["output_directory"])" ,"Average Sample Read Depth.$(parsed_args["save_format"])"), js=:remote)
 
 elseif parsed_args["avg_dp"] == "variant"
+    
     if isdefined(:dp_num_array) == false
-        read_depth_array = ViVa.generate_genotype_array(sub,"DP")
-        clean_column1!(read_depth_array)
-        read_depth_array=ViVa.sort_genotype_array(read_depth_array)
-        dp_num_array,dp_chromosome_labels = translate_readdepth_strings_to_num_array(read_depth_array)
+        dp_num_array,dp_chromosome_labels=combined_all_read_depth_array_functions(sub)
     end
 
     chr_pos_tuple_list = generate_chromosome_positions_for_hover_labels(dp_chromosome_labels)

From 22f2a8411c7fc84b982ddebe0860dca4549123a2 Mon Sep 17 00:00:00 2001
From: George Tollefson <gatollefson@gmail.com>
Date: Mon, 5 Nov 2018 14:20:45 -0500
Subject: [PATCH 2/6] Wrote all tests for test/new_vcf_utils. All are passing.
 Fixed 4 sig list filter functions to iterate over VCF.Reader object first
 then siglist to make faster. Allowed inputing vcf filename as path to
 filename.

---
 .gitignore                |   1 +
 .travis.yml               |  35 ++--
 ViVa_v2.jl                |  15 --
 open_jupyter_notebook.jl  |   2 -
 src/ViVa.jl               |   8 +-
 src/plot_utils.jl         |   8 +-
 src/vcf_utils_complete.jl | 142 ++++++++--------
 test/.DS_Store            | Bin 8196 -> 8196 bytes
 test/new_vcf_utils.jl     | 329 +++++++++++++++-----------------------
 viva_cli.jl               |  53 +++---
 10 files changed, 255 insertions(+), 338 deletions(-)
 delete mode 100644 ViVa_v2.jl
 delete mode 100644 open_jupyter_notebook.jl

diff --git a/.gitignore b/.gitignore
index ea423ef..5f5d647 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 *.html
 .DS_Store
 .ipynb_checkpoints/
+output
diff --git a/.travis.yml b/.travis.yml
index df211b0..ddcbd6d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,24 +1,23 @@
-## Documentation: http://docs.travis-ci.com/user/languages/julia/
 language: julia
 
-# os:
-#   - linux
-#   - osx
-#
-# julia:
-#   - 0.6
-#
-# notifications:
-#   email: false
-#
-# git:
-#   depth: 99999999
-#
-# before_script:
-#   - export PATH=$HOME/.local/bin:$PATH
+os:
+  - linux
+  - osx
 
-# after_success:
-#   - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
+julia:
+  - 0.6
+
+notifications:
+  email: false
+
+git:
+  depth: 99999999
+
+before_script:
+  - export PATH=$HOME/.local/bin:$PATH
+
+after_success:
+  - julia -e 'Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
 
 jobs:
   include:
diff --git a/ViVa_v2.jl b/ViVa_v2.jl
deleted file mode 100644
index 9822dfa..0000000
--- a/ViVa_v2.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-vcf_filename = "test_4X_191.vcf"
-field_to_visualize = "read_depth" # field(s) = "read_depth", *etc.
-variant_filter = ["range","chr4:0-4000000000"] #"range","chr4:0-4000000000"
-sample_filter = ["reorder_columns", "sample_phenotype_matrix.csv", "case_control_status"] # "select_columns", "select_column_list.txt",
-plot_types = "sample_line_chart"
-save_format = "html"
-plot_title = "Read_depth_test"
-
-
-using ViVa
-using GeneticVariation
-using VCFTools
-plot=ViVa.jupyter_main_new(vcf_filename,field_to_visualize,variant_filter,sample_filter,plot_types,save_format,plot_title)
-
diff --git a/open_jupyter_notebook.jl b/open_jupyter_notebook.jl
deleted file mode 100644
index bcac03b..0000000
--- a/open_jupyter_notebook.jl
+++ /dev/null
@@ -1,2 +0,0 @@
-using IJulia
-notebook()
diff --git a/src/ViVa.jl b/src/ViVa.jl
index be303b2..3e97735 100644
--- a/src/ViVa.jl
+++ b/src/ViVa.jl
@@ -1,13 +1,13 @@
 module ViVa
 
-using DataFrames #use CSV.jl ? depwarnings
+using DataFrames
 using PlotlyJS
 using Rsvg
 using Blink
-#using CSV
 using GeneticVariation
 using ArgParse
 using VCFTools
+#using Base.Filesystem
 
 
 
@@ -42,9 +42,9 @@ export
     avg_variant_dp_line_chart,
     read_depth_threshhold,
     list_variant_positions_low_dp,
-    list_sample_positions_low_dp,
+    list_sample_names_low_dp,
     avg_dp_variant,
-    avg_dp_patients,
+    avg_dp_samples,
     jupyter_main,
     save_numerical_array,
     io_pass_filter,
diff --git a/src/plot_utils.jl b/src/plot_utils.jl
index f86e74f..cdc7941 100644
--- a/src/plot_utils.jl
+++ b/src/plot_utils.jl
@@ -42,8 +42,8 @@ function genotype_heatmap2(input,title,chrom_label_info,sample_names)
        end
 
 """
-           genotype_heatmap_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1},sample_names)
-       generate heatmap of genotype data.
+   genotype_heatmap_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1},sample_names)
+generate heatmap of genotype data.
 """
 function genotype_heatmap_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1},sample_names)
 
@@ -127,7 +127,7 @@ function dp_heatmap2(input, title, chrom_label_info, sample_names)
 end
 
 """
-           dp_heatmap2_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1})
+    dp_heatmap2_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1})
 generate heatmap of read depth data with grouped samples.
 """
 function dp_heatmap2_with_groups(input::Array{Int64,2},title::String,chrom_label_info::Tuple{Array{String,1},Array{Int64,1},String},group_label_pack::Array{Any,1})
@@ -174,7 +174,7 @@ function dp_heatmap2_with_groups(input::Array{Int64,2},title::String,chrom_label
 end
 
 """
-           avg_sample_dp_scatter(sample_avg_list::Array{Float64,1},sample_names)
+    avg_sample_dp_scatter(sample_avg_list::Array{Float64,1},sample_names)
 generate line chart of average read depths of each sample.
 """
 function avg_sample_dp_scatter(sample_avg_list::Array{Float64,1},sample_names)
diff --git a/src/vcf_utils_complete.jl b/src/vcf_utils_complete.jl
index fb8964e..de3b55a 100644
--- a/src/vcf_utils_complete.jl
+++ b/src/vcf_utils_complete.jl
@@ -110,42 +110,37 @@ end
     io_sig_list_vcf_filter(sig_list,vcf_filename)
 returns subarray of variant records matching a list of variant positions returned from load_siglist()
 """
-function io_sig_list_vcf_filter(sig_list,vcf_filename)
+function io_sig_list_vcf_filter(sig_list,vcf_filename) 
 
-       vcf_subarray = Array{Any}(0)
+    reader = VCF.Reader(open(vcf_filename, "r"))
 
-       for row= 1:size(sig_list,1)
-              dimension = size(sig_list,1)
+    vcf_subarray = Array{Any}(0)
 
-              chr=(sig_list[row,1])
-              pos=(sig_list[row,2])
+    for record in reader
 
-              reader = VCF.Reader(open(vcf_filename, "r"))
-              tic()
-              for record in reader
+        for row= 1:size(sig_list,1)
+            dimension = size(sig_list,1)
 
-                     if typeof(VCF.chrom(record)) == String
-                            chr = string(chr)
-                            if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
-                                push!(vcf_subarray,record)
-                            end
+            chr=(sig_list[row,1])
+            pos=(sig_list[row,2])
 
+            if typeof(VCF.chrom(record)) == String
+                   chr = string(chr)
+                      if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
+                          push!(vcf_subarray,record)
+                      end
 
-                        end#remove this if need code below
-#=
-                    else
+            else
 
-                            if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
-                                push!(vcf_subarray,record)
+                      if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos)
+                          push!(vcf_subarray,record)
 
-                            end
-                    end
-                    =#
+                      end
               end
-              toc()
-       end
+        end
+    end
 
-       return vcf_subarray
+    return vcf_subarray
 end
 
 """
@@ -186,16 +181,16 @@ function pass_chrrange_siglist_filter(vcf_filename,sig_list,chr_range::AbstractS
 
     vcf_subarray = Array{Any}(0)
 
-    for row= 1:size(sig_list,1)
+    reader = VCF.Reader(open(vcf_filename, "r"))
+
+    for record in reader
+
+    for row = 1:size(sig_list,1)
            dimension = size(sig_list,1)
 
            chr=(sig_list[row,1])
            pos=(sig_list[row,2])
 
-           reader = VCF.Reader(open(vcf_filename, "r"))
-
-           for record in reader
-
                   if typeof(VCF.chrom(record)) == String
                          chr = string(chr)
 
@@ -258,12 +253,16 @@ returns subarray of vcf records with io_pass_filter and io_chromosome_range_vcf_
     end
 
 """
-        pass_siglist_filter(vcf_filename,sig_list,chr_range::AbstractString)
-    returns subarray of vcf records with io_pass_filter, io_sig_list_vcf_filter, and io_chromosome_range_vcf_filter applied.
+    pass_siglist_filter(vcf_filename,sig_list,chr_range::AbstractString)
+returns subarray of vcf records with io_pass_filter, io_sig_list_vcf_filter, and io_chromosome_range_vcf_filter applied.
 """
-    function pass_siglist_filter(vcf_filename,sig_list)
+function pass_siglist_filter(vcf_filename,sig_list)
 
-        vcf_subarray = Array{Any}(0)
+    vcf_subarray = Array{Any}(0)
+
+    reader = VCF.Reader(open(vcf_filename, "r"))
+
+    for record in reader
 
         for row= 1:size(sig_list,1)
                dimension = size(sig_list,1)
@@ -271,10 +270,6 @@ returns subarray of vcf records with io_pass_filter and io_chromosome_range_vcf_
                chr=(sig_list[row,1])
                pos=(sig_list[row,2])
 
-               reader = VCF.Reader(open(vcf_filename, "r"))
-
-               for record in reader
-
                       if typeof(VCF.chrom(record)) == String
                              chr = string(chr)
 
@@ -286,64 +281,63 @@ returns subarray of vcf records with io_pass_filter and io_chromosome_range_vcf_
 
                              if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos) && (VCF.hasfilter(record)) && (VCF.filter(record) == String["PASS"])
                                  push!(vcf_subarray,record)
-
                              end
                      end
-               end
         end
+    end
 
-        return vcf_subarray
-        end
+    return vcf_subarray
+end
 
 """
     chrrange_siglist_filter(vcf_filename,sig_list,chr_range::AbstractString)
 returns subarray of vcf records with io_pass_filter, io_sig_list_vcf_filter, and io_chromosome_range_vcf_filter applied.
 """
-        function chrrange_siglist_filter(vcf_filename,sig_list,chr_range::AbstractString)
+function chrrange_siglist_filter(vcf_filename,sig_list,chr_range::AbstractString)
 
-            a=split(chr_range,":")
-            chrwhole=a[1]
-            chrnumber=split(chrwhole,"r")
-            string_chr=chrnumber[2]
-            chr=String(string_chr)
-            range=a[2]
-            splitrange=split(range, "-")
-            lower_limit=splitrange[1]
-            chr_range_low=parse(lower_limit)
-            upper_limit=splitrange[2]
-            chr_range_high=parse(upper_limit)
+    a=split(chr_range,":")
+    chrwhole=a[1]
+    chrnumber=split(chrwhole,"r")
+    string_chr=chrnumber[2]
+    chr=String(string_chr)
+    range=a[2]
+    splitrange=split(range, "-")
+    lower_limit=splitrange[1]
+    chr_range_low=parse(lower_limit)
+    upper_limit=splitrange[2]
+    chr_range_high=parse(upper_limit)
 
-            vcf_subarray = Array{Any}(0)
+    vcf_subarray = Array{Any}(0)
 
-            for row= 1:size(sig_list,1)
-                   dimension = size(sig_list,1)
+    reader = VCF.Reader(open(vcf_filename, "r"))
 
-                   chr=(sig_list[row,1])
-                   pos=(sig_list[row,2])
+    for record in reader
 
-                   reader = VCF.Reader(open(vcf_filename, "r"))
+        for row= 1:size(sig_list,1)
+            dimension = size(sig_list,1)
 
-                   for record in reader
+            chr=(sig_list[row,1])
+            pos=(sig_list[row,2])
 
-                          if typeof(VCF.chrom(record)) == String
-                                 chr = string(chr)
+                    if typeof(VCF.chrom(record)) == String
+                             chr = string(chr)
 
-                                 if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos) && ((VCF.chrom(record) == chr)) && ((chr_range_high > VCF.pos(record) > chr_range_low))
-                                     push!(vcf_subarray,record)
-                                 end
+                             if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos) && ((VCF.chrom(record) == chr)) && ((chr_range_high > VCF.pos(record) > chr_range_low))
+                                 push!(vcf_subarray,record)
+                             end
 
-                         else
+                     else
 
-                                 if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos) && ((VCF.chrom(record) == chr)) && ((chr_range_high > VCF.pos(record) > chr_range_low))
-                                     push!(vcf_subarray,record)
+                             if (VCF.chrom(record) == chr) && (VCF.pos(record) == pos) && ((VCF.chrom(record) == chr)) && ((chr_range_high > VCF.pos(record) > chr_range_low))
+                                 push!(vcf_subarray,record)
 
-                                 end
-                         end
-                   end
+                             end
+                     end
             end
+    end
 
-            return vcf_subarray
-            end
+    return vcf_subarray
+end
 
 #functions for converting vcf record array to numerical array
 
diff --git a/test/.DS_Store b/test/.DS_Store
index c02d3ce4ddfb142ddb2f95a94384b282c69c667d..058d747c0a0ffc8e96385d11f1e9bc0b65dcdf0d 100644
GIT binary patch
delta 1325
zcmeIxO>7Kd7zgm@@AI9Bu6Nsdce|u*y4|FtR<~c;Qnnu&QnaDfmP+k*2BX_uI@=Fj
zt0h8E93~Y+99%?PXmLW~B2F&d_`3KA7Y7HI3K#G0Cc?qN$;BLI=J`MK@yz?nobaFU
z-=5)kY9;u1ks}r>t*$E0uc(R_=f|pjmGZP=bv&*xhsRgDX;1&*(b$pr(Mwj`o`=iU
zZSGVwvo{osL_$bb)^qztn`pAD8#|5ONO-$3s$0*p>wP_(51RTmJrsx<LwaK%s+&DL
zbI8z#TMkBzNZ8z=518`k_Lop<vo2*ej&a-K4B-$tjJsLqV9+=;XhaVulBv2(<SP6g
z>(=C>wnQ{zuc2$Hj#wrN7~7T18wPZ_*Af}F#k|lX)HYL&!vjo7CCgW+YFnQX*0`yp
z6YnZjw1)ygJ;XJIB{E9oeP!jU=om^5$D5O>46CYD(b*g6@7Kdo(~e>pr$${TtaDjQ
z$AsugzP?V=bd53Q_7|nUN#P(yIaEO9w3b?_i!>Uh37Vvfbe(R|eR@Do=o!t>b9zB9
z=^eeN5A=~f(=2_ZZ}c4uc4Q(8c__k4tU?KVsKI*Fp&kv`jBRK`yB9mqgMBb?0DXue
zj&U5rah$~j&f@~E;Rfzt3e&iYhj?UlXLs6MP1$PUZ)6+(noN1MD7iTH%&hDsx$dH3
zU*#IVRllTTtYgS9jbKP`A2zh;{;q)8tA{lsysup@zCOZ|3&<BHavV;Pm+x|M#-B2l
zDhoX>l|wQssdBkTcIU{Ra5!^Rud76nn0#)kbd{+UOtSI$>28%;qexgjFCoU{)vj7~
zJ(GTXLAqV9HY#?!RXF{^^GBSUG)0dk%xiihIX=-BngvM=8=T0M7zJ>{1Fz&LMH$Lb
z`wPidRJ5QKUD%0k?3OV5B}^Ye2xAx{7)4A1ox~}e#u*7TiHo?5E4Vs6Bg!$oz&Sgw
Uxgio7><|C<`1!N^Bac%20J(EL&Hw-a

delta 1241
zcmeH_OGs2v7{|YVX`Or1JTpG#3^``0SUQ%YHa2B7X_`@BnAVh%jm}*R%|pFcNvCOg
z7`5oh4WfdGXw`#+s6`uTRYB0ppjt!_)uK&N1g-3hH(ExUcAdq6^Zm~A_djF)G5`2;
zj)&$X-@jNS61CMncX5r+n{*c^eDxl8ab2x9nN*mg%(H64uIR{UVsCQaIdiXVKF*ut
zT(FSd7wL`1Bj!!+G+)?SGewJSy=v~a=bQKP8jAwqzIbd)cvS1*d`Q>YwMfti4{Pg!
zhNg3v7wzj@*>iJvfkTMEP;Yo(C~S=I62>O^+?;H+P?%G8M@kfmg^YED<f5iQP4*!R
zmx*$P2U&+M_qQq)s$1c@(y==nv*w7Us)zBA(isW%YLV$lqE7WPzERmWtPSenc+4vL
z)RoKzw-V@!N26NI(5>=n;b-&s1GY1L_@<z4wDoH-d4BrRjfFy(<GBT?5`Q&KH-#8E
zX$kqMmAXlzej1@;bb?OO1-e2v={8N!eR@QX=?Oih*Yu9w(+B!UpXm#IrEdUK%t00e
z7GNPtQHBcCq7GivqXErmK`S<(4FPON7rGHb6fw94Fo*=ka2Q8$6lZY>S8)y3aRYZS
zVRq#OGF{DiEx$vl`Ew}BtE}|vZ1&u|{GyWb%Iak+)|jpN+h%o2soEZM!Y)#LcG?x2
zl({+Q7rR`XNk`UDDRrs*HdChAqEg9b{37luNoLBxEas_ET|84}W3BVnt18cs;aPNL
zgQT-$Ol;EW5C!U5wMntzt-^_w_*084-+v+gGF_v)Qu=dxNpGa^Pg3>|nnEUW;D8f_
zaG?n0Sd2=zQ6*I`hYu^zG_89*HlrOK=)_j^Ac#JxTtln~{ZhFO14*QC00(gh$8a2{
qa2h7g+<I0}hg-A7_l#yk+he4^{|-GqXf(wmL($lO`~Td&2R{J~eIF?R

diff --git a/test/new_vcf_utils.jl b/test/new_vcf_utils.jl
index a0825e9..c1ee9fd 100644
--- a/test/new_vcf_utils.jl
+++ b/test/new_vcf_utils.jl
@@ -30,22 +30,24 @@ end
 
 @testset "io_chromosome_range_vcf_filter" begin
 sub = io_chromosome_range_vcf_filter("chr4:0-400000000",reader)
-println(sub[1:2])
-println(size(sub,2))
+@test typeof(sub) == Array{Any,1}
+@test size(sub,1) == 1012
+#println("io_chromosome_range_vcf_filter type is $(typeof(sub))")
+#println("io_chromosome_range_vcf_filter size is $(size(sub,1))")
 end
 
-#=
+
 @testset "filters_with_siglist" begin
 
     @testset "load_siglist" begin
     sig_list=load_siglist("test_files/significantList_for_proteinstructures.csv")
-    println(sig_list[2:1])
-    println(size(sig_list,1))
+    #println(sig_list[2:1])
+    #println(size(sig_list,1))
 
             @testset "clean_column1_siglist!" begin
             clean_column1_siglist!(sig_list)
-            println(sig_list[1,2])
-            println(size(sig_list,1))
+            #println(sig_list[1,2])
+            #println(size(sig_list,1))
             end
 
             @testset "io_sig_list_vcf_filter" begin
@@ -75,7 +77,7 @@ end
     end
 
     end
-=#
+
 
 @testset "io_pass_filter" begin
     reader = VCF.Reader(open(vcf_filename, "r"))
@@ -98,237 +100,164 @@ reader = VCF.Reader(open(vcf_filename, "r"))
 sub = io_pass_filter(reader)
 
 gt_num_array,gt_chromosome_labels=combined_all_genotype_array_functions(sub)
-println(typeof(gt_num_array))
-println(length(gt_num_array))
-println(typeof(gt_chromosome_labels))
-println(length(gt_chromosome_labels))
+#println("combined_all_genotype_array_functions gt array is type: $(typeof(gt_num_array))")
+#println("combined_all_genotype_array_functions gt array is length: $(length(gt_num_array))")
+#println("combined_all_genotype_array_functions gt_chromosome_labels is typeof: $(typeof(gt_chromosome_labels))")
+#println("combined_all_genotype_array_functions gt_chromosome_labels is length: $(length(gt_chromosome_labels))")
+@test typeof(gt_num_array) == Array{Int64,2}
+@test length(gt_num_array) == 222324
+@test typeof(gt_chromosome_labels) == Array{Any,2}
+@test length(gt_chromosome_labels) == 2328
+
+    @testset "chromosome_label_generator" begin
+    chrom_label_info = ViVa.chromosome_label_generator(gt_chromosome_labels[:,1])
+    #println("chromosome_label_generator chrom_label_info is type $(typeof(chrom_label_info))")
+    #println("chromosome_label_generator chrom_label_info is length $(length(chrom_label_info))")
+    @test typeof(chrom_label_info) == Tuple{Array{String,1},Array{Int64,1},String}
+    @test size(chrom_label_info,1) == 3
+    end
+
+    @testset "generate_chromosome_positions_for_hover_labels" begin
+    chr_pos_tuple_list = generate_chromosome_positions_for_hover_labels(gt_chromosome_labels)
+    #println("generate_chromosome_positions_for_hover_labels chr_pos_tuple_list is type $(typeof(chr_pos_tuple_list))")
+    #println("generate_chromosome_positions_for_hover_labels chr_pos_tuple_list is length $(length(chr_pos_tuple_list))")
+    @test typeof(chr_pos_tuple_list) == Array{Tuple,1}
+    @test size(chr_pos_tuple_list,1) == 1164
+    end
 
     @testset "generate_genotype_array" begin
     reader = VCF.Reader(open(vcf_filename, "r"))
     sub = io_pass_filter(reader)
     genotype_array=generate_genotype_array(sub,"GT")
-    println(typeof(genotype_array))
-    println(length(genotype_array))
-    println(genotype_array[3:5])
+
+    #println("generate_genotype_array is $(typeof(genotype_array))")
+    #println("generate_genotype_array is $(size(genotype_array,1))")
+    @test typeof(genotype_array) == Array{String,2}
+    @test size(genotype_array,1) == 1164
 
     @testset "define_geno_dict" begin
     geno_dict = define_geno_dict()
-    println(typeof(geno_dict))
-    println(length(geno_dict))
+    #println("define_geno_dict is type is $(typeof(geno_dict))")
+    #println("define_geno_dict is length is $(length(geno_dict))")
+    @test typeof(geno_dict) == Dict{Any,Any}
+    @test length(geno_dict) == 92
 
     @testset "translate_genotype_to_num_array" begin
     gt_num_array,gt_chromosome_labels=translate_genotype_to_num_array(genotype_array,geno_dict)
-    println(typeof(gt_num_array))
-    println(length(gt_num_array))
-    println(typeof(gt_chromosome_labels))
-    println(length(gt_chromosome_labels))
+    #println("translate_genotype_to_num_array gt_num_array type is $(typeof(gt_num_array))")
+    #println("translate_genotype_to_num_array gt_num_array length is $(length(gt_num_array))")
+    #println("translate_genotype_to_num_array gt_chromosome_labels typeof is $(typeof(gt_chromosome_labels))")
+    #println("translate_genotype_to_num_array gt_chromosome_labels length is $(length(gt_chromosome_labels))")
+    @test typeof(gt_num_array) == Array{Int64,2}
+    @test length(gt_num_array) == 222324
+    @test typeof(gt_chromosome_labels) == Array{String,2}
+    @test length(gt_chromosome_labels) == 2328
     end
     end
     end
 
 end
 
-@testset "combined_all_read_depth_array_functions" begin #inside functions same used in combined_all_genotype_array_functions
+@testset "combined_all_read_depth_array_functions" begin
 
 reader = VCF.Reader(open(vcf_filename, "r"))
 sub = io_pass_filter(reader)
 dp_num_array,dp_chromosome_labels=combined_all_read_depth_array_functions(sub)
-println(typeof(dp_num_array))
-println(length(dp_num_array))
-println(typeof(dp_chromosome_labels))
-println(length(dp_chromosome_labels))
-
-@testset "get_sample_names" begin
-reader = VCF.Reader(open(vcf_filename, "r"))
-sample_names=get_sample_names(reader)
-println("get_sample_names")
-println(typeof(sample_names))
-println(length(sample_names))
-
-@testset "avg_dp_samples" begin
-avg_sample_list=avg_dp_samples(dp_num_array)
-println("avg_sample_list is $avg_sample_list")
-
-@testset "list_sample_names_low_dp" begin
-list=list_sample_names_low_dp(avg_sample_list,sample_names)
-println(list)
-end
-
-end
-
 
+#println("combined_all_read_depth_array_functions dp_num_array type is $(typeof(dp_num_array))")
+#println("combined_all_read_depth_array_functions dp_num_array length is $(length(dp_num_array))")
+#println("combined_all_read_depth_array_functions gt_chromosome_labels typeof is $(typeof(dp_chromosome_labels))")
+#println("combined_all_read_depth_array_functions gt_chromosome_labels length is $(length(dp_chromosome_labels))")
+@test typeof(dp_num_array) == Array{Int64,2}
+@test length(dp_num_array) == 222324
+@test typeof(dp_chromosome_labels) == Array{Any,2}
+@test length(dp_chromosome_labels) == 2328
 
-@testset "avg_dp_variant" begin
-avg_variant_list=avg_dp_variant(dp_num_array)
-println("avg_dp_variant is $avg_variant_list")
-end
+    @testset "get_sample_names" begin
+    reader = VCF.Reader(open(vcf_filename, "r"))
+    sample_names=get_sample_names(reader)
+    #println("get_sample_names")
+    #println("get_sample_names sample_names type is $(typeof(sample_names))")
+    #println("get_sample_names sample_names length is $(length(sample_names))")
+    @test typeof(sample_names) == Array{Symbol,2}
+    @test length(sample_names) == 191
+
+            @testset "read_depth_threshhold" begin
+            dp_num_array=read_depth_threshhold(dp_num_array)
+            #println("read_depth_threshhold dp_num_array is type $(typeof(dp_num_array))")
+            #println("read_depth_threshhold dp_num_array is size $(size(dp_num_array,1))")
+            @test typeof(dp_num_array) == Array{Int64,2}
+            @test size(dp_num_array,1) == 1164
+            end
 
-@testset "sortcols_by_phenotype_matrix" begin
-vcf,group_label_pack=sortcols_by_phenotype_matrix("test_files/sample_phenotype_matrix.csv","control,case", dp_num_array, sample_names)
-println(typeof(vcf))
-println(size(vcf,1))
-println(typeof(group_label_pack))
-println(length(group_label_pack))
-
-    @testset "find_group_label_indices" begin
-    pheno = readdlm("test_files/sample_phenotype_matrix.csv", ',')
-    row_to_sort_by = find(x -> x == "control,case", pheno)
-    row_to_sort_by = row_to_sort_by[1]
-    group_label_pack=find_group_label_indices(pheno,"control,case",row_to_sort_by)
-    println(typeof(group_label_pack))
-    println(length(group_label_pack))
-    end
+        @testset "avg_dp_samples" begin
+        avg_sample_list=avg_dp_samples(dp_num_array)
+        #println("avg_dp_samples avg_sample_list type is $(typeof(avg_sample_list))")
+        #println("avg_dp_samples avg_sample_list length is $(length(avg_sample_list))")
+        @test typeof(avg_sample_list) == Array{Float64,1}
+        @test size(avg_sample_list,1) == 191
+
+        @testset "list_sample_names_low_dp" begin
+        list=list_sample_names_low_dp(avg_sample_list,sample_names)
+        #println("list_sample_names_low_dp list type is $(typeof(list))")
+        #println("list_sample_names_low_dp list length is $(length(list))")
+        @test typeof(list) == Array{String,1}
+        @test size(list,1) == 6
+        end
 
-    @testset "select_columns" begin
-    dp_num_array=select_columns("test_files/select_samples_list.txt", dp_num_array, sample_names)
-    println(typeof(dp_num_array))
-    println(length(dp_num_array))
     end
 
-end
-end
-end
-
-"""
-    list_variant_positions_low_dp(variant_avg_list::Array{Float64,2},chrom_labels)
-finds variant positions that have an average read depth of under 15 across all patients
-"""
-function list_variant_positions_low_dp(variant_avg_list::Array{Float64,1},chrom_labels)
+    @testset "avg_dp_variant" begin
+    avg_variant_list=avg_dp_variant(dp_num_array)
+    #println("avg_dp_variant avg_variant_list type is $(typeof(avg_variant_list))")
+    #println("avg_dp_variant avg_variant_list length is $(length(avg_variant_list))")
+    @test typeof(avg_variant_list) == Array{Float64,1}
+    @test size(avg_variant_list,1) == 1164
 
-    low_dp_index_list = Array{Int64}(0)
+        @testset  "list_variant_positions_low_dp" begin
+        list=list_variant_positions_low_dp(avg_variant_list,dp_chromosome_labels)
+        #println("list_variant_positions_low_dp list type is $(typeof(list))")
+        #println("list_variant_positions_low_dp list length is $(length(list))")
+        @test typeof(list) == Array{Tuple{Int64,Int64},1}
+        @test size(list,1) == 33
 
-        for item = 1:length(variant_avg_list)
-            if variant_avg_list[item] < 15
-                push!(low_dp_index_list,item)
-            end
         end
 
-    low_dp_positions = Array{Tuple{Int64,Int64}}(0)
+    end
 
-        for i in low_dp_index_list
-            chrom_position = chrom_labels[i,1],chrom_labels[i,2]
-            push!(low_dp_positions,chrom_position)
+    @testset "sortcols_by_phenotype_matrix" begin
+    vcf,group_label_pack=sortcols_by_phenotype_matrix("test_files/sample_phenotype_matrix.csv","control,case", dp_num_array, sample_names)
+    #println("sortcols_by_phenotype_matrix vcf type is $(typeof(vcf))")
+    #println("sortcols_by_phenotype_matrix vcf size is $(size(vcf,1))")
+    #println("sortcols_by_phenotype_matrix group_label_pack type is $(typeof(group_label_pack))")
+    #println("sortcols_by_phenotype_matrix group_label_pack size is $(size(group_label_pack,1))")
+    @test typeof(vcf) == Array{Int64,2}
+    @test size(vcf,1) == 1164
+    @test typeof(group_label_pack) == Array{Any,1}
+    @test size(group_label_pack,1) == 5
+
+        @testset "find_group_label_indices" begin
+        pheno = readdlm("test_files/sample_phenotype_matrix.csv", ',')
+        row_to_sort_by = find(x -> x == "control,case", pheno)
+        row_to_sort_by = row_to_sort_by[1]
+        group_label_pack=find_group_label_indices(pheno,"control,case",row_to_sort_by)
+        #println("find_group_label_indices group_label_pack type is $(typeof(group_label_pack))")
+        #println("find_group_label_indices group_label_pack length is $(length(group_label_pack))")
+        @test typeof(group_label_pack) == Array{Any,1}
+        @test size(group_label_pack,1) == 5
         end
 
-        return low_dp_positions
-end
-
-#functions for producing objects for plot functions
-
-"""
-    read_depth_threshhold(dp_array::Array{Int64,2})
-sets ceiling for read depth values at dp = 100. All dp over 100 are set to 100 to visualize read depth values between 0 < dp > 100 in better definition
-"""
-function read_depth_threshhold(dp_array::Array{Int64,2})
-
-    dp_array[dp_array[:,:].>100].=100
-
-    return dp_array
-end
-
-"""
-    save_numerical_array(num_array::Matrix{Any},sample_names,chr_labels)
-save numerical array with chr labels and sample ids to working directory
-"""
-function save_numerical_array(num_array,sample_names,chr_labels)
-
-      #samplenames=sample_names
-      #samplenames=Matrix(samplenames)
-
-      headings = hcat("chr","position")
-      sample_names = hcat(headings,sample_names)
-
-      chr_labeled_array_for_plotly=hcat(chr_labels, num_array)
-      labeled_value_matrix_withsamplenames= vcat(sample_names,chr_labeled_array_for_plotly)
-
-      writedlm("AC_gatk406_eh_PASS_withheader_value_matrix_.txt", labeled_value_matrix_withsamplenames, "\t")
-end
-
-"""
-    chromosome_label_generator(chromosome_labels::Array{String,2})
-Returns vector of chr labels and indices to mark chromosomes in plotly heatmap
-Specifically, saves indexes and chrom labels in vectors to pass into heatmap function to ticvals and tictext respectively
-Input is either gt_chromosome_labels or dp_chromosome_labels from translate_gt/dp_to_num_array()
-"""
-
-function chromosome_label_generator(chromosome_labels::Array{Any,1})
-    chrom_label_indices = findfirst.(map(a -> (y -> isequal(a, y)), unique(chromosome_labels)), [chromosome_labels])
-    chrom_labels = unique(chromosome_labels)
-    chrom_labels = [string(i) for i in chrom_labels]
-
-    if length(chrom_labels) > 1
-        for item=2:(length(chrom_labels))
-
-            ratio=((chrom_label_indices[item])-(chrom_label_indices[item-1]))/(length(chromosome_labels))
-            println(ratio)
-
-            if ratio < 0.2
-                font_size = "8"
-                println("font size is $font_size")
-                return chrom_labels,chrom_label_indices,font_size
-            else
-                font_size = "10"
-                println("font size is $font_size")
-
-                return chrom_labels,chrom_label_indices,font_size
-            end
+        @testset "select_columns" begin
+        dp_num_array=select_columns("test_files/select_samples_list.txt", dp_num_array, sample_names)
+        #println("select_columns dp_num_array type is $(typeof(dp_num_array))")
+        #println("select_columns dp_num_array size is $(size(dp_num_array,1))")
+        @test typeof(dp_num_array) == Array{Int64,2}
+        @test size(dp_num_array,1) == 1164
         end
-    else
 
-        font_size = "10"
-        return chrom_labels,chrom_label_indices,font_size
     end
-end
 
-"""
-    checkfor_outputdirectory(path::String)
-Checks to see if output directory exists already. If it doesn't, it creates the new directory to write output files to.
-"""
-function checkfor_outputdirectory(path::String)
-    if isdir(path) == true
-    else
-            mkdir(path)
     end
-end
-
-"""
-    generate_chromosome_positions_for_hover_labels(chr_labels::Array{Any,2})
-creates tuple of genomic locations to set as tick labels. This is automatically store chromosome positions in hover labels. However tick labels are set to hidden with showticklabels=false so they will not crowd the y axis.
-"""
-function generate_chromosome_positions_for_hover_labels(chr_labels::Array{Any,2})
-
-returnXY_column1!(chr_labels) #not working yet
-#println(chr_labels)
-
-chr_pos_tuple_list=Array{Tuple}(0)
-
-    for row = 1:size(chr_labels,1)
-
-        chr=chr_labels[row,1]
-        pos=chr_labels[row,2]
-        chr_pos_tuple=chr,pos
-        push!(chr_pos_tuple_list,chr_pos_tuple)
     end
 
-    return chr_pos_tuple_list
-end
-
-"""
-    returnXY_column1!(chr_label_vector)
-Replace String "23","24","25" with "X","Y","M" in chromosome label vector used for plot labels
-"""
-@testset "returnXY_column1!" begin
-
-end
-
-"""
-    sort_genotype_array(genotype_array)
-sorts genotype array for GT or DP by chromosomal location
-"""
-@testset sort_genotype_array(genotype_array) begin
-
-end
-
-
-=#
 end
diff --git a/viva_cli.jl b/viva_cli.jl
index 6171d20..6cfdd7a 100644
--- a/viva_cli.jl
+++ b/viva_cli.jl
@@ -1,8 +1,3 @@
-#=
-read_depth heatmap up to 15 white, up to 30 light blue,
-don't output files in same folder
-=#
-
 println("Welcome to ViVa.")
 println()
 println("Loading packages:")
@@ -59,7 +54,7 @@ function test_parse_main(ARGS::Vector{String})
     )
 
     @add_arg_table s begin
-        "--vcf_file", "-f"             # vcf filename
+        "--vcf_file", "-f"
         help = "vcf filename in format: file.vcf"
         arg_type = String
         required = true
@@ -73,7 +68,7 @@ function test_parse_main(ARGS::Vector{String})
         arg_type = String
         default = "output"
 
-        "--save_format", "-s"               # format to save graphics in
+        "--save_format", "-s"
         help = "file format you wish to save graphics as (eg. pdf, html, png). Defaults to html"
         arg_type = String
         default = "html"
@@ -129,9 +124,7 @@ end
 
 parsed_args = test_parse_main(ARGS)
 
-#begin main program
-
-#filter vcf and load matrix
+#filter vcf and load matrix of filtered vcf records
 
 vcf_filename = (parsed_args["vcf_file"])
 println("Reading $vcf_filename ...")
@@ -175,7 +168,7 @@ if parsed_args["show_stats"] == true
 end
 
 tic()
-
+#pass_filter
 if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] == nothing && parsed_args["positions_list"] == nothing
     sub = ViVa.io_pass_filter(reader)
     number_rows = size(sub,1)
@@ -183,6 +176,7 @@ if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] == noth
     heatmap_input = "pass_filtered"
 end
 
+#chr_range
 if parsed_args["chromosome_range"] != nothing && parsed_args["pass_filter"] == false && parsed_args["positions_list"] == nothing
     sub = ViVa.io_chromosome_range_vcf_filter(parsed_args["chromosome_range"],reader)
     number_rows = size(sub,1)
@@ -190,6 +184,7 @@ if parsed_args["chromosome_range"] != nothing && parsed_args["pass_filter"] == f
     heatmap_input = "range_filtered"
 end
 
+#list
 if parsed_args["positions_list"] != nothing && parsed_args["chromosome_range"] == nothing && parsed_args["pass_filter"] == false
     sig_list =  load_siglist(parsed_args["positions_list"])
     sub = ViVa.io_sig_list_vcf_filter(sig_list,vcf_filename)
@@ -198,6 +193,7 @@ if parsed_args["positions_list"] != nothing && parsed_args["chromosome_range"] =
     heatmap_input = "positions_filtered"
 end
 
+#pass_filter and chr_range and list
 if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] != nothing && parsed_args["positions_list"] != nothing
     sig_list =  load_siglist(parsed_args["positions_list"])
     sub = ViVa.pass_chrrange_siglist_filter(vcf_filename, sig_list, parsed_args["chromosome_range"])
@@ -205,12 +201,14 @@ if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] != noth
     println("selected $number_rows variants with Filter status: PASS, that match list of chromosome positions of interest, and are within chromosome range: $(parsed_args["chromosome_range"])")
 end
 
+#pass_filter and chr_range
 if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] != nothing && parsed_args["positions_list"] == nothing
     sub = ViVa.pass_chrrange_filter(reader, parsed_args["chromosome_range"])
     number_rows = size(sub,1)
     println("selected $number_rows variants with Filter status: PASS and are within chromosome range: $(parsed_args["chromosome_range"])")
 end
 
+#pass_filter and list
 if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] == nothing && parsed_args["positions_list"] != nothing
     sig_list =  load_siglist(parsed_args["positions_list"])
     sub = ViVa.pass_siglist_filter(vcf_filename, sig_list)
@@ -218,6 +216,15 @@ if parsed_args["pass_filter"] == true && parsed_args["chromosome_range"] == noth
     println("selected $number_rows variants with Filter status: PASS and that match list of chromosome positions of interest")
 end
 
+#chr_range and list
+if parsed_args["pass_filter"] == false && parsed_args["chromosome_range"] != nothing && parsed_args["positions_list"] != nothing
+    sig_list =  load_siglist(parsed_args["positions_list"])
+    sub = ViVa.chrrange_siglist_filter(vcf_filename, sig_list, parsed_args["chromosome_range"])
+    number_rows = size(sub,1)
+    println("selected $number_rows variants that are within chromosome range: $(parsed_args["chromosome_range"]) and that match list of chromosome positions of interest")
+end
+
+#no filters
 if parsed_args["pass_filter"] == false && parsed_args["chromosome_range"] == nothing && parsed_args["positions_list"] == nothing
 
     println("no filters applied. Large vcf files will take a long time to process and heatmap visualizations will lose resolution at this scale unless viewed in interactive html for zooming.")
@@ -232,13 +239,22 @@ if parsed_args["pass_filter"] == false && parsed_args["chromosome_range"] == not
 
 end
 
+println()
+println("Finished Filtering. Total time to filter:")
+toc()
+println("_______________________________________________")
+
+#convert to numeric array for plotting and generate/save heatmaps and scatter plots
+
 if parsed_args["heatmap"] == "genotype"
     gt_num_array,gt_chromosome_labels = combined_all_genotype_array_functions(sub)
 
     if parsed_args["heatmap_title"] != nothing
         title = parsed_args["heatmap_title"]
     else
-        title = "Genotype_$(parsed_args["vcf_file"])"
+        bn = Base.Filesystem.basename(parsed_args["vcf_file"])
+        title = "Genotype_$bn"
+        println(title)
     end
 
     chr_pos_tuple_list = generate_chromosome_positions_for_hover_labels(gt_chromosome_labels)
@@ -249,7 +265,9 @@ if parsed_args["heatmap"] == "genotype"
 
         if parsed_args["select_samples"] != nothing
             println("selecting samples listed in $(parsed_args["select_samples"])")
-            gt_num_array = select_columns(parsed_args["select_samples"], gt_num_array, sample_names)
+            gt_num_array = select_columns(parsed_args["select_samples"],
+                                          gt_num_array,
+                                          sample_names)
         end
 
         group_trait_matrix_filename=((parsed_args["group_samples"])[1])
@@ -319,13 +337,6 @@ if parsed_args["heatmap"] == "read_depth"
 
 end
 
-#println("_______________________________________________")
-println()
-println("Finished Filtering. Total time to filter:")
-toc()
-println("_______________________________________________")
-#println()
-
 if parsed_args["avg_dp"] == "sample"
 
     if isdefined(:dp_num_array) == false
@@ -342,7 +353,7 @@ if parsed_args["avg_dp"] == "sample"
     PlotlyJS.savefig(graphic, joinpath("$(parsed_args["output_directory"])" ,"Average Sample Read Depth.$(parsed_args["save_format"])"), js=:remote)
 
 elseif parsed_args["avg_dp"] == "variant"
-    
+
     if isdefined(:dp_num_array) == false
         dp_num_array,dp_chromosome_labels=combined_all_read_depth_array_functions(sub)
     end

From 8c27f4ab3399b9e4d8c8a4ee3f3befeec7223f03 Mon Sep 17 00:00:00 2001
From: Fernando Gelin <fernando_gelin@brown.edu>
Date: Mon, 5 Nov 2018 14:33:19 -0500
Subject: [PATCH 3/6] Update .travis.yml

---
 .travis.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ddcbd6d..fe06d90 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,11 @@ git:
 
 before_script:
   - export PATH=$HOME/.local/bin:$PATH
-
+  
+script:
+  - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi		   
+  - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("ViVa"); Pkg.test("ViVa"; coverage=true)'
+   
 after_success:
   - julia -e 'Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
 

From 986b475e4daab2a8cdc13db36aba836e3f85d60f Mon Sep 17 00:00:00 2001
From: Fernando Gelin <fernando_gelin@brown.edu>
Date: Mon, 5 Nov 2018 14:46:26 -0500
Subject: [PATCH 4/6] Update .travis.yml

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index fe06d90..9ca2b70 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,7 @@ git:
 
 before_script:
   - export PATH=$HOME/.local/bin:$PATH
+  - julia -e 'Pkg.pin("Blink", v"0.6.2"); Pkg.pin("PlotlyJS", v"0.10.2"); Pkg.build("Blink")'
   
 script:
   - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi		   

From 77f4817fb0501401df8b4775b2939317b675bdc2 Mon Sep 17 00:00:00 2001
From: Fernando Gelin <fernando_gelin@brown.edu>
Date: Mon, 5 Nov 2018 14:52:27 -0500
Subject: [PATCH 5/6] Update .travis.yml

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9ca2b70..0992315 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,11 +15,10 @@ git:
 
 before_script:
   - export PATH=$HOME/.local/bin:$PATH
-  - julia -e 'Pkg.pin("Blink", v"0.6.2"); Pkg.pin("PlotlyJS", v"0.10.2"); Pkg.build("Blink")'
   
 script:
   - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi		   
-  - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("ViVa"); Pkg.test("ViVa"; coverage=true)'
+  - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.pin("Blink", v"0.6.2"); Pkg.pin("PlotlyJS", v"0.10.2"); Pkg.build("Blink"); Pkg.build("ViVa"); Pkg.test("ViVa"; coverage=true)'
    
 after_success:
   - julia -e 'Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'

From ff801a22b1ea62afb0e4de6c6c6826a6d1634ddc Mon Sep 17 00:00:00 2001
From: Fernando Gelin <fernando_gelin@brown.edu>
Date: Mon, 5 Nov 2018 15:15:41 -0500
Subject: [PATCH 6/6] Update ViVa.jl

---
 src/ViVa.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ViVa.jl b/src/ViVa.jl
index 3e97735..8db9b81 100644
--- a/src/ViVa.jl
+++ b/src/ViVa.jl
@@ -6,7 +6,7 @@ using Rsvg
 using Blink
 using GeneticVariation
 using ArgParse
-using VCFTools
+#using VCFTools
 #using Base.Filesystem