diff --git a/README.md b/README.md
index 4f7153a..3fbb3b1 100644
--- a/README.md
+++ b/README.md
@@ -15,29 +15,36 @@ other tools such as [Sailfish](https://github.com/kingsfordgroup/sailfish) and
 
 QAPA consists of both Python (2.7+ or 3.5+) and R scripts.
 
-1. Install [R](https://www.r-project.org/) and the R packages optparse, dplyr,
-   data.table, and stringr. In the R console, the packages can be installed in
-   one line:
-
-        install.packages(c("optparse", "dplyr", "data.table", "stringr"))
-
-2. Install the latest development version of
+1. Install the latest development version of
    [bedtools](https://github.com/arq5x/bedtools2). *Note: do not use 2.26.0
    stable release as there is a
    [bug](https://github.com/arq5x/bedtools2/issues/435) with the groupBy tool*.
 
-3. Install the latest QAPA Python/R source code from GitHub. QAPA requires the
-   Python packages pandas, numpy, pybedtools, and biopython. These will be
-   automatically installed, if necessary.
+2. Install the latest development version (or the latest
+   [release](https://github.com/morrislab/qapa/releases/latest)) of QAPA from
+   GitHub . QAPA requires the Python packages pandas, numpy, pybedtools, and
+   biopython. These will be automatically installed, if necessary.
 
         git clone https://github.com/morrislab/qapa.git
         cd qapa
         python setup.py install
 
-        # To test if installation is working:
-        cd
-        which qapa
-        qapa -h
+3. Install [R](https://www.r-project.org/) and the R packages optparse, dplyr,
+   data.table, and stringr. In the R console, the packages can be installed in
+   one line:
+
+        install.packages(c("optparse", "dplyr", "data.table", "stringr"))
+
+   Alternatively, run the provided `install_packages.R` helper script from
+   command line:
+
+        Rscript scripts/install_packages.R
+
+4. To test if installation is working:
+
+        cd          # change to root directory
+        which qapa  # should return path of qapa executable
+        qapa -h     # should display help message
 
 # Usage
 
@@ -86,11 +93,15 @@ The following data sources are required:
         mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e "select * from
             wgEncodeGencodeBasicVM9" mm10 > gencode.basic.txt
 
-   Note that the `-N` option (suppress column headings) is not used here.
+   Alternatively, if you are starting from a GTF/GFF file, you can convert
+   it to genePred format using the UCSC tool
+   [`gtfToGenePred`](http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/gtfToGenePred):
+
+        gtfToGenePred -genePredExt custom_genes.gtf custom_genes.genedPred
 
 **B. Poly(A) site annotation**
 
-Two options are available.
+As of v1.2.0, this step is optional. Otherwise, two options are available:
 
 Option 1: standard approach (as described in the [paper](#citation))
 
@@ -109,11 +120,11 @@ Option 1: standard approach (as described in the [paper](#citation))
         txEnd, name2, score, strand from wgEncodeGencodePolyaVM9 where name2 =
         'polyA_site'" -N mm10 > gencode.polyA_sites.bed
 
-Option 2: use custom BED track (*new in v1.1.0*)
+Option 2: use custom BED track
 
 1. Custom BED track of poly(A) sites
 
-    Alternatively a custom BED file of poly(A) can be used to annotate 3' UTRs.
+    A custom BED file of poly(A) can be used to annotate 3' UTRs.
     Each entry must contain the start (0-based) and end coordinate of a poly(A)
     site.
 
@@ -127,14 +138,24 @@ A reference genome in FASTA format is required for extracting sequences from
 
 To extract 3' UTRs from annotation, run:
 
-    qapa build --db ensembl_identifiers.txt -g gencode.polyA_sites.bed
-        -p clusters.mm10.bed gencode.basic.txt > output_utrs.bed
+    qapa build --db ensembl_identifiers.txt -g gencode.polyA_sites.bed -p clusters.mm10.bed 
+        gencode.basic.txt > output_utrs.bed
 
-Or when using a custom BED file:
+If using a custom BED file, replace the `-g` and `-p` options with `-o`:
 
     qapa build --db ensembl_identifiers.txt -o custom_sites.bed
         gencode.basic.txt > output_utrs.bed
 
+If using a custom genePred file converted from GTF, include the `-H`
+option:
+
+    qapa build -H --db ensembl_identifiers.txt -o custom_sites.bed
+        custom_genes.genePred > output_utrs.bed
+ 
+If bypassing the poly(A) annotation step, include the `-N` option:
+
+    qapa build -N --db ensembl_identifiers.txt gencode.basic.txt > output.utrs.bed
+
 Results will be saved in the file `output_utrs.bed` (default is STDOUT).
 
 To extract sequences from the resulting BED file, use the `fasta` sub-command
diff --git a/qapa/annotate.py b/qapa/annotate.py
index 1b54af8..dcd2d74 100644
--- a/qapa/annotate.py
+++ b/qapa/annotate.py
@@ -9,7 +9,6 @@
 from pybedtools import featurefuncs
 import re
 
-
 def extend_feature(feature, length=24):
     """Extend the 3' end by length
     """
@@ -117,7 +116,7 @@ def main(args, input_filename, fout=sys.stdout):
         custom_mode = True
         custom = pybedtools.BedTool(args.other)
         sites = sort_bed(custom)
-    else:
+    elif not args.no_annotation:
         pas_filter = re.compile("(DS|TE)$")
         gencode = pybedtools.BedTool(args.gencode_polya)\
             .filter(lambda x: x.name == 'polyA_site')\
@@ -147,12 +146,16 @@ def main(args, input_filename, fout=sys.stdout):
     #   - Restore BED format with cut()
     #   - Use custom function to resolve overlapping features from groupby
 
-    overlap_utrs = utrs.intersect(sites, s=True, wa=True, wb=True)\
-                       .each(restore_feature)\
-                       .each(update_3prime,
-                             min_intermediate_pas=args.intermediate_polyasite,
-                             custom=custom_mode)\
-                       .saveas()
+    if args.no_annotation:
+        overlap_utrs = utrs.each(restore_feature)\
+                           .saveas()
+    else: 
+        overlap_utrs = utrs.intersect(sites, s=True, wa=True, wb=True)\
+                           .each(restore_feature)\
+                           .each(update_3prime,
+                                 min_intermediate_pas=args.intermediate_polyasite,
+                                 custom=custom_mode)\
+                           .saveas()
     overlap_utrs = sort_bed(overlap_utrs)\
         .groupby(g=[1, 2, 3, 6, 7, 8, 9], c=[4, 5, 10, 11],
                  o=['collapse'] * 4, delim="|")\
diff --git a/qapa/collapse.py b/qapa/collapse.py
index a93f461..d693a74 100644
--- a/qapa/collapse.py
+++ b/qapa/collapse.py
@@ -11,7 +11,7 @@
 
 
 class Interval:
-    def __init__(self, l):
+    def __init__(self, l, sp=None):
         #l = line.rstrip().split("\t")
         self.chrom = l[0]
         self.start = int(l[1])
@@ -24,6 +24,7 @@ def __init__(self, l):
         self.end2 = int(l[7])
         self.gene_id = l[8]
         #self.utr_id = l[9]
+        self.species = self._guess_species(sp)
 
     def is_forward(self):
         return self.strand == "+"
@@ -45,24 +46,36 @@ def set_score(self):
         else:
             self.score = self.start2 - self.start
 
-    def finalize(self):
+    def finalize(self, species=None):
         '''Print the interval using the highest peak'''
         if self.is_forward():
             utr_co = [self.end2, self.end]
         else:
             utr_co = [self.start, self.start2]
-        new_name = [self.name, self._guess_species(), self.chrom, self.start,
-                    self.end, self.strand, 'utr'] + utr_co
+        new_name = [self.name, self.species, self.chrom, 
+                    self.start, self.end, self.strand, 'utr'] + utr_co
         new_name = "_".join([str(x) for x in new_name])
         self.set_score()
         return [self.chrom, self.start, self.end, new_name, self.score,
                 self.strand, self.gene_id]
 
-    def _guess_species(self):
+    def set_gene(self, conn):
+        '''Query for Ensembl Gene ID'''
+        query = 'select gene_id from ensembl_id where transcript_id = ?'
+        cur = conn.execute(query, (self.name,))
+        r = cur.fetchone()
+        if r:
+            self.gene_id = r[0]
+        else:
+            self.gene_id = None
+
+    def _guess_species(self, species=None):
         if re.match(r'ENST\d+', self.name):
             return 'hg19'
         elif re.match(r'ENSMUST\d+', self.name):
             return 'mm10'
+        elif species is not None:
+            return species
         warnings.warn('Could not guess species from gene name!', Warning)
         return 'unk'
 
@@ -121,12 +134,8 @@ def merge_bed(args, inputfile):
     overlap_diff_genes = set()
 
     print("Iterating and merging intervals by 3' end", file=sys.stderr)
-    # for line in input:
     for index, line in df.iterrows():
-        if re.match(r'^(?!chr)', line[0]):
-            continue
-
-        my_interval = Interval(line)
+        my_interval = Interval(line, args.species)
 
         if prev_interval is None:
             prev_interval = my_interval
diff --git a/qapa/extract.py b/qapa/extract.py
index dad6665..a9c01d1 100644
--- a/qapa/extract.py
+++ b/qapa/extract.py
@@ -8,8 +8,13 @@
 
 
 class Row:
-    def __init__(self, row):
+    def __init__(self, row, no_header=False):
         l = row.rstrip().split("\t")
+        if no_header:
+            # add a dummy column in front of list to represent bin column in
+            # UCSC genePred tables
+            l.insert(0, "dummy")
+
         self.name = l[1]
         self.chrom = l[2]
         self.strand = l[3]
@@ -75,7 +80,7 @@ def get_3utr_length(self):
         return self.utr3[1] - self.utr3[0]
 
     def is_on_random_chromosome(self):
-        return not re.match(r'chr[0-9XY]+$', self.chrom)
+        return not re.match(r'^(chr)*[0-9XY]+$', self.chrom)
 
     def get_block_sizes(self, n):
         sizes = [0] * n
@@ -120,18 +125,19 @@ def main(args, fout=sys.stdout):
     n = 0
     for row in fileinput.input(args.annotation_file[0],
                                openhook=fileinput.hook_compressed):
-        n = n + 1
-
-        if fileinput.isfirstline():
+        
+        if fileinput.isfirstline() and not args.no_header:
             continue
-
+        n = n + 1
+        
         if re.match(r"^#", row):
             c = c + 1
             continue
 
-        rowobj = Row(row)
+        rowobj = Row(row, args.no_header)
 
-        if rowobj.is_on_random_chromosome():
+        if not args.no_skip_random_chromosomes and \
+            rowobj.is_on_random_chromosome():
             c = c + 1
             continue
 
@@ -167,8 +173,8 @@ def main(args, fout=sys.stdout):
     fileinput.close()
     # conn.close()
     if float(c) / float(n) > 0.75:
-        print("Warning: More than 75% of entries skipped. Are you using the "
-              "correct database?", file=sys.stderr)
+        print("Warning: %d/%d (%0.2f%%) were skipped. Are you using the "
+              "correct database?" % (c, n, float(c)/float(n)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/qapa/fasta.py b/qapa/fasta.py
index 615ac2a..d900a64 100644
--- a/qapa/fasta.py
+++ b/qapa/fasta.py
@@ -7,7 +7,8 @@
 
 def get_sequences(bed_file, genome):
     bed = pybedtools.BedTool(bed_file)
-    return bed.sequence(genome, s=True, name=True, fullHeader=True, split=True)
+    return bed.sequence(genome, s=True, name=True, fullHeader=False,
+                        split=True)
 
 
 def filter_sequences(fasta_file, min_length=100, fout=sys.stdout):
diff --git a/qapa/qapa.py b/qapa/qapa.py
index 2416578..22779d6 100644
--- a/qapa/qapa.py
+++ b/qapa/qapa.py
@@ -2,6 +2,7 @@
 import sys
 import os
 import os.path
+import re
 import argparse
 import tempfile
 
@@ -69,7 +70,7 @@ def getoptions(args=None):
                           help="Ensembl gene identifier table")
     required.add_argument('-g', '--gencode_polya', dest="gencode_polya",
                           help="GENCODE poly(A) site track")
-    required.add_argument('-p', '--polyasite', dest="polyasite", 
+    required.add_argument('-p', '--polyasite', dest="polyasite",
                           help="PolyAsite database")
     optional.add_argument('-m', '--min_polyasite', dest="min_polyasite",
                           type=int, default=3,
@@ -94,8 +95,24 @@ def getoptions(args=None):
                           help="Use this option to annotate 3' UTRs with a "
                           "custom BED file of poly(A) sites. This option "
                           "cannot be used in conjunction with -g and -p.")
+    optional.add_argument("-C", "--no_skip_random_chromosomes", default=True,
+                          action="store_true",
+                          help="Disable skiping of chromosomes that don't "
+                          "match the regular expression pattern "
+                          " '^(chr)*[0-9XY]+$'")
     optional.add_argument("-s", "--save", action='store_true',
-                          help="don't automatically delete intermediate files")
+                          help="Don't automatically delete intermediate files")
+    optional.add_argument("-H", "--no_header", action='store_true',
+                          help="Annotation table (genePred) has no header. "
+                          "Use this option if your input table was created "
+                          "using gtfToGenePred -genePredExt.")
+    optional.add_argument("-N", "--no_annotation", action='store_true',
+                          help="Skip annotation step. Use this option if you "
+                          "only have a gene model annotation file to build "
+                          "a 3' UTR library.")
+    optional.add_argument("--species", type=str,
+                          help="Set species. Useful if not using 'hg19' or "
+                          "'mm10', otherwise 'unk' is used.")
     build_parser.set_defaults(func=build)
     build_parser._action_groups.append(optional)
 
@@ -163,12 +180,21 @@ def getoptions(args=None):
                             args.other, args.annotation_file[0]], build_parser)
 
         if args.other is None and \
-            (args.gencode_polya is None or args.polyasite is None):
+            (args.gencode_polya is None or args.polyasite is None) and \
+            not args.no_annotation:
                 parser.error("Missing arguments: -g and/or -p")
 
         if args.other and (args.gencode_polya or args.polyasite):
             eprint("Option -o (custom BED) will be used for build phase and "
                    "-g and -p will be ignored")
+
+        if args.no_annotation:
+            eprint("Annotation step will be skipped")
+
+        if not (args.species is None or \
+                re.match(r'^[a-zA-Z0-9]+$', args.species)):
+            parser.error("Species must be alphanumeric.")
+
     elif args.subcommand == 'fasta':
         _check_input_files([args.bed_file[0], args.genome], fasta_parser)
     elif args.subcommand == 'quant':
@@ -238,7 +264,7 @@ def quant(args):
         intermediate_name = args.save
 
     try:
-        cmd = "create_merged_data.R --ensembl {} -f {} -F {} {} > {}".format(
+        cmd = "create_merged_data.R --db {} -f {} -F {} {} > {}".format(
             args.db, args.field, args.format, " ".join(args.quant_files),
             intermediate_name)
         # eprint(cmd)
diff --git a/qapa/version.py b/qapa/version.py
index b3ddbc4..58d478a 100644
--- a/qapa/version.py
+++ b/qapa/version.py
@@ -1 +1 @@
-__version__ = '1.1.1'
+__version__ = '1.2.0'
diff --git a/scripts/compute_pau.R b/scripts/compute_pau.R
index b8846c1..59e28b3 100755
--- a/scripts/compute_pau.R
+++ b/scripts/compute_pau.R
@@ -31,10 +31,14 @@ suppressPackageStartupMessages(library(data.table))
 calculate <- function(dt, qscore) {
   # Calculate UTR Index given a data.table
   #
-  # Return as data frame
+  # Return as data table or NULL if dt is empty
   write("Calculating Poly(A) Usage", stderr())
   write(paste("\t", nrow(dt), "rows,", length(unique(dt$Gene)), "genes"), stderr())
 
+  if (nrow(dt) == 0) {
+      return(NULL)
+  }
+  
   dt[, c("APA_ID", "pau") :=
             list(update_apa_id(APA_ID, UTR3.Start, UTR3.End),
                  calc_pau(value)
@@ -102,8 +106,6 @@ create_apa_id <- function(d) {
 
 update_apa_id <- function(x, start, end) {
   # Update APA_ID with proximal, distal or single status - added as another suffix
-  # x <- apa_id(rep(x, length(start)))
-
   if (length(x) == 1) {
     x <- paste(x, "S", sep = "_")
   } else {
@@ -136,7 +138,6 @@ if (file == "-") {
 m <- data.table(read.csv(file, sep="\t", header=T, check.names=FALSE, stringsAsFactors=FALSE))
 setkey(m, Gene)
 m[, APA_ID := apa_id(Gene)]
-# m <- as.data.frame(m)
 
 # Calculate PAU values ####
 write("Melting data frame", stderr())
@@ -174,6 +175,12 @@ pau <- n_sites[pau]
 if (opt$options$expr) {
   # Append a suffix (e.g. .TPM) to the end to distinguish from the pau columns
   write("\nAdding input expression values", stderr())
+  if (nrow(dt.plus) == 0) {
+      dt.plus <- NULL
+  }
+  if (nrow(dt.minus) == 0) {
+      dt.minus <- NULL
+  }
   expr <- rbind(dt.plus, dt.minus)
   expr[,sample := paste0(sample, ".TPM")]
   expr <- dcast(expr, Transcript + Gene + Gene_Name + Chr + LastExon.Start +
diff --git a/scripts/create_merged_data.R b/scripts/create_merged_data.R
index 61167e4..7fc1cba 100755
--- a/scripts/create_merged_data.R
+++ b/scripts/create_merged_data.R
@@ -5,7 +5,7 @@ suppressPackageStartupMessages(library(optparse))
 args <- commandArgs(TRUE)
 
 option.list <- list(
-  make_option(c("-e", "--ensembl"), type="character", default=NULL,
+  make_option(c("--db"), type="character", default=NULL,
               help="Ensembl identifiers database file. Required unless -m is
               specified. [%default]"),
   make_option(c("-f", "--field"), type="character",
@@ -39,7 +39,7 @@ if (length(opt$args) < 1) {
 
 if (opt$options$merge_only) {
   write("Merge-only mode enabled.", stderr())
-} else if (is.null(opt$options$ensembl)) {
+} else if (is.null(opt$options$db)) {
   stop("Ensembl identifiers database is required.")
 }
 
@@ -119,7 +119,13 @@ format_multi_ensembl_ids <- function(ids) {
   # ENSMUST00000111043_ENSMUSG00000048482,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1
   # becomes
   # ENSMUST00000111043,ENSMUST00000111044_ENSMUSG00000048482_mm9_chr1
-  split_ids <- str_match(ids, "^(ENS.+)_((hg19|mm10).+)")
+  # Test regex: https://regex101.com/r/zuDsy1/1
+  split_ids <- str_match(ids, "^(([^_]+_[^_,]+)(,[^_]+_[^_,]+)*)_(([^_]+).+)")
+  
+  if (is.na(split_ids[1])) {
+      stop("Unable to format Ensembl ID by regex")
+  }
+  
   # Separate multiple Transcript_Gene name
   ens <- strsplit(split_ids[,2], ",")
   # Split transcript and gene names, then re-arrange to combine transcripts and genes
@@ -129,24 +135,24 @@ format_multi_ensembl_ids <- function(ids) {
       paste(., collapse="_")
   })
   stopifnot(length(ens) == length(ids))
-  apply(cbind(ens, split_ids[,3]), 1, paste, collapse="_")
+  apply(cbind(ens, split_ids[,5]), 1, paste, collapse="_")
 }
 
 separate_ensembl_field <- function(df) {
-  tx_pattern <- "^ENS(MUS)*T.*_(hg\\d+|mm\\d+)_chr[0-9XY]+_\\d+_\\d+_[-+]_utr_\\d+_\\d+"
+  tx_pattern <- "^([^_]+_[^_,]+)(,[^_]+_[^_,]+)*_[^_]+_(chr)*\\w+_\\d+_\\d+_[-+]_utr_\\d+_\\d+"
   if (grepl(tx_pattern, df$Transcript[1], perl = TRUE)) {
     # Format Ensembl Transcript and Ensembl Gene IDs if there are multiple
-    # Remove hg19 info
     # Remove utr tag
     df[, Transcript := str_extract(Transcript, tx_pattern) %>%
            format_multi_ensembl_ids() %>%
-           str_replace("_(hg\\d+|mm\\d+)", "") %>%
+           #str_replace("_(hg\\d+|mm\\d+|unk)", "") %>%
            str_replace("_utr", "")]
 
     # Split by underscore
-    df[, c("Transcript", "Gene", "Chr", "LastExon.Start", "LastExon.End",
-           "Strand", "UTR3.Start", "UTR3.End") :=
+    df[, c("Transcript", "Gene", "Species", "Chr", "LastExon.Start",
+        "LastExon.End", "Strand", "UTR3.Start", "UTR3.End") :=
          tstrsplit(Transcript, "_", fixed=TRUE)]
+    df[, Species := NULL]
 
     df[, ':=' (
       LastExon.Start = as.numeric(as.character(LastExon.Start)),
@@ -155,16 +161,15 @@ separate_ensembl_field <- function(df) {
       UTR3.End = as.numeric(as.character(UTR3.End))
     )]
     df[, Length := abs(UTR3.End - UTR3.Start)]
-  } else if (grepl("ENS(MUS)*T\\d+", df$Transcript[1])) {
-    df
   } else {
-    stop("Unable to find Ensembl IDs by regex")
+    warning("Unable to find Ensembl IDs by regex")
+    df
   }
 }
 
 #### Add Ensembl Gene ID column ####
 extract_one_transcript <- function(ids) {
-  str_extract(ids, "ENS(MUS)*T\\d+")
+  str_extract(ids, "^[^,]+")
 }
 
 add_ensembl_metadata <- function(df, dbfile, all_genes = FALSE,
@@ -176,27 +181,22 @@ add_ensembl_metadata <- function(df, dbfile, all_genes = FALSE,
   db <- read.table(dbfile, header = TRUE, sep = "\t", stringsAsFactors=FALSE) %>%
     data.table()
 
-  if (all(grepl("ENS(MUS)*T\\d+.*", df$Transcript[1:min(nrow(df), 1000)], perl=TRUE))) {
-    df[, tid := extract_one_transcript(Transcript)]
-
-    if (!all_genes) {
+  df[, tid := extract_one_transcript(Transcript)]
+  
+  if (!all_genes) {
       db <- db[Gene.type == "protein_coding"]
-    }
-    gid <- db[, .(tid=Transcript.stable.ID,
-                  Gene=Gene.stable.ID,
-                  Gene_Name=Gene.name)] %>%
+  }
+  gid <- db[, .(tid=Transcript.stable.ID,
+                Gene=Gene.stable.ID,
+                Gene_Name=Gene.name)] %>%
       unique()
-    if ("Gene" %in% colnames(df)) {
+  if ("Gene" %in% colnames(df)) {
       df[, Gene := NULL]   # Remove Gene column so it doesn't conflict with gid results
-    }
-    setkey(df, tid)
-    setkey(gid, tid)
-    df <- gid[df]
-    df[, tid := NULL]
-  } else {
-    # should never enter this condition if input is properly prepared
-    stop("Can't identify gene ID column type")
   }
+  setkey(df, tid)
+  setkey(gid, tid)
+  df <- gid[df]
+  df[, tid := NULL]
 
   c <- length(which(!is.na(df$Gene_Name)))
   pc <- paste0("(", round(c / nrow(df)*100, digits=7), "%)")
@@ -222,10 +222,10 @@ merged_data <- join_iterative(opt$args, by = c("Transcript", "Length"),
                               format = opt$options$format)
 
 # Remove random chromosomes
-if (!opt$options$merge_only && all(grepl("chr[0-9XY]+", merged_data$Transcript))) {
-  write("Removing random chromosomes", stderr())
-  merged_data <- merged_data[grep("chr[0-9XY]+_\\d+_\\d+", Transcript),]
-}
+# if (!opt$options$merge_only && all(grepl("chr[0-9XY]+", merged_data$Transcript))) {
+#   write("Removing random chromosomes", stderr())
+#   merged_data <- merged_data[grep("chr[0-9XY]+_\\d+_\\d+", Transcript),]
+# }
 
 if (nrow(merged_data) < 100) {
   warning("Less than 100 rows in final table.")
@@ -235,7 +235,7 @@ if (!opt$options$merge_only) {
   write("Separating Ensembl IDs", stderr())
   separate_ensembl_field(merged_data)
   write("Adding Ensembl metadata", stderr())
-  merged_data <- add_ensembl_metadata(merged_data, opt$options$ensembl,
+  merged_data <- add_ensembl_metadata(merged_data, opt$options$db,
                                       opt$options$all_genes,
                                       opt$options$non_standard)
 }
diff --git a/scripts/install_packages.R b/scripts/install_packages.R
new file mode 100755
index 0000000..92fa8c9
--- /dev/null
+++ b/scripts/install_packages.R
@@ -0,0 +1,25 @@
+#!/usr/bin/env Rscript
+
+writeLines(paste("Using", R.Version()$version.string))
+
+local.lib <- .libPaths()[1]
+pkgs <- c("stringr", "dplyr", "data.table", "optparse")
+
+for (p in pkgs) {
+  if (!p %in% installed.packages(lib.loc = local.lib)) {
+    install.packages(p, dependencies=TRUE,
+                     repos='http://cran.us.r-project.org', lib=local.lib)
+  }
+}
+
+writeLines("\n-------------------------------------")
+writeLines("INSTALL SUMMARY")
+writeLines("-------------------------------------")
+check <- vector(length = length(pkgs))
+for (p in pkgs) {
+  check[p] <- p %in% installed.packages(lib.loc = local.lib)
+  writeLines(sprintf("%s: %s", p, ifelse(check[p] == TRUE, "OK", "FAIL")))  
+}
+writeLines("-------------------------------------\n")
+status <- !(all(check))
+q(status=status)
diff --git a/setup.py b/setup.py
index 3e8d6e1..21e1342 100644
--- a/setup.py
+++ b/setup.py
@@ -4,8 +4,7 @@
 from distutils.core import setup
 
 if sys.version_info[:2] < (2, 7):
-    sys.stderr.write("At least Python 2.7, or Python 3.5 or later "
-                     "is required\n")
+    sys.stderr.write("At least Python 2.7 or later is required\n")
     sys.exit(1)
 elif sys.version_info[0] == 3 and sys.version_info[:2] < (3, 5):
     sys.stderr.write("At least Python 3.5 or later is required.\n")
@@ -44,7 +43,6 @@
             'Programming Language :: Python :: 2.7',
             'Programming Language :: Python :: 3',
             'Programming Language :: Python :: 3.5',
-            'Topic :: Scientific/Engineering :: Bio-Informatics',
       ],
       zip_safe=False
       )