diff --git a/pyfaidx/__init__.py b/pyfaidx/__init__.py old mode 100644 new mode 100755 index 8b10603..ee7f900 --- a/pyfaidx/__init__.py +++ b/pyfaidx/__init__.py @@ -4,10 +4,11 @@ """ from __future__ import division - +import bisect import os import re import string +import struct import sys import warnings from collections import namedtuple @@ -15,7 +16,6 @@ from math import ceil from os.path import getmtime from threading import Lock - from six import PY2, PY3, integer_types, string_types from six.moves import zip_longest @@ -312,19 +312,32 @@ def __len__(self): return self.rlen -class BGZFblock(namedtuple('BGZFblock', ['cstart', 'clen', 'ustart', 'ulen'])): - __slots__ = () - - def __getitem__(self, key): - if type(key) == str: - return getattr(self, key) - return tuple.__getitem__(self, key) - - def as_bytes(self): - return struct.pack('= getmtime(self.gzi_indexname): self.read_gzi() @@ -569,8 +583,7 @@ def build_index(self): "Bad sequence name %s at line %s." % (line.rstrip('\n\r'), str(i))) offset += line_blen - thisoffset = fastafile.tell( - ) if self._bgzf else offset + thisoffset = offset else: # check line and advance offset if not blen: blen = line_blen @@ -614,7 +627,41 @@ def build_index(self): % self.indexname) elif isinstance(e, FastaIndexingError): raise e - + + def build_gzi(self): + """ Build the htslib .gzi index format """ + from Bio import bgzf + with open(self.filename, 'rb') as bgzf_file: + self.gzi_index = [] + for i, values in enumerate(bgzf.BgzfBlocks(bgzf_file)): + self.gzi_index.append(BgzfBlock(*values)) + eof = self.gzi_index.pop() + if not eof.empty: + raise IOError("BGZF EOF marker not found. File %s is not a valid BGZF file." % self.filename) + + + def write_gzi(self): + """ Write the on disk format for the htslib .gzi index + https://github.com/samtools/htslib/issues/473""" + with open(self.gzi_indexname, 'wb') as bzi_file: + bzi_file.write(struct.pack('>> unpack_gzi_to_blocks(b'\x02\x00\x00\x00\x00\x00\x00\x00\x1e(\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x8a1\x00\x00\x00\x00\x00\x00\xff\x1c\x01\x00\x00\x00\x00\x00') + ((10270, 65280), (12682, 72959)) + """ + import struct + + n_bytes = len(gzi_bytes) + gzi_ints = struct.Struct('<%sQ' % str(n_bytes // 8)).unpack(gzi_bytes) # little-endian 64-bit unsigned integers + n_blocks = gzi_ints[0] + + block_offsets = tuple(zip(*[iter(gzi_ints[1:])] * 2)) + return block_offsets + +def pack_blocks_to_gzi(block_offsets): + """ Packs the bgzip .gzi format from a tuple of + (compressed offset, uncompressed offset) describing + the BGZF compressed FASTA file. + >>> pack_blocks_to_gzi(((10270, 65280), (12682, 72959))) + b'\x02\x00\x00\x00\x00\x00\x00\x00\x1e(\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x8a1\x00\x00\x00\x00\x00\x00\xff\x1c\x01\x00\x00\x00\x00\x00' + """ + import struct + from itertools import chain + + n_blocks = len(block_offsets) + gzi_bytes = struct.Struct('<%sQ' % str(n_blocks * 2 + 1)).pack(n_blocks, *chain(*block_offsets)) # little-endian 64-bit unsigned integers + + return gzi_bytes if __name__ == "__main__": diff --git a/tests/data/chr17.hg19.part.fa b/tests/data/chr17.hg19.part.fa deleted file mode 100644 index 962d41c..0000000 --- a/tests/data/chr17.hg19.part.fa +++ /dev/null @@ -1,2 +0,0 @@ ->chr17 -AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGAGCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAAGACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACACGCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACAGTGcctgcgacaaagctgaatgctatcatttaaaaactccttgctggtttgagaggcagaaaatgatatctcatagttgctttactttgcatattttAAAATTGTGACTTTCATGGCATAAATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAGGAGagagaaatgaagacatatgtccacacaaaaacctgttcattgcagctttctaccatcaccaaaaattgcaaacaaccacacgcccttcaactggggaactcatcaacaacaaacttgtggtttacccacacaatggaagaccacttagcaacaaaaaggaccaaactcctggtacatgcaactgacagatgaatctcaaacgcattcctccgtgtgaaagaagccggactcacagggcaacacactatctgactgtttcatgggaaagtctggaaacggcaacaccattgagacagaaaacaggtgagtggttgcctggggccagggaactttctggggtcatattctctgtgttgattctggtggtggaaacaagactgtCccagcctgggtgatacagcgagaccccatctctaccaaaaaattaaaaattagctgggcatggtggtgcatgcctgtagtcccagctattcacagtgctgaggtgggaagatgcttgagcccaggagttcaaggctgcaatgagctatgattgcgccactgcactttggcctggacaacagagcaaaaccctgtctctaaaaaaagaaaagaaaagaaaaaCTCACTGGATATGAATGATAcaggttgaggatccattatctgaaatgcttggaccagatgttttgaattttggattttttcatattttgtaatctttgcagtatatttaccagttcagcatccctaactcaaaaattcaaaaatctgaaatcccaaacgcgccaataagcattccctttgagcgtcatgtcggtgcttggaatgtttggggttttggatttacagctttgggacgctcaacctgTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCCCGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTAGGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGAGCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTTTGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGCCACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAGCGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCACTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCCTGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGGTCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACAGTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAaaccccttctggttatacataagacagccagagaagggagttgcccagggtggcacagcacgttgctgccagTTACTGCCATTTTCACGGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGCAGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGCACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCGGCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAgctgggcatggtggcttgcacctgtaatcccagcactttgggaggccgagctaggaggatcgtttgagtccagcagtttgagaccagcctggccaatacggcaaaacccagtctctacaaaaaatacaaaaaacaactagccaggcgtggtggtgcacacctgtagtcccagctactcaggaggctgagggggaaggactgcttgagcccaggagtttgaggctgctgtgagctgtgatcgcatcactgcattccagcccggtgacagagtgagtcactgtctcaaaaaagaaaggaagaaataaagaaaacaaATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGGCCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCTCTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTATTTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGGACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAGCAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTGCCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAGACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGGGCATTGAAACTGGTTTAAAAATGTCACACCATAggccgggcacagtggctcacgcctgtaatcccagccctttgggaggccagggtgggtggatcacttgaggtcaggagttcaagaccagcctggccaacatggtgaaaccccgtctactaaaaatacaaaaattagcctggcgtggtggcgcatgcctgtaatcccagctacttgggaagctgagggatgagaactgcttgaacctgggaggcagacgttgcagtgagctgagatcacgccactgcactccagcctgggcaacagagtaagactctgtctcaaaaaaaaaaaaaTCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATATATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTGTTATAATAATTCctctagttcaaatttattcatttttaacttcatagtaccacattctacacactgcccatgtcccctcaagcttcccctggctcctgcaaccacaaatctactctctgcctctgtgggttgacctattctggacacgtcatagaaatagagtcctgcaacacgtggccgtctgtgtctggcttctctcgcttagcatcttgtttccaaggtcctcccacagtgtagcatgcacctgctacactccttcttagggctgatattccaCGCACCTGCTACACTCCTTCTTATGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTATGACTGATATTCCACGCACCtgctacactccttcttagggctgatattccactgcagggacagacttcatttgtgtatccattcatcagtggatggacacttggggtgtttccacttttggctgttgtggatagtgctgctatgaacattcctgcacaagttttaggatggacatgtttttcttatctcttgggtatataacaaggagtggaattgccagatcaaatggtgattctgtgtttaactttctgaggaactatcagctgcttcccaaagtggccatcccattattctcaTAttatttttatttgtttattatattttgagagtgtctcgctctgtcaccctggctggagtgcagtggtgtgatctcggctcactgcaatctccacctcccaggttgaagtgattttcctgcctcagcctcccaagtagctgcgattacaggcgcccgccaccacacccagctaatttttttatttttagtagagacgggtttccccatattggccaagctggtctcaaactcctgacctcaggtgatccgtgcgcctcggcctcccaaagtactgggattacaagcacgagccactgcactcagccTAATTTATTACTATTTTTAACTGTAGAGACAAGGTCTCAGTATGTTGCCAAGGCTGGTctcaaattcctgggttcaagcaatcctctcaagtagttaggactacagggacatgccactacaccaggctaatttttaatttttttatagagatggaggtctcactatgttgcccaggatggtctcaaactcctagcctcaagcaatcctcctgccttggcctcccaaagtgttcagaatgtaagtgtaactcactgcacctgCCACATACAATTTTTAAAGTGACAGAAATATGATCATGGCCATGGGAATGGTGCCCCTAGAGCTGTGCCACGAGGAGTACTGGCCTCTTCATGGTGCCAAGATGTCCCTGAGGCCTTAGTCACCTGGGTCCTTGGTGTCCCCTAGGTCAGGGCCATCCCTCTGTCATTCCCCCTCCCTGAAGCACCTGCCCCTCCTTTCTGCTGAACTAAATTCCTCCCCAAGTCTCAGTTTTCCAGAGTCTCCCTGTGAGCTCACACTCATACCTACCTAGTTTCTGAAGAGCCCCGAGCACTGACGGTAGTCACTGTGGCACCTGTAGCACCCTCTCCAAAGGGTCGCCAGCTCCTGCCTGGCTCTCAGAGCTACACAGCCTCTCCTGACCAGGCTCACAGCTCCACAGCTCTGCGGCCCAGGCCACCTGGCATGGCCCCCTGAGTTAGTCTCTCCCCAGGCCCACCATCAGCCCTGTTGGTAGAGCTGGGTGGACTCTTATCCGCATCTGTAGCACCATTATAGGGCTGGGCAAATGTGGGCAGTTGCAAAGGCCTGGCAGAGTTCCTCTGCATCCTCCCCCAGCCTCCTGGCTACCCCCGGCACTGGCCCCGCCTTCTGTCCCCTCCTGCAACTCATGGCCCCTCCTGGGCCCCTCAGTCACAGAGAGGCCTGGACATAGCATCAGGTGATAACtaatacctgcatgaccctgggaagccactcagcttctctgtccctcagtttccccatctgtgaaatgggctggccatgcttaacccctggagttgCCAAGGTAGCCCATCAGGGAACACAGCGCCCCTGTACCTCAGGCACTCCCTGGGTGCCTGCCACCTGGGACCACGGAGCCGGCACACGGACCCCCGTCCTTGGAGGTGAAGACGTGGCAGGTGGTCACGCGCACGGCACACTCACGTTTTCACGTAGGGGTCCGAGTAGCCGTTGGCGTCCATGGCGGCCAGGTGGGCGCACCGCACGATGCCTACCAGCAGGCCTTGCTTCTGTGAGCTGTACTTGAGGGAGATGAGGATGCGGCCCCGCTCCTCCAGGGACTTGTCTTCAGTCTTGTCCACCTGTTGGACGGGACGGTCACTCAGTCCTCACCTGCTCCCACCCCTCTCTTGGCCGTCCTTGGCCTCCTCTTCCTGAGCCCGCCCATCCGGCTGCTGCAGCCGGGCCTGGTCACGGTCCCTGGTGAGTGGCCTCACTGTGAACTCACAGCCCTTCTGTCATCTCTTCCCTCCCAAGGGCTCTTCCAGGGCTGGATCTGACCCACACCCTCCCTGTTCTTGGCTGCATGTGGCCTACGGTGGCCTTCACAGCCCAGCAAGGGCCAGCCcaggggttggcaactgcagcccgggggccagatcaggcccgacgcctggctctgtgtgggatgtgagctaagcatggctcttacccttatcttaaacaatttctttttcaaaaaaatagagacaggggtctcactattttgcccaggctgatctcaaactcctgggctcaagggatcttcccaccttggcctcccaaagtgctgggattacagacatgagccaccgtgcccagctggttcttactttttggaatggcagaaagaaaaatgaaatgaaaaatattttgtgacacatgaaaatgacatgaaattcacacttcagcttccattaacaccgtgttgttggaacgcagccttgccagctccgtgatgcttctctatggctgcttttgccctaaggacagagctgcatggtggccacagattgcgaggcacacagagcctaacattagcgctaagcggcccttcacggGTCTGCAGCCCCCGGCCTGGCGCGCTCGGCTTCCGCGGACGGTCTTCCACCAGGTCCTCCTCACAGCCCGCCCGGACTTTCCCTTGGCTTTGAAGCCGGGTCCCACCTCCACACCTTTGCGCACCCTTAATCCACAGCTCCAGCACCCTCCTCTGACCTCCTCAGTGTTCACCGTTGATAAATTCCTGGTCCTTCTGGGCCTGGGGGGCTTTTCTGACCCTCAGGGTGGGTTGGCCGTCCCCTCTGCGCGCCCACTGCCTCGGGTTTATCCTGTCATGGCCTGGTGTGGCCCCCTGCTTCCTCAGGGCTCTGCCCCAGCCTCTAAGTTCCTTGAAGTTGGAACTCTACCTGTAAAttttgattttgacagagtctgactcttgtcacccaggctggagtgcagtggcatgatcttggctcactacaacctctgcctcccgggtttaagcgattctcctgcctcagccttctgagtagctgggactacagtctaatttttctattttttggtagagacagggttttgccatgtctgccaggctggtctcgaattcctgacctcaagtgatccacccacctcagcctcctgaagtgctgggattacaggtatgagccaccgcatccagATAATACCTGCGTCATCTTTAATACCTCAGGGCTGGGCAAGGCCCTGGCACTAAGGGCCCACTGGAGGCTTGTACTAGTGAAGGCAGGAATGGAGGGACAGATGGTCCCCTGTGCGTTCAACCTCATACAAGCAACTCCAGCCTGGAGGTTCAAAGAAAGGCAGAAAGGTCTGCCCATGACAATGGAGCCTGGTGGATGGAAAAGGGTCACCGTGGCCCCAGTTCCCTGAAGGTGCTACCTGGAGCCTCTCAGGGTGCTGGATGTGGCTCCCTCAGGAGAACCCCGAAGACAGAGTTCTGGTACATTCCTCACCCTGGAGAAGCTGGGAGCCAAGTAGGGAGCCCATCCAGTGCCTTCCCTGCCTCAGCAGCTGCCAGTGCCTGCCTCTCAAGGCAGATCTCAGCTCCAGGCCTCCCCATCCCCAGCCAGCCTGATGCTTCTCCATTCCTTGCCCTCCCGAGACTACCCGGCTTCTTCCTTCTGGCTGCTGCACACCCCAAACCCTCTCTCCTAGGCCCTCAGCCCCCCCAGACAAACCCAGCTTGAGCCCACCCACACACGCCCAGCTTGAGCCCTGTGTCCCCCTGGATCCCCTTAGGACTCTACCAGGCTCCTCTCCCAAGCAGCCCCAGCCACCTCCTTTTGCCCACAACCGTGCCCTGCCTAACACCAAAGTGAGTCCCTCTGGCTACATCTAACCCTTTCTGGAGATGAGACTCCTGTGAGGTGAGCAGGGATGTTGAGcagggaagacatacatgccccaccagccaccttcgccacctcccacgcccagagcagacatggctaatcgatcctagcatttgccctaggctccaatgccacctcagaatccttttcaacacagtgctcaggcagccattcctggtggatcaggccaggcctgcgagatCTCGCTATCTGCCGCTCAGGCAGAGCTTTGTGTGTGAGGGCCTTGATGCCCTGTGCTGCTTGTGTCAGTGTGTGTGGTGTGAAAATTAAACAATATAAACTAGCAGGggccaggcacagtgactcatgcctgtaaccccagcactttgggaggccaaggcgggcagatcacctgaggtcagcagctcaggaccagcctggccaacatggtgaaaccccatctctactaaaaatacaaaaattagccgggtgtggtggcgcatgcctgtaatcccagctattgggaggctgaggcagaagaatcacttgaacccgggaggcggaggttgaagtaagccgacattgcgccactgcactccagcctgggcgacatagactccatctcataaaaaaaaaaaaaaaacaaCCAACCAGCAGGCATATTTTTAGCTCTTTTTTCAGGGGTGGGACATTGTACTTCGGGTTGTTTCATATGAAGACCACTGGGTCTTGCTCAGTATTGACTTAAAACAGATAATGTTCGCAGACTGTAAATTCTAAAACCTACCGCCAGAggcctggcacaggggctcatgcctgtaatcccagcactttgggaggccgaggcaggaggatcactggaggtcaggagtttgagacttgcctggccaacatggcgaacccccgtctctactaaaaataccaaaattagccaggcgtggtggcgcacacctgtaatcccagcactttgggaggctgaggcaggtggatcattcaaggtcaggagtttgagacacctggacaatatggtgaaaccccatctctaccaaatatacaaaaattaaccaggcgtggcggcacacgcctgtagtcccagctactcgggaggctgaggcatgagaattgtttgaactcaggaggtagaggttgcagtgaacagagattttgccactgaactccagcctggatgacagagcaagactcagtctcaaaaaataataataataaaaGTACCACCAGAATGTGGCTGTACTGTCAGGGGTGCATCCCCAGCTGCACTCCTGCGGTCACTGTGAGTCCCTGAACGGCACCAATGGGCCGGTAGCGCATCCAGCAACGCCCTGATCATGGCcacgcacagggacgcacatgctttcacgaacgcacaccacacatgtggacacacacactgtcgcacacagacacgtactgacatatgctcttacacacaattcacacacgagcacacacacacacacgctgacaccccacgtacatacccacGTGGTTGTTTGTTTATGCCAGTGATGAAAACTCAGGAACACTAAGGCAGGGCTGGTGTTGCtttttttttttttttttgagacagagtcttgctcttgctcttgtcacccaggctggagtgcaacggtgcaatctcggctcactgcaacctccgcctctcaggttcaagcgattctcctgcctcagcctcctgagtagctgggattacaggcatgcacccccacacccggctaatttttttatttttagcagagacggggtttcgccatgttggccaggctctctcgaactcttgacctcatgacccacctgccttggcctcctaagatgttgCCTTTCTTAAGTGACATAGACCATGTGGAAAAACCGGGTTACCTGTGGTTAGTGACTAACAATAAAACAGGAAAGGTTATATCCATCACACAAATGTCTGAGGGGGAGAGAATGTGACAAGGAATAAAATTGGATCAAATTCTGCAAAAGTAACTGGGATTCTGGGAAGAAGCCGTGGCCTCAGGCTGACTCGCCCCCGGGGCTTGACTTGGGCTAAGCTCGAGGTGAGTCCACGTCCCCGGGCCCCACTGGGGCTGGGTACACTGGGGACAGCCGCCGGGCTCTGTCCTCCCAAACTTGCCCCTTGCCCAGTCCTCTTAGGGGGACAACGTGCCATCGAGGGGACCATGCCTCCGCCTGTGTCCTGAACGCTGGGAGGCTGAGGCCCCAGGATTTCTCTTGACCCCAGTGGCACGGGGGACTCCTGGCTTCACCAGCCCTATGAACCAGGTGAAGGTGAGGCCATAGACAAGGGAGGATGGGGGAGGGAAGAGGGACATAGAGACCAAGACTCAGAGGGCGTAGCTGCTGGAGCAGGCCGAGGGCAAATCTGTTCTGACATAACGTTGAGACAAATGCCATTTCTAGGAAAGGAtactctgctgtctcctctgcgtatctcacaggcactcaggtctaacatgttccaagcgtgctccttgcgcgtcctgtccacccgtggtccctgctgagtcctctgagtgcagcaaacagcccctcaggcttcagtggctcaggccccaaacctcggatctgcccttccctcacccaaggacgtcctatctgctgcctgcacatctggttcagaatcaagccctcctaccgctgccaaggtcaggccagggttgtgcccatctctccccatctccccaggcctcctgccctctcctccctcttcttcaagccatcctgagcccaggccagcgagcctggtaaaatgtcatcccccatgatccctcagcttagcaccctcccgtggccactcagagtgaaagccagggtccttcctcacctccacccccttgactctccatgctcacctccccggtctcccctcccctctcactctgcccctcATGAGTCCCATCACAGGCAGGAAGTTctgccttcccagcacctgccaccgagccaggtacacagcaggtgctcaatcaatCCTCTCACCGGCAGCTGCTTCTCCAGGCAGATGCTGAAGGTCTTGGTGTGGTTGGGTTTCAGCTTCTTCAGGGGCACACGTGTCTCCCCGATGAACTCATTGTGCCGGAATTTGTCCTCGTCACACACAGAGATCCTAGAGGGGGCGGTGGTGAGGGGCACAGCCAGTGCCTCAGACGCACTGGGCATGGTGGAGGTGTGCGCAGGTAGGGCCAGCCCTGGCTTCTCCTGCCCCAAGCCCTGCCCTGGTCTGGGGTGGGAGACGCACAAGATGCCTGGGCCCTGACAGGGGCAGAGTGTGGCACGATATCAGGCACTGTCCTCATGGACAAGTGTCCTCAGGTTGGAAGAGGGGACAGGAGAAGGCAGAACCAGTGCCAGGAGTAGCCAGGAGGCTGGGAGAGCCGGTTCTCTGGAGGGAACCACCTCCAGCACCCTGAGAGGCCCCAGGAAGCACCTTCAGGGGACTGGGGCCAGGGTGACCCTTTTCCACCCACCAGGCTCCATTGGCGTGGGCAGACCTTTCTGCCTTTCTTCAAGCCCCCAAGCTGGAGTTGCTGGCATGAGGTTGAACCCACAGGGGACCATCTGACCCTCCAGTCCTGCCTTCATTAGGAAAAGCCGGGTGGGAGTAGGGGTTGGGGAGGGAGCAGGCGGCCTGGGACCCTCACCCACCGCAGGGTCTTGCGGATCATGTCTTCATCTGTGATCCCGTAGTAAGTGAGGGTCTCGTTCCATGTGGGGTTCAGAGTGTTACGGAGAGTTTTTGTTCTGAGCTTATTTGCCTGGAGAAGAGAAAAATGATCTTATTAGCATCAAAGTGtgtatcaaacagaacaatggcccccagagatggccacgtgctcatcttggagcctgtgaatgtgttatcaacatggccaagtggactgaggctgcaggtggacttagggttggtaatgagctgacattagaatagggagattatcctagattgttgggtggcccaatgtggtcacagggttcttaaaagcagaagaatggacagagaagacagtcagggacgtcaccaaggaagggggccagagagatgcaatggggcccgctgtgaaggtggaggaaggggccacaagcccaggagggccgatggcctctagaagctggaaggagctaggaaactgtgggctccccgggctccagaaggaatgcagccttgccgacaccttgattttagcccagagagagaccactgctggacttctaacctgcagagcagtccgagagtaaacgcgctgctttaagccacgaagtttgtggtcatttgttgcagcagccgtaggaggctcatccaAGGAGGACCCCACCTCCAGCCCGATGCCACGGGGTAGGTTCTGCAGGAGGCCTGTGTGGAACTGGAGTCTCCTTCCCAGGGCATTCCCTGCCTTTGTGGACCGTCCCCTCTGCCTGGAACCCATCCCGTCCTTGGGTCTGCCTCGGGGGTGGCCTCTCCGAGCTGGAATATTGCTGCCACTCCCTCCTCCTGTGCCAGCGGCTCTGAGCTCCTTGTCTGAATCGGTCTGATGCCTGCCACACCCGgcccaggctgggaggtcagacagcctgggcttccaattccagctctgtggcagcatcaggttccccactgtggaaagggcacaggaatccctctctcccattgctttcaggagctgtttgtaaggagactgctctttataaaacactagggaaagtcctggGGACTTCCTACAACTCGGCAGCCATGCACTGCCGGCTCCAGTCCCACAAATGAAGGGTCACTGAGCACACTTCCCTGGTCATACTCGCCCTCGGCCCTCATATCCCTGAGCCCTTCTTGCAGCATAAGGGCATCAAGACCCTGTGTGGGGAGCCCATTTCTTCCCCAGGAGTAGGTGGTGGGGCACTGCCATTTCTCCTACAGTCCTGCCTTCCCTAGAGAAGGGGGAAGGGCCCCTTCTGGGGTGGTCCTCCTGGGCTGCTGTGGGCCCCAGGCCTACCCATTTAGGCCTCCACGTAAACTCAGACTCTGCCCCATGAAATTAAAAATAAAACAGCACTGAAGTGTAGGATGCACAGAAGGAAGTCTAATTAAGTTCTGACTTTCCATGGCTCAAGGTCACCTTGATGCTTTTATTTAACAGCAATTTTTCTCTCTCCTGTCCGAGATGCACCTGTTGTGCTGGTGCCCTGAGCACATGTGTGGTCGGACGCTTGGGTCATCCGGCGTAGGAATGAGGGGCAGATTctgcccacttcatcccccttgcggagagtcagggaacagcccatcaacacgttagatgctctgagaagtcctgtggtacagaaacctgtttaacaatgtttccccatgttatttaaccacagagtcctttgtttGCACCTAATAGTAACTTCCAGCTGAGAATGCTTCTTATAAAATGCTGGCCTGGAAATTCTCCAGGGGTTTCCCCAGATCAGCAGGTAGGTAACGCTGACTCGTGAGGTCCCCAGGAGGCAACAGGGTGTGGGGCAGACTGGGTGCTCTGTGTGCAGTTGAATGGCTGAGGTCCCATCAGCTTGCTTGGACTCTGAGGGGAGTGGCAGCTGTGGGCCTCCCTGGGTGCCCGCAGCCCAGTCCAGGCCCCAAAGCAAAAGGACCAGAGTGCATTGCCTGAGGTGGTGGGCGGGTGCAGCTGGTGGGCAGGCCTGGGTGGAACCCCTCACCTTACTGGCTCCTGGCAGCAGGTGCAGCTTGACGTAGGGGTCTGCCAGCCCATTGTGGTCCATTGGCTTCAGGCCCTGGGCAGAGAAGAGCAAACGGTGTGAACTGGAAATCGGGGACATGGAACTGACAGGGCCTGCAGATGCCCTTGTCCCCTGGGTCTCCTGGCTGGGCCCATTCATGGCCCCTTTCACAGAAGCCCCCGGGCCCCACACCTCCAGATGGAGGGAGTGAGGATGGCTCAACGCTGATGCAATCTCCCCTCCCCAAAACATCCTGGCCCAGGGCAGCTGTCATGGGGCCTCTGTCAGGACAGACTTTCTCTTGTCCTCATCCTGGAAGCCAGGTGGCTCCCCCTTCTCTGAGCCTCCCTCTCCACCCTGCTTTGCTCAGGCCCTGCCATCATCCTGACTCCCGTCTCCCATCTACCAAGCGTGGCCTCCTGAGTGGCCATGCCCCTCACTGACCCTAAATCCACATAACCCCGGGACACAGTCTGAGACTAGGCCCCATTTCTGGCCATATTTGCCACAACTCCCAGTCAGTCCACAGAGATCACACAAGCAGACTGGCCACAGAGGGTCCCTCCACAAGATGGCACCCCTCTCTCACTCTGGTTCTTTCAGAAAACCCAGCCACGCGTGCACAGCCAGAGGCACACAGAAGCATGGACAGAGAGGGGCTTTGCTGACCTCAGCAGGCTCTTGTCTCTGAAAATTGCATTTGTTTCCTTCTGGGATGGTGCACAGGGATGCAGGGAGGCAGCTGTGGCATCTGTGGGGGTGCAGCCGGCAACCCCAGGATAGTATTCAAAGGGCTTCTACCCAGCCGGGTCAGGGAGGCAGCTCGGCGGCTCACACCAGGCCCATCTCCAAGGGGAGGCTGGGGCTCCCTCCCAGGCCCACCCCACCTCGGTTTGACGGAGCTCTGGGGGATGCGGCAGGGGCTTCCAACCACCTACTTCCCTGCcactggcaaagggcagaggtttgggggcctggctggcttgggctccaggctcagctcctgttcaccagctgggaagtggatgagttactttgcacgctaagcctcagctttatcatctgttaaaggggttggccgggcacggtggctcacgcctgtaatcccagcgcttgagcccaggagttcaagaccagcctgggcaacacagtaagaccctgtctcttcaaaaaactaaaaaattagctgggtggggtggcatgtgcctgtggttccagcaactagagaagctgaggtgggaggatcgcttgaacctgggacacggaggctgcagtgaaccatgatcgcaccactgcactccagcctgggtgacagagtgagaccctgtctcaagtttaaaaaaaaaaaaaaaaaaagggcgggggtgtaatactcccaccttcctagggctggagtgagagggagggagactgtgtggacacagggcctggcaggcagGGTTTGCTCCCTTCCCTGCACGCCAGGCCCCCGTGAGACGCCGGAGAAGCGGTTGAGGTCTGCTGGGTGCATCTGAGCTGTGCCCTGCACTGTGGGGCTGCCTCTGGGACAGGGCCTGGCTCAGTGGCCCAACACGTGCTCAAGGTGACAAGGCTGCTCTTCCAAACCCAACTCCCTCACCCTGCCTTGCACCTTTCCCACCAAGTGCCTGAAGTGCAGAGTTCTGGGAATCTGGTGGCCTGGCCTGACCAGGCGGTGCCTGACTGCTGTGTCTTATTCTTACAGTCCTTATACATTTGTTTGTTGTAGCTTAAAATGTTTCCATTTGGGCCACGTTTCCTGACACGTTTCACTTACAAACCTTTGGGCCACCACATGTTCTGGCGTATGTCTTTCCAGTGGCTCTGGTGGTCCCACCTCCGTACCCCGAGACACCCGTGGGAGCTAGGCTGGGTCGAGGGGCGCTGTCATGCTGGGACGGGAGTGATGCCTGCTCTGTCCTTCAGACCCCGGGACTGTGACGAGGCCCCTGCTGCTGCTCTCTACTCCTCCACCCGGCAGTTCCCTGAGCACCCTGGGCAGCCACACATTTCGCCAAAAGCAAAAGGACCAGCAGGCACTGCCTGAAGCCCTGTCACTGTGCTGCCACCAACTTCTGTGCCCAAACAGGCAGCTTCCCTGGTCCTGACCCGGGGTTCCGCCAGTGCCTCCACCTTCTGTGGGGCTGGCCTCCCACGGAGCCTGACCTCTGTCCACGGAAGGGGAAGGTCGGGAGGCTGTTTCCAGGGCAGGGAGCTGCTAGTGGGGCCCTTGGGCACATGCTCCCCAGCTTGGGAGTTGGCAAGAGACATAAACTGATCCCATGGTTTAGGGGTGGGCCTCACGGGAGGtgacgtggtttggcggtgtccccacccaagtctcatctggagttgtactcccataattcctacaggttgtgggaaggacccagcgggagataactgaatcacaaggggaggtttctcccagactgttcttgacgtagtgaatacgtctcacgagatctgatgatctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACACTCTTCTTGACGTAGTGAATACGTCTCACGAGATCTCATGGTCTGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacatctcacgagatctgatggcctgataaggggaaacccgttttccttggctctcattctctctcttgccaccaaacatgtgagaagtgcctttcactttcagccataactgtgaggtcttctcagccacgtgtaacggcaagtccaataaacctctttcctttataaattacacagtctcaggtatgtctttatcggcagcatgaaaatggactaatacaGGGAGGATGGGTGGAAGCGGCCCCGGGGGAGGCCCTGGCTGGTACTGGCACTGAGGGAAGAGATGGGGGGTCTGGCTTTGAGAGGAGAGCTTCTCCCCAAAAACCTAGCCCTGCCCCGCCCTGGGCCTCTCAGAGGCTGTTGCTGGTGAAGTGTTCGGAAGAGGAGCTTTCTAGTCtgaagtatcattcagcctgaaaaggaagttttgacacgtgctgcaatggggatgaagcctgaagacattctgcggagtgaaagaaggcagactcaaaaggacagatcccggggactgcagactcaaaaggacagataccgggagactgcacttacatgaggtccctagaatagtcaaatccatagagaaggaagccaaatggcagcctccccaggggccaggggaggaggaagggggagctgttgtttaatgggtccagtttagttttgcaagctgaagagggctctggagatgggtttcacagcagtgtgaaggtagaaggtacttagcacaactgaactgtacgctaaaaatggttgagCGCTGAGGGAAGAAGTAAAAAAAAGTGGTTGAGGTGGGAAATGTATATCTGTGTATTTTACCACAATAAAAATAAAAAGTCTCCCAGAACTGGTAgtgccaggggccacgtgttaactcatttaatgctcacaacaggcatgtagggcagggacaaccaaccctatttacagatgggcaaactgagactGACCCTTATAAGGGGGGACAAGCAAGGGTGCACCCCAGGGTGTCCAGCCCCCACCCTGGCCCTCCAGAGGCCAGCCCTCCTTCAGCTCACCCACCCTGGGCCCCTCCCCACACCCCAGCCCAGAGCCCCAGCTCTTCCCCAGCCTGCACCACCCCTTCCCTACAGAACTGGATTTACACAGAGAAGGAACTGGGCCTCCCACCCCCACTTCTGATACCTGAGGGATACAGCCCAAAGTGGacacacacttacatgtgtgcacgcacggtaccacacatgtacacacagagacacacatacagccatgtatgtgcatacacacaaacgcacCTGGAGCTGGGAAGGGAAGGCCCTGGTGTCTGGCATGGAGAGAGGAAGGGGTGGGCTTTGGCCAGAGTGGCCTGGCAGCCGGCACCTCTCCAGTCCCCAGGCCTGGACCACCTCTACAAAGTTGGACAGAGGGAAAGGAGGAAGGGTCTAGCTTGGTCTCTACCTTGGCACAGCTGGGATTTGACAAATGCTCAGTTCTGCTCCTAGGGGTGGGCTGGAGCCCCCGCCAGGCAGGGCTGGACATGCCCTGAGTCATAGCATGGGTGGTTCTAGAGAGGGCAGGGGTGGGATGGAGCGTGCAGGCCTCTCAGTGCCCTACCAGGGCCCTGAGGCTTGCGTGGATGGCACTCACACCTACCCATGGCAGTCCACATGTGGCCCAGGCTGGGCTGGGGGACAGCCTGGGGTGGCACGCAGTAGCCTGTCCTGCTGGGTGAGCATGCTGCCAAGGGCAGCCCTTGCTGCCAGGCTGGGAGGAGGGGCAGGGGGCCTGCAGGTTGGGAGGCTGGGTGGGGCCTGGGCCCAGGCAGCTCTGTGGGAAGCCGCTGGATCTGAGCTGGGCTGGCTCAGGCCCTTACATGGCACTACTAGGGAGACTCTACTGGCCATGCAGGCCCTTACCTTGCGAGAGAACATCAATTTTGGCACCTTCCTCCCACAGGGAGCAATGGGGTGAGGGGAAGGGAACAGGACAGTTGAGAACATGGAGCTGACACATGCTTGAGTGGCAGAGCCAGAGGGCAGCACCAGGGACCAGGCCAGGCTGCAGAGGGGAGCACCAGgggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGgggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGCGGCACCAGGGGCCGGGCCAGGTTGCAGAGGGGAGCACCAGgggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGgccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGgccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccagcggccgggccaggctgcagaggggTCCACAGGCACCCACAACCCCAGCCCACGTAGTGAGGCTCAGAGGGCCTCTGGGCTCAGGCCGTGGACACCCTGCCTGGAGTGGCATCGGCCTCCTACAGTGGCTCGGCTTCCAGGGTGCAAAGTGGCGTCCCCACTCCTCAGGGCGTCTGGGAGGCCTGGAGGCACCAGTACCCAACCCGCCCTCCCTTGGCCTGCAGGACAGACATCACCCTGCCCCTCTCTTTCCCTCTCAGCAGCCCCTCCCCAGGCTCGAGGGTCCTCGGTCCAGGCCTTCATCTTCCCATTCTCATCTGTTTCTTTGCTCCCCGCAATGCCTGACTGTCCAAGGCATTTCTTGGGGTTGGGTATTCAAGAAGGTTTAAAGAAGAATTCCTTCCTGGCCCCGCACCCCACACAGCGCAGACATCCAAAAGCCTGGACAGGAACCTGGGGAGTGGTGTGGTCTGGCCTCCCTGACCTAGGCCCTCTTGAGGACCCCGGGGCAGGGAATTTGGGGGCAGGCTGGCGGGGCCTACCTTGGCCTTGGTGATGGTGCAGTGGAGGGCGTTGTTCTCCTGGTCATACAGCAGGCTGAAGTCCAGCGTGCCCAGGGCAGCTGCGGACAGAGGAGGGCACAGGTCCCACCCTGGCCGCATCTTGGAGAGGCTTCGCCTGCCCCTGTGAGACCAGATGAGCCTGGCCTGGGCAGGTGCCACCGTTCCATGGCGgctgccaccaaccaagcacctgctctatgccagcccctcacccatcctcccagtcccacccaaccctgggaagtcacaataatctccccactttccagaggaggagctgagacccagagaggtcaggtgggtcactccagttcCCTGTCTGGCCCAGTGTGTGGTCCACCTGAGCTGGGCACCCAGCCAAAGGAGTTCCTGCCCTCCTGGTGCAACTGCCAGTCTGGGccccgtgcctcagtttccctcacctgtgaaatgTCACAAGGATCACACAAGGCGGAGGAGACGAGGCTTTGAGAGGAACAGGTCCTGGCCAGGAAGATCAGCTGATTTGCTCAACAGTCCCCCAGCCAACACACAGGACTGCAGCTCCTCTGTCTCTCTGGCCCGTTGGGAACCCCGAGCAGGCCGTGAGGAGCCAGCTGGGTCCTCATACTGCTGCCCCCAAGTCTCTCAATGGCAGTGGTAACTCCCAAAAGCCGGGGGGAGGGTGCAGCCATAATTGGGGGAGGTTGCAGCCATAATTGGGTGCAGCTGCCTCCCTCCCCGGGGGCAATCACTCATAGCAGCTGTGGCTTTCCATGCAGAAGCGGCTCCCAGCATGGAGGCCAAGGTGATGGTTGGGGCAGAGCTTGGAGATGATGGTGGGGGGCAGAGCTTAGTGGCAGTCCGCAGACAGCAAGATGCACATTCACAGATGGCTTCAGAAGCCCAGAGCCTGCTCCCAGGCTGCAGGGCTGGTCAAATGGTGTCACattccttcattatttaccaagtgtttacaacatgccagactctaggggatggacatctgtgaggcagagtccctatcccaaggaaagcacagcttaaggggaggaacaaatggaaataatttgccgcagtagagtctggtgagccgggagccatggggagcccatgtgaattgggaccaccagggaaggcttcctggaggaggtgatgcttcacctgagccatgagggatgagtaggagttggccaatggcaaagaggggctgggggaggtgacgattccaaccacaggccaaccagcaggtacgggcttagtggctggtgtgatcccggGATAAGGGAAGGCCAAACTTGAAAGACTGTGTTCTTCAGGCTCTGAGCTAGGCTCCCAACCTGGATCTAGATCCAATTACGGCCACACCTGAccaggcccatcatttacatctcttctgcggctgctttcctacttcagaagcagaccctacggcctgcacagctgaaaatatttactatccagcccttctttatagaacaagtttgcagatctctgGGCGAGTGCTGGGCCCAGGCATAACAAAGTTTCTTGGGTCTGCAGGGAAATTGGCTAATGAAAGGCCACGAGTGTGGAGTGTCCTTTATTCTGAGCTGGTGATGGGAGTGGCATTTGTCTAGATCATCCCCTTCTAACACGGGTCAGGCACATGTCTCTGTCTTCGCAgcagtgtggggtgcaagacctgccctctgacatcagtctcctcggttcaaatcccagcgtgcacttccctgctgtgtggccagcttatctaacgtctttatgcctcagtttactcatctgtgaaatggagacaacgatagtatccacctcacagcacagtgtgcctgcggtgtaggaggtgctcagtagatcattatagaaggagcagatCCCTATGGTGGGTTTTCTAGCTAACAGTACTGGCTTCTCTAAGACCCATTCTGGAATGAAACTGTCCTCAATTGGCTCATTTCCCTCCCTGCCTTTGGGAACAGAACATACAAATAGTCACTTAAATTTTTTGAATAACCTGAGTCATTGCTATGGCCACGATGACTCTTCCGTATGTCACGGTCATGTTCCAGAGGCAAAAGGGGACACGAAGCAACCCCTGCACCAGAGGGTCCCGGGTGGCCCTCTTTTCGGTTTTCTTCATTTTTCTGCCTCTTCCTTTGACATCAGCCTGAAACTCCTATTAGATTACTTGTCTGTGTTGGCGCAGACAGCTCTCACCTATTCAATAATTGTTTCTCCCATGAGACCAGGTGATGATACTTGTTGGAAGTGTGTGCAAACTAAGAGCCAACAGGCCTTGAGTGGCTggtgtggggctcatgcctgtcatcccagcagtttgggaggccgaagcagatggatcacttgagctcaggagttcgagaccagcctggccaacatggcaacaaaaactacaaaaatggccgggtgtggtggctcacgcctataatcccagcactttgggaggctgagacgggtggatcacttgagcccaggatttcaagaccggcctggccaacatggaaacacccatttctacaaaaaatacaaaaatagccgggcatggtggcgcatgcctgtggtcccagctactcgggaggctgaggtgggaggaccgcttgagcccaggaagttgaggctacagtgagtgtgattgtgccactgcatccaacctaggcgacagagcgagatcctgtctcaaaaaaataaataaaTACTAGGTCTTGAGTGAAGCCTGAGGCACGTTCTCCTGAGCAGGGTATGTGTGCAGCAGCAATAGCTGTAGCTGGCCTTCAGCTGCCTGCATGCTAGCTGCCATCTCCACACCAGGCAGGTGAGGCTGCTGGAAGGGAGGTGTATCCCACTGCTCCTGCAGCAATGAACACAGCCACAGCCCGAGATGCACCAATCTGTACTTTTGTAACAATATTCTCTGAAGCTGTTGGCTTTACATGATAGTAAATGGATCTGCTGTTCAGGTCAGGCCTACCTGGGgtttgttcccccaacgaattagccccctgagcacagggaccatgccttggccacacctgtgagaccaacaaacagcagatgcagacacacaTGGCTGGTCCATTCAAACCAACTTGCCCTCCAGGTGCTCCCAGGAGCTGGGATCTGGTGTGACACCCAAGTGTAAAAATGCACATTCTGATTTCTGCCTGTTTCCCAACCCCAGAGAAGGACAGCCAGAAGACAGAGCGGCTGCCTGCTCCCCCTGGGACACCCAGCTCCTGGAGGGGAGAAGCCCCTGCACTGGCTCTACAGAAACCCCTGTCCAAGGAGGGCAGCGATCTTCTCCAACTGCCTGGGGGGAAAACTGACACCCGTCACCACCTCCTGGGAAGGAACAGCACACCCAGTGTGTCACCAGGAAGGGACTGATGAATTTCTTTGAACTCCACTGGAAGGTCTGTAAGAAATGATAGTTTATAAATAGAAGCTCGGTCACAGTTTTGAGAGGCTGGTGAAGCTCTcttacggagcacatggcccaaactcttcatttccctttagagaaatgaggctcagagaggggaggggagctgctcagcgtggcccagcagacccaggcacagaatctaggcctcttcacttccatcctgatagtttctcccttaaagcctactggctcCCAAGAAGttttttttttttgagacagggtcttgttctgttacccaggctggagtacagtggtgcgatcacagttcagcctcccaggctcaaacgatcctctcacctcagcctcccaagtagctgggactacaggcgcccaccaccatgcccggctaacttaaaaaatatatttacaaaaaaagagacagtggtgttggtgcggggggtgcgggggggtaggtctcactacgttgctcaggctggtcttgaactcccgggctcaagcaatcctcccgccttcacctcccaaagtgctgggattacaggcacacacgggccactctgcccTGGTCCAAAaagcttttttttggccactttgtttagctgaaatcttaggaggatgcaactaagaaacaaaaagagagaaaagcagagttgctctaggggaaggagagtgggggctcagCCGCTGCCCTGGAGAGCCCTCCGAGTTGGAAAAGCACCTTGGGCTTGTGCTGGGCGTGAGGAGGGCCCTAAGGGCAGGAGAAGGTGCTTCTCTGTGACCCACAGTGCCCTGCCCTTGGAGCGCTCCAATCACATGACCCCACTGAAGTTTACATCTTCGGAAAAATAATGAGGGTTGGTCACTAATGGGCCTTTGAGAGTTGTCACCCGCTTGTCGATAATTACAGAGCTTTAATGGGGCTAATTCAGAGAAAATCAAGCTCAAGCCCCTGTTGGCTCCTCTAAGGCCGAGCACCCCCTGCTTGGCTGATGTAGACCTGACAGCATCATCATAACCTTGGTTTGCACCTGGACTTTCCTGACACCCTTGGGAATAGGAAATCTGCCCTGAttttttttttttttttttttAAGAAAAATGTCAGGCTCTCAAATACTTTAAAAAAAACAAAACAATTGTGGCAAGGACCATAGAAAATCATCAAGCTTGTGTTGAAAGCATTTTATAGTTGGGGAAACAGCCATTCAGCCAGGCCGGCACtgcaggggggacagagagaaatgagccatcatccctgcttcccaggagcttagagacgagtgTTAAGATTTTTATAAACATCTtcattcatctgctttaaattgttgaaaggttctccatcaactcctggactcagttcaacctccttggtgtggcattcaagaccttgtgtgatatgaccactttggcctcctcataggtctcttccccttctttgagccacttggctctactgttttgcctacactgggctgagtttctgacacttcaggacctgtgtacacagctgtccctctgcgaagagcacctccctttcctcttttccctctgtcagcccacctcctccctcctcccagtcttagggctgagccctctctgtgagtgacctccgcctgccagagctttccatctgggttcttgcctccctgtccctcgctgtggggtcacctgtttgtcttggttttgatgaacaccctcaggtcagaggttgcctctcagcattcttgtatctctgtgcctaacacagagcctgccacatagttggtgcCGTAACATCGAGTGCATTACAAATATCACAGGTCCAGGTAGAACATGACTGGGGCCAAAGAGGTTCTGGGCTCAAAGGAAGGGGCACCACAGCCCATGGGGGCTGGAATATGCCCCCTAACACGGAGAACCTTTGTGGTGGACTCAACCCCTTGTCCTTGCTCCAGACCTGCACAGGCATCGGTCCTGACCCGGTCCTGGGTGAACCACACAGGGCAggtttaacacgggcatgtgatccaattctggccaatgagacaaaaggacaggtctcctggggacttctgggagaggtttcctcgctcttaaactgagacatgagaaaaggaatcggtccttaccaattccttacccagtcaaggaccttattgttacatgtgatgcctggacctgcagcagccaccttggacctgagggTCCTGCTTGGCTGATGGAGACTGACAGCGTCATTCTGACTTTGGTCTGCACCTGAACCTTCCTCACCCAGACACCCTTAGGAATAGGAAACGGGCCTTGATTTTGCTCTACAAGGAAAAACGCACAGCCACGAGCAAAGATGGACAGAACCATCACCCCAACAGTGCTGAGGGGCTGCTGCATTGGTGCACCCTGGTGCAGCAAGACCTCAGCTGtcctcattgtttaagccactctgagttggttatctgtcacttgcaacccatggcactctaGTGATTACagctttgaagaatggactgggcttccacaggtgggtgccgggcaggagagcctctcaggtaggggcagcgtgtgGGGTGTGTGGCCCACGGCCCACCTGGAGGGCAGCACGCCACAGAGGCCACCCTAGGAGAACTGGCAGGGCCGACACTGGGTCCTGCTCCTCATTGGGACATTCACTGCCTTTCCTCCCTGTCATCGCCCCTCTTCGAGATCTGCTGGGAACATGCGAGTTATGTGCTTGTGCTTTGACATGACTTAATAAAGAAAGGGCTTTAAAAAGCCAGGAGAAAAGGCTGAGTAAATAAACAGCACTTCACTCTCTGCCTTGGACAACAGAGCAACCGCCCTCATTTCTTTTTGTCAGAACAGAGATAATCCAATAATTAGGGCAGCAAATAGCATAACCCTGGCTAAAGCCGTAATGAACCATCTGGCTTGAATCATTGAGGCTTATTCTAAGGATCTGGTTGTCACAGCCCAGAGGGGGGCACCGCCTGGGGTACTGGGTGGAAGGCAGTCCCACCCCAGAACCTGTTCCCCCACCATGGACAAGAGCCAAGATTTCTGGGCTTCTGGGCCTAGGATGTTTGTCAGCCTCCCATAGGATCCAAGCCTAGGGTGGGGCCCCCCTCGAAGGGGGCGACAGGGGCCCAAGCTGGCCCTAGCTCAGTGGCACACCGTCCTCCTTAGCTCCTCTGAGCCCGTCTTCAGCATTTCTTTGGCATGTCCTGATAATACGGGTGTGGCCAGCCCACAGTTCAAGCTGGCAAAGCTGCATCCAGCTCTGCCCTGCACTGCTGACCTCATGTGAGTCTGGCCCCTGCACCGCATGTATCTCAGGCCAGGGGCCCAGCCCTGCTGCCAGGACATGCTCCCTGAAGGGCTGCTGAGCTGATGGATTGGAGATGGGGCTGGCTGGCCTGAGTTTCTAGCTTCCTGGTCCCCTCCTTGCTTTGTGGGTTCAGAGACAGCAAAAAAAAGAAAAAGAAAAAGCCTCTTGGCCATTCGCCATGCAGAGGGCCCTCTGTGGCACCAGGAGGGTGGCCTGAAGCTCTGCCCTCCCCTCTTACTTCCTTGGTGCAGCAGCAGAGATGCCAGAAATGGGAACAACTTTCCCATCCGTTCTTCTGGGGGAGGCTTTGGATTCAGGGCAGCCAAAGCAGTTACTTGGGTCCCACTCAGGGCCCACCCAGGGGATCACCAAGTCCAGGGTGAGCTTCGCACAGGGTGCCAGGGGCAGAGGGGAGGGGGGGTGGATCTAGGGCACAGCCCTGAGGGCAAAAAACTCTTCCGACCCAATCTCCCCAAGCTGGCAGGAAAGTGGAGGGACAAGACTGCTCCCCAGCCCCCACGCCCCAGGGCAGGGCCTTCATGTGCCAGGCGCTGGCCCGAGGGCCGTGGTCCAGGCCTCTAGAAAGTGCAAAGCAGGCAGTTTCCCCTACAGGGGCCCTGCTCTAACCGGCCACTGCTGATGGGCCTCCCCAGGTGGGGCGATGGGGGGTCTGTGCCCCGGGGGCACTGGTAATCCCTACCTTCAGCTTCTGGTGGCACATTTGATGCTTGGGAAACTCCAGGCCCGCAGCCCACAGGCCCTGGTGAGTGCCCAGGCCAGGCGCAGACATCCCTGCTGCGCAGGGGAGGGGCAGCACCAGCCCTGGAGAAGGGCCACATCCCGGGAAGGGCTGGGGTTTGACGAGACGCTGGTTTTCCAGGTCTCAGTGACAAGTCTGGAGCCACAGCTGAGCTAGGAGGGGGTTCTCACATGCCATCCCCACCCCGCGCAAACCGACTCCTCACTGGACTGCGACCTCTTCCGGCCTCGGTTTCCCAGCCAGTCCCGGCTCGGGCCGGACAGGCACCCTCGGGGACGGGAAAAGGCGCCAGGAGCGCCCACCGGCCGGGCCTCGGTCCCGGGACTCCGGCGCTTGCCTGCTCCCGGGGGCTCAGGGCTCAGTCCGGGAGGAGGGGGAGCGGGCTGGGGGGCCCTCTCTGCCCGGGGGCCGCGGGCGTAACAGGTGGGCGAAGGTGCGCGGCCCTGGCGAGGGCTGCGGCGGGGTCCATGGACACCGGAGGAGGAAACGCCAAGGTTTTTCCAAAGGACAAGCGGCCCCGCGGTCCTCCTGGTCCTCTGCTCGCGCGCCAGCAAAGCAGCTGCGCTCTGCGGGCCGCCGGGACCACGCGGGAGGCCGGGCCGCTCCCAGCCTCGGGCCCCTCCCCAGCTCGCCCCAGCCCCGACCCTCGGCCGCGAGGCCCTCCCGGAGCGGCTGGCGAGCGGGGAGCGACCGCGCGGCCGGCAGCAACTGGTGTCTCCCCGGGACGCAGCTCCGCCCTTCCCGGGAACAAAAGCAGCCGCCCGCGCCGGAGCTCCGGGAGGGCGGGCTGGCAGGGAGGGGGCGCGGCGCCGGCTCCGAGGAACCCGGCCCCGGAAATGGGACACCCCCAGGGGGTGCCCCCGAACTTCCCTCCTGTCCGGCCTGGGTCGGGGGAGGGCTCGAGGCCGGTGGGCAGGGCGCGGAGAGCGCACCGAGTGCGCCAGGGGCCCGCAAGCCCGCGGCGGGGTTGTGAACCGAGGCAGAGCGCGAGCGCGCGAGGGGGACCGGCGGAGGGAAGCCGCGAGGCCGTGGGGGGGCCGAGCCCGAGCCAGGGGAGGGGGCGCGAAGTCGGCGCGTGGGAAACTTACTGCAGTCGTCCGACTCGTAGCCGTCGGCGTCCGGCTCGTCCTCCGGCGGCTTGGCTGGCGGCCGCGCggggctgggacccgggctggggcccgggctggagccgTAGGCTCCGAAGAGCTGGTCCACATCCTCGTCGTCCTcgcgggcgccgtcggaggggctgcggcggccggcaccggccacagccgggcgcgcgggggcgtccgggggtgcagcggctcggggcccggcgtccgggggcaggccccgcgggAAGCGGGGGAAGTAGTCGGAGATCTGCTTGATGGGACGGATGGGGCCGGGGCACACGTCGATGGCCATATGCTCCTGGATGCTGATGGTCGCCTTCTCCCCGCGCCGCCGGAGGGTCATGCAGGCAGCGccgccccgccccgggcgcggcccggcccggcgcgaccccggcccgggggcggctcagcaggcccggcggggcgcggcGGGGGCTGCGGGCATCGCCGGCCGCGCCCCCGGACGGCCCTGACTTGGCCGCTGCCCGCTCCGCTGCGGACGGCGCGAGCGAGTGCCAGAGGCCGGCAGGCAGGGGGCGGGCCCAGCCCGCGTCACCCGGCAGCAACCAAGCAGGGTGAGTGTGCGGGCTGCGCGGGCGGCGCGGAGCGGAGGGAGCCGCGCGGCGCCACACTCACTCGCACTCGCACTCACACCGGCGTGCACGCCGGCCCGGGACCCCGCGCGCGCACACTCGCGGCCAGGCAGGGCCGCCGGGCGCCTTCCGCTCATGCACGCCCGCGGCACAAGCTGGGAGTCAACCCGGCAGGGACCCGCAGGACGCGCACCCACACGTTCCCGCGCGTGCCAGCCCACGCTGGGCTCCGCTCGCTCTCTCGGGACACACGCAGGTGTGCAAGTGCACACATGCGTGTGCAGAGACACGTGGTGGAAGCATCCGCTCGCTCACACCGTAGGTCACACACGCAAACGTCTGCATGCGCACATGGTTGTTTTAGGAAGCTGTGACACAGTACACCCCCAATGCACAGGCGCGCACACCTGATGGAGCACACACACAGGTGATCAAGGGCACCCAGGGCACAGGCTTCCCTACCCCCAAGCACCCCTAACAAGATGCACAAACATGAGCCCATAGAAGTGATCAGGGGACCCTGGGCACAACTCCTTCTCTCCTCCCCCTCTAGCAGGACGTGGAGTCACACTCCTAGCATGTAGGGACAGGTTGCCTACACACGGGCAGGACATGTACACGCTCAACTGCACAAGGACACCGGGGCTCAGTCTGTGCACACATATTCTCAGGTCACATACAACACAGCACTCTAACCTCGGGTCAGTCCCTCACTTGGTCAGTGAGTACAGTCTGAAGGACCCCCCGGGGCAGGGTGGCTGAGAACCTGCACAGGGCCTGGTCAGGAGGGAGTGAGGGCAGGGCCTGGGCGGTGAGTGATGCAATAAGGCTGGGCGGCACGCTCCCCCCACCCCCACTCCTACTCTAGGCCTCCCACACGGTCAGATCACTAAACAAATCCCAGAGGGCCCAGCCCTGGCTGTCCGGCTTTCCGGGACCAGAGCTCTGTTGGGAACTGCTGCTGCTTGGACAGGTGTGTTCCCGGAAAGCCCTGGGCATGGATGGAATCCTGTTTACCCTCTGGTTTCCACTGATGTGTAAGACACTTAGCTTCTTAGTGGGTGCCTTTGGTGCATCTGAATGAGGGGCTCCAAGCCTCTCTTGTTCCCCCACCATAACCCCTGCAGAGTGATGGGGAGCAGAGGAAAGAGAGGcaaagccttggcctgtggcttccagctgcacagttctggcaggctatttgacctctttgagcctcggtttcctcatctatgaagtgaggctatttccaactgcacagccttgtggcaaggccccatccagcacagacaggtaagaggtgctcagCTGGCTTTCCCTTCTCCCTTCCTTTGGAAAGACAAGACTCATGGTGAGAAGTGATGAGAAATTCATGTTTTGTGGTGAGTTCTGAACTGGGTGGTGGCGGGCACTACCGGCCTTTGAAAACACTGGAGAAACACATGGCATATGTTgtactgcgccaagaacattcacagctgtcatctcattgattccttaaaccaccccacgatgtaggcagggcctgctgttcccatttcacaggggaggaaattagcgctcaggcacaaggatgtgtccagggtgactgctggctggcggcagagctgcgatgagagcccagtgtcctgactgttcgatgcttccactttccctcccttctcccttTCCTCCCCTCCACTACAGAGCTCAGGGGCTCAGAGCAGAGTTGGAAACACAGGTAAAACCTCGTTCCCAAAGCTCATCCTGAGGCTTCTTGGACAGGGGAAGCCCAAACTgaggaggaggggaaggagggaaaaaaaggaggaggaagaggaggacgggaggaggCCAAGAGCCTCAGGGGTTACAGTGGGAATGAACCAGCCGGGGTTCCCTAAAGATAGTCTGAGGTCCTGGTGGGAGAAATATTCAGCCTTCCAAGAGCCAAAGGCCAAGGAAAGGACAGAAGAGCCTGGAAGGGCAGCCTGGCACAGAGGGGTTCTCATTTCCAGGAATGCTTAAGGGGATTAATCCTTAAATAGGCACAGGGTGACTTTTTCTGTGCCAGAGGTGGCTGCATCACAAAATGATTACATAATGACGCAGGTATTAAAATACAGACGCTGAACTGTCATTTTATCATTCCTTTACTAAGGAGCTGGAGGGtgtcgtcatcatcttacagatggggaaactgaggctgcgggaggtcaagtgactagcaagaggcagactggagatgagacctggacgtcctgactGCTGAGCCTGCCAAGGCCCTGTACCTGTGTTCACCGAGAGCTGGCCGCTCTCTCCTGGCCTCTTGTCCTATGGGTCTGGTTTTTGGAGGATCGGCTCATGGCTCTTGGCTCTGCAGGAGCTGGCTCTTGGGGAGGGCTTTGAAGAAGTCAGGTGGAGGGCCCAGCCTCCTAAGCATGGAGCCAGGGAACCCAAGGATGCCCACTGGAGAAACACATGGCATGTTGGGGTCTACCCTTTGGCAGGTGGCAGAGTTGAAATCCACTCTAGGTCTGAATTTGCCCAGCTCTGAGCCCAGCTGAAATGGGGTAGGGCCTCCCCGAGGGATAGAAGTGGATAGAAGTGGTGTTATCTGGGCAAGTGTCCACTTTCtagaaagagcataggtcagggagtaggcagacgtagagtcaaaagtcaggtgtgcctcctcctaagcgtaccggactttgggcaagttgtttaacttaggccctgggtctcatctgcaatgggggagaataagaacaacgttgtgtgagttaaaggtgaggatgggtatacagtgcttagcacagtgcccagtgtgcaactggcactcagggaattgtgattctgttGCCCCTGCCTTCCTGGTGCAAACCGTCCCATTGCAAATCTTCCTGGTGCAAACCGTCGTGGTGCAAACCTTCCTGGTGCAAATCGTCGTGGTGCAAACCTTCCTGGTGCAAACCGTCCCAGTGCAAAACTTCCTGGTGCAAATCGTCCTGGTGCAAACCTTCCTGATGCAAACTGTCATTGCGCTCCCAGTGCCTGCCTTGATTTCTCTACCAGTGACATGTGGTTGGCTGCTCCCTGTCTGCTGCCAGGACCAGGTGAGAAATGGATGCACTTGCCAAGGCTGGGCGCTGGCTGGCATGTGTGGGCATCTCTAAGCAGTTGGATATGTCCAAAGGCTCATCAATCATGTTGCCTTCCATTCCCCATGCTGAGGTGGGCAGCTGGGCAGCTGGACCAGCCTCTGGCAAAGTTAAGTGGATGGAGTCTGCCCTTGGTCACAGTCCTCAGAAGCCCTGTGCCCTTGCCTCTCTGCCTTCTGCTTTTCCCAACAAGCTTCTGAGCTTTCTCCCAACTCCCCACAGACCCTCTCAAAGGCTCCTCCTGTCAAGTGAGATAcagtgggttctttaaaaaatggtccaggtatctgtagcactagcatgtctatttactttgtcactgaggctggagtgcagtggtgtgatcatggctcactgcaacctcgacctcccgaggctcaagtgatcttcccaactcagcctctcaagtatctgggaccaaatgcatgcaccaccatccccagctaatttttaaaatttttgtagagacggggtcttgccatgttaccctggctagtctcaaactcctggctttaaatgattttcctgccctggcctctcaaaatgctgagattacaagcgtgagttgccacacccagcccaggatgtctatcaataatgcagattccaggcccccatctcacacccactgactcagaatatgtgtgtgcacactcaggatgcatcttaacaagcgcccctgatgattctggtgcacagtgaaggttgagactcgctgGGTTAGAGGGTGCTAATGGTTTAACGGTGATTTTCAACCTGCTGGGCTGCCTCTATACTGTTCACATGTGTAACATATGCCCATCAGTGACATCTCTCAATATTTAATGATTCTTCACTGGGGAAGTGAGTGACTAACCCAAGGCAGGTGTTGGAATTTCCAGAGAGGTTTGGCAAAGCCACAGTGGGGAGTCTGATCTCCTCCTGTTTGGGTATTCTGACCTCTTTCCCGGTGGAGAAGtgttgggagcaggccccccaaaatctagccataaactggccccaaaactggccataaacaaaacctctgcagcactgtgacatgttcataatggccctaacgcctccgctggaaggttgtgggtttaccggaatgagggcaaggaacacccggcccgcccaggacggaaaaccccttaaaggcgttcttaagccacaaacaataccgtgagtgatctgtgccttaagaacatgctcctgctgcagttaaccagcccaacctattcctttaattcagcccgtcccttcgtttcccataagggatacttttagttgatttaacatctatagaaacaatgccaatgactggcttgctgttagtaaatacgtgggtaaatctctgttccgggctctcagctctgaaggctgtgagacccctgatttcccactccacacctctatatttctgtgtgtgtgtctttaattcctctagcgccgctgggttagggtctccccgaccaagctggtctcggcaGAGAAGGACCAGTTAATGGCTCTTCTTAGCGTAGGTAACGTGTGTTGTTGAGGAAATGCCCTCTGTGCAGCACTTGGCTGGATGTTCCTTGGTTGGCATCTTGGCTGGTGTCCATGTGCCCGGAGAAGAAGGGCCCTCTCTGACCCAGGCCTAATATGTGTCCCCCGTTTCCACCCTTCCCTGATAACCGGAGGAGATCTTGTCCTGCTGTGCTAATAGAACGTTCTCTGTATTAACAGAAAATTTTAACCCAACAGAGGTGCTTGGATGGAGGAAATCCAAACAATGTTGCTGGTTGATGGAGAGGGGCATTCCAAGGTCTCTTGTCCATTCATTTATTCATGAAGCCACATttcaacaaatatttattgtgcacctgccctgtaacaggccccgagctgtgatctgtagggggctcagtgataaatgaagcCCATTTTTCTTTAGATTCCAGATGATGGAGGGGAAGTCAGGAAGGGAGGTAGGAACCACCACTCTAGGGGTCCCTTGCCTCCTTCTTGGCATACGCTATGGCCTCGGCATGGAGAGCATGATGGAGCACTGATGCCAGCAAGTCAGCTCAACATTTGCAATATAGCGTGTGGCTGCTTAGAGGCCAACTTGATATATTAAGTGATATTAAAAATGCAGCCTATTTCTGATAATTTATGCGCATTCTCATAAACACATTACACTCGTCACAGTACATTTACGAGCACCAGGCCTACCAGTGGGGAAAAGTTAACACCTACCTAATGATGTTGTTTTGCAGATGTCAGATGCTCTGGTGCTGCAGTGATGGGCTCAATGGTGCAAAGAAATGTGAGTCTTTCTTCTATCCTGGGACACCAGAAGGCTGCCCAGGGCTCGGCAGAAGAACCAGGCTCTCTCCCTCCACACTGCCAAGGTCTGTGCTGTGGGATTGGAAAGAGGCCGGGTGAGAGGCTCCCCTGCCAGGAGAGACTTAGCCCAGGATCAGGTTGCCTAAGGCAAGAGATTAGATGTCACAGAGGGATAATGCTGAATCCTCTGGGGAAGCTCTAGAAATGACCTCTGGGAGAAAGAGGGGCAGAGCCTGAGGCTTACATCACAGCACAGATCAGCCTCCCTCCCACAGGCCTGGGGACCACTAGAGCCCAGGGTCTTCATGACTGAAGTGAACATCTCAGCATTATACATGAATTAGAATGAGTAGAAACTCTAACTAGCCCAGGGCTGGTGGAAACCCAGGGCCAACAGCCACAAATGTTCCCTCCAATCGCTTTGAATTCATGATGCAATTCAAAGGAATTAACAATGCAAGTGGTAGCTTTCAATGATAACGTGTTCCACtttttgtgtttgagagagtctcactctgttgcccaggctgaagtgcaatggcacgatctcggttcactgccacctccacctcccgggttcaagcaattttcctgcctcagcctcctgagtagctgggactacaggtgactgccaccatgtttggcgggcgtagttctttgtatttttagtagagatggggtttcaccatgttggccacgctggtcttgaattcctgacctcaggtgatctgcccgcctcagtctcccaaagtgctgggattacaggtgtgaaccaccacacctggcACCCCCCGCCttttttttttttttttttttttttttagagactggaccttgctttgtcacccaggctggaatgccatggagtgatcatggctcactgcagtcttgaactcctgggctcacgtgatcctcccatctcagcctcctcaatagccagaaccacagatatgcaccaccacgtccagttaatctttttatgttttattttttaaagatgggtcttgctatgttgcccaggctTACATGAACTGAGttttacatttactacaaatgtgtgtacctataaacaatatatggtattgttttgcaagctttaaaactttgtaaaaatggcatcatactatacataacctcattaaactcatttttggcttattattatttgtgaagttttttcctgtagatgcatctagctatagtttgtttttattgctgtctactgttccactatgtgaatacgccataatttatttctcttttctcctattgttgatcatttaagctggttcctatgcgtggctatcatcaacatgctgcaatggctattcctgtttcctggtctgtatatgggatttctttttttgagacagggtctcactctattgcccaggctaaagttcagtcgtaccatctcggctcaccacagcctcaacctccctgggctcaggcgatcctcccacctcagcctcccgagtagctgggactgcaggcacataccaccacatctggctaatttttataatttttgtagagaaagggtttgccaggttgcccaggctagtctgaaactcctgagctcaagtgatcctcccacctcagcctcgcaaagtgctgggattacaggcgtgtgccaccacacctggccagggagagtttctttaagagagtttcttggataggaaatttgctgggcctagggtctgtatatctttaccttgactagagagtgcctagctgctccccaaactggttagaaccacggccattctcaccaaccatctctaacagtgccagttcctccttgtccttgtcaacacttggtattgccagacttgaatttttgctagtctggtaggttttttttatgtttaattagttctccttgctaactagtgCTACTTCATTTCTGCTGCTAAGGGTGGGCATGTGCTGTCAATAGATAAATGCAACAGATTAAAAATTGAAGAGCTtccatcaataagggattggctaaatacagtatgcctcacctgtacaatagaatactgcacaatcattaacaaagatgAGTGTGCTGATATGGAAGAGATATTGATATTCTGATGTACTAAATATCTTTTCATCTCCCAGATTTATTGTTACAAAGCAAGAGGCATAAAAAGCATATTCCCTTTGTAAATAAATGAAAAGATATGTATACACATGCATATTTGTATGTATATGCGCAGAATACCTCTGAAAGAATGAACAGGAAACTGGTAACCACAGTTCATCTGGGAAGAGCACTAGAGGACAGGGAAACTTTTTTGCTCTGTGAATTCTTACCACGCATGTGTATTAGCCTGTTGGAAAAAATTAGCcctagaataggcaaattcgtagagactgaaagtagaatagaggttgccagaggttttggggtagagaatagggggtttttatttgatagatgcattttctgtttgagatgatgagagagttctgaaatggatagtggtgatggttgtacaacattgtgattgtacttaatgccactcaactgtacacttaaaagcggttgaaatgggctgggcacggtggctcacacctggaatcccagcgcttcgggaagccaaggtgggcagatcacctgaggtcaggagttcacgaccagcctgaccaacatggtgaaaccccgtctctactaaaaatacaaaaattagctgggcgtggtggtggtcgcctataatcccagctactcaggaggctgaggcaggagaattgcttgaacctgggaggtggaggttgcagtgagccaagatcacgccactgtactccagcctgggcaacagaagtgagacctcatctcaaaaaaaaaaaaTGTTGAAATggcctggcacaatggttcacacctgtaatcccagccctcagggatgccaaggcaagaggatcacttgagcccaggagtttgagaccagcctgggaaagatggtgagactctgtctctacaaaatgttttttaaaaattagctgggtgcagtggtgcacaccctgtggtcccag diff --git a/tests/data/genes.fasta b/tests/data/genes.fasta index 5b0261e..5814cb8 100644 --- a/tests/data/genes.fasta +++ b/tests/data/genes.fasta @@ -1,4 +1,4 @@ ->gi|563317589|dbj|AB821309.1| Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds +>AB821309.1 Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds ATGGTCAGCTGGGGTCGTTTCATCTGCCTGGTCGTGGTCACCATGGCAACCTTGTCCCTGGCCCGGCCCT CCTTCAGTTTAGTTGAGGATACCACATTAGAGCCAGAAGATGCCATCTCATCCGGAGATGATGAGGATGA CACCGATGGTGCGGAAGATTTTGTCAGTGAGAACAGTAACAACAAGAGAGCACCATACTGGACCAACACA @@ -50,7 +50,7 @@ AGCCACAACACAGGCTTTGGCACTGATAGAACTCTATAATGCACCCGAGGGGCGATACAAGCAGGATGTG TACTTGCTTCCTAAGAAAATGGATGAATACGTTGCCAGCTTGCATCTGCCATCATTTGATGCCCACCTTA CAGAGCTGACAGATGACCAAGCAAAATATCTGGGACTCAACAAAAATGGGCCATTCAAACCTAATTATTA CAGATACTAA ->gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced +>KF435150.1 Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced ATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCTCTCCTGGACAAA TCAATCAGGTACGACCAAAACTGCCGCTTTTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTT CACTGTTAAAGAGGTCATGCACTATTTAGGTCAGTACATAATGGTGAAGCAACTTTATGATCAGCAGGAG @@ -58,7 +58,7 @@ CAGCATATGGTATATTGTGGTGGAGATCTTTTGGGAGAACTACTGGGACGTCAGAGCTTCTCCGTGAAAG ACCCAAGCCCTCTCTATGATATGCTAAGAAAGAATCTTGTCACTTTAGCCACTGCTACTACAGCAAAGTG CAGAGGAAAGTTCCACTTCCAGAAAAAGAACTACAGAAGACGATATCCCCACACTGCCTACCTCAGAGCA TAAATGCATACATTCTAGAGAAGGTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTC ->gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds +>KF435149.1 Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds ATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCTCTCCTGGACAAA TCAATCAGGTACGACCAAAACTGCCGCTTTTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTT CACTGTTAAAGAGGTCATGCACTATTTAGGTCAGTACATAATGGTGAAGCAACTTTATGATCAGCAGGAG @@ -69,7 +69,7 @@ TAAATGCATACATTCTAGAGAAGATGAAGACTTAATTGAAAATTTAGCCCAAGATGAAACATCTAGGCTG GACCTTGGATTTGAGGAGTGGGATGTAGCTGGCCTGCCTTGGTGGTTTTTAGGAAACTTGAGAAGCAACT ATACACCTAGAAGTAATGGCTCAACTGATTTACAGACAAATCAGGTGATTGAAGTGGGAAAAAATGATGA CCTGGAGGACTC ->gi|543583796|ref|NR_104216.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA +>NR_104216.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -136,7 +136,7 @@ TGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAA TGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCA TGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAG AAGTTGAAAAAAAAAAAAAAAAA ->gi|543583795|ref|NR_104215.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA +>NR_104215.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -213,7 +213,7 @@ ATGGGTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTC TATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAG CCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTT TCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583794|ref|NR_104212.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA +>NR_104212.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -291,7 +291,7 @@ GAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCT ACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTT TGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCA CATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583788|ref|NM_001282545.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA +>NM_001282545.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -352,7 +352,7 @@ CATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTACCCTGAACACATT AAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTTGATAGATTGAGCT ACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCACATCAAACTGAATG AATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583786|ref|NM_001282543.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA +>NM_001282543.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -432,7 +432,7 @@ ACAATTGTCTATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAAT TACTAAAAGCCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGT CAACCCTTTTCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAA AAAAAA ->gi|543583785|ref|NM_000465.3| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA +>NM_000465.3 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -512,7 +512,7 @@ GTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATT CTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCAC CATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAA AGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583740|ref|NM_001282549.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA +>NM_001282549.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -570,7 +570,7 @@ GGTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTAT TCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCA CCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCA AAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583738|ref|NM_001282548.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA +>NM_001282548.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -630,7 +630,7 @@ AAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTA CCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTT GATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCAC ATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|530384540|ref|XM_005249645.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA +>XM_005249645.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA GTGCTGGGACTACAGAGTCCAGTGTCGTGCTGCTGCAGGAGCACCCCTGCCTGGTGGAGCTGCTGTCCCA TGTGCTGAAAGTCCAGGACCTGAGTTCTGGGGTCCTCTCCTTCTCACTGCGCCTGGCAGGAACCTTCGCA GCCCAGGAAAACTGCTTCCAGTATCTTCAGCAGGGGGAGTTACTACCAGGGCTCTTTGGGGAGCCAGGAC @@ -671,7 +671,7 @@ CCGACTGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCT TCTGTGGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGG AGAGAAGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAA TGGCATTGAAGTCTTTCTTTAA ->gi|530384538|ref|XM_005249644.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA +>XM_005249644.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA ACCGGATGCTCGGCATGAACCACTAGGCGCCTGGCGGGGGTGATCTGTCGGAGCGACCGGCTTGGCGCCT GCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGTCTGGGCCCTCGAGGCTTCCAGAGCGGC CTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGTCCTCCGCCTCCTCCGGGCCTGGCAGGT @@ -715,7 +715,7 @@ AGGACATGCTGGCCACGGGAGGCTTCCTGCAGGGGGACGAGGCCGACTGCTACTGAGCAGAACCAGAGTC TGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCTTCTGTGGGACACTGCCAGCCCCAGGGCT CCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGGAGAGAAGCAAGGTCAAGAAATCCCACAG TTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAATGGCATTGAAGTCTTTCTTTAA ->gi|530384536|ref|XM_005249643.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA +>XM_005249643.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA TCTGTCGGAGCGACCGGCTTGGCGCCTGCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGT CTGGGCCCTCGAGGCTTCCAGAGCGGCCTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGT CCTCCGCCTCCTCCGGGCCTGGCAGGTGGCACTGTCCGGAGGCGGAGCCTTGGGCGAGGGGTGGTTGCGG @@ -761,7 +761,7 @@ GACGAGGCCGACTGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACC ATGTCCTTCTGTGGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAG GACCAGGAGAGAAGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCA AAATAAATGGCATTGAAGTCTTTCTTTAA ->gi|530384534|ref|XM_005249642.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA +>XM_005249642.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA GCGACCGGCTTGGCGCCTGCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGTCTGGGCCCT CGAGGCTTCCAGAGCGGCCTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGTCCTCCGCCT CCTCCGGGCCTGGCAGGTGGCACTGTCCGGAGGCGGAGCCTTGGGCGAGGGGTGGTTGCGGCGGAGGACG @@ -807,7 +807,7 @@ TGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCTTCTGT GGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGGAGAGA AGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAATGGCA TTGAAGTCTTTCTTTAA ->gi|530373237|ref|XM_005265508.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA +>XM_005265508.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA GCATGCCCGCATCTGCTGTCCGACAGGCGGAAGACGAGCCCAGAGGCGGAGCAGGGCCGTCGCGCCTTGG TGACGTCTGCCGCCGGCGCGGGCGGGTGACGCGACTGGGCCCGTTGTCTGTGTGTGGGACTGAGGGGCCC CGGGGGCGGTGGGGGCTCCCGGTGGGGGCAGCGGTGGGGAGGGAGGGCCTGGACATGGCGCTGAGGGGCC @@ -848,7 +848,7 @@ GAATAGTCCCAGCTGGAGAGTCCAGGCCCTGGGAATGGGAGGAACCAGGCCACATTCCTTCCATCGTGCC CTGAGGCCTGACACGGCAGATCAGCCCCATAGTGCTCAGGAGGCAGCATCTGGAGTTGGGGCACAGCGAG GTACTGCAGCTTCCTCCACAGCCGGCTGTGGAGCAGCAGGACCTGGCCCTTCTGCCTGGGCAGCAGAATA TATATTTTACCTATCAGAGACATCTATTTTTCTGGGCTCCAACCCAACATGCCACCATGTTGAC ->gi|530373235|ref|XM_005265507.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA +>XM_005265507.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA GCATGCCCGCATCTGCTGTCCGACAGGCGGAAGACGAGCCCAGAGGCGGAGCAGGGCCGTCGCGCCTTGG TGACGTCTGCCGCCGGCGCGGGCGGGTGACGCGACTGGGCCCGTTGTCTGTGTGTGGGACTGAGGGGCCC CGGGGGCGGTGGGGGCTCCCGGTGGGGGCAGCGGTGGGGAGGGAGGGCCTGGACATGGCGCTGAGGGGCC @@ -890,7 +890,7 @@ AGAGTCCAGGCCCTGGGAATGGGAGGAACCAGGCCACATTCCTTCCATCGTGCCCTGAGGCCTGACACGG CAGATCAGCCCCATAGTGCTCAGGAGGCAGCATCTGGAGTTGGGGCACAGCGAGGTACTGCAGCTTCCTC CACAGCCGGCTGTGGAGCAGCAGGACCTGGCCCTTCTGCCTGGGCAGCAGAATATATATTTTACCTATCA GAGACATCTATTTTTCTGGGCTCCAACCCAACATGCCACCATGTTGAC ->gi|530364726|ref|XR_241081.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA +>XR_241081.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA GTGTGGGAGGCCGGAAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGT GACTGCCACCCCTCCCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAA CAGACTGCAGTTTCTTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAG @@ -906,7 +906,7 @@ GTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTCTAAGTCCTTAAGTGATGATACCGATGTAGAGG TTACCTCTGAGGATGAGTGGCAGTGTACTGAATGCAAGAAATTTAACTCTCCAAGCAAGAGGTACTGTTT TCGTTGTTGGGCCTTGAGGAAGGATTGGTATTCAGATTGTTCAAAGTTAACCCATTCTCTCTCCACGTCT GATATCACTGCCATACCTGAAAAGGAAAA ->gi|530364725|ref|XR_241080.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA +>XR_241080.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA GTGTGGGAGGCCGGAAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGT GACTGCCACCCCTCCCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAA CAGACTGCAGTTTCTTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAG @@ -977,7 +977,7 @@ AAAGGGGCCAGACTGTGTTGCCTTCTTGAGCCTGGTCTGACTCCTGAGTGGAAGTCTGATTCCAGGTACA TGAGATAAGCACTAATACCTCCAGTTTGCAGATTAAGAGACTGAGGTCCTGAAGAGGTTAAAGAACTTGG CTCAAGTCACATAGCTGGTGAGCAGCAAGATACAAGAATCAACCCAAGTCCAGGGGGCTGTGTGCCGTTT ACACTTCACATCTGTGCTGCCAGGGCTGTAGCTATAAAAGCTTGAAAACCATTA ->gi|530364724|ref|XR_241079.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA +>XR_241079.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA AAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGTGACTGCCACCCCTC CCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAACAGACTGCAGTTTC TTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCT diff --git a/tests/data/genes.fasta.lower b/tests/data/genes.fasta.lower index d88bdc6..7df934b 100644 --- a/tests/data/genes.fasta.lower +++ b/tests/data/genes.fasta.lower @@ -1,4 +1,4 @@ ->gi|563317589|dbj|AB821309.1| Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds +>AB821309.1 Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds atggtcagctggggtcgtttcatctgcctggtcgtggtcaccatggcaaccttgtccctggcccggccct ccttcagtttagttgaggataccacattagagccagaagatgccatctcatccggagatgatgaggatga caccgatggtgcggaagattttgtcagtgagaacagtaacaacaagagagcaccatactggaccaacaca @@ -50,7 +50,7 @@ agccacaacacaggctttggcactgatagaactctataatgcacccgaggggcgatacaagcaggatgtg tacttgcttcctaagaaaatggatgaatacgttgccagcttgcatctgccatcatttgatgcccacctta cagagctgacagatgaccaagcaaaatatctgggactcaacaaaaatgggccattcaaacctaattatta cagatactaa ->gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced +>KF435150.1 Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced atgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatctctcctggacaaa tcaatcaggtacgaccaaaactgccgcttttgaagattttgcatgcagcaggtgcgcaaggtgaaatgtt cactgttaaagaggtcatgcactatttaggtcagtacataatggtgaagcaactttatgatcagcaggag @@ -58,7 +58,7 @@ cagcatatggtatattgtggtggagatcttttgggagaactactgggacgtcagagcttctccgtgaaag acccaagccctctctatgatatgctaagaaagaatcttgtcactttagccactgctactacagcaaagtg cagaggaaagttccacttccagaaaaagaactacagaagacgatatccccacactgcctacctcagagca taaatgcatacattctagagaaggtgattgaagtgggaaaaaatgatgacctggaggactc ->gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds +>KF435149.1 Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds atgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatctctcctggacaaa tcaatcaggtacgaccaaaactgccgcttttgaagattttgcatgcagcaggtgcgcaaggtgaaatgtt cactgttaaagaggtcatgcactatttaggtcagtacataatggtgaagcaactttatgatcagcaggag @@ -69,7 +69,7 @@ taaatgcatacattctagagaagatgaagacttaattgaaaatttagcccaagatgaaacatctaggctg gaccttggatttgaggagtgggatgtagctggcctgccttggtggtttttaggaaacttgagaagcaact atacacctagaagtaatggctcaactgatttacagacaaatcaggtgattgaagtgggaaaaaatgatga cctggaggactc ->gi|543583796|ref|NR_104216.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA +>NR_104216.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -136,7 +136,7 @@ tgggcctcattacagtcacaattgtctattctgtttcctaccctgaacacattaaaatggtaggaactaa tgcttgtcttatttaattactaaaagccaccattttctttgatagattgagctacagattgtaaacttca tgtatttctttataagtcaacccttttcaaagatacgcacatcaaactgaatgaataaataaatattgag aagttgaaaaaaaaaaaaaaaaa ->gi|543583795|ref|NR_104215.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA +>NR_104215.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -213,7 +213,7 @@ atgggtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtc tattctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaag ccaccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttt tcaaagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583794|ref|NR_104212.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA +>NR_104212.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -291,7 +291,7 @@ gaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttcct accctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttctt tgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgca catcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583788|ref|NM_001282545.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA +>NM_001282545.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -352,7 +352,7 @@ catattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttcctaccctgaacacatt aaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttctttgatagattgagct acagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgcacatcaaactgaatg aataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583786|ref|NM_001282543.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA +>NM_001282543.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -432,7 +432,7 @@ acaattgtctattctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaat tactaaaagccaccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagt caacccttttcaaagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaa aaaaaa ->gi|543583785|ref|NM_000465.3| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA +>NM_000465.3 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -512,7 +512,7 @@ gtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctatt ctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccac cattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaa agatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583740|ref|NM_001282549.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA +>NM_001282549.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -570,7 +570,7 @@ ggtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctat tctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagcca ccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttca aagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583738|ref|NM_001282548.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA +>NM_001282548.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -630,7 +630,7 @@ aaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttccta ccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttcttt gatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgcac atcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|530384540|ref|XM_005249645.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA +>XM_005249645.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA gtgctgggactacagagtccagtgtcgtgctgctgcaggagcacccctgcctggtggagctgctgtccca tgtgctgaaagtccaggacctgagttctggggtcctctccttctcactgcgcctggcaggaaccttcgca gcccaggaaaactgcttccagtatcttcagcagggggagttactaccagggctctttggggagccaggac @@ -671,7 +671,7 @@ ccgactgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcaccatgtcct tctgtgggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccaggaccagg agagaagcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactcaaaataaa tggcattgaagtctttctttaa ->gi|530384538|ref|XM_005249644.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA +>XM_005249644.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA accggatgctcggcatgaaccactaggcgcctggcgggggtgatctgtcggagcgaccggcttggcgcct gcctgtccccagcccctctcagcttgaactccttccttcaagtctgggccctcgaggcttccagagcggc ctccaggggtgcagtctcagttccccacgccagccgtctccgtcctccgcctcctccgggcctggcaggt @@ -715,7 +715,7 @@ aggacatgctggccacgggaggcttcctgcagggggacgaggccgactgctactgagcagaaccagagtc tgccactggggctcaggaccaagggaggcagcaccatgtccttctgtgggacactgccagccccagggct ccagcccagcccggtggatcctctggggaagccaggaccaggagagaagcaaggtcaagaaatcccacag tttgatgtattaaagaaatgacttatttctactcaaaataaatggcattgaagtctttctttaa ->gi|530384536|ref|XM_005249643.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA +>XM_005249643.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA tctgtcggagcgaccggcttggcgcctgcctgtccccagcccctctcagcttgaactccttccttcaagt ctgggccctcgaggcttccagagcggcctccaggggtgcagtctcagttccccacgccagccgtctccgt cctccgcctcctccgggcctggcaggtggcactgtccggaggcggagccttgggcgaggggtggttgcgg @@ -761,7 +761,7 @@ gacgaggccgactgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcacc atgtccttctgtgggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccag gaccaggagagaagcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactca aaataaatggcattgaagtctttctttaa ->gi|530384534|ref|XM_005249642.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA +>XM_005249642.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA gcgaccggcttggcgcctgcctgtccccagcccctctcagcttgaactccttccttcaagtctgggccct cgaggcttccagagcggcctccaggggtgcagtctcagttccccacgccagccgtctccgtcctccgcct cctccgggcctggcaggtggcactgtccggaggcggagccttgggcgaggggtggttgcggcggaggacg @@ -807,7 +807,7 @@ tgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcaccatgtccttctgt gggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccaggaccaggagaga agcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactcaaaataaatggca ttgaagtctttctttaa ->gi|530373237|ref|XM_005265508.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA +>XM_005265508.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA gcatgcccgcatctgctgtccgacaggcggaagacgagcccagaggcggagcagggccgtcgcgccttgg tgacgtctgccgccggcgcgggcgggtgacgcgactgggcccgttgtctgtgtgtgggactgaggggccc cgggggcggtgggggctcccggtgggggcagcggtggggagggagggcctggacatggcgctgaggggcc @@ -848,7 +848,7 @@ gaatagtcccagctggagagtccaggccctgggaatgggaggaaccaggccacattccttccatcgtgcc ctgaggcctgacacggcagatcagccccatagtgctcaggaggcagcatctggagttggggcacagcgag gtactgcagcttcctccacagccggctgtggagcagcaggacctggcccttctgcctgggcagcagaata tatattttacctatcagagacatctatttttctgggctccaacccaacatgccaccatgttgac ->gi|530373235|ref|XM_005265507.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA +>XM_005265507.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA gcatgcccgcatctgctgtccgacaggcggaagacgagcccagaggcggagcagggccgtcgcgccttgg tgacgtctgccgccggcgcgggcgggtgacgcgactgggcccgttgtctgtgtgtgggactgaggggccc cgggggcggtgggggctcccggtgggggcagcggtggggagggagggcctggacatggcgctgaggggcc @@ -890,7 +890,7 @@ agagtccaggccctgggaatgggaggaaccaggccacattccttccatcgtgccctgaggcctgacacgg cagatcagccccatagtgctcaggaggcagcatctggagttggggcacagcgaggtactgcagcttcctc cacagccggctgtggagcagcaggacctggcccttctgcctgggcagcagaatatatattttacctatca gagacatctatttttctgggctccaacccaacatgccaccatgttgac ->gi|530364726|ref|XR_241081.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA +>XR_241081.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA gtgtgggaggccggaagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgt gactgccacccctccccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaa cagactgcagtttcttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacag @@ -906,7 +906,7 @@ gtgattgaagtgggaaaaaatgatgacctggaggactctaagtccttaagtgatgataccgatgtagagg ttacctctgaggatgagtggcagtgtactgaatgcaagaaatttaactctccaagcaagaggtactgttt tcgttgttgggccttgaggaaggattggtattcagattgttcaaagttaacccattctctctccacgtct gatatcactgccatacctgaaaaggaaaa ->gi|530364725|ref|XR_241080.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA +>XR_241080.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA gtgtgggaggccggaagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgt gactgccacccctccccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaa cagactgcagtttcttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacag @@ -977,7 +977,7 @@ aaaggggccagactgtgttgccttcttgagcctggtctgactcctgagtggaagtctgattccaggtaca tgagataagcactaatacctccagtttgcagattaagagactgaggtcctgaagaggttaaagaacttgg ctcaagtcacatagctggtgagcagcaagatacaagaatcaacccaagtccagggggctgtgtgccgttt acacttcacatctgtgctgccagggctgtagctataaaagcttgaaaaccatta ->gi|530364724|ref|XR_241079.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA +>XR_241079.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA aagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgtgactgccacccctc cccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaacagactgcagtttc ttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatct diff --git a/tests/test_Fasta_bgzip.py b/tests/test_Fasta_bgzip.py index 27f18f0..3bec1c6 100644 --- a/tests/test_Fasta_bgzip.py +++ b/tests/test_Fasta_bgzip.py @@ -25,7 +25,6 @@ def tearDown(self): except EnvironmentError: pass # some tests may delete this file - @expectedFailure def test_build_issue_126(self): """ Samtools BGZF index should be identical to pyfaidx BGZF index """ expect_index = ("gi|563317589|dbj|AB821309.1| 3510 114 70 71\n" @@ -121,6 +120,7 @@ def test_fetch_whole_entry(self): 'TACATTCTAGAGAAGGTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTC') result = faidx.fetch('gi|557361099|gb|KF435150.1|', 1, 481) + print(result) assert str(result) == expect def test_fetch_middle(self): @@ -185,7 +185,9 @@ def test_fetch_keyerror(self): def test_blank_string(self): """ seq[0:0] should return a blank string mdshw5/pyfaidx#53 """ fasta = Fasta('data/genes.fasta.gz', as_raw=True) - assert fasta['gi|557361099|gb|KF435150.1|'][0:0] == '' + result = fasta['gi|557361099|gb|KF435150.1|'][0:0] + print(result) + assert result == '' def test_slice_from_beginning(self): fasta = Fasta('data/genes.fasta.gz', as_raw=True)