forked from sgusev/GERMLINE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGERMLINE.cpp
102 lines (83 loc) · 3.07 KB
/
GERMLINE.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// GERMLINE.cpp: GEnetic Relationship Miner for LINear Extension
#include "GERMLINE.h"
#include "math.h"
#include <iostream>
using namespace std;
ofstream MATCH_FILE;
size_t num_samples;
unsigned long num_matches;
SNPs ALL_SNPS;
Individuals ALL_SAMPLES;
// GERMLINE(): default constructor
GERMLINE::GERMLINE()
{}
// mine(): main function for GERMLINE
void GERMLINE::mine( string params )
{
PolymorphicIndividualsExtractor * pie = inputManager.getPie();
inputManager.getIndividuals();
if ( ! pie->valid() ) return;
string out = inputManager.getOutput();
num_samples = 0;
num_matches = 0;
pie->loadInput();
MatchesBuilder mb( pie );
ofstream fout( ( out + ".log" ).c_str() );
fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
fout << " Welcome to GERMLINE, a tool for detecting long segments shared" << endl;
fout << " by descent between pairs of individuals in near-linear time." << endl;
fout << endl;
fout << " For more details, please see the paper [ PMID: 18971310 ]" << endl;
fout << " or the web-site [ http://www.cs.columbia.edu/~gusev/germline/ ]" << endl;
fout << endl;
fout << " GERMLINE was coded by Alexander Gusev and collaborators in " << endl;
fout << " Itsik Pe'er's Computational Biology Lab at Columbia University" << endl;
fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
if ( BINARY_OUT ) MATCH_FILE.open( ( out + ".bmatch" ).c_str() , ios::binary );
else MATCH_FILE.open( ( out + ".match" ).c_str() );
fout << params << endl;
fout << setw(65) << setfill('-') << ' ' << endl << setfill(' ');
fout << setw(50) << left << "Minimum match length: " << MIN_MATCH_LEN << " cM" << endl;
fout << setw(50) << "Allowed mismatching bits: " << MAX_ERR_HOM << " " << MAX_ERR_HET << endl;
fout << setw(50) << "Word size: " << MARKER_SET_SIZE << endl;
if ( ROI )
fout << setw(50) << "Target region: " << ALL_SNPS.getROIStart().getSNPID() << " - " << ALL_SNPS.getROIEnd().getSNPID() << endl;
else
fout << setw(50) << "Target region: " << "all" << endl;
time_t timer[2]; time( &timer[0] );
if ( DEBUG ) cout << "DEBUG MODE ON" << endl;
if ( ROI )
{
ALL_SNPS.beginChromosome();
num_sets = (long)ceil((double)ALL_SNPS.currentSize()/(double)MARKER_SET_SIZE);
mb.buildMatches();
ALL_SAMPLES.freeMatches();
ALL_SAMPLES.freeMarkers();
}
else
{
for ( ALL_SNPS.beginChromosome() ; ALL_SNPS.moreChromosome() ; ALL_SNPS.nextChromosome() )
{
num_sets = (long)ceil((double)ALL_SNPS.currentSize()/(double)MARKER_SET_SIZE);
mb.buildMatches();
if ( !SILENT ) cout << "Matches completed ... freeing memory" << endl;
ALL_SAMPLES.freeMatches();
ALL_SAMPLES.freeMarkers();
}
}
time( &timer[1] );
fout << setw(50) << "Total IBD segments: " << num_matches << endl;
fout << setw(50) << "Total runtime (sec): " << difftime( timer[1] , timer[0] ) << endl;
fout.close();
MATCH_FILE.close();
if ( BINARY_OUT )
{
ofstream bmid_out( ( out + ".bmid" ).c_str() );
ALL_SNPS.print( bmid_out );
bmid_out.close();
ofstream bsid_out( ( out + ".bsid" ).c_str() );
ALL_SAMPLES.print( bsid_out );
bsid_out.close();
}
}
// end GERMLINE.cpp