-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstats_single.h
137 lines (96 loc) · 2.81 KB
/
stats_single.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#ifndef STATS_H
#define STATS_H
#include <algorithm>
#include <cmath>
#include <iostream>
using namespace std;
#include "mathutil.h"
typedef RecursiveSeries<Harmonic_Rec<double, int> > Harmonic;
typedef RecursiveSeries<Harmonic_2_Rec<double, int> > Harmonic_2;
extern Harmonic harmonic;
extern Harmonic_2 harmonic_2;
// number of polymorphic sites
template<class SET_ITER>
int n_segregating_sites(const SET_ITER & start, const SET_ITER & stop)
{
int n = 0;
for (SET_ITER set = start; set != stop; set++)
if (set->n_alleles() > 1)
n++;
return n;
}
// sum of pairwise differences over all sites
template<class SET_ITER>
int pairwise_differences(const SET_ITER & start, const SET_ITER & stop)
{
int sum = 0;
for (SET_ITER set=start; set!=stop; set++)
sum += set->pairwise_difference();
return sum;
}
// number of singletons over all sites
template<class SET_ITER>
int count_singletons(const SET_ITER & start, const SET_ITER & stop)
{
int count = 0;
for (SET_ITER set=start; set!=stop; set++)
count += set->n_singletons();
return count;
}
template<class SET_ITER, class SEQ_ITER>
int count_singletons_for_seq(
const SEQ_ITER & seq_start, const SEQ_ITER & seq_stop,
// !! implicitly assumes same size for seq and sets
const SET_ITER & set_start)
{
int count = 0;
SET_ITER s = set_start;
// go through sites
for (SEQ_ITER i=seq_start; i!=seq_stop; i++, s++)
if (s->is_singleton(*i))
count++;
return count;
}
inline double theta_pi(int n_sequences, int sum_pair_diff)
{
return sum_pair_diff * 2.0 / (n_sequences * (n_sequences-1));
}
inline double theta_W(int n_sequences, int n_segr)
{
return n_segr / harmonic(n_sequences-1);
}
// Tajima's D
double DTajima(int n_sequences, int n_segsites, double theta_pi);
inline double c_sub_n(int n_sequences)
{
if (n_sequences <= 2)
return 1.0;
const double a = harmonic(n_sequences - 1);
const double c = 2 * (n_sequences * a - 2 * (n_sequences - 1));
return c / ((n_sequences - 1) * (n_sequences - 2));
}
inline double d_sub_n(int n_sequences)
{
const double a_n_plus1 = harmonic(n_sequences);
const int nsm1 = n_sequences - 1;
const int nsm2 = n_sequences - 2;
double d = c_sub_n(n_sequences) + double(nsm2) / (nsm1 * nsm1);
d += (2.0/nsm1) * (1.5 - (2*a_n_plus1 - 3)/nsm2 - 1.0/n_sequences);
return d;
}
double fu_li_Fstar(int n_sequences, int n_singletons, int n_mutations, double pi);
double fu_li_Dstar(int n_sequences, int n_singletons, int n_mutations);
template<class ITER>
double R2(const ITER & start, const ITER & stop, double pi, int S)
{
if (S == 0 || start==stop) return undefined();
double sm = 0.0;
int c = 0;
for (ITER i=start; i!=stop; i++, c++)
sm += (*i - pi/2.0) * (*i - pi/2.0);
sm = sqrt(sm/c) / S;
if (sm < 1.0E-15)
sm = 0.0;
return sm;
}
#endif // STATS_H