-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample.c
129 lines (97 loc) · 3.11 KB
/
example.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/**
* @file example.c
* @author Xiyu Peng
*
* An example for calling amplici_core function
*
* Note about formatting. Line widths are at 80 characters, not because we live
* in the 60's but to help force good coding and to reduce complexity. Function
* predeclarations may break this rule so that the entire prototype can be
* found with a simple grep on the source code.
*/
#include <stdlib.h>
#include "libamplici.h"
#include "fastq.h"
#include "error.h"
#include "amplici.h"
#include "io.h"
int main()
{
int err = NO_ERROR;
/* read two input files */
char *fastq_file = "./test/SRR2990088_1_noN_3000_subset1_new.fastq";
char *error_profile_name = NULL;
char *output_file = "test.out";
double low_bound = 2.0;
/* initialize output */
unsigned int K = 0;
unsigned int *cluster_id = NULL;
unsigned int *cluster_size = NULL;
unsigned char *seeds = NULL;
unsigned int *seeds_length = NULL;
double *ll = NULL;
double *abun = NULL;
/* initialize input */
fastq_options *fqo = NULL; /* fastq file options */
fastq_data *fdata = NULL;
unsigned char **dmat = NULL;
unsigned char **qmat = NULL;
if ((err = make_fastq_options(&fqo)))
goto CLEAR_AND_EXIT;
/* encode nucleotides in 2-bits: error raised if ambiguous bases */
fqo->read_encoding = XY_ENCODING;
/* read sequence data */
if (!fastq_file || (err = read_fastq(fastq_file,
&fdata, fqo)))
goto CLEAR_AND_EXIT;
size_t sample_size = fdata->n_reads;
unsigned int max_read_length = fdata->n_max_length;
dmat = malloc(sample_size * sizeof *dmat);
if (!dmat)
goto CLEAR_AND_EXIT;
unsigned char *rptr = fdata->reads;
for (size_t i = 0; i < sample_size; ++i) {
dmat[i] = rptr;
rptr += max_read_length;
}
/* allocate short-cut pointers to quality sequences */
qmat = malloc(sample_size * sizeof *qmat);
if (!qmat)
goto CLEAR_AND_EXIT;
unsigned char *qptr = fdata->quals;
for (size_t i = 0; i < sample_size; i++) {
qmat[i] = qptr;
qptr += max_read_length;
}
/* amplicon clustering */
if((amplici_core(dmat, qmat, sample_size, low_bound, max_read_length,
error_profile_name, fdata->max_quality, fdata->min_quality,
&seeds, &seeds_length, &cluster_id,
&cluster_size, &K, &ll, &abun)))
goto CLEAR_AND_EXIT;
/* print and check */
FILE *fp = NULL;
fp = fopen(output_file, "w");
if (!fp)
goto CLEAR_AND_EXIT;
fprintf(fp, "K: %i\n", K);
fprintf(fp, "assignments: ");
fprint_assignment(fp, cluster_id, sample_size,
K, 2, 1);
fprintf(fp, "cluster sizes: ");
fprint_uints(fp, cluster_size, K, 3, 1);
fprint_fasta(fp, seeds, K, max_read_length, seeds_length, "H");
fclose(fp);
CLEAR_AND_EXIT:
if(fdata) free_fastq(fdata);
if(fqo) free_fastq_options(fqo);
if (dmat) free(dmat);
if (qmat) free(qmat);
if (cluster_id) free(cluster_id);
if (cluster_size) free(cluster_size);
if (seeds) free(seeds);
if (seeds_length) free(seeds_length);
if (ll) free(ll);
if (abun) free(abun);
return(EXIT_FAILURE);
}/* main */