forked from ocxtal/dozeu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.2bit.c
116 lines (100 loc) · 3.8 KB
/
example.2bit.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/**
* Aligning ACACTTCTAGACTTTACCACTA to the following graph;
* sequences are encoded as follows: {'A','C','G','T'} -> {0x0,0x1,0x2,0x3}
* 6
* A
* 0 1 3 5 /7\9
* ACAC-TTGT-AGAC---TTCTA-C-CACGG
* \ / \ / \ /
* ATCC T G
* 2 4 8
*/
/* make sure POSIX APIs are properly activated */
#if defined(__linux__) && !defined(_POSIX_C_SOURCE)
# define _POSIX_C_SOURCE 200112L
#endif
#if defined(__darwin__) && !defined(_BSD_SOURCE)
# define _BSD_SOURCE
#endif
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#define UNITTEST 1
#define DZ_NUCL_2BIT /* use 2bit encoding for input sequences */
#define DZ_FULL_LENGTH_BONUS /* use full-length bonus feature */
#define DZ_CIGAR_OP 0x44493d58 /* 'D', 'I', '=', 'X'; the default is 0x04030201 */
#include "dozeu.h"
int main(int argc, char *argv[])
{
unittest_main(argc, argv); /* just for testing */
/* init score matrix and memory arena */
int8_t const M = 2, X = -3, GI = 5, GE = 1; /* match, mismatch, gap open, and gap extend; g(k) = GI + k + GE for k-length gap */
int8_t const xdrop_threshold = 70, full_length_bonus = 10;
int8_t max_gap_length = (xdrop_threshold - GI) / GE;
int8_t const score_matrix[16] = {
/* ref-side */
/* A C G T */
/* 00 01 10 11 */
/* A 00 */ M, X, X, X,
/* query- C 01 */ X, M, X, X,
/* side G 10 */ X, X, M, X,
/* T 11 */ X, X, X, M
};
struct dz_s *dz = dz_init(
score_matrix,
GI, GE
);
/* Initialize root of alignment */
struct dz_alignment_init_s aln_init = dz_align_init(dz, max_gap_length);
/* pack query */
char const *query = "\x0\x1\x0\x1\x3\x3\x1\x3\x0\x2\x0\x1\x3\x3\x3\x0\x1\x1\x0\x1\x3\x0";
struct dz_query_s const *q = dz_pack_query_forward(
dz,
query, /* char const *seq: query sequence */
full_length_bonus,
22 /* length */
);
/* init node array */
struct dz_forefront_s const *ff[10] = { 0 };
/* fill root: node 0 */
ff[0] = dz_extend(
dz, q,
&aln_init.root, 1, /* struct dz_forefront_s const *ff[], uint64_t n_ffs; incoming forefront and degree; dz_root(dz) for root node */
"\x0\x1\x0\x1", 4, 0, /* reference-side sequence, its length, and node id */
aln_init.xt /* x-drop threshold */
);
/* branching paths: 1, 2 -> 3 */
ff[1] = dz_extend(dz, q, &ff[0], 1, "\x3\x3\x2\x3", 4, 1, aln_init.xt);
ff[2] = dz_extend(dz, q, &ff[0], 1, "\x0\x3\x1\x1", 4, 2, aln_init.xt);
ff[3] = dz_extend(dz, q, &ff[1], 2, "\x0\x2\x0\x1", 4, 3, aln_init.xt); /* "&ff[1], 2" indicates ff[1] and ff[2] are incoming nodes */
/* insertion: -, 4 -> 5 */
ff[4] = dz_extend(dz, q, &ff[3], 1, "\x3", 1, 4, aln_init.xt);
ff[5] = dz_extend(dz, q, &ff[3], 2, "\x3\x3\x1\x3\x0", 5, 5, aln_init.xt); /* "&ff[3], 2" indicates ff[3] and ff[4] are incoming nodes */
/* SNVs: 6, 7, 8 -> 9 */
ff[6] = dz_extend(dz, q, &ff[5], 1, "\x0", 1, 6, aln_init.xt);
ff[7] = dz_extend(dz, q, &ff[5], 1, "\x1", 1, 7, aln_init.xt);
ff[8] = dz_extend(dz, q, &ff[5], 1, "\x2", 1, 8, aln_init.xt);
ff[9] = dz_extend(dz, q, &ff[6], 3, "\x1\x0\x1\x2\x2", 5, 9, aln_init.xt);
/* detect max */
struct dz_forefront_s const *max = NULL;
for(size_t i = 0; i < 10; i++) {
if(max == NULL || ff[i]->max > max->max) { max = ff[i]; }
}
/* traceback */
struct dz_alignment_s const *aln = dz_trace(
dz,
max
);
printf("ref_length(%u), query_length(%u), score(%d), path(%s)\n", aln->ref_length, aln->query_length, aln->score, aln->path);
for(size_t i = 0; i < aln->span_length; i++) {
struct dz_path_span_s const *s = &aln->span[i];
printf("node_id(%u), subpath_length(%u), subpath(%.*s)\n",
s->id,
s[1].offset - s[0].offset,
s[1].offset - s[0].offset, &aln->path[s->offset]
);
}
/* clean up */
dz_destroy(dz);
return(0);
}