-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNovelTSSPolyA.cpp
129 lines (87 loc) · 2.72 KB
/
NovelTSSPolyA.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <cmath>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
using namespace std;
using namespace boost;
struct END {
string chro;
string strand;
int pos;
};
int compare_END_sort (const void *a, const void *b) {
if( (*(END*) a).chro > (*(END*) b).chro ) return 1;
else if ( (*(END*) a).chro == (*(END*) b).chro ){
if( (*(END*) a).pos > (*(END*) b).pos ) return 1;
else return -1;
}
else return -1;
}
int compare_two_END (END a, END b) {
if(a.chro > b.chro) return 1;
else if (a.chro == b.chro){
if(a.pos > b.pos) return 1;
else return -1;
}
else return -1;
}
vector<END> BuildENDVector (ifstream &inf) {
vector<END> end_vec;
while(inf){
string strInput;
getline(inf, strInput);
if(strInput.length() > 0){
vector<string> vec;
split(vec, strInput, is_any_of("\t"));
END end;
end.chro = vec[1];
end.strand = vec[2];
end.pos = lexical_cast<int>(vec[3]);
end_vec.push_back(end);
}
}
qsort(&end_vec[0], end_vec.size(), sizeof(END), compare_END_sort);
return end_vec;
}
int BinarySearch (END query, vector<END> &target_vec) {
int up = 0;
int down = target_vec.size() - 1;
while(down - up > 1){
int middle = (up + down) / 2;
if(compare_two_END(target_vec[middle],query) == 1) down = middle;
else up = middle;
}
return up;
}
int main (int argc, char **argv) {
ifstream annoinf(argv[1]);
vector<END> end_vec = BuildENDVector(annoinf);
annoinf.close();
ifstream inf(argv[2]);
while(inf){
string strInput;
getline(inf, strInput);
if(strInput.length() > 0){
vector<string> vec;
split(vec, strInput, is_any_of("\t"));
END query;
query.chro = vec[1];
query.strand = vec[2];
query.pos = lexical_cast<int>(vec[3]);
int locate = BinarySearch (query, end_vec);
bool match = false;
if(query.chro == end_vec[locate].chro && query.strand == end_vec[locate].strand && abs(query.pos - end_vec[locate].pos) < 30) match = true;
else if(locate < end_vec.size() - 1 && query.chro == end_vec[locate + 1].chro && query.strand == end_vec[locate + 1].strand && abs(query.pos - end_vec[locate + 1].pos) < 30) match = true;
else ;
if(!match) cout << strInput << endl;
}
}
annoinf.close(); inf.close();
return 0;
}