// UST // --- VERSION 1.0 ---- // Author: amatur, Last Edited: Nov 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; bool DEBUGMODE = false; int K = 0; string UNITIG_FILE = "examples/k11.unitigs.fa"; string OUTPUT_FILENAME; enum DEBUGFLAG_T { NONE = 0, UKDEBUG = 6, VERIFYINPUT = 1, INDEGREEPRINT = 2, DFSDEBUGG = 3, PARTICULAR = 4, NODENUMBER_DBG = 5, OLDNEWMAP = 9, PRINTER = 10, SINKSOURCE = 12}; enum ALGOMODE_T { BASIC = 0, INDEGREE_DFS = 1, INDEGREE_DFS_1 = 2, OUTDEGREE_DFS = 3, OUTDEGREE_DFS_1 = 4, INDEGREE_DFS_INVERTED = 5, PLUS_INDEGREE_DFS = 6, RANDOM_DFS = 7, NODEASSIGN = 8, SOURCEFIRST = 9, TWOWAYEXT = 10, PROFILE_ONLY = 11, EPPRIOR=12, GRAPHPRINT = 13, TIGHTUB = 14, BRACKETCOMP = 15}; bool FLG_NEWUB = true; bool FLG_ABUNDANCE = false; DEBUGFLAG_T DBGFLAG = NONE; //NODENUMBER_DBG ALGOMODE_T ALGOMODE = TWOWAYEXT; string mapmode[] = {"basic", "indegree_dfs", "indegree_dfs_initial_sort_only", "outdegree_dfs", "outdegree_dfs_initial_sort_only", "inverted_indegree_dfs", "plus_indegree_dfs", "random_dfs", "node_assign", "source_first", "twoway", "profile_only", "endpoint_priority", "graph_print", "tight_ub", "tip" }; string modefilename[] = {"Fwd", "indegree_dfs", "indegree_dfs_initial_sort_only", "outdegree_dfs", "outdegree_dfs_initial_sort_only", "inverted_indegree_dfs", "plus_indegree_dfs", "random_dfs", "node_assign", "source_first", "", "profile_only", "endpoint_priority", "graph_print", "tight_ub", "Tip" }; typedef tuple fourtuple; // uid, walkid, pos, isTip bool sort_by_walkId (const fourtuple &lhs, const fourtuple &rhs){ return get<1>(lhs) < get<1>(rhs); } bool sort_by_pos (const fourtuple &lhs, const fourtuple &rhs){ return get<2>(lhs) < get<2>(rhs); } bool sort_by_tipstatus (const fourtuple &lhs, const fourtuple &rhs){ return get<3>(lhs) < get<3>(rhs); } typedef struct { int serial = -1; int startPosWithKOverlap; int endPosWithKOVerlap; bool isWalkEnd = false; int pos_in_walk = -100; int finalWalkId = -1; // renders some walkId as invalid int isTip = 0; } new_node_info_t; typedef struct { int serial; int ln; } unitig_struct_t; typedef struct { //1 means +, 0 means - bool left; bool right; int toNode; } edge_t; typedef struct { edge_t edge; int fromNode; } edge_both_t; typedef struct { edge_t edge; int kmerStartIndex; int kmerEndIndex; } newEdge_t; int C_ustitch = 0; int C_twoway_ustitch = 0; int C_tip_ustitch = 0; int V_ustitch = 0; int V_twoway_ustitch = 0; int V_tip_ustitch = 0; int isolated_node_count = 0; int sink_count = 0; int source_count = 0; int sharedparent_count = 0; int sharparentCntRefined = 0; int onecount = 0; struct node_sorter { int node; int sortkey; //bool operator() (struct node_sorter i, struct node_sorter j) { return (i.sortkeyj.sortkey); } int* global_indegree; int* global_outdegree; int* global_plusindegree; int* global_plusoutdegree; int* global_issinksource; //int* global_priority; map, int> inOutCombo; vector > adjList; vector > reverseAdjList; vector unitigs; map newSequences; map newNewSequences; //int is the unitig id (old id) set newNewMarker; vector > newToOld; vector walkFirstNode; //given a walk id, what's the first node of that walk unordered_map > sinkSrcEdges; //int is the unitig id (old id) string getFileName(const string& s) { char sep = '/'; size_t i = s.rfind(sep, s.length()); if (i != string::npos) { return(s.substr(i+1, s.length() - i)); } return(""); } inline string plus_strings(const string& a, const string& b, size_t kmersize) { if (a == "") return b; if (b == "") return a; string ret = a + b.substr(kmersize - 1, b.length() - (kmersize - 1)); return ret; } string delSpaces(string &str) { str.erase(std::remove(str.begin(), str.end(), ' '), str.end()); return str; } bool charToBool(char c) { if (c == '+') { return true; } else { if (c != '-') cout << "Erroneus character in BCALM2 output." << endl; return false; } } string reverseComplement(string base) { size_t len = base.length(); char* out = new char[len + 1]; out[len] = '\0'; for (int i = 0; i < len; i++) { if (base[i] == 'A') out[len - i - 1] = 'T'; else if (base[i] == 'C') out[len - i - 1] = 'G'; else if (base[i] == 'G') out[len - i - 1] = 'C'; else if (base[i] == 'T') out[len - i - 1] = 'A'; } string outString(out); free(out); return outString; } double readTimer() { return clock() / (double) CLOCKS_PER_SEC; } inline string currentDateTime() { // Get current date/time, format is YYYY-MM-DD HH:mm:ss time_t now = time(0); struct tm tstruct; char buf[80]; tstruct = *localtime(&now); strftime(buf, sizeof (buf), "%Y-%m-%d %X\n", &tstruct); return buf; } int countOutArcs(int node) { return (adjList.at(node)).size(); } inline char boolToCharSign(bool sign) { return (sign == true) ? '+' : '-'; } // @@ --- ALL PRINTING CODE --- // void printBCALMGraph(vector > adjList) { for (int i = 0; i < adjList.size(); i++) { cout << i << "# "; for (edge_t edge : adjList.at(i)) { cout << boolToCharSign(edge.left) << ":" << edge.toNode << ":" << boolToCharSign(edge.right) << ", "; } cout << endl; } } class GroupMerger { public: map fwdVisited; map bwdVisited; map bwdWalkId; map fwdWalkId; GroupMerger() { } void connectGroups(int from, int to){ fwdVisited[from] = false; bwdVisited[to] = false; fwdWalkId[from] = to; bwdWalkId[to] = from; } ~GroupMerger() { } }; class DisjointSet { unordered_map parent; public: DisjointSet() { } void make_set(int id) { this->parent[id] = -1; } void Union(int xId, int yId) { int xset = find_set(xId); int yset = find_set(yId); if(xset != yset) { parent[xset] = yset; } } int find_set(int id) { if (parent[id] == -1) return id; return find_set(parent[id]); } ~DisjointSet(){ } }; class Graph { public: size_t V = adjList.size(); int countNewNode = 0; int time = 0; char* color; int* p_dfs; bool* nodeSign; new_node_info_t* oldToNew; bool* saturated; struct node_sorter * sortStruct; bool* countedForLowerBound; DisjointSet disSet; GroupMerger gmerge; Graph() { color = new char[V]; p_dfs = new int[V]; nodeSign = new bool[V]; oldToNew = new new_node_info_t[V]; saturated = new bool[V]; sortStruct = new struct node_sorter[V]; global_indegree = new int[V]; global_outdegree = new int[V]; global_plusindegree = new int[V]; global_plusoutdegree = new int[V]; global_issinksource = new int[V]; //global_priority = new int[V]; countedForLowerBound = new bool[V]; for (int i = 0; i < V; i++) { if(ALGOMODE == TWOWAYEXT || ALGOMODE == BRACKETCOMP ){ disSet.make_set(i); } oldToNew[i].serial = -1; saturated[i] = false; sortStruct[i].sortkey = 0; sortStruct[i].node = i; global_indegree[i] = 0; global_outdegree[i] = 0; global_plusindegree[i] = 0; global_plusoutdegree[i] = 0; global_issinksource[i] = 0; //global_priority[i] = 0; countedForLowerBound[i] = false; } } inline bool sRight(edge_t plusminusedge){ return !(plusminusedge.right == true); } inline bool sLeft(edge_t plusminusedge){ return (plusminusedge.left == true); } void indegreePopulate(){ int xc = 0; for(vector elist: adjList){ for(edge_t e: elist){ global_indegree[e.toNode] += 1; sortStruct[e.toNode].sortkey = sortStruct[e.toNode].sortkey + 1; if(e.right == true){ global_plusindegree[e.toNode] += 1; } if(e.left == true){ global_plusoutdegree[xc] += 1; } } global_outdegree[xc] = elist.size(); xc++; } for(int i = 0; i<5; i++){ for(int j = 0; j<5; j++){ inOutCombo[make_pair(i,j)] = 0; } } for(int i = 0; i a; a = make_pair(global_plusindegree[i], global_plusoutdegree[i]); inOutCombo[a] = (inOutCombo.count(a) ? inOutCombo[a] + 1 : 1 ); if(DBGFLAG == SINKSOURCE){ cout< elist: adjList){ int neighborCount = 0; int spNeighborCount[2]; spNeighborCount[0]=0; spNeighborCount[1]=0; stack countedNodes; set > countedSides; //if(true){ if(FLG_NEWUB == true){ //ENDPOINT SIDE UPPER BOUND - improved for(edge_t e_xy: elist){ //central node: all neighbors of x int y = e_xy.toNode; vector adjY = adjList[y]; bool eligibleSp = true; //pair pairr; for(edge_t e_from_y : adjY){ // check if this neighbor is speacial //pairr =make_pair(e_from_y.toNode, sRight(e_xy) ); if(e_from_y.toNode!=xc){ if(sRight(e_xy) == sLeft(e_from_y)){ eligibleSp = false; break; } } } if(eligibleSp){ spNeighborCount[sLeft(e_xy)]++; } } if(spNeighborCount[0]>1){ sharedparent_count += spNeighborCount[0] - 1 ; } if(spNeighborCount[1]>1){ sharedparent_count += spNeighborCount[1] - 1 ; } } if(FLG_NEWUB == false){ //ENDPOINT SIDE UPPER BOUND for(edge_t e_xy: elist){ int y = e_xy.toNode; vector adjY = adjList[y]; bool eligible = true; pair pairr; for(edge_t e_from_y : adjY){ pairr =make_pair(e_from_y.toNode, sRight(e_xy) ); if(e_from_y.toNode!=xc){ if(sRight(e_xy) == sLeft(e_from_y)){ eligible = false; break; } } } if(eligible){ neighborCount++; } } if(global_issinksource[xc] == 1){ if(neighborCount>1){ sharedparent_count += neighborCount - 1 ; } }else{ if(neighborCount>2){ sharedparent_count += neighborCount - 2 ; } } } //sharedparent_count_wrong =sharedparent_count; //if(true){ if(1==0){ // OLDER UPPER BOUND CALC int neighborCount = 0; for(edge_t e_xy: elist){ int y = e_xy.toNode; if(!countedForLowerBound[y]){ vector adjY = adjList[y]; bool eligible = true; for(edge_t e_from_y : adjY){ if(e_from_y.toNode!=xc){ if(sRight(e_xy) == sLeft(e_from_y) ){ eligible = false; break; } } } if(eligible){ countedForLowerBound[y] = true; //global_priority[y] = 4; neighborCount++; countedNodes.push(y); } } } if(global_issinksource[xc] == 1){ if(neighborCount>1){ sharedparent_count += neighborCount - 1 ; }else{ while(!countedNodes.empty()){ countedForLowerBound[countedNodes.top()] = false; countedNodes.pop(); } } }else{ if(neighborCount>2){ sharedparent_count += neighborCount - 2 ; }else{ while(!countedNodes.empty()){ countedForLowerBound[countedNodes.top()] = false; countedNodes.pop(); } } } } xc++; } //check if not ALGOMODE == INDEGREE_DFS_INVERTED delete [] global_indegree; delete [] global_outdegree; delete [] global_plusindegree; delete [] global_plusoutdegree; } void DFS_visit(int u) { if(ALGOMODE == BRACKETCOMP){ if(global_issinksource[u]==1){ vector adju = adjList.at(u); vector myvector; for (edge_t e : adju) { myvector.push_back(e); } sinkSrcEdges[u] = myvector; return; } } stack s; edge_t uEdge; uEdge.toNode = u; s.push(uEdge); while (!s.empty()) { edge_t xEdge = s.top(); int x = xEdge.toNode; s.pop(); if (color[x] == 'w') { //Original DFS code time = time + 1; color[x] = 'g'; s.push(xEdge); vector adjx = adjList.at(x); if(ALGOMODE == RANDOM_DFS){ random_shuffle ( adjx.begin(), adjx.end() ); } // if(ALGOMODE == EPPRIOR){ // sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) // { // return global_priority[lhs.toNode] < global_priority[rhs.toNode] ; // }); // } if(ALGOMODE == INDEGREE_DFS){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_indegree[lhs.toNode] < global_indegree[rhs.toNode]; }); } if(ALGOMODE == PLUS_INDEGREE_DFS){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_indegree[lhs.toNode] - global_plusindegree[lhs.toNode] > global_indegree[lhs.toNode] - global_plusindegree[rhs.toNode]; }); } if(ALGOMODE == INDEGREE_DFS_INVERTED){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_indegree[lhs.toNode] > global_indegree[rhs.toNode]; }); } if (ALGOMODE == OUTDEGREE_DFS){ if(p_dfs[x] == -1){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_outdegree[lhs.toNode] < global_outdegree[rhs.toNode]; }); }else if (nodeSign[x] == false){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_outdegree[lhs.toNode] - global_plusoutdegree[lhs.toNode] < global_outdegree[rhs.toNode] - global_plusoutdegree[rhs.toNode]; }); }else if (nodeSign[x] == true){ sort( adjx.begin( ), adjx.end( ), [ ]( const edge_t& lhs, const edge_t& rhs ) { return global_plusoutdegree[lhs.toNode] < global_plusoutdegree[rhs.toNode]; }); } } // Now our branching code :: // For a white x // Consider 2 case: // Case 1. p[x] = -1, it can happen in two way, x is the first one ever in this connected component, or no one wanted to take x // either way, if p[x] = -1, i can be representative of a new node in new graph // Case 2. p[x] != -1, so x won't be the representative/head of a newHome. x just gets added to its parent's newHome. int u = unitigs.at(x).ln; //unitig length if (p_dfs[x] == -1) { list xxx; xxx.push_back(x); newToOld.push_back(xxx); oldToNew[x].serial = countNewNode++; // countNewNode starts at 0, then keeps increasing oldToNew[x].finalWalkId = oldToNew[x].serial; //added while doing bracket comp walkFirstNode.push_back(x); oldToNew[x].pos_in_walk = 1; oldToNew[x].startPosWithKOverlap = 1; if (u < K) { oldToNew[x].endPosWithKOVerlap = 1; // do we actually see this? yes if(DBGFLAG == UKDEBUG){ cout<< "node: "<< x<<"u< k ***** u = "< myvector (sortStruct, sortStruct+V); sort (myvector.begin(), myvector.end(), sort_by_key_inverted); copy(myvector.begin(), myvector.end(), sortStruct); } // if(ALGOMODE == EPPRIOR){ // for (int i = 0; i < V; i++) { // sortStruct[i].node = i; // sortStruct[i].sortkey = global_priority[i]; // } // vector myvector (sortStruct, sortStruct+V); // sort (myvector.begin(), myvector.end(), sort_by_key_inverted); // copy(myvector.begin(), myvector.end(), sortStruct); // } if(ALGOMODE == INDEGREE_DFS_INVERTED){ for (int i = 0; i < V; i++) { sortStruct[i].node = i; sortStruct[i].sortkey = global_indegree[i]; } vector myvector (sortStruct, sortStruct+V); sort (myvector.begin(), myvector.end(), sort_by_key_inverted); //random_shuffle ( myvector.begin(), myvector.end() ); copy(myvector.begin(), myvector.end(), sortStruct); } if (ALGOMODE == INDEGREE_DFS || ALGOMODE == INDEGREE_DFS_1 ){ for (int i = 0; i < V; i++) { sortStruct[i].node = i; sortStruct[i].sortkey = global_indegree[i]; } vector myvector (sortStruct, sortStruct+V); sort (myvector.begin(), myvector.end(), sort_by_key); copy(myvector.begin(), myvector.end(), sortStruct); if(DBGFLAG == INDEGREEPRINT){ cout<<"print in degrees"<"< myvector (sortStruct, sortStruct+V); sort (myvector.begin(), myvector.end(), sort_by_key); random_shuffle ( myvector.begin(), myvector.end() ); copy(myvector.begin(), myvector.end(), sortStruct); } if (ALGOMODE == OUTDEGREE_DFS || ALGOMODE == OUTDEGREE_DFS_1){ for (int i = 0; i < V; i++) { sortStruct[i].node = i; sortStruct[i].sortkey = global_outdegree[i]; } vector myvector (sortStruct, sortStruct+V); sort (myvector.begin(), myvector.end(), sort_by_key); copy(myvector.begin(), myvector.end(), sortStruct); } double time_a = readTimer(); for (int i = 0; i < V; i++) { color[i] = 'w'; p_dfs[i] = -1; } cout<<"Basic V loop time: "< lst; lst.push_back(fromnode); lst.push_back(tonode); gmerge.fwdVisited[fromnode] = true; gmerge.bwdVisited[tonode] = true; if(gmerge.fwdVisited.count(tonode)>0){ while(gmerge.fwdVisited[tonode] == false){ gmerge.fwdVisited[tonode] = true; tonode = gmerge.fwdWalkId[tonode]; gmerge.bwdVisited[tonode] = true; lst.push_back(tonode); if(gmerge.fwdVisited.count(tonode)==0) break; } } if(gmerge.bwdVisited.count(fromnode)>0){ while(gmerge.bwdVisited[fromnode] == false){ gmerge.bwdVisited[fromnode] = true; fromnode = gmerge.bwdWalkId[fromnode]; gmerge.fwdVisited[fromnode] = true; lst.push_front(fromnode); if(gmerge.bwdVisited.count(fromnode)==0) break; } } int headOfThisWalk = walkFirstNode[lst.at(0)]; //CHECK AGAIN assert(!lst.empty()); int commonWalkId = lst.at(0); int posOffset = 1; int lastWalk = -1; for(auto i: lst){ // i is new walk id before merging merged[i] = true; walkFirstNode[i] = headOfThisWalk; // travesing the walk list of walk ID i for(int uid: newToOld[i]){ oldToNew[uid].serial = commonWalkId; oldToNew[uid].finalWalkId = commonWalkId; oldToNew[uid].pos_in_walk = posOffset++; } } oldToNew[newToOld[lst.back()].back()].isWalkEnd = true; V_twoway_ustitch ++; } } for (int newNodeNum = 0; newNodeNum sorter; for(int uid = 0 ; uid< V; uid++){ new_node_info_t nd = oldToNew[uid]; sorter.push_back(make_tuple(uid, nd.finalWalkId, nd.pos_in_walk, nd.isTip)); } //stable_sort(sorter.begin(),sorter.end(),sort_by_tipstatus); stable_sort(sorter.begin(),sorter.end(),sort_by_pos); stable_sort(sorter.begin(),sorter.end(),sort_by_walkId); ofstream uidSequence; string uidSeqFilename = "uidSeq.usttemp"; //"uidSeq"+ mapmode[ALGOMODE] +".txt" uidSequence.open(uidSeqFilename); int finalUnitigSerial = 0; for(fourtuple n : sorter){ int uid = get<0>(n); int bcalmid = unitigs.at(uid).serial; // int finalWalkId = get<1>(n); // int pos_in_walk = get<2>(n); // int isTip = get<3>(n); // cout< seq.usttemp").c_str()); system("sort -n -k 2 -o uidSeq.usttemp uidSeq.usttemp"); if(FLG_ABUNDANCE){ system(("awk '(NR%2)' "+UNITIG_FILE+" | cut -f 5 -d ':' | cut -f 1 -d 'L' > count.usttemp").c_str()); // get a separate count file system("paste -d' ' uidSeq.usttemp seq.usttemp count.usttemp > merged.usttemp "); system("sort -n -k 1 -o merged.usttemp merged.usttemp"); system(("cat merged.usttemp | awk '{for (i=4;i<=NF;i+=1) print $i}' > "+getFileName(UNITIG_FILE)+".ust.counts").c_str()); }else{ system("paste -d' ' uidSeq.usttemp seq.usttemp > merged.usttemp "); system("sort -n -k 1 -o merged.usttemp merged.usttemp"); } system("cat merged.usttemp | cut -d' ' -f3 > seq.usttemp"); ifstream sequenceStringFile ("seq.usttemp"); ofstream ustOutputFile (OUTPUT_FILENAME); //ofstream smallKFile("smallK.fa"); //both string and abundance sort //keep string only and output //open the string file if(100==100){ int lastWalk = -1; string walkString = ""; string unitigString = ""; for(fourtuple n : sorter){ int uid = get<0>(n); int finalWalkId = get<1>(n); int pos_in_walk = get<2>(n); //for each line in file string sequenceFromFile = "";//getline getline (sequenceStringFile,sequenceFromFile); if(nodeSign[uid] == false){ unitigString = reverseComplement(sequenceFromFile); }else{ unitigString = sequenceFromFile; } if(finalWalkId!=lastWalk){ if(lastWalk != -1){ //print previous walk //ustOutputFile<<">"<=K){ ustOutputFile<<">"<\n"<< walkString+"A" <\n"<< walkString+"C" <\n"<< walkString+"G" <\n"<< walkString+"T" <\n"<< "A"+walkString <\n"<< "C"+walkString <\n"<< "G"+walkString <\n"<< "T"+walkString <"<=K){ ustOutputFile<<">"<\n"<< walkString+"A" <\n"<< walkString+"C" <\n"<< walkString+"G" <\n"<< walkString+"T" <\n"<< "A"+walkString <\n"<< "C"+walkString <\n"<< "G"+walkString <\n"<< "T"+walkString < m){ m = u.ln; } } return m; } void printNewGraph(Graph &G){ list *newToOld = new list[G.countNewNode]; for (int i = 0; i < G.V; i++) { newToOld[G.oldToNew[i].serial].push_back(i); cout << "old " << i << "-> new" << G.oldToNew[i].serial << endl; } for (int i = 0; i < G.countNewNode; i++) { list adj = newToOld[i]; cout<<"new " << i<<": old (index) "; for(int val : adj){ cout<0 LN:i:13 KC:i:12 km:f:1.3 L:-:0:- L:-:2:- L:+:0:+ L:+:1:- for (int newNodeNum = 0; newNodeNum' << newNodeNum <<" LN:i:"<' << newNodeNum <<">0 LN:i:13 KC:i:12 km:f:1.3 L:-:0:- L:-:2:- L:+:0:+ L:+:1:- " ; myfile.close(); plainfile.close(); } int read_unitig_file(const string& unitigFileName, vector& unitigs) { ifstream unitigFile; unitigFile.open(unitigFileName); string line; int nodeNum; char lnline[20]; char kcline[20]; char kmline[20]; char edgesline[100000]; bool doCont = false; int smallestK = 9999999; getline(unitigFile, line); do { unitig_struct_t unitig_struct; if(FLG_ABUNDANCE){ //>3 LN:i:24 ab:Z:10 10 10 10 10 7 7 7 7 7 3 3 3 3 L:-:0:+ L:-:1:+ L:+:0:- edgesline[0] = '\0'; sscanf(line.c_str(), "%*c %d %s", &unitig_struct.serial, lnline); if( line.find("ab:Z") == string::npos){ cout<<"Incorrect input format. Try using flag -a 0."<0 LN:i:13 KC:i:12 km:f:1.3 L:-:0:- L:-:2:- L:+:0:+ L:+:1:- sscanf(lnline, "%*5c %d", &unitig_struct.ln); if(unitig_struct.ln < smallestK){ smallestK = unitig_struct.ln ; } if(unitig_struct.ln < K){ printf("Wrong k! Try again with correct k value. \n"); exit(2); } } char c1, c2; stringstream ss(edgesline); vector edges; while (getline(ss, line, ' ')) { if (delSpaces(line).length() != 0) { if(DBGFLAG==VERIFYINPUT){ cout<")) { //unitig_struct.sequence = unitig_struct.sequence + line; unitigs.push_back(unitig_struct); } else { doCont = true; break; } } } while (doCont); unitigFile.close(); if(smallestK > K ){ cout<<"\n :::: :::: :::: :::: !!!!!!!!! WARNING !!!!!!!!!!! :::: :::: :::: ::::"<(std::atoi(optarg)); }else{ fprintf(stderr, "Error: use either -a 0 or -a 1 \n"); exit(EXIT_FAILURE); } } break; case 'i': if(optarg) nvalue = optarg; break; //case 'f': //if(optarg) { //FLG_NEWUB = static_cast(std::atoi(optarg)); //} //break; // case 'm': // if(optarg) { // ALGOMODE = static_cast(std::atoi(optarg)); // } // break; //case 'd': //if(optarg) { //DBGFLAG = static_cast(std::atoi(optarg)); //} //break; case 'k': if(optarg) { K = std::atoi(optarg) ; if(K<=0){ fprintf(stderr, "Error: Specify a positive k value.\n"); exit(EXIT_FAILURE); } }else{ fprintf(stderr, "Usage: %s -k -i [-a <0: for no counts, 1: to output separate count, default = 0>]\n", argv[0]); exit(EXIT_FAILURE); } break; default: // fprintf(stderr, "Usage: %s -k -i [-a <0: for no counts, 1: to output separate count, default = 0>]\n", argv[0]); exit(EXIT_FAILURE); } } if(K==0 || strcmp(nvalue, "")==0){ fprintf(stderr, "Usage: %s -k -i [-a <0: for no counts, 1: to output separate count, default = 0>]\n", argv[0]); exit(EXIT_FAILURE); } UNITIG_FILE = string(nvalue); } //*/ ifstream infile(UNITIG_FILE); if(!infile.good()){ fprintf(stderr, "Error: File named \"%s\" cannot be opened.\n", UNITIG_FILE.c_str()); exit(EXIT_FAILURE); } OUTPUT_FILENAME = getFileName(UNITIG_FILE)+".ust.fa"; ofstream globalStatFile; system(("rm -rf "+getFileName(UNITIG_FILE)+".stats.txt").c_str()); globalStatFile.open((getFileName(UNITIG_FILE)+".stats.txt").c_str(), std::fstream::out | std::fstream::app); double startTime = readTimer(); cout << "## Please wait while we read input file.... " << UNITIG_FILE << ": k = "<second)*100.0/V_bcalm); globalStatFile << "PERCENT_DEGREE_"<<(i->first).first << "_" << (i->first).second << "=" << (i->second)*100.0/V_bcalm <<"%" << endl; } //printf("%.2f%%\t\ %.2f%%\t", // isolated_node_count*100.0/V_bcalm, // (sink_count+source_count)*100.0/V_bcalm); //OPTIONAL; derivable globalStatFile << "PERCENT_N_ISOLATED" << "=" << isolated_node_count*100.0/V_bcalm <<"%" << endl; globalStatFile << "PERCENT_N_DEADEND" << "=" << (sink_count+source_count)*100.0/V_bcalm <<"%" << endl; // Iterating the map and printing ordered values // for (auto i = inOutCombo.begin(); i != inOutCombo.end(); i++) { // cout << "(" << i->first.first<< ", "<< i->first.second << ")" << " := " << i->second << '\n'; // } if(ALGOMODE == PROFILE_ONLY){ //printf("\n"); return 0; } //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@// //##################################################// //@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@// //##################################################// //STARTING DFS cout<<"## Please wait while DFS step is going on... "<" <