-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsga_bwt_reader.cpp
78 lines (67 loc) · 2.44 KB
/
sga_bwt_reader.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
//-----------------------------------------------
// Copyright 2010 Wellcome Trust Sanger Institute
// Written by Jared Simpson (js18@sanger.ac.uk)
// Released under the GPL
//-----------------------------------------------
//
// SGABWTReader - read sga's bwt file
//
#include "sga_bwt_reader.h"
//
SGABWTReader::SGABWTReader(const std::string& filename) : m_stage(IOS_NONE), m_numRunsOnDisk(0), m_numRunsRead(0)
{
m_pReader = new std::ifstream(filename.c_str(), std::ios::binary);
if(!m_pReader->is_open())
{
std::cerr << "Error: could not open " << filename << " for read\n";
exit(EXIT_FAILURE);
}
m_stage = IOS_HEADER;
}
//
SGABWTReader::~SGABWTReader()
{
delete m_pReader;
}
//
void SGABWTReader::readHeader(size_t& num_strings, size_t& num_symbols, BWFlag& flag)
{
assert(m_stage == IOS_HEADER);
uint16_t magic_number;
m_pReader->read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
if(magic_number != RLBWT_FILE_MAGIC)
{
std::cerr << "BWT file is not properly formatted, aborting\n";
exit(EXIT_FAILURE);
}
m_pReader->read(reinterpret_cast<char*>(&num_strings), sizeof(num_strings));
m_pReader->read(reinterpret_cast<char*>(&num_symbols), sizeof(num_symbols));
m_pReader->read(reinterpret_cast<char*>(&m_numRunsOnDisk), sizeof(m_numRunsOnDisk));
m_pReader->read(reinterpret_cast<char*>(&flag), sizeof(flag));
//std::cout << "Read magic: " << magic_number << "\n";
//std::cout << "strings:" << num_strings << "\n";
//std::cout << "symbols: " << num_symbols << "\n";
//std::cout << "runs: " << m_numRunsOnDisk << "\n";
m_stage = IOS_BWSTR;
}
// Read a single base from the BWStr
// The BWT is stored as runs on disk, so this class keeps
// an internal buffer of a single run and emits characters from this buffer
// and performs reads as necessary. If all the runs have been read, emit
// a newline character to signal the end of the BWT
char SGABWTReader::readChar()
{
assert(m_stage == IOS_BWSTR);
if(m_currRun.isEmpty())
{
// All runs have been read and emitted, return the end marker
if(m_numRunsRead == m_numRunsOnDisk)
return '\n';
// Read one run from disk
m_pReader->read(reinterpret_cast<char*>(&m_currRun), sizeof(RLUnit));
++m_numRunsRead;
}
// Decrement the current run and emit its symbol
m_currRun.decrementCount();
return m_currRun.getChar();
}