-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbwtdisk_reader.cpp
72 lines (62 loc) · 1.9 KB
/
bwtdisk_reader.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
//-----------------------------------------------
// Copyright 2013 Wellcome Trust Sanger Institute
// Written by Jared Simpson (js18@sanger.ac.uk)
// Released under the GPL
//-----------------------------------------------
//
// BWTDiskReader - read a bwtdisk file
//
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include "bwtdisk_reader.h"
//
BWTDiskReader::BWTDiskReader(const std::string& filename) : m_stage(IOS_HEADER), m_eof_pos(std::string::npos)
{
m_pReader = new std::ifstream(filename.c_str());
if(!m_pReader->is_open())
{
std::cerr << "Error: could not open " << filename << " for read\n";
exit(EXIT_FAILURE);
}
}
//
BWTDiskReader::~BWTDiskReader()
{
delete m_pReader;
}
//
void BWTDiskReader::discardHeader()
{
size_t size = 0;
m_pReader->read((char*)&size, sizeof(size));
m_pReader->read((char*)&m_eof_pos, sizeof(m_eof_pos));
m_stage = IOS_BWSTR;
m_num_read = 0;
}
// Returns the position in the BWT of the first symbol of the text
size_t BWTDiskReader::getEOFPos() const
{
assert(m_eof_pos != std::string::npos);
return m_eof_pos;
}
// Read a single base from the BWStr
// The BWT is stored as runs on disk, so this class keeps
// an internal buffer of a single run and emits characters from this buffer
// and performs reads as necessary. If all the runs have been read, emit
// a newline character to signal the end of the BWT
char BWTDiskReader::readChar()
{
assert(m_stage == IOS_BWSTR);
// Extract a single character from the stream
char c = m_pReader->get();
// bwtdisk writes the sentinel position in the header
// of the file and emits an arbitrary symbol at this location
// We catch this case here and emit a $ which is what SGA expects.
if(m_num_read == m_eof_pos)
c = '$';
else if(c == EOF) // SGA expects \n at the file's end
c = '\n';
m_num_read++;
return c;
}