From 37e7e00fb049c92d7d056be033dd5f79719ffcc9 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Mon, 20 Jan 2025 10:47:11 -0600 Subject: [PATCH] bam/bai/io/writer/index: Split writers into modules --- noodles-bam/src/bai/async/io/writer/index.rs | 295 +----------------- .../bai/async/io/writer/index/magic_number.rs | 23 ++ .../io/writer/index/reference_sequences.rs | 48 +++ .../writer/index/reference_sequences/bins.rs | 131 ++++++++ .../index/reference_sequences/bins/chunks.rs | 30 ++ .../index/reference_sequences/intervals.rs | 21 ++ .../index/reference_sequences/metadata.rs | 67 ++++ noodles-bam/src/bai/io/writer/index.rs | 240 ++------------ .../src/bai/io/writer/index/magic_number.rs | 10 + .../io/writer/index/reference_sequences.rs | 49 +++ .../writer/index/reference_sequences/bins.rs | 52 +++ .../index/reference_sequences/bins/chunks.rs | 32 ++ .../index/reference_sequences/intervals.rs | 24 ++ .../index/reference_sequences/metadata.rs | 69 ++++ 14 files changed, 585 insertions(+), 506 deletions(-) create mode 100644 noodles-bam/src/bai/async/io/writer/index/magic_number.rs create mode 100644 noodles-bam/src/bai/async/io/writer/index/reference_sequences.rs create mode 100644 noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins.rs create mode 100644 noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins/chunks.rs create mode 100644 noodles-bam/src/bai/async/io/writer/index/reference_sequences/intervals.rs create mode 100644 noodles-bam/src/bai/async/io/writer/index/reference_sequences/metadata.rs create mode 100644 noodles-bam/src/bai/io/writer/index/magic_number.rs create mode 100644 noodles-bam/src/bai/io/writer/index/reference_sequences.rs create mode 100644 noodles-bam/src/bai/io/writer/index/reference_sequences/bins.rs create mode 100644 noodles-bam/src/bai/io/writer/index/reference_sequences/bins/chunks.rs create mode 100644 noodles-bam/src/bai/io/writer/index/reference_sequences/intervals.rs create mode 100644 noodles-bam/src/bai/io/writer/index/reference_sequences/metadata.rs diff --git a/noodles-bam/src/bai/async/io/writer/index.rs b/noodles-bam/src/bai/async/io/writer/index.rs index 0d16a9983..c83efe120 100644 --- a/noodles-bam/src/bai/async/io/writer/index.rs +++ b/noodles-bam/src/bai/async/io/writer/index.rs @@ -1,24 +1,17 @@ -use indexmap::IndexMap; -use noodles_bgzf as bgzf; -use noodles_csi::{ - binning_index::{ - index::{ - reference_sequence::{bin::Chunk, index::LinearIndex, Bin, Metadata}, - ReferenceSequence, - }, - ReferenceSequence as _, - }, - BinningIndex, -}; +mod magic_number; +mod reference_sequences; + +use noodles_csi::BinningIndex; use tokio::io::{self, AsyncWrite, AsyncWriteExt}; -use crate::bai::{Index, MAGIC_NUMBER}; +use self::{magic_number::write_magic_number, reference_sequences::write_reference_sequences}; +use crate::bai::Index; pub(super) async fn write_index(writer: &mut W, index: &Index) -> io::Result<()> where W: AsyncWrite + Unpin, { - write_magic(writer).await?; + write_magic_number(writer).await?; write_reference_sequences(writer, index.reference_sequences()).await?; if let Some(unplaced_unmapped_record_count) = index.unplaced_unmapped_record_count() { @@ -28,168 +21,6 @@ where Ok(()) } -async fn write_magic(writer: &mut W) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - writer.write_all(&MAGIC_NUMBER).await -} - -async fn write_reference_sequences( - writer: &mut W, - reference_sequences: &[ReferenceSequence], -) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let n_ref = u32::try_from(reference_sequences.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(n_ref).await?; - - for reference_sequence in reference_sequences { - write_reference_sequence(writer, reference_sequence).await?; - } - - Ok(()) -} - -async fn write_reference_sequence( - writer: &mut W, - reference_sequence: &ReferenceSequence, -) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - write_bins( - writer, - reference_sequence.bins(), - reference_sequence.metadata(), - ) - .await?; - - write_intervals(writer, reference_sequence.index()).await?; - - Ok(()) -} - -async fn write_bins( - writer: &mut W, - bins: &IndexMap, - metadata: Option<&Metadata>, -) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let n_bin = u32::try_from(bins.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) - .and_then(|n| { - if metadata.is_some() { - n.checked_add(1) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "n_bin overflow")) - } else { - Ok(n) - } - })?; - - writer.write_u32_le(n_bin).await?; - - for (&id, bin) in bins { - write_bin(writer, id, bin).await?; - } - - if let Some(m) = metadata { - write_metadata(writer, m).await?; - } - - Ok(()) -} - -async fn write_bin(writer: &mut W, id: usize, bin: &Bin) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let id = u32::try_from(id).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(id).await?; - write_chunks(writer, bin.chunks()).await?; - Ok(()) -} - -async fn write_chunks(writer: &mut W, chunks: &[Chunk]) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let n_chunk = - u32::try_from(chunks.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(n_chunk).await?; - - for chunk in chunks { - write_chunk(writer, chunk).await?; - } - - Ok(()) -} - -async fn write_chunk(writer: &mut W, chunk: &Chunk) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let chunk_beg = u64::from(chunk.start()); - writer.write_u64_le(chunk_beg).await?; - - let chunk_end = u64::from(chunk.end()); - writer.write_u64_le(chunk_end).await?; - - Ok(()) -} - -async fn write_intervals(writer: &mut W, intervals: &[bgzf::VirtualPosition]) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - let n_intv = u32::try_from(intervals.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(n_intv).await?; - - for &interval in intervals { - let ioffset = u64::from(interval); - writer.write_u64_le(ioffset).await?; - } - - Ok(()) -} - -async fn write_metadata(writer: &mut W, metadata: &Metadata) -> io::Result<()> -where - W: AsyncWrite + Unpin, -{ - use crate::bai::DEPTH; - - const METADATA_ID: usize = Bin::metadata_id(DEPTH); - const METADATA_CHUNK_COUNT: usize = 2; - - let id = - u32::try_from(METADATA_ID).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(id).await?; - - let n_chunk = u32::try_from(METADATA_CHUNK_COUNT) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32_le(n_chunk).await?; - - let ref_beg = u64::from(metadata.start_position()); - writer.write_u64_le(ref_beg).await?; - - let ref_end = u64::from(metadata.end_position()); - writer.write_u64_le(ref_end).await?; - - let n_mapped = metadata.mapped_record_count(); - writer.write_u64_le(n_mapped).await?; - - let n_unmapped = metadata.unmapped_record_count(); - writer.write_u64_le(n_unmapped).await?; - - Ok(()) -} - async fn write_unplaced_unmapped_record_count( writer: &mut W, unplaced_unmapped_record_count: u64, @@ -199,115 +30,3 @@ where { writer.write_u64_le(unplaced_unmapped_record_count).await } - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn test_write_magic() -> io::Result<()> { - let mut buf = Vec::new(); - write_magic(&mut buf).await?; - assert_eq!(buf, b"BAI\x01"); - Ok(()) - } - - #[tokio::test] - async fn test_write_bins() -> io::Result<()> { - let bins = [(8, Bin::new(Vec::new()))].into_iter().collect(); - - let mut buf = Vec::new(); - write_bins(&mut buf, &bins, None).await?; - - let expected = [ - 0x01, 0x00, 0x00, 0x00, // n_bins = 1 - 0x08, 0x00, 0x00, 0x00, // bins[0].bin = 8 - 0x00, 0x00, 0x00, 0x00, // bins[0].n_chunk = 0 - ]; - - assert_eq!(buf, expected); - - Ok(()) - } - - #[tokio::test] - async fn test_write_bins_with_metadata() -> io::Result<()> { - let bins = [(8, Bin::new(Vec::new()))].into_iter().collect(); - let metadata = Metadata::new( - bgzf::VirtualPosition::from(13), - bgzf::VirtualPosition::from(21), - 5, - 0, - ); - - let mut buf = Vec::new(); - write_bins(&mut buf, &bins, Some(&metadata)).await?; - - #[rustfmt::skip] - let expected = [ - 0x02, 0x00, 0x00, 0x00, // n_bins = 2 - - 0x08, 0x00, 0x00, 0x00, // bins[0].bin = 8 - 0x00, 0x00, 0x00, 0x00, // bins[0].n_chunk = 0 - - 0x4a, 0x92, 0x00, 0x00, // bins[1].bin = 37450 - 0x02, 0x00, 0x00, 0x00, // bins[1].n_chunk = 2 - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[0].chunk_beg = 13 - 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[0].chunk_end = 21 - 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[1].chunk_beg = 5 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[1].chunk_end = 0 - ]; - - assert_eq!(buf, expected); - - Ok(()) - } - - #[tokio::test] - async fn test_write_bin() -> io::Result<()> { - let bin = Bin::new(vec![Chunk::new( - bgzf::VirtualPosition::from(13), - bgzf::VirtualPosition::from(21), - )]); - - let mut buf = Vec::new(); - write_bin(&mut buf, 8, &bin).await?; - - let expected = [ - 0x08, 0x00, 0x00, 0x00, // bin = 8 - 0x01, 0x00, 0x00, 0x00, // n_chunk = 1 - 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk[0].chunk_beg - 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk[0].chunk_end - ]; - - assert_eq!(buf, expected); - - Ok(()) - } - - #[tokio::test] - async fn test_write_metadata() -> io::Result<()> { - let metadata = Metadata::new( - bgzf::VirtualPosition::from(610), - bgzf::VirtualPosition::from(1597), - 55, - 0, - ); - - let mut buf = Vec::new(); - write_metadata(&mut buf, &metadata).await?; - - let expected = [ - 0x4a, 0x92, 0x00, 0x00, // bin = 37450 - 0x02, 0x00, 0x00, 0x00, // n_chunks = 2 - 0x62, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_beg = 610 - 0x3d, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_end = 1597 - 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_mapped = 55 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_unmapped = 0 - ]; - - assert_eq!(buf, expected); - - Ok(()) - } -} diff --git a/noodles-bam/src/bai/async/io/writer/index/magic_number.rs b/noodles-bam/src/bai/async/io/writer/index/magic_number.rs new file mode 100644 index 000000000..56bf3b961 --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/magic_number.rs @@ -0,0 +1,23 @@ +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +use crate::bai::MAGIC_NUMBER; + +pub(super) async fn write_magic_number(writer: &mut W) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + writer.write_all(&MAGIC_NUMBER).await +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_write_magic_number() -> io::Result<()> { + let mut buf = Vec::new(); + write_magic_number(&mut buf).await?; + assert_eq!(buf, b"BAI\x01"); + Ok(()) + } +} diff --git a/noodles-bam/src/bai/async/io/writer/index/reference_sequences.rs b/noodles-bam/src/bai/async/io/writer/index/reference_sequences.rs new file mode 100644 index 000000000..88c7bfd2e --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/reference_sequences.rs @@ -0,0 +1,48 @@ +mod bins; +mod intervals; +mod metadata; + +use noodles_csi::binning_index::{ + index::{reference_sequence::index::LinearIndex, ReferenceSequence}, + ReferenceSequence as _, +}; +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +use self::{bins::write_bins, intervals::write_intervals, metadata::write_metadata}; + +pub(super) async fn write_reference_sequences( + writer: &mut W, + reference_sequences: &[ReferenceSequence], +) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let n_ref = u32::try_from(reference_sequences.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(n_ref).await?; + + for reference_sequence in reference_sequences { + write_reference_sequence(writer, reference_sequence).await?; + } + + Ok(()) +} + +async fn write_reference_sequence( + writer: &mut W, + reference_sequence: &ReferenceSequence, +) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + write_bins( + writer, + reference_sequence.bins(), + reference_sequence.metadata(), + ) + .await?; + + write_intervals(writer, reference_sequence.index()).await?; + + Ok(()) +} diff --git a/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins.rs b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins.rs new file mode 100644 index 000000000..bbfe7ea9c --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins.rs @@ -0,0 +1,131 @@ +mod chunks; + +use indexmap::IndexMap; +use noodles_csi::binning_index::index::reference_sequence::{Bin, Metadata}; +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +use self::chunks::write_chunks; +use super::write_metadata; + +pub(super) async fn write_bins( + writer: &mut W, + bins: &IndexMap, + metadata: Option<&Metadata>, +) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let n_bin = u32::try_from(bins.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) + .and_then(|n| { + if metadata.is_some() { + n.checked_add(1) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "n_bin overflow")) + } else { + Ok(n) + } + })?; + + writer.write_u32_le(n_bin).await?; + + for (&id, bin) in bins { + write_bin(writer, id, bin).await?; + } + + if let Some(m) = metadata { + write_metadata(writer, m).await?; + } + + Ok(()) +} + +async fn write_bin(writer: &mut W, id: usize, bin: &Bin) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let id = u32::try_from(id).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(id).await?; + write_chunks(writer, bin.chunks()).await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use noodles_bgzf as bgzf; + use noodles_csi::binning_index::index::reference_sequence::bin::Chunk; + + use super::*; + + #[tokio::test] + async fn test_write_bins() -> io::Result<()> { + let bins = [(8, Bin::new(Vec::new()))].into_iter().collect(); + + let mut buf = Vec::new(); + write_bins(&mut buf, &bins, None).await?; + + let expected = [ + 0x01, 0x00, 0x00, 0x00, // n_bins = 1 + 0x08, 0x00, 0x00, 0x00, // bins[0].bin = 8 + 0x00, 0x00, 0x00, 0x00, // bins[0].n_chunk = 0 + ]; + + assert_eq!(buf, expected); + + Ok(()) + } + + #[tokio::test] + async fn test_write_bins_with_metadata() -> io::Result<()> { + let bins = [(8, Bin::new(Vec::new()))].into_iter().collect(); + let metadata = Metadata::new( + bgzf::VirtualPosition::from(13), + bgzf::VirtualPosition::from(21), + 5, + 0, + ); + + let mut buf = Vec::new(); + write_bins(&mut buf, &bins, Some(&metadata)).await?; + + #[rustfmt::skip] + let expected = [ + 0x02, 0x00, 0x00, 0x00, // n_bins = 2 + + 0x08, 0x00, 0x00, 0x00, // bins[0].bin = 8 + 0x00, 0x00, 0x00, 0x00, // bins[0].n_chunk = 0 + + 0x4a, 0x92, 0x00, 0x00, // bins[1].bin = 37450 + 0x02, 0x00, 0x00, 0x00, // bins[1].n_chunk = 2 + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[0].chunk_beg = 13 + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[0].chunk_end = 21 + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[1].chunk_beg = 5 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // bins[1].chunks[1].chunk_end = 0 + ]; + + assert_eq!(buf, expected); + + Ok(()) + } + + #[tokio::test] + async fn test_write_bin() -> io::Result<()> { + let bin = Bin::new(vec![Chunk::new( + bgzf::VirtualPosition::from(13), + bgzf::VirtualPosition::from(21), + )]); + + let mut buf = Vec::new(); + write_bin(&mut buf, 8, &bin).await?; + + let expected = [ + 0x08, 0x00, 0x00, 0x00, // bin = 8 + 0x01, 0x00, 0x00, 0x00, // n_chunk = 1 + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk[0].chunk_beg + 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk[0].chunk_end + ]; + + assert_eq!(buf, expected); + + Ok(()) + } +} diff --git a/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins/chunks.rs b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins/chunks.rs new file mode 100644 index 000000000..3c83146e7 --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/bins/chunks.rs @@ -0,0 +1,30 @@ +use noodles_csi::binning_index::index::reference_sequence::bin::Chunk; +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +pub(super) async fn write_chunks(writer: &mut W, chunks: &[Chunk]) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let n_chunk = + u32::try_from(chunks.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(n_chunk).await?; + + for chunk in chunks { + write_chunk(writer, chunk).await?; + } + + Ok(()) +} + +async fn write_chunk(writer: &mut W, chunk: &Chunk) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let chunk_beg = u64::from(chunk.start()); + writer.write_u64_le(chunk_beg).await?; + + let chunk_end = u64::from(chunk.end()); + writer.write_u64_le(chunk_end).await?; + + Ok(()) +} diff --git a/noodles-bam/src/bai/async/io/writer/index/reference_sequences/intervals.rs b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/intervals.rs new file mode 100644 index 000000000..2969e9260 --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/intervals.rs @@ -0,0 +1,21 @@ +use noodles_bgzf as bgzf; +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +pub(super) async fn write_intervals( + writer: &mut W, + intervals: &[bgzf::VirtualPosition], +) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + let n_intv = u32::try_from(intervals.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(n_intv).await?; + + for &interval in intervals { + let ioffset = u64::from(interval); + writer.write_u64_le(ioffset).await?; + } + + Ok(()) +} diff --git a/noodles-bam/src/bai/async/io/writer/index/reference_sequences/metadata.rs b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/metadata.rs new file mode 100644 index 000000000..3a3abfe84 --- /dev/null +++ b/noodles-bam/src/bai/async/io/writer/index/reference_sequences/metadata.rs @@ -0,0 +1,67 @@ +use noodles_csi::binning_index::index::reference_sequence::{Bin, Metadata}; +use tokio::io::{self, AsyncWrite, AsyncWriteExt}; + +pub(super) async fn write_metadata(writer: &mut W, metadata: &Metadata) -> io::Result<()> +where + W: AsyncWrite + Unpin, +{ + use crate::bai::DEPTH; + + const METADATA_ID: usize = Bin::metadata_id(DEPTH); + const METADATA_CHUNK_COUNT: usize = 2; + + let id = + u32::try_from(METADATA_ID).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(id).await?; + + let n_chunk = u32::try_from(METADATA_CHUNK_COUNT) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32_le(n_chunk).await?; + + let ref_beg = u64::from(metadata.start_position()); + writer.write_u64_le(ref_beg).await?; + + let ref_end = u64::from(metadata.end_position()); + writer.write_u64_le(ref_end).await?; + + let n_mapped = metadata.mapped_record_count(); + writer.write_u64_le(n_mapped).await?; + + let n_unmapped = metadata.unmapped_record_count(); + writer.write_u64_le(n_unmapped).await?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use noodles_bgzf as bgzf; + + use super::*; + + #[tokio::test] + async fn test_write_metadata() -> io::Result<()> { + let metadata = Metadata::new( + bgzf::VirtualPosition::from(610), + bgzf::VirtualPosition::from(1597), + 55, + 0, + ); + + let mut buf = Vec::new(); + write_metadata(&mut buf, &metadata).await?; + + let expected = [ + 0x4a, 0x92, 0x00, 0x00, // bin = 37450 + 0x02, 0x00, 0x00, 0x00, // n_chunks = 2 + 0x62, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_beg = 610 + 0x3d, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_end = 1597 + 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_mapped = 55 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_unmapped = 0 + ]; + + assert_eq!(buf, expected); + + Ok(()) + } +} diff --git a/noodles-bam/src/bai/io/writer/index.rs b/noodles-bam/src/bai/io/writer/index.rs index b531a5dd5..289044f62 100644 --- a/noodles-bam/src/bai/io/writer/index.rs +++ b/noodles-bam/src/bai/io/writer/index.rs @@ -1,20 +1,13 @@ +mod magic_number; +mod reference_sequences; + use std::io::{self, Write}; use byteorder::{LittleEndian, WriteBytesExt}; -use indexmap::IndexMap; -use noodles_bgzf as bgzf; -use noodles_csi::{ - binning_index::{ - index::{ - reference_sequence::{bin::Chunk, index::LinearIndex, Bin, Metadata}, - ReferenceSequence, - }, - ReferenceSequence as _, - }, - BinningIndex, -}; +use noodles_csi::BinningIndex; -use crate::bai::{Index, MAGIC_NUMBER}; +use self::{magic_number::write_magic_number, reference_sequences::write_reference_sequences}; +use crate::bai::Index; pub(super) fn write_index(writer: &mut W, index: &Index) -> io::Result<()> where @@ -30,167 +23,6 @@ where Ok(()) } -fn write_magic_number(writer: &mut W) -> io::Result<()> -where - W: Write, -{ - writer.write_all(&MAGIC_NUMBER) -} - -fn write_reference_sequences( - writer: &mut W, - reference_sequences: &[ReferenceSequence], -) -> io::Result<()> -where - W: Write, -{ - let n_ref = u32::try_from(reference_sequences.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(n_ref)?; - - for reference_sequence in reference_sequences { - write_reference_sequence(writer, reference_sequence)?; - } - - Ok(()) -} - -fn write_reference_sequence( - writer: &mut W, - reference_sequence: &ReferenceSequence, -) -> io::Result<()> -where - W: Write, -{ - write_bins( - writer, - reference_sequence.bins(), - reference_sequence.metadata(), - )?; - - write_intervals(writer, reference_sequence.index())?; - - Ok(()) -} - -fn write_bins( - writer: &mut W, - bins: &IndexMap, - metadata: Option<&Metadata>, -) -> io::Result<()> -where - W: Write, -{ - let n_bin = u32::try_from(bins.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) - .and_then(|n| { - if metadata.is_some() { - n.checked_add(1) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "n_bin overflow")) - } else { - Ok(n) - } - })?; - - writer.write_u32::(n_bin)?; - - for (&id, bin) in bins { - write_bin(writer, id, bin)?; - } - - if let Some(metadata) = metadata { - write_metadata(writer, metadata)?; - } - - Ok(()) -} - -fn write_bin(writer: &mut W, id: usize, bin: &Bin) -> io::Result<()> -where - W: Write, -{ - let id = u32::try_from(id).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(id)?; - write_chunks(writer, bin.chunks())?; - Ok(()) -} - -fn write_chunks(writer: &mut W, chunks: &[Chunk]) -> io::Result<()> -where - W: Write, -{ - let n_chunk = - u32::try_from(chunks.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(n_chunk)?; - - for chunk in chunks { - write_chunk(writer, chunk)?; - } - - Ok(()) -} - -fn write_chunk(writer: &mut W, chunk: &Chunk) -> io::Result<()> -where - W: Write, -{ - let chunk_beg = u64::from(chunk.start()); - writer.write_u64::(chunk_beg)?; - - let chunk_end = u64::from(chunk.end()); - writer.write_u64::(chunk_end)?; - - Ok(()) -} - -fn write_intervals(writer: &mut W, intervals: &[bgzf::VirtualPosition]) -> io::Result<()> -where - W: Write, -{ - let n_intv = u32::try_from(intervals.len()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(n_intv)?; - - for interval in intervals { - let ioffset = u64::from(*interval); - writer.write_u64::(ioffset)?; - } - - Ok(()) -} - -fn write_metadata(writer: &mut W, metadata: &Metadata) -> io::Result<()> -where - W: Write, -{ - use crate::bai::DEPTH; - - const METADATA_ID: usize = Bin::metadata_id(DEPTH); - const METADATA_CHUNK_COUNT: usize = 2; - - let id = - u32::try_from(METADATA_ID).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(id)?; - - let n_chunk = u32::try_from(METADATA_CHUNK_COUNT) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - writer.write_u32::(n_chunk)?; - - let ref_beg = u64::from(metadata.start_position()); - writer.write_u64::(ref_beg)?; - - let ref_end = u64::from(metadata.end_position()); - writer.write_u64::(ref_end)?; - - let n_mapped = metadata.mapped_record_count(); - writer.write_u64::(n_mapped)?; - - let n_unmapped = metadata.unmapped_record_count(); - writer.write_u64::(n_unmapped)?; - - Ok(()) -} - fn write_unplaced_unmapped_record_count( writer: &mut W, unplaced_unmapped_record_count: u64, @@ -203,7 +35,14 @@ where #[cfg(test)] mod tests { + use noodles_bgzf as bgzf; + use noodles_csi::binning_index::index::{ + reference_sequence::{bin::Chunk, Bin}, + ReferenceSequence, + }; + use super::*; + use crate::bai::MAGIC_NUMBER; #[test] fn test_write_index() -> io::Result<()> { @@ -222,50 +61,15 @@ mod tests { write_index(&mut buf, &index)?; let mut expected = Vec::new(); - // magic - expected.write_all(&MAGIC_NUMBER)?; - // n_ref - expected.write_u32::(1)?; - // n_bin - expected.write_u32::(1)?; - // bin - expected.write_u32::(16385)?; - // n_chunk - expected.write_u32::(1)?; - // chunk_beg - expected.write_u64::(509268599425)?; - // chunk_end - expected.write_u64::(509268599570)?; - // n_intv - expected.write_u32::(1)?; - // ioffset - expected.write_u64::(337)?; - - assert_eq!(buf, expected); - - Ok(()) - } - - #[test] - fn test_write_metadata() -> io::Result<()> { - let metadata = Metadata::new( - bgzf::VirtualPosition::from(610), - bgzf::VirtualPosition::from(1597), - 55, - 0, - ); - - let mut buf = Vec::new(); - write_metadata(&mut buf, &metadata)?; - - let expected = [ - 0x4a, 0x92, 0x00, 0x00, // bin = 37450 - 0x02, 0x00, 0x00, 0x00, // chunks = 2 - 0x62, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_beg = 610 - 0x3d, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_end = 1597 - 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_mapped = 55 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_unmapped = 0 - ]; + expected.write_all(&MAGIC_NUMBER)?; // magic + expected.write_u32::(1)?; // n_ref + expected.write_u32::(1)?; // n_bin + expected.write_u32::(16385)?; // bin + expected.write_u32::(1)?; // n_chunk + expected.write_u64::(509268599425)?; // chunk_beg + expected.write_u64::(509268599570)?; // chunk_end + expected.write_u32::(1)?; // n_intv + expected.write_u64::(337)?; // ioffset assert_eq!(buf, expected); diff --git a/noodles-bam/src/bai/io/writer/index/magic_number.rs b/noodles-bam/src/bai/io/writer/index/magic_number.rs new file mode 100644 index 000000000..94cd3b735 --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/magic_number.rs @@ -0,0 +1,10 @@ +use std::io::{self, Write}; + +use crate::bai::MAGIC_NUMBER; + +pub(super) fn write_magic_number(writer: &mut W) -> io::Result<()> +where + W: Write, +{ + writer.write_all(&MAGIC_NUMBER) +} diff --git a/noodles-bam/src/bai/io/writer/index/reference_sequences.rs b/noodles-bam/src/bai/io/writer/index/reference_sequences.rs new file mode 100644 index 000000000..25a7761a8 --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/reference_sequences.rs @@ -0,0 +1,49 @@ +mod bins; +mod intervals; +mod metadata; + +use std::io::{self, Write}; + +use byteorder::{LittleEndian, WriteBytesExt}; +use noodles_csi::binning_index::{ + index::{reference_sequence::index::LinearIndex, ReferenceSequence}, + ReferenceSequence as _, +}; + +use self::{bins::write_bins, intervals::write_intervals, metadata::write_metadata}; + +pub(super) fn write_reference_sequences( + writer: &mut W, + reference_sequences: &[ReferenceSequence], +) -> io::Result<()> +where + W: Write, +{ + let n_ref = u32::try_from(reference_sequences.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(n_ref)?; + + for reference_sequence in reference_sequences { + write_reference_sequence(writer, reference_sequence)?; + } + + Ok(()) +} + +fn write_reference_sequence( + writer: &mut W, + reference_sequence: &ReferenceSequence, +) -> io::Result<()> +where + W: Write, +{ + write_bins( + writer, + reference_sequence.bins(), + reference_sequence.metadata(), + )?; + + write_intervals(writer, reference_sequence.index())?; + + Ok(()) +} diff --git a/noodles-bam/src/bai/io/writer/index/reference_sequences/bins.rs b/noodles-bam/src/bai/io/writer/index/reference_sequences/bins.rs new file mode 100644 index 000000000..f2e798075 --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/reference_sequences/bins.rs @@ -0,0 +1,52 @@ +mod chunks; + +use std::io::{self, Write}; + +use byteorder::{LittleEndian, WriteBytesExt}; +use indexmap::IndexMap; +use noodles_csi::binning_index::index::reference_sequence::{Bin, Metadata}; + +use self::chunks::write_chunks; +use super::write_metadata; + +pub(super) fn write_bins( + writer: &mut W, + bins: &IndexMap, + metadata: Option<&Metadata>, +) -> io::Result<()> +where + W: Write, +{ + let n_bin = u32::try_from(bins.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e)) + .and_then(|n| { + if metadata.is_some() { + n.checked_add(1) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "n_bin overflow")) + } else { + Ok(n) + } + })?; + + writer.write_u32::(n_bin)?; + + for (&id, bin) in bins { + write_bin(writer, id, bin)?; + } + + if let Some(metadata) = metadata { + write_metadata(writer, metadata)?; + } + + Ok(()) +} + +fn write_bin(writer: &mut W, id: usize, bin: &Bin) -> io::Result<()> +where + W: Write, +{ + let id = u32::try_from(id).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(id)?; + write_chunks(writer, bin.chunks())?; + Ok(()) +} diff --git a/noodles-bam/src/bai/io/writer/index/reference_sequences/bins/chunks.rs b/noodles-bam/src/bai/io/writer/index/reference_sequences/bins/chunks.rs new file mode 100644 index 000000000..4fe6e5dbc --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/reference_sequences/bins/chunks.rs @@ -0,0 +1,32 @@ +use std::io::{self, Write}; + +use byteorder::{LittleEndian, WriteBytesExt}; +use noodles_csi::binning_index::index::reference_sequence::bin::Chunk; + +pub(super) fn write_chunks(writer: &mut W, chunks: &[Chunk]) -> io::Result<()> +where + W: Write, +{ + let n_chunk = + u32::try_from(chunks.len()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(n_chunk)?; + + for chunk in chunks { + write_chunk(writer, chunk)?; + } + + Ok(()) +} + +fn write_chunk(writer: &mut W, chunk: &Chunk) -> io::Result<()> +where + W: Write, +{ + let chunk_beg = u64::from(chunk.start()); + writer.write_u64::(chunk_beg)?; + + let chunk_end = u64::from(chunk.end()); + writer.write_u64::(chunk_end)?; + + Ok(()) +} diff --git a/noodles-bam/src/bai/io/writer/index/reference_sequences/intervals.rs b/noodles-bam/src/bai/io/writer/index/reference_sequences/intervals.rs new file mode 100644 index 000000000..5e324615a --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/reference_sequences/intervals.rs @@ -0,0 +1,24 @@ +use std::io::{self, Write}; + +use noodles_bgzf as bgzf; + +use byteorder::{LittleEndian, WriteBytesExt}; + +pub(super) fn write_intervals( + writer: &mut W, + intervals: &[bgzf::VirtualPosition], +) -> io::Result<()> +where + W: Write, +{ + let n_intv = u32::try_from(intervals.len()) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(n_intv)?; + + for interval in intervals { + let ioffset = u64::from(*interval); + writer.write_u64::(ioffset)?; + } + + Ok(()) +} diff --git a/noodles-bam/src/bai/io/writer/index/reference_sequences/metadata.rs b/noodles-bam/src/bai/io/writer/index/reference_sequences/metadata.rs new file mode 100644 index 000000000..3994ea8dd --- /dev/null +++ b/noodles-bam/src/bai/io/writer/index/reference_sequences/metadata.rs @@ -0,0 +1,69 @@ +use std::io::{self, Write}; + +use byteorder::{LittleEndian, WriteBytesExt}; +use noodles_csi::binning_index::index::reference_sequence::{Bin, Metadata}; + +pub(super) fn write_metadata(writer: &mut W, metadata: &Metadata) -> io::Result<()> +where + W: Write, +{ + use crate::bai::DEPTH; + + const METADATA_ID: usize = Bin::metadata_id(DEPTH); + const METADATA_CHUNK_COUNT: usize = 2; + + let id = + u32::try_from(METADATA_ID).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(id)?; + + let n_chunk = u32::try_from(METADATA_CHUNK_COUNT) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + writer.write_u32::(n_chunk)?; + + let ref_beg = u64::from(metadata.start_position()); + writer.write_u64::(ref_beg)?; + + let ref_end = u64::from(metadata.end_position()); + writer.write_u64::(ref_end)?; + + let n_mapped = metadata.mapped_record_count(); + writer.write_u64::(n_mapped)?; + + let n_unmapped = metadata.unmapped_record_count(); + writer.write_u64::(n_unmapped)?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use noodles_bgzf as bgzf; + + use super::*; + + #[test] + fn test_write_metadata() -> io::Result<()> { + let metadata = Metadata::new( + bgzf::VirtualPosition::from(610), + bgzf::VirtualPosition::from(1597), + 55, + 0, + ); + + let mut buf = Vec::new(); + write_metadata(&mut buf, &metadata)?; + + let expected = [ + 0x4a, 0x92, 0x00, 0x00, // bin = 37450 + 0x02, 0x00, 0x00, 0x00, // chunks = 2 + 0x62, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_beg = 610 + 0x3d, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // ref_end = 1597 + 0x37, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_mapped = 55 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_unmapped = 0 + ]; + + assert_eq!(buf, expected); + + Ok(()) + } +}