From 87f20116c4337eda17a416ebafb8976abc188d87 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Wed, 22 May 2024 02:20:34 -0700 Subject: [PATCH] fix: bam::Record:new should return a valid record (#361) * bam::Record:new should return a valid record, see /~https://github.com/rust-bio/rust-htslib/issues/339 --- src/bam/mod.rs | 54 ++++++++++++++++++++++++++++++++++++++++- src/bam/record.rs | 15 ++++++++++-- src/bam/record_serde.rs | 2 ++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/bam/mod.rs b/src/bam/mod.rs index f232401e5..ddf1d3221 100644 --- a/src/bam/mod.rs +++ b/src/bam/mod.rs @@ -2451,7 +2451,7 @@ CCCCCCCCCCCCCCCCCCC"[..], where F: Fn(&record::Record) -> Option, { - let mut bam_reader = Reader::from_path(bamfile).unwrap(); // internal functions, just unwarp + let mut bam_reader = Reader::from_path(bamfile).unwrap(); // internal functions, just unwrap let header = header::Header::from_template(bam_reader.header()); let mut sam_writer = Writer::from_path(samfile, &header, Format::Sam).unwrap(); for record in bam_reader.records() { @@ -3002,6 +3002,58 @@ CCCCCCCCCCCCCCCCCCC"[..], assert_eq!(header_refseqs[0].get("LN").unwrap(), "10000000",); } + #[test] + fn test_bam_new() { + // Create the path to write the tmp test BAM + let tmp = tempfile::Builder::new() + .prefix("rust-htslib") + .tempdir() + .expect("Cannot create temp dir"); + let bampath = tmp.path().join("test.bam"); + + // write an unmapped BAM record (uBAM) + { + // Build the header + let mut header = Header::new(); + + // Add the version + header.push_record( + HeaderRecord::new(b"HD") + .push_tag(b"VN", &"1.6") + .push_tag(b"SO", &"unsorted"), + ); + + // Build the writer + let mut writer = Writer::from_path(&bampath, &header, Format::Bam).unwrap(); + + // Build an empty record + let mut record = Record::new(); + + // Write the record (this previously seg-faulted) + assert!(writer.write(&record).is_ok()); + } + + // Read the record + { + // Build th reader + let mut reader = Reader::from_path(&bampath).expect("Error opening file."); + + // Read the record + let mut rec = Record::new(); + match reader.read(&mut rec) { + Some(r) => r.expect("Failed to read record."), + None => panic!("No record read."), + }; + + // Check a few things + assert!(rec.is_unmapped()); + assert_eq!(rec.tid(), -1); + assert_eq!(rec.pos(), -1); + assert_eq!(rec.mtid(), -1); + assert_eq!(rec.mpos(), -1); + } + } + #[test] fn test_idxstats_bam() { let mut reader = IndexedReader::from_path("test/test.bam").unwrap(); diff --git a/src/bam/record.rs b/src/bam/record.rs index 83aded002..ced3cc8aa 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -114,12 +114,23 @@ fn extranul_from_qname(qname: &[u8]) -> usize { impl Record { /// Create an empty BAM record. pub fn new() -> Self { - Record { + let mut record = Record { inner: unsafe { MaybeUninit::zeroed().assume_init() }, own: true, cigar: None, header: None, - } + }; + // The read/query name needs to be set as empty to properly initialize + // the record + record.set_qname(b""); + // Developer note: these are needed so the returned record is properly + // initialized as unmapped. + record.set_unmapped(); + record.set_tid(-1); + record.set_pos(-1); + record.set_mpos(-1); + record.set_mtid(-1); + record } pub fn from_inner(from: *mut htslib::bam1_t) -> Self { diff --git a/src/bam/record_serde.rs b/src/bam/record_serde.rs index 651867030..1bf8a2fa1 100644 --- a/src/bam/record_serde.rs +++ b/src/bam/record_serde.rs @@ -8,6 +8,8 @@ use serde_bytes::{ByteBuf, Bytes}; use crate::bam::record::Record; fn fix_l_extranul(rec: &mut Record) { + // first, reset the number of extranuls to 0 for calling .qname(); then calculate how many we actually have + rec.inner_mut().core.l_extranul = 0; let l_extranul = rec.qname().iter().rev().take_while(|x| **x == 0u8).count() as u8; rec.inner_mut().core.l_extranul = l_extranul; }