From 62d02da153f12db6dbab7462d16391b1da8d2711 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Fri, 20 Sep 2024 00:35:24 +0900 Subject: [PATCH] Move container parser states to another module --- jxl/src/container/mod.rs | 268 +----------------------------------- jxl/src/container/parse.rs | 270 +++++++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 264 deletions(-) create mode 100644 jxl/src/container/parse.rs diff --git a/jxl/src/container/mod.rs b/jxl/src/container/mod.rs index 6ced0e9..c21d85c 100644 --- a/jxl/src/container/mod.rs +++ b/jxl/src/container/mod.rs @@ -6,10 +6,11 @@ // Originally written for jxl-oxide. pub mod box_header; +pub mod parse; use box_header::*; - -use crate::error::{Error, Result}; +use parse::*; +pub use parse::ParseEvent; /// Container format parser. #[derive(Debug, Default)] @@ -59,267 +60,6 @@ enum JxlpIndexState { JxlpFinished, } -/// Iterator that reads over a buffer and emits parser events. -pub struct ParseEvents<'inner, 'buf> { - inner: &'inner mut ContainerParser, - remaining_input: &'buf [u8], - finished: bool, -} - -impl<'inner, 'buf> ParseEvents<'inner, 'buf> { - const CODESTREAM_SIG: [u8; 2] = [0xff, 0x0a]; - const CONTAINER_SIG: [u8; 12] = [0, 0, 0, 0xc, b'J', b'X', b'L', b' ', 0xd, 0xa, 0x87, 0xa]; - - fn new(parser: &'inner mut ContainerParser, input: &'buf [u8]) -> Self { - parser.previous_consumed_bytes = 0; - Self { - inner: parser, - remaining_input: input, - finished: false, - } - } - - fn emit_single(&mut self) -> Result>> { - let state = &mut self.inner.state; - let jxlp_index_state = &mut self.inner.jxlp_index_state; - let buf = &mut self.remaining_input; - - loop { - if buf.is_empty() { - self.finished = true; - return Ok(None); - } - - match state { - DetectState::WaitingSignature => { - if buf.starts_with(&Self::CODESTREAM_SIG) { - tracing::trace!("Codestream signature found"); - *state = DetectState::InCodestream { - kind: BitstreamKind::BareCodestream, - bytes_left: None, - }; - return Ok(Some(ParseEvent::BitstreamKind( - BitstreamKind::BareCodestream, - ))); - } else if buf.starts_with(&Self::CONTAINER_SIG) { - tracing::trace!("Container signature found"); - *state = DetectState::WaitingBoxHeader; - *buf = &buf[Self::CONTAINER_SIG.len()..]; - return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Container))); - } else if !Self::CODESTREAM_SIG.starts_with(buf) - && !Self::CONTAINER_SIG.starts_with(buf) - { - tracing::debug!(?buf, "Invalid signature"); - *state = DetectState::InCodestream { - kind: BitstreamKind::Invalid, - bytes_left: None, - }; - return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Invalid))); - } else { - return Ok(None); - } - } - DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(buf)? { - HeaderParseResult::Done { - header, - header_size, - } => { - *buf = &buf[header_size..]; - let tbox = header.box_type(); - if tbox == ContainerBoxType::CODESTREAM { - match jxlp_index_state { - JxlpIndexState::Initial => { - *jxlp_index_state = JxlpIndexState::SingleJxlc; - } - JxlpIndexState::SingleJxlc => { - tracing::debug!("Duplicate jxlc box found"); - return Err(Error::InvalidBox); - } - JxlpIndexState::Jxlp(_) | JxlpIndexState::JxlpFinished => { - tracing::debug!("Found jxlc box instead of jxlp box"); - return Err(Error::InvalidBox); - } - } - - *state = DetectState::InCodestream { - kind: BitstreamKind::Container, - bytes_left: header.box_size().map(|x| x as usize), - }; - } else if tbox == ContainerBoxType::PARTIAL_CODESTREAM { - if let Some(box_size) = header.box_size() { - if box_size < 4 { - return Err(Error::InvalidBox); - } - } - - match jxlp_index_state { - JxlpIndexState::Initial => { - *jxlp_index_state = JxlpIndexState::Jxlp(0); - } - JxlpIndexState::Jxlp(index) => { - *index += 1; - } - JxlpIndexState::SingleJxlc => { - tracing::debug!("jxlp box found after jxlc box"); - return Err(Error::InvalidBox); - } - JxlpIndexState::JxlpFinished => { - tracing::debug!("found another jxlp box after the final one"); - return Err(Error::InvalidBox); - } - } - - *state = DetectState::WaitingJxlpIndex(header); - } else { - let bytes_left = header.box_size().map(|x| x as usize); - *state = DetectState::InAuxBox { header, bytes_left }; - } - } - HeaderParseResult::NeedMoreData => return Ok(None), - }, - DetectState::WaitingJxlpIndex(header) => { - let &[b0, b1, b2, b3, ..] = &**buf else { - return Ok(None); - }; - - let index = u32::from_be_bytes([b0, b1, b2, b3]); - *buf = &buf[4..]; - let is_last = index & 0x80000000 != 0; - let index = index & 0x7fffffff; - - match *jxlp_index_state { - JxlpIndexState::Jxlp(expected_index) if expected_index == index => { - if is_last { - *jxlp_index_state = JxlpIndexState::JxlpFinished; - } - } - JxlpIndexState::Jxlp(expected_index) => { - tracing::debug!( - expected_index, - actual_index = index, - "Out-of-order jxlp box found", - ); - return Err(Error::InvalidBox); - } - state => { - tracing::debug!(?state, "invalid jxlp index state in WaitingJxlpIndex"); - unreachable!("invalid jxlp index state in WaitingJxlpIndex"); - } - } - - *state = DetectState::InCodestream { - kind: BitstreamKind::Container, - bytes_left: header.box_size().map(|x| x as usize - 4), - }; - } - DetectState::InCodestream { - bytes_left: None, .. - } => { - let payload = *buf; - *buf = &[]; - return Ok(Some(ParseEvent::Codestream(payload))); - } - DetectState::InCodestream { - bytes_left: Some(bytes_left), - .. - } => { - let payload = if buf.len() >= *bytes_left { - let (payload, remaining) = buf.split_at(*bytes_left); - *state = DetectState::WaitingBoxHeader; - *buf = remaining; - payload - } else { - let payload = *buf; - *bytes_left -= buf.len(); - *buf = &[]; - payload - }; - return Ok(Some(ParseEvent::Codestream(payload))); - } - DetectState::InAuxBox { - header: _, - bytes_left: None, - } => { - let _payload = *buf; - *buf = &[]; - // FIXME: emit auxiliary box event - } - DetectState::InAuxBox { - header: _, - bytes_left: Some(bytes_left), - } => { - let _payload = if buf.len() >= *bytes_left { - let (payload, remaining) = buf.split_at(*bytes_left); - *state = DetectState::WaitingBoxHeader; - *buf = remaining; - payload - } else { - let payload = *buf; - *bytes_left -= buf.len(); - *buf = &[]; - payload - }; - // FIXME: emit auxiliary box event - } - DetectState::Done(_) => return Ok(None), - } - } - } -} - -impl std::fmt::Debug for ParseEvents<'_, '_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ParseEvents") - .field("inner", &self.inner) - .field( - "remaining_input", - &format_args!("({} byte(s))", self.remaining_input.len()), - ) - .field("finished", &self.finished) - .finish() - } -} - -impl<'inner, 'buf> Iterator for ParseEvents<'inner, 'buf> { - type Item = Result>; - - fn next(&mut self) -> Option { - if self.finished { - return None; - } - - let initial_buf = self.remaining_input; - let event = self.emit_single(); - - if event.is_err() { - self.finished = true; - } - - self.inner.previous_consumed_bytes += initial_buf.len() - self.remaining_input.len(); - event.transpose() - } -} - -/// Parser event emitted by [`ParseEvents`]. -pub enum ParseEvent<'buf> { - /// Bitstream structure is detected. - BitstreamKind(BitstreamKind), - /// Codestream data is read. - Codestream(&'buf [u8]), -} - -impl std::fmt::Debug for ParseEvent<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::BitstreamKind(kind) => f.debug_tuple("BitstreamKind").field(kind).finish(), - Self::Codestream(buf) => f - .debug_tuple("Codestream") - .field(&format_args!("{} byte(s)", buf.len())) - .finish(), - } - } -} - impl ContainerParser { pub fn new() -> Self { Self::default() @@ -365,7 +105,7 @@ impl ContainerParser { #[cfg(test)] impl ContainerParser { - pub(crate) fn collect_codestream(input: &[u8]) -> Result> { + pub(crate) fn collect_codestream(input: &[u8]) -> crate::error::Result> { let mut parser = Self::new(); let mut codestream = Vec::new(); for event in parser.process_bytes(input) { diff --git a/jxl/src/container/parse.rs b/jxl/src/container/parse.rs new file mode 100644 index 0000000..b386e94 --- /dev/null +++ b/jxl/src/container/parse.rs @@ -0,0 +1,270 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Originally written for jxl-oxide. + +use super::{box_header::*, DetectState, JxlpIndexState, BitstreamKind, ContainerParser}; +use crate::error::{Error, Result}; + +/// Iterator that reads over a buffer and emits parser events. +pub struct ParseEvents<'inner, 'buf> { + inner: &'inner mut ContainerParser, + remaining_input: &'buf [u8], + finished: bool, +} + +impl<'inner, 'buf> ParseEvents<'inner, 'buf> { + const CODESTREAM_SIG: [u8; 2] = [0xff, 0x0a]; + const CONTAINER_SIG: [u8; 12] = [0, 0, 0, 0xc, b'J', b'X', b'L', b' ', 0xd, 0xa, 0x87, 0xa]; + + pub(super) fn new(parser: &'inner mut ContainerParser, input: &'buf [u8]) -> Self { + parser.previous_consumed_bytes = 0; + Self { + inner: parser, + remaining_input: input, + finished: false, + } + } + + fn emit_single(&mut self) -> Result>> { + let state = &mut self.inner.state; + let jxlp_index_state = &mut self.inner.jxlp_index_state; + let buf = &mut self.remaining_input; + + loop { + if buf.is_empty() { + self.finished = true; + return Ok(None); + } + + match state { + DetectState::WaitingSignature => { + if buf.starts_with(&Self::CODESTREAM_SIG) { + tracing::trace!("Codestream signature found"); + *state = DetectState::InCodestream { + kind: BitstreamKind::BareCodestream, + bytes_left: None, + }; + return Ok(Some(ParseEvent::BitstreamKind( + BitstreamKind::BareCodestream, + ))); + } else if buf.starts_with(&Self::CONTAINER_SIG) { + tracing::trace!("Container signature found"); + *state = DetectState::WaitingBoxHeader; + *buf = &buf[Self::CONTAINER_SIG.len()..]; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Container))); + } else if !Self::CODESTREAM_SIG.starts_with(buf) + && !Self::CONTAINER_SIG.starts_with(buf) + { + tracing::debug!(?buf, "Invalid signature"); + *state = DetectState::InCodestream { + kind: BitstreamKind::Invalid, + bytes_left: None, + }; + return Ok(Some(ParseEvent::BitstreamKind(BitstreamKind::Invalid))); + } else { + return Ok(None); + } + } + DetectState::WaitingBoxHeader => match ContainerBoxHeader::parse(buf)? { + HeaderParseResult::Done { + header, + header_size, + } => { + *buf = &buf[header_size..]; + let tbox = header.box_type(); + if tbox == ContainerBoxType::CODESTREAM { + match jxlp_index_state { + JxlpIndexState::Initial => { + *jxlp_index_state = JxlpIndexState::SingleJxlc; + } + JxlpIndexState::SingleJxlc => { + tracing::debug!("Duplicate jxlc box found"); + return Err(Error::InvalidBox); + } + JxlpIndexState::Jxlp(_) | JxlpIndexState::JxlpFinished => { + tracing::debug!("Found jxlc box instead of jxlp box"); + return Err(Error::InvalidBox); + } + } + + *state = DetectState::InCodestream { + kind: BitstreamKind::Container, + bytes_left: header.box_size().map(|x| x as usize), + }; + } else if tbox == ContainerBoxType::PARTIAL_CODESTREAM { + if let Some(box_size) = header.box_size() { + if box_size < 4 { + return Err(Error::InvalidBox); + } + } + + match jxlp_index_state { + JxlpIndexState::Initial => { + *jxlp_index_state = JxlpIndexState::Jxlp(0); + } + JxlpIndexState::Jxlp(index) => { + *index += 1; + } + JxlpIndexState::SingleJxlc => { + tracing::debug!("jxlp box found after jxlc box"); + return Err(Error::InvalidBox); + } + JxlpIndexState::JxlpFinished => { + tracing::debug!("found another jxlp box after the final one"); + return Err(Error::InvalidBox); + } + } + + *state = DetectState::WaitingJxlpIndex(header); + } else { + let bytes_left = header.box_size().map(|x| x as usize); + *state = DetectState::InAuxBox { header, bytes_left }; + } + } + HeaderParseResult::NeedMoreData => return Ok(None), + }, + DetectState::WaitingJxlpIndex(header) => { + let &[b0, b1, b2, b3, ..] = &**buf else { + return Ok(None); + }; + + let index = u32::from_be_bytes([b0, b1, b2, b3]); + *buf = &buf[4..]; + let is_last = index & 0x80000000 != 0; + let index = index & 0x7fffffff; + + match *jxlp_index_state { + JxlpIndexState::Jxlp(expected_index) if expected_index == index => { + if is_last { + *jxlp_index_state = JxlpIndexState::JxlpFinished; + } + } + JxlpIndexState::Jxlp(expected_index) => { + tracing::debug!( + expected_index, + actual_index = index, + "Out-of-order jxlp box found", + ); + return Err(Error::InvalidBox); + } + state => { + tracing::debug!(?state, "invalid jxlp index state in WaitingJxlpIndex"); + unreachable!("invalid jxlp index state in WaitingJxlpIndex"); + } + } + + *state = DetectState::InCodestream { + kind: BitstreamKind::Container, + bytes_left: header.box_size().map(|x| x as usize - 4), + }; + } + DetectState::InCodestream { + bytes_left: None, .. + } => { + let payload = *buf; + *buf = &[]; + return Ok(Some(ParseEvent::Codestream(payload))); + } + DetectState::InCodestream { + bytes_left: Some(bytes_left), + .. + } => { + let payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); + *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload + } else { + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + return Ok(Some(ParseEvent::Codestream(payload))); + } + DetectState::InAuxBox { + header: _, + bytes_left: None, + } => { + let _payload = *buf; + *buf = &[]; + // FIXME: emit auxiliary box event + } + DetectState::InAuxBox { + header: _, + bytes_left: Some(bytes_left), + } => { + let _payload = if buf.len() >= *bytes_left { + let (payload, remaining) = buf.split_at(*bytes_left); + *state = DetectState::WaitingBoxHeader; + *buf = remaining; + payload + } else { + let payload = *buf; + *bytes_left -= buf.len(); + *buf = &[]; + payload + }; + // FIXME: emit auxiliary box event + } + DetectState::Done(_) => return Ok(None), + } + } + } +} + +impl std::fmt::Debug for ParseEvents<'_, '_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ParseEvents") + .field("inner", &self.inner) + .field( + "remaining_input", + &format_args!("({} byte(s))", self.remaining_input.len()), + ) + .field("finished", &self.finished) + .finish() + } +} + +impl<'inner, 'buf> Iterator for ParseEvents<'inner, 'buf> { + type Item = Result>; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let initial_buf = self.remaining_input; + let event = self.emit_single(); + + if event.is_err() { + self.finished = true; + } + + self.inner.previous_consumed_bytes += initial_buf.len() - self.remaining_input.len(); + event.transpose() + } +} + +/// Parser event emitted by [`ParseEvents`]. +pub enum ParseEvent<'buf> { + /// Bitstream structure is detected. + BitstreamKind(BitstreamKind), + /// Codestream data is read. + Codestream(&'buf [u8]), +} + +impl std::fmt::Debug for ParseEvent<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::BitstreamKind(kind) => f.debug_tuple("BitstreamKind").field(kind).finish(), + Self::Codestream(buf) => f + .debug_tuple("Codestream") + .field(&format_args!("{} byte(s)", buf.len())) + .finish(), + } + } +}