From 028e85ce2ba103ef3fb7415f5ee67e3d5ac2dfea Mon Sep 17 00:00:00 2001 From: Volker Mische Date: Mon, 23 Jan 2023 13:15:20 +0100 Subject: [PATCH] fix: improve codec spec compliance (#174) The DAG-CBOR, DAG-JSON and DAG-PB codecs are not more spec compliant. New negative test fixtures were added to /~https://github.com/ipld/codec-fixtures, with this change all pass. BREAKING CHANGE: DAG-CBOR, DAG-JSON and DAG-PB encoding and decoding is stricter. In DAG-CBOR and DAG-JSON now error when duplicated map keys with the same map are encountered. DAG-PB is now stricter on encoding and decoding. For example are links and data not allowed to be interleaved, they have to be one after another (in any order). When encoding `Ipld` to DAG-PB, the types now need to match, links need to be ordered. The DAG-PB will error if unknown data is encountered during decoding. --- core/src/serde/de.rs | 5 +- dag-cbor/src/decode.rs | 9 ++- dag-cbor/src/error.rs | 5 ++ dag-json/src/codec.rs | 11 ++-- dag-pb/src/codec.rs | 128 +++++++++++++++++++++++++++++++---------- 5 files changed, 119 insertions(+), 39 deletions(-) diff --git a/core/src/serde/de.rs b/core/src/serde/de.rs index c5113f5e..9d68a680 100644 --- a/core/src/serde/de.rs +++ b/core/src/serde/de.rs @@ -164,7 +164,10 @@ impl<'de> de::Deserialize<'de> for Ipld { let mut values = BTreeMap::new(); while let Some((key, value)) = visitor.next_entry()? { - values.insert(key, value); + let prev_value = values.insert(key, value); + if prev_value.is_some() { + return Err(de::Error::custom("Duplicate map key")); + } } Ok(Ipld::Map(values)) diff --git a/dag-cbor/src/decode.rs b/dag-cbor/src/decode.rs index 97b19fe1..1898b99d 100644 --- a/dag-cbor/src/decode.rs +++ b/dag-cbor/src/decode.rs @@ -1,8 +1,8 @@ //! CBOR decoder use crate::cbor::{Major, MajorKind, F32, F64, FALSE, NULL, TRUE}; use crate::error::{ - InvalidCidPrefix, LengthOutOfRange, NumberNotMinimal, NumberOutOfRange, UnexpectedCode, - UnexpectedEof, UnknownTag, + DuplicateKey, InvalidCidPrefix, LengthOutOfRange, NumberNotMinimal, NumberOutOfRange, + UnexpectedCode, UnexpectedEof, UnknownTag, }; use crate::DagCborCodec as DagCbor; use byteorder::{BigEndian, ByteOrder}; @@ -100,7 +100,10 @@ pub fn read_map + Ord, T: Decode>( for _ in 0..len { let key = K::decode(DagCbor, r)?; let value = T::decode(DagCbor, r)?; - map.insert(key, value); + let prev_value = map.insert(key, value); + if prev_value.is_some() { + return Err(DuplicateKey.into()); + } } Ok(map) } diff --git a/dag-cbor/src/error.rs b/dag-cbor/src/error.rs index e236469e..eb10c4f3 100644 --- a/dag-cbor/src/error.rs +++ b/dag-cbor/src/error.rs @@ -115,3 +115,8 @@ pub struct UnexpectedEof; #[derive(Debug, Error)] #[error("Invalid Cid prefix: {0}")] pub struct InvalidCidPrefix(pub u8); + +/// A duplicate key within a map. +#[derive(Debug, Error)] +#[error("Duplicate map key.")] +pub struct DuplicateKey; diff --git a/dag-json/src/codec.rs b/dag-json/src/codec.rs index 8cbd465f..0d97be04 100644 --- a/dag-json/src/codec.rs +++ b/dag-json/src/codec.rs @@ -198,10 +198,13 @@ impl<'de> de::Visitor<'de> for JsonVisitor { } } - let unwrapped = values - .into_iter() - .map(|(key, WrapperOwned(value))| (key, value)) - .collect(); + let mut unwrapped = BTreeMap::new(); + for (key, WrapperOwned(value)) in values { + let prev_value = unwrapped.insert(key, value); + if prev_value.is_some() { + return Err(SerdeError::custom("duplicate map key".to_string())); + } + } Ok(Ipld::Map(unwrapped)) } diff --git a/dag-pb/src/codec.rs b/dag-pb/src/codec.rs index 3e54f3c2..4fe10dbc 100644 --- a/dag-pb/src/codec.rs +++ b/dag-pb/src/codec.rs @@ -129,14 +129,35 @@ impl<'a> TryFrom<&'a Ipld> for PbNodeRef<'a> { fn try_from(ipld: &'a Ipld) -> core::result::Result { let mut node = PbNodeRef::default(); - if let Ipld::List(links) = ipld.get("Links")? { - node.links = links - .iter() - .map(|link| link.try_into()) - .collect::>()? + match ipld.get("Links")? { + Ipld::List(links) => { + let mut prev_name = "".to_string(); + for link in links.iter() { + match link { + Ipld::Map(_) => { + let pb_link: PbLink = link.try_into()?; + // Make sure the links are sorted correctly. + if let Some(ref name) = pb_link.name { + if name.as_bytes() < prev_name.as_bytes() { + // This error message isn't ideal, but the important thing is + // that it errors. + return Err(TypeError::new(TypeErrorType::Link, ipld)); + } + prev_name = name.clone() + } + node.links.push(pb_link) + } + ipld => return Err(TypeError::new(TypeErrorType::Link, ipld)), + } + } + } + ipld => return Err(TypeError::new(TypeErrorType::List, ipld)), } - if let Ok(Ipld::Bytes(data)) = ipld.get("Data") { - node.data = Some(&data[..]); + + match ipld.get("Data") { + Ok(Ipld::Bytes(data)) => node.data = Some(&data[..]), + Ok(ipld) => return Err(TypeError::new(TypeErrorType::Bytes, ipld)), + _ => (), } Ok(node) @@ -147,26 +168,53 @@ impl TryFrom<&Ipld> for PbLink { type Error = TypeError; fn try_from(ipld: &Ipld) -> core::result::Result { - let cid = if let Ipld::Link(cid) = ipld.get("Hash")? { - *cid - } else { - return Err(TypeError::new(TypeErrorType::Link, ipld)); - }; - - let mut link = PbLink { - cid, - name: None, - size: None, - }; + if let Ipld::Map(map) = ipld { + let mut cid = None; + let mut name = None; + let mut size = None; + for (key, value) in map { + match key.as_str() { + "Hash" => { + cid = if let Ipld::Link(cid) = value { + Some(*cid) + } else { + return Err(TypeError::new(TypeErrorType::Link, ipld)); + }; + } + "Name" => { + name = if let Ipld::String(name) = value { + Some(name.clone()) + } else { + return Err(TypeError::new(TypeErrorType::String, ipld)); + } + } + "Tsize" => { + size = if let Ipld::Integer(size) = value { + Some( + u64::try_from(*size) + .map_err(|_| TypeError::new(TypeErrorType::Integer, value))?, + ) + } else { + return Err(TypeError::new(TypeErrorType::Integer, ipld)); + } + } + _ => { + return Err(TypeError::new( + TypeErrorType::Key("Hash, Name or Tsize".to_string()), + TypeErrorType::Key(key.to_string()), + )); + } + } + } - if let Ok(Ipld::String(name)) = ipld.get("Name") { - link.name = Some(name.clone()); - } - if let Ok(Ipld::Integer(size)) = ipld.get("Tsize") { - link.size = Some(*size as u64); + // Name and size are optional, CID is not. + match cid { + Some(cid) => Ok(PbLink { cid, name, size }), + None => Err(TypeError::new(TypeErrorType::Key("Hash".to_string()), ipld)), + } + } else { + Err(TypeError::new(TypeErrorType::Map, ipld)) } - - Ok(link) } } @@ -187,8 +235,10 @@ impl<'a> MessageRead<'a> for PbLink { } Ok(18) => name = Some(r.read_string(bytes)?.to_string()), Ok(24) => size = Some(r.read_uint64(bytes)?), - Ok(t) => { - r.read_unknown(bytes, t)?; + Ok(_) => { + return Err(quick_protobuf::Error::Message( + "unexpected bytes".to_string(), + )) } Err(e) => return Err(e), } @@ -234,12 +284,28 @@ impl MessageWrite for PbLink { impl<'a> MessageRead<'a> for PbNodeRef<'a> { fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> quick_protobuf::Result { let mut msg = Self::default(); + let mut links_before_data = false; while !r.is_eof() { match r.next_tag(bytes) { - Ok(18) => msg.links.push(r.read_message::(bytes)?), - Ok(10) => msg.data = Some(r.read_bytes(bytes)?), - Ok(t) => { - r.read_unknown(bytes, t)?; + Ok(18) => { + // Links and data might be in any order, but they may not be interleaved. + if links_before_data { + return Err(quick_protobuf::Error::Message( + "duplicate Links section".to_string(), + )); + } + msg.links.push(r.read_message::(bytes)?) + } + Ok(10) => { + msg.data = Some(r.read_bytes(bytes)?); + if !msg.links.is_empty() { + links_before_data = true + } + } + Ok(_) => { + return Err(quick_protobuf::Error::Message( + "unexpected bytes".to_string(), + )) } Err(e) => return Err(e), }