Skip to content

Commit

Permalink
fix: improve codec spec compliance (#174)
Browse files Browse the repository at this point in the history
The DAG-CBOR, DAG-JSON and DAG-PB codecs are not more spec compliant. New
negative test fixtures were added to /~https://github.com/ipld/codec-fixtures,
with this change all pass.

BREAKING CHANGE: DAG-CBOR, DAG-JSON and DAG-PB encoding and decoding is
stricter.

In DAG-CBOR and DAG-JSON now error when duplicated map keys with the same
map are encountered.

DAG-PB is now stricter on encoding and decoding. For example are links and
data not allowed to be interleaved, they have to be one after another (in
any order).

When encoding `Ipld` to DAG-PB, the types now need to match, links need to
be ordered.

The DAG-PB will error if unknown data is encountered during decoding.
  • Loading branch information
vmx authored Jan 23, 2023
1 parent 36188d5 commit 028e85c
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 39 deletions.
5 changes: 4 additions & 1 deletion core/src/serde/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ impl<'de> de::Deserialize<'de> for Ipld {
let mut values = BTreeMap::new();

while let Some((key, value)) = visitor.next_entry()? {
values.insert(key, value);
let prev_value = values.insert(key, value);
if prev_value.is_some() {
return Err(de::Error::custom("Duplicate map key"));
}
}

Ok(Ipld::Map(values))
Expand Down
9 changes: 6 additions & 3 deletions dag-cbor/src/decode.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! CBOR decoder
use crate::cbor::{Major, MajorKind, F32, F64, FALSE, NULL, TRUE};
use crate::error::{
InvalidCidPrefix, LengthOutOfRange, NumberNotMinimal, NumberOutOfRange, UnexpectedCode,
UnexpectedEof, UnknownTag,
DuplicateKey, InvalidCidPrefix, LengthOutOfRange, NumberNotMinimal, NumberOutOfRange,
UnexpectedCode, UnexpectedEof, UnknownTag,
};
use crate::DagCborCodec as DagCbor;
use byteorder::{BigEndian, ByteOrder};
Expand Down Expand Up @@ -100,7 +100,10 @@ pub fn read_map<R: Read + Seek, K: Decode<DagCbor> + Ord, T: Decode<DagCbor>>(
for _ in 0..len {
let key = K::decode(DagCbor, r)?;
let value = T::decode(DagCbor, r)?;
map.insert(key, value);
let prev_value = map.insert(key, value);
if prev_value.is_some() {
return Err(DuplicateKey.into());
}
}
Ok(map)
}
Expand Down
5 changes: 5 additions & 0 deletions dag-cbor/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,8 @@ pub struct UnexpectedEof;
#[derive(Debug, Error)]
#[error("Invalid Cid prefix: {0}")]
pub struct InvalidCidPrefix(pub u8);

/// A duplicate key within a map.
#[derive(Debug, Error)]
#[error("Duplicate map key.")]
pub struct DuplicateKey;
11 changes: 7 additions & 4 deletions dag-json/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,13 @@ impl<'de> de::Visitor<'de> for JsonVisitor {
}
}

let unwrapped = values
.into_iter()
.map(|(key, WrapperOwned(value))| (key, value))
.collect();
let mut unwrapped = BTreeMap::new();
for (key, WrapperOwned(value)) in values {
let prev_value = unwrapped.insert(key, value);
if prev_value.is_some() {
return Err(SerdeError::custom("duplicate map key".to_string()));
}
}
Ok(Ipld::Map(unwrapped))
}

Expand Down
128 changes: 97 additions & 31 deletions dag-pb/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,35 @@ impl<'a> TryFrom<&'a Ipld> for PbNodeRef<'a> {
fn try_from(ipld: &'a Ipld) -> core::result::Result<Self, Self::Error> {
let mut node = PbNodeRef::default();

if let Ipld::List(links) = ipld.get("Links")? {
node.links = links
.iter()
.map(|link| link.try_into())
.collect::<Result<_, _>>()?
match ipld.get("Links")? {
Ipld::List(links) => {
let mut prev_name = "".to_string();
for link in links.iter() {
match link {
Ipld::Map(_) => {
let pb_link: PbLink = link.try_into()?;
// Make sure the links are sorted correctly.
if let Some(ref name) = pb_link.name {
if name.as_bytes() < prev_name.as_bytes() {
// This error message isn't ideal, but the important thing is
// that it errors.
return Err(TypeError::new(TypeErrorType::Link, ipld));
}
prev_name = name.clone()
}
node.links.push(pb_link)
}
ipld => return Err(TypeError::new(TypeErrorType::Link, ipld)),
}
}
}
ipld => return Err(TypeError::new(TypeErrorType::List, ipld)),
}
if let Ok(Ipld::Bytes(data)) = ipld.get("Data") {
node.data = Some(&data[..]);

match ipld.get("Data") {
Ok(Ipld::Bytes(data)) => node.data = Some(&data[..]),
Ok(ipld) => return Err(TypeError::new(TypeErrorType::Bytes, ipld)),
_ => (),
}

Ok(node)
Expand All @@ -147,26 +168,53 @@ impl TryFrom<&Ipld> for PbLink {
type Error = TypeError;

fn try_from(ipld: &Ipld) -> core::result::Result<PbLink, Self::Error> {
let cid = if let Ipld::Link(cid) = ipld.get("Hash")? {
*cid
} else {
return Err(TypeError::new(TypeErrorType::Link, ipld));
};

let mut link = PbLink {
cid,
name: None,
size: None,
};
if let Ipld::Map(map) = ipld {
let mut cid = None;
let mut name = None;
let mut size = None;
for (key, value) in map {
match key.as_str() {
"Hash" => {
cid = if let Ipld::Link(cid) = value {
Some(*cid)
} else {
return Err(TypeError::new(TypeErrorType::Link, ipld));
};
}
"Name" => {
name = if let Ipld::String(name) = value {
Some(name.clone())
} else {
return Err(TypeError::new(TypeErrorType::String, ipld));
}
}
"Tsize" => {
size = if let Ipld::Integer(size) = value {
Some(
u64::try_from(*size)
.map_err(|_| TypeError::new(TypeErrorType::Integer, value))?,
)
} else {
return Err(TypeError::new(TypeErrorType::Integer, ipld));
}
}
_ => {
return Err(TypeError::new(
TypeErrorType::Key("Hash, Name or Tsize".to_string()),
TypeErrorType::Key(key.to_string()),
));
}
}
}

if let Ok(Ipld::String(name)) = ipld.get("Name") {
link.name = Some(name.clone());
}
if let Ok(Ipld::Integer(size)) = ipld.get("Tsize") {
link.size = Some(*size as u64);
// Name and size are optional, CID is not.
match cid {
Some(cid) => Ok(PbLink { cid, name, size }),
None => Err(TypeError::new(TypeErrorType::Key("Hash".to_string()), ipld)),
}
} else {
Err(TypeError::new(TypeErrorType::Map, ipld))
}

Ok(link)
}
}

Expand All @@ -187,8 +235,10 @@ impl<'a> MessageRead<'a> for PbLink {
}
Ok(18) => name = Some(r.read_string(bytes)?.to_string()),
Ok(24) => size = Some(r.read_uint64(bytes)?),
Ok(t) => {
r.read_unknown(bytes, t)?;
Ok(_) => {
return Err(quick_protobuf::Error::Message(
"unexpected bytes".to_string(),
))
}
Err(e) => return Err(e),
}
Expand Down Expand Up @@ -234,12 +284,28 @@ impl MessageWrite for PbLink {
impl<'a> MessageRead<'a> for PbNodeRef<'a> {
fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> quick_protobuf::Result<Self> {
let mut msg = Self::default();
let mut links_before_data = false;
while !r.is_eof() {
match r.next_tag(bytes) {
Ok(18) => msg.links.push(r.read_message::<PbLink>(bytes)?),
Ok(10) => msg.data = Some(r.read_bytes(bytes)?),
Ok(t) => {
r.read_unknown(bytes, t)?;
Ok(18) => {
// Links and data might be in any order, but they may not be interleaved.
if links_before_data {
return Err(quick_protobuf::Error::Message(
"duplicate Links section".to_string(),
));
}
msg.links.push(r.read_message::<PbLink>(bytes)?)
}
Ok(10) => {
msg.data = Some(r.read_bytes(bytes)?);
if !msg.links.is_empty() {
links_before_data = true
}
}
Ok(_) => {
return Err(quick_protobuf::Error::Message(
"unexpected bytes".to_string(),
))
}
Err(e) => return Err(e),
}
Expand Down

0 comments on commit 028e85c

Please sign in to comment.