From 36188d562e10dffdb27a71779a6793d7fb302cbf Mon Sep 17 00:00:00 2001 From: Friedel Ziegelmayer Date: Mon, 9 Jan 2023 17:39:38 +0100 Subject: [PATCH] feat: make dag-pb spec compliant (#170) The DAG-PB codec is now spec compliant. The fixtures at /~https://github.com/ipld/codec-fixtures pass. Closes #168,#139 --- Cargo.toml | 4 +- core/Cargo.toml | 6 +- dag-cbor/src/decode.rs | 2 +- dag-json/Cargo.toml | 2 +- dag-pb/Cargo.toml | 7 +- dag-pb/src/codec.rs | 300 +++++++++++++++++++++++++++++++--------- dag-pb/src/dag_pb.proto | 22 --- dag-pb/src/dag_pb.rs | 23 --- dag-pb/src/lib.rs | 13 +- dag-pb/tests/compat.rs | 93 +++++++++++++ macro/Cargo.toml | 2 +- 11 files changed, 343 insertions(+), 131 deletions(-) delete mode 100644 dag-pb/src/dag_pb.proto delete mode 100644 dag-pb/src/dag_pb.rs create mode 100644 dag-pb/tests/compat.rs diff --git a/Cargo.toml b/Cargo.toml index cd491d9e..8410c14b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ libipld-json = { version = "0.15.0", path = "dag-json", optional = true } libipld-macro = { version = "0.15.0", path = "macro" } libipld-pb = { version = "0.15.0", path = "dag-pb", optional = true } log = "0.4.14" -multihash = { version = "0.17.0", default-features = false, features = ["multihash-impl"] } +multihash = { version = "0.18.0", default-features = false, features = ["multihash-impl"] } thiserror = "1.0.25" [dev-dependencies] @@ -29,7 +29,7 @@ async-std = { version = "1.9.0", features = ["attributes"] } criterion = "0.3.4" proptest = "1.0.0" model = "0.1.2" -multihash = "0.17.0" +multihash = "0.18.0" [features] default = ["dag-cbor", "dag-json", "dag-pb", "derive"] diff --git a/core/Cargo.toml b/core/Cargo.toml index 031690a6..b7dfab07 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -15,9 +15,9 @@ arb = ["quickcheck", "cid/arb"] [dependencies] anyhow = { version = "1.0.40", default-features = false } -cid = { version = "0.9.0", default-features = false, features = ["alloc"] } +cid = { version = "0.10.0", default-features = false, features = ["alloc"] } core2 = { version = "0.4", default-features = false, features = ["alloc"] } -multihash = { version = "0.17.0", default-features = false, features = ["alloc"] } +multihash = { version = "0.18.0", default-features = false, features = ["alloc"] } multibase = { version = "0.9.1", default-features = false, optional = true } serde = { version = "1.0.132", default-features = false, features = ["alloc"], optional = true } @@ -25,7 +25,7 @@ thiserror = {version = "1.0.25", optional = true } quickcheck = { version = "1.0", optional = true } [dev-dependencies] -multihash = { version = "0.17.0", default-features = false, features = ["multihash-impl", "blake3"] } +multihash = { version = "0.18.0", default-features = false, features = ["multihash-impl", "blake3"] } serde_test = "1.0.132" serde_bytes = "0.11.5" serde_json = "1.0.79" diff --git a/dag-cbor/src/decode.rs b/dag-cbor/src/decode.rs index 47678089..97b19fe1 100644 --- a/dag-cbor/src/decode.rs +++ b/dag-cbor/src/decode.rs @@ -166,7 +166,7 @@ pub fn read_uint(r: &mut R, major: Major) -> Result { 0..=MAX_2BYTE => Err(NumberNotMinimal.into()), value => Ok(value), }, - 27 => match read_u64(r)? as u64 { + 27 => match read_u64(r)? { 0..=MAX_4BYTE => Err(NumberNotMinimal.into()), value => Ok(value), }, diff --git a/dag-json/Cargo.toml b/dag-json/Cargo.toml index 0e40e1b7..ed84906b 100644 --- a/dag-json/Cargo.toml +++ b/dag-json/Cargo.toml @@ -9,6 +9,6 @@ repository = "/~https://github.com/ipfs-rust/rust-ipld" [dependencies] libipld-core = { version = "0.15.0", path = "../core" } -multihash = "0.17.0" +multihash = "0.18.0" serde_json = { version = "1.0.64", features = ["float_roundtrip"] } serde = { version = "1.0.126", features = ["derive"] } diff --git a/dag-pb/Cargo.toml b/dag-pb/Cargo.toml index c825747c..34f4d1aa 100644 --- a/dag-pb/Cargo.toml +++ b/dag-pb/Cargo.toml @@ -9,9 +9,12 @@ repository = "/~https://github.com/ipfs-rust/rust-ipld" [dependencies] libipld-core = { version = "0.15.0", path = "../core" } -prost = "0.11.0" thiserror = "1.0.25" +quick-protobuf = "0.8.1" +bytes = "1.3.0" [dev-dependencies] libipld-macro = { path = "../macro" } -multihash = "0.17.0" +libipld = { path = "../" } +multihash = "0.18.0" +hex = "0.4.3" diff --git a/dag-pb/src/codec.rs b/dag-pb/src/codec.rs index 6691d6af..3e54f3c2 100644 --- a/dag-pb/src/codec.rs +++ b/dag-pb/src/codec.rs @@ -1,78 +1,94 @@ -use crate::dag_pb; use core::convert::{TryFrom, TryInto}; +use std::collections::BTreeMap; + +use bytes::Bytes; use libipld_core::cid::Cid; use libipld_core::error::{Result, TypeError, TypeErrorType}; use libipld_core::ipld::Ipld; -use prost::bytes::{Buf, Bytes}; -use std::collections::BTreeMap; +use quick_protobuf::sizeofs::{sizeof_len, sizeof_varint}; +use quick_protobuf::{BytesReader, MessageRead, MessageWrite, Writer, WriterBackend}; /// A protobuf ipld link. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct PbLink { /// Content identifier. pub cid: Cid, /// Name of the link. - pub name: String, + pub name: Option, /// Size of the data. - pub size: u64, + pub size: Option, } /// A protobuf ipld node. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct PbNode { /// List of protobuf ipld links. pub links: Vec, /// Binary data blob. - pub data: Box<[u8]>, + pub data: Option, } -use prost::Message; +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub(crate) struct PbNodeRef<'a> { + links: Vec, + data: Option<&'a [u8]>, +} impl PbNode { pub(crate) fn links(bytes: Bytes, links: &mut impl Extend) -> Result<()> { - let proto = dag_pb::PbNode::decode(bytes)?; - for link in proto.links { - let cid = Cid::try_from(link.hash.as_ref())?; - links.extend(Some(cid)); + let node = PbNode::from_bytes(bytes)?; + for link in node.links { + links.extend(Some(link.cid)); } Ok(()) } /// Deserializes a `PbNode` from bytes. - pub fn from_bytes(bytes: impl Buf) -> Result { - let proto: dag_pb::PbNode = dag_pb::PbNode::decode(bytes)?; - let data = proto.data.to_vec().into_boxed_slice(); - let mut links = Vec::new(); - for link in proto.links { - let cid = Cid::try_from(link.hash.as_ref())?; - let name = link.name; - let size = link.tsize; - links.push(PbLink { cid, name, size }); - } - Ok(PbNode { links, data }) + pub fn from_bytes(buf: Bytes) -> Result { + let mut reader = BytesReader::from_bytes(&buf); + let node = PbNodeRef::from_reader(&mut reader, &buf)?; + let data = node.data.map(|d| buf.slice_ref(d)); + + Ok(PbNode { + links: node.links, + data, + }) } /// Serializes a `PbNode` to bytes. - pub fn into_bytes(self) -> Box<[u8]> { - let links = self - .links - .into_iter() - .map(|link| dag_pb::PbLink { - hash: link.cid.to_bytes().into(), - name: link.name, - tsize: link.size, - }) - .collect::>(); - let proto = dag_pb::PbNode { - data: self.data.into(), - links, - }; + pub fn into_bytes(mut self) -> Box<[u8]> { + // Links must be strictly sorted by name before encoding, leaving stable + // ordering where the names are the same (or absent). + self.links.sort_by(|a, b| { + let a = a.name.as_ref().map(|s| s.as_bytes()).unwrap_or(&[][..]); + let b = b.name.as_ref().map(|s| s.as_bytes()).unwrap_or(&[][..]); + a.cmp(b) + }); - let mut res = Vec::with_capacity(proto.encoded_len()); - proto - .encode(&mut res) - .expect("there is no situation in which the protobuf message can be invalid"); - res.into_boxed_slice() + let mut buf = Vec::with_capacity(self.get_size()); + let mut writer = Writer::new(&mut buf); + self.write_message(&mut writer) + .expect("protobuf to be valid"); + buf.into_boxed_slice() + } +} + +impl PbNodeRef<'_> { + /// Serializes a `PbNode` to bytes. + pub fn into_bytes(mut self) -> Box<[u8]> { + // Links must be strictly sorted by name before encoding, leaving stable + // ordering where the names are the same (or absent). + self.links.sort_by(|a, b| { + let a = a.name.as_ref().map(|s| s.as_bytes()).unwrap_or(&[][..]); + let b = b.name.as_ref().map(|s| s.as_bytes()).unwrap_or(&[][..]); + a.cmp(b) + }); + + let mut buf = Vec::with_capacity(self.get_size()); + let mut writer = Writer::new(&mut buf); + self.write_message(&mut writer) + .expect("protobuf to be valid"); + buf.into_boxed_slice() } } @@ -85,7 +101,9 @@ impl From for Ipld { .map(|link| link.into()) .collect::>(); map.insert("Links".to_string(), links.into()); - map.insert("Data".to_string(), node.data.into()); + if let Some(data) = node.data { + map.insert("Data".to_string(), Ipld::Bytes(data.to_vec())); + } map.into() } } @@ -94,30 +112,34 @@ impl From for Ipld { fn from(link: PbLink) -> Self { let mut map = BTreeMap::::new(); map.insert("Hash".to_string(), link.cid.into()); - map.insert("Name".to_string(), link.name.into()); - map.insert("Tsize".to_string(), link.size.into()); + + if let Some(name) = link.name { + map.insert("Name".to_string(), name.into()); + } + if let Some(size) = link.size { + map.insert("Tsize".to_string(), size.into()); + } map.into() } } -impl TryFrom<&Ipld> for PbNode { +impl<'a> TryFrom<&'a Ipld> for PbNodeRef<'a> { type Error = TypeError; - fn try_from(ipld: &Ipld) -> core::result::Result { - let links = if let Ipld::List(links) = ipld.get("Links")? { - links + fn try_from(ipld: &'a Ipld) -> core::result::Result { + let mut node = PbNodeRef::default(); + + if let Ipld::List(links) = ipld.get("Links")? { + node.links = links .iter() .map(|link| link.try_into()) .collect::>()? - } else { - return Err(TypeError::new(TypeErrorType::List, ipld)); - }; - let data = if let Ipld::Bytes(data) = ipld.get("Data")? { - data.clone().into_boxed_slice() - } else { - return Err(TypeError::new(TypeErrorType::Bytes, ipld)); - }; - Ok(PbNode { links, data }) + } + if let Ok(Ipld::Bytes(data)) = ipld.get("Data") { + node.data = Some(&data[..]); + } + + Ok(node) } } @@ -130,16 +152,156 @@ impl TryFrom<&Ipld> for PbLink { } else { return Err(TypeError::new(TypeErrorType::Link, ipld)); }; - let name = if let Ipld::String(name) = ipld.get("Name")? { - name.clone() - } else { - return Err(TypeError::new(TypeErrorType::String, ipld)); - }; - let size = if let Ipld::Integer(size) = ipld.get("Tsize")? { - *size as u64 - } else { - return Err(TypeError::new(TypeErrorType::Integer, ipld)); + + let mut link = PbLink { + cid, + name: None, + size: None, }; - Ok(PbLink { cid, name, size }) + + if let Ok(Ipld::String(name)) = ipld.get("Name") { + link.name = Some(name.clone()); + } + if let Ok(Ipld::Integer(size)) = ipld.get("Tsize") { + link.size = Some(*size as u64); + } + + Ok(link) + } +} + +impl<'a> MessageRead<'a> for PbLink { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> quick_protobuf::Result { + let mut cid = None; + let mut name = None; + let mut size = None; + + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => { + let bytes = r.read_bytes(bytes)?; + cid = Some( + Cid::try_from(bytes) + .map_err(|e| quick_protobuf::Error::Message(e.to_string()))?, + ); + } + Ok(18) => name = Some(r.read_string(bytes)?.to_string()), + Ok(24) => size = Some(r.read_uint64(bytes)?), + Ok(t) => { + r.read_unknown(bytes, t)?; + } + Err(e) => return Err(e), + } + } + Ok(PbLink { + cid: cid.ok_or_else(|| quick_protobuf::Error::Message("missing Hash".into()))?, + name, + size, + }) + } +} + +impl MessageWrite for PbLink { + fn get_size(&self) -> usize { + let mut size = 0; + let l = self.cid.encoded_len(); + size += 1 + sizeof_len(l); + + if let Some(ref name) = self.name { + size += 1 + sizeof_len(name.as_bytes().len()); + } + + if let Some(tsize) = self.size { + size += 1 + sizeof_varint(tsize); + } + size + } + + fn write_message(&self, w: &mut Writer) -> quick_protobuf::Result<()> { + let bytes = self.cid.to_bytes(); + w.write_with_tag(10, |w| w.write_bytes(&bytes))?; + + if let Some(ref name) = self.name { + w.write_with_tag(18, |w| w.write_string(name))?; + } + if let Some(size) = self.size { + w.write_with_tag(24, |w| w.write_uint64(size))?; + } + Ok(()) + } +} + +impl<'a> MessageRead<'a> for PbNodeRef<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> quick_protobuf::Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(18) => msg.links.push(r.read_message::(bytes)?), + Ok(10) => msg.data = Some(r.read_bytes(bytes)?), + Ok(t) => { + r.read_unknown(bytes, t)?; + } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl MessageWrite for PbNode { + fn get_size(&self) -> usize { + let mut size = 0; + if let Some(ref data) = self.data { + size += 1 + sizeof_len(data.len()); + } + + size += self + .links + .iter() + .map(|s| 1 + sizeof_len((s).get_size())) + .sum::(); + + size + } + + fn write_message(&self, w: &mut Writer) -> quick_protobuf::Result<()> { + for s in &self.links { + w.write_with_tag(18, |w| w.write_message(s))?; + } + + if let Some(ref data) = self.data { + w.write_with_tag(10, |w| w.write_bytes(data))?; + } + + Ok(()) + } +} + +impl MessageWrite for PbNodeRef<'_> { + fn get_size(&self) -> usize { + let mut size = 0; + if let Some(data) = self.data { + size += 1 + sizeof_len(data.len()); + } + + size += self + .links + .iter() + .map(|s| 1 + sizeof_len((s).get_size())) + .sum::(); + + size + } + + fn write_message(&self, w: &mut Writer) -> quick_protobuf::Result<()> { + for s in &self.links { + w.write_with_tag(18, |w| w.write_message(s))?; + } + + if let Some(data) = self.data { + w.write_with_tag(10, |w| w.write_bytes(data))?; + } + + Ok(()) } } diff --git a/dag-pb/src/dag_pb.proto b/dag-pb/src/dag_pb.proto deleted file mode 100644 index d084ddb9..00000000 --- a/dag-pb/src/dag_pb.proto +++ /dev/null @@ -1,22 +0,0 @@ -// /~https://github.com/ipld/specs/blob/master/block-layer/codecs/dag-pb.md -syntax = "proto3"; - -package dag_pb; - -// An IPFS MerkleDAG Link -message PBLink { - // binary CID (with no multibase prefix) of the target object - bytes Hash = 1; - // UTF-8 string name - string Name = 2; - // cumulative size of target object - uint64 Tsize = 3; -} - -// An IPFS MerkleDAG Node -message PBNode { - // refs to other objects - repeated PBLink Links = 2; - // opaque user data - bytes Data = 1; -} diff --git a/dag-pb/src/dag_pb.rs b/dag-pb/src/dag_pb.rs deleted file mode 100644 index b2c5e912..00000000 --- a/dag-pb/src/dag_pb.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// An IPFS MerkleDAG Link -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct PbLink { - /// binary CID (with no multibase prefix) of the target object - #[prost(bytes = "bytes", tag = "1")] - pub hash: ::prost::bytes::Bytes, - /// UTF-8 string name - #[prost(string, tag = "2")] - pub name: ::prost::alloc::string::String, - /// cumulative size of target object - #[prost(uint64, tag = "3")] - pub tsize: u64, -} -/// An IPFS MerkleDAG Node -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct PbNode { - /// refs to other objects - #[prost(message, repeated, tag = "2")] - pub links: ::prost::alloc::vec::Vec, - /// opaque user data - #[prost(bytes = "bytes", tag = "1")] - pub data: ::prost::bytes::Bytes, -} diff --git a/dag-pb/src/lib.rs b/dag-pb/src/lib.rs index eb136a15..960713a7 100644 --- a/dag-pb/src/lib.rs +++ b/dag-pb/src/lib.rs @@ -1,19 +1,17 @@ //! Protobuf codec. #![deny(missing_docs)] -#![deny(warnings)] -#![allow(clippy::derive_partial_eq_without_eq)] +use crate::codec::PbNodeRef; pub use crate::codec::{PbLink, PbNode}; + use core::convert::{TryFrom, TryInto}; use libipld_core::cid::Cid; use libipld_core::codec::{Codec, Decode, Encode, References}; use libipld_core::error::{Result, UnsupportedCodec}; use libipld_core::ipld::Ipld; -use prost::bytes::Bytes; use std::io::{Read, Seek, Write}; mod codec; -mod dag_pb; /// Protobuf codec. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -37,7 +35,7 @@ impl TryFrom for DagPbCodec { impl Encode for Ipld { fn encode(&self, _: DagPbCodec, w: &mut W) -> Result<()> { - let pb_node: PbNode = self.try_into()?; + let pb_node: PbNodeRef = self.try_into()?; let bytes = pb_node.into_bytes(); w.write_all(&bytes)?; Ok(()) @@ -48,7 +46,8 @@ impl Decode for Ipld { fn decode(_: DagPbCodec, r: &mut R) -> Result { let mut bytes = Vec::new(); r.read_to_end(&mut bytes)?; - Ok(PbNode::from_bytes(Bytes::from(bytes))?.into()) + let node = PbNode::from_bytes(bytes.into())?; + Ok(node.into()) } } @@ -60,7 +59,7 @@ impl References for Ipld { ) -> Result<()> { let mut bytes = Vec::new(); r.read_to_end(&mut bytes)?; - PbNode::links(Bytes::from(bytes), set) + PbNode::links(bytes.into(), set) } } diff --git a/dag-pb/tests/compat.rs b/dag-pb/tests/compat.rs new file mode 100644 index 00000000..d3b152ba --- /dev/null +++ b/dag-pb/tests/compat.rs @@ -0,0 +1,93 @@ +use libipld::{cid::Cid, ipld, prelude::Codec, Ipld, IpldCodec}; + +struct TestCase { + name: &'static str, + node: Ipld, + expected_bytes: &'static str, +} + +#[test] +fn test_compat_roundtrip() { + // Cases based on + // /~https://github.com/ipld/go-codec-dagpb/blob/master/compat_test.go + + let empty_links = Vec::::new(); + let data_some = vec![0, 1, 2, 3, 4]; + let acid = Cid::try_from(&[1, 85, 0, 5, 0, 1, 2, 3, 4][..]).unwrap(); + let zero_name = ""; + let some_name = "some name"; + let zero_tsize: u64 = 0; + let large_tsize: u64 = 9007199254740991; // JavaScript Number.MAX_SAFE_INTEGER + + let cases = [ + TestCase { + name: "Links zero", + node: ipld!({ "Links": empty_links.clone() }), + expected_bytes: "", + }, + TestCase { + name: "Data some Links zero", + node: ipld!({ + "Links": empty_links, + "Data": data_some + }), + expected_bytes: "0a050001020304", + }, + TestCase { + name: "Links Hash some", + node: ipld!({ + "Links": vec![ipld!({ "Hash": acid})], + }), + expected_bytes: "120b0a09015500050001020304", + }, + TestCase { + name: "Links Hash some Name zero", + node: ipld!({ + "Links": vec![ipld!({ + "Hash": acid, + "Name": zero_name, + })] + }), + expected_bytes: "120d0a090155000500010203041200", + }, + TestCase { + name: "Links Hash some Name some", + node: ipld!({ + "Links": vec![ipld!({ + "Hash": acid, + "Name": some_name, + })] + }), + expected_bytes: "12160a090155000500010203041209736f6d65206e616d65", + }, + TestCase { + name: "Links Hash some Tsize zero", + node: ipld!({ + "Links": vec![ipld!({ + "Hash": acid, + "Tsize": zero_tsize, + })] + }), + expected_bytes: "120d0a090155000500010203041800", + }, + TestCase { + name: "Links Hash some Tsize some", + node: ipld!({ + "Links": vec![ipld!({ + "Hash": acid, + "Tsize": large_tsize, + })] + }), + expected_bytes: "12140a0901550005000102030418ffffffffffffff0f", + }, + ]; + + for case in cases { + println!("case {}", case.name); + let result = IpldCodec::DagPb.encode(&case.node).unwrap(); + assert_eq!(result, hex::decode(case.expected_bytes).unwrap()); + + let ipld: Ipld = IpldCodec::DagPb.decode(&result).unwrap(); + assert_eq!(ipld, case.node); + } +} diff --git a/macro/Cargo.toml b/macro/Cargo.toml index ca307712..ce2c9a6d 100644 --- a/macro/Cargo.toml +++ b/macro/Cargo.toml @@ -11,4 +11,4 @@ repository = "/~https://github.com/ipfs-rust/rust-ipld" libipld-core = { version = "0.15.0", path = "../core" } [dev-dependencies] -multihash = { version = "0.17.0", features = ["blake3"] } +multihash = { version = "0.18.0", features = ["blake3"] }