From 1c6c456c28d6094e459643a5eb24bed3eef85424 Mon Sep 17 00:00:00 2001 From: Arnaud Mimart <33665250+amimart@users.noreply.github.com> Date: Sat, 6 Jan 2024 18:27:07 +0100 Subject: [PATCH] feat(rdf): add rdf dataset querying utils --- Cargo.lock | 1 + packages/okp4-rdf/Cargo.toml | 1 + packages/okp4-rdf/src/dataset.rs | 219 +++++++++++++++++++++++++++++++ packages/okp4-rdf/src/lib.rs | 1 + 4 files changed, 222 insertions(+) create mode 100644 packages/okp4-rdf/src/dataset.rs diff --git a/Cargo.lock b/Cargo.lock index d7922a85..57769dde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -818,6 +818,7 @@ version = "3.0.0" dependencies = [ "base16ct", "cosmwasm-std", + "itertools 0.12.0", "rio_api", "rio_turtle", "rio_xml", diff --git a/packages/okp4-rdf/Cargo.toml b/packages/okp4-rdf/Cargo.toml index 0c44b4aa..ac5c662f 100644 --- a/packages/okp4-rdf/Cargo.toml +++ b/packages/okp4-rdf/Cargo.toml @@ -12,3 +12,4 @@ rio_turtle.workspace = true rio_xml.workspace = true sha2 = "0.10.8" thiserror.workspace = true +itertools = "0.12.0" diff --git a/packages/okp4-rdf/src/dataset.rs b/packages/okp4-rdf/src/dataset.rs new file mode 100644 index 00000000..c43a6228 --- /dev/null +++ b/packages/okp4-rdf/src/dataset.rs @@ -0,0 +1,219 @@ +use itertools::Itertools; +use rio_api::model::{GraphName, NamedNode, Quad, Subject, Term}; +use std::slice::Iter; + +#[derive(Clone)] +pub struct Dataset<'a> { + quads: Vec>, +} + +impl<'a> Dataset<'a> { + pub fn new(quads: Vec>) -> Self { + Self { quads } + } + + pub fn match_pattern( + &'a self, + s: Option>, + p: Option>, + o: Option>, + g: Option>>, + ) -> QuadPatternFilter<'a, Iter<'a, Quad<'a>>> { + self.quads.iter().match_pattern((s, p, o, g).into()) + } + + pub fn skip_pattern( + &'a self, + s: Option>, + p: Option>, + o: Option>, + g: Option>>, + ) -> QuadPatternFilter<'a, Iter<'a, Quad<'a>>> { + self.quads.iter().skip_pattern((s, p, o, g).into()) + } +} + +impl<'a> From<&'a [Quad<'a>]> for Dataset<'a> { + fn from(quads: &'a [Quad<'a>]) -> Self { + Self { + quads: quads.to_vec(), + } + } +} + +impl<'a> IntoIterator for &'a Dataset<'a> { + type Item = &'a Quad<'a>; + type IntoIter = Iter<'a, Quad<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.quads.iter() + } +} + +#[derive(Copy, Clone)] +pub struct QuadPattern<'a> { + subject: Option>, + predicate: Option>, + object: Option>, + graph_name: Option>>, +} + +impl<'a> + From<( + Option>, + Option>, + Option>, + Option>>, + )> for QuadPattern<'a> +{ + fn from( + value: ( + Option>, + Option>, + Option>, + Option>>, + ), + ) -> Self { + Self { + subject: value.0, + predicate: value.1, + object: value.2, + graph_name: value.3, + } + } +} + +impl QuadPattern<'_> { + pub fn match_pattern<'a>(self, quad: &'a Quad<'a>) -> bool { + self.subject + .map(|s| s == quad.subject) + .unwrap_or_else(|| true) + && self + .predicate + .map(|p| p == quad.predicate) + .unwrap_or_else(|| true) + && self + .object + .map(|o| o == quad.object) + .unwrap_or_else(|| true) + && self + .graph_name + .map(|o| o == quad.graph_name) + .unwrap_or_else(|| true) + } + + pub fn skip_pattern<'a>(self, quad: &'a Quad<'a>) -> bool { + !self.match_pattern(quad) + } +} + +pub trait QuadIterator<'a>: Iterator> { + fn match_patterns(self, patterns: Vec>) -> QuadPatternFilter<'a, Self> + where + Self: Sized, + { + QuadPatternFilter::new(self, patterns, QuadPatternFilterKind::Match) + } + + fn skip_patterns(self, patterns: Vec>) -> QuadPatternFilter<'a, Self> + where + Self: Sized, + { + QuadPatternFilter::new(self, patterns, QuadPatternFilterKind::Skip) + } + + fn match_pattern(self, pattern: QuadPattern<'a>) -> QuadPatternFilter<'a, Self> + where + Self: Sized, + { + self.match_patterns(vec![pattern]) + } + + fn skip_pattern(self, pattern: QuadPattern<'a>) -> QuadPatternFilter<'a, Self> + where + Self: Sized, + { + self.skip_patterns(vec![pattern]) + } + + fn subjects(self) -> Box> + 'a> + where + Self: Sized + 'a, + { + Box::from(self.map(|quad: &'a Quad<'a>| quad.subject).unique()) + } + + fn predicates(self) -> Box> + 'a> + where + Self: Sized + 'a, + { + Box::from(self.map(|quad: &'a Quad<'a>| quad.predicate).unique()) + } + + fn objects(self) -> Box> + 'a> + where + Self: Sized + 'a, + { + Box::from(self.map(|quad: &'a Quad<'a>| quad.object).unique()) + } + + fn graph_names(self) -> Box>> + 'a> + where + Self: Sized + 'a, + { + Box::from(self.map(|quad: &'a Quad<'a>| quad.graph_name).unique()) + } +} + +impl<'a, T: ?Sized> QuadIterator<'a> for T where T: Iterator> {} + +pub enum QuadPatternFilterKind { + Match, + Skip, +} + +pub struct QuadPatternFilter<'a, I> +where + I: Iterator>, +{ + patterns: Vec>, + filter_kind: QuadPatternFilterKind, + inner: I, +} + +impl<'a, I> QuadPatternFilter<'a, I> +where + I: Iterator>, +{ + pub fn new( + inner: I, + patterns: Vec>, + filter_kind: QuadPatternFilterKind, + ) -> Self { + Self { + patterns, + inner, + filter_kind, + } + } +} + +impl<'a, I> Iterator for QuadPatternFilter<'a, I> +where + I: Iterator>, +{ + type Item = &'a Quad<'a>; + + fn next(&mut self) -> Option { + self.inner.find(|quad| match self.filter_kind { + QuadPatternFilterKind::Match => self + .patterns + .iter() + .fold(true, |v, p| v && p.match_pattern(quad)), + QuadPatternFilterKind::Skip => self + .patterns + .iter() + .fold(true, |v, p| v || p.skip_pattern(quad)), + }) + } +} diff --git a/packages/okp4-rdf/src/lib.rs b/packages/okp4-rdf/src/lib.rs index d2cfb8be..e71934c8 100644 --- a/packages/okp4-rdf/src/lib.rs +++ b/packages/okp4-rdf/src/lib.rs @@ -1,3 +1,4 @@ +pub mod dataset; pub mod normalize; pub mod serde; pub mod uri;