Skip to content

Commit

Permalink
feat(rdf): add rdf dataset querying utils
Browse files Browse the repository at this point in the history
  • Loading branch information
amimart committed Feb 5, 2024
1 parent 26ca921 commit 1c6c456
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/okp4-rdf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ rio_turtle.workspace = true
rio_xml.workspace = true
sha2 = "0.10.8"
thiserror.workspace = true
itertools = "0.12.0"
219 changes: 219 additions & 0 deletions packages/okp4-rdf/src/dataset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
use itertools::Itertools;
use rio_api::model::{GraphName, NamedNode, Quad, Subject, Term};
use std::slice::Iter;

#[derive(Clone)]
pub struct Dataset<'a> {
quads: Vec<Quad<'a>>,
}

impl<'a> Dataset<'a> {
pub fn new(quads: Vec<Quad<'a>>) -> Self {
Self { quads }
}

pub fn match_pattern(
&'a self,
s: Option<Subject<'a>>,
p: Option<NamedNode<'a>>,
o: Option<Term<'a>>,
g: Option<Option<GraphName<'a>>>,
) -> QuadPatternFilter<'a, Iter<'a, Quad<'a>>> {
self.quads.iter().match_pattern((s, p, o, g).into())
}

pub fn skip_pattern(
&'a self,
s: Option<Subject<'a>>,
p: Option<NamedNode<'a>>,
o: Option<Term<'a>>,
g: Option<Option<GraphName<'a>>>,
) -> QuadPatternFilter<'a, Iter<'a, Quad<'a>>> {
self.quads.iter().skip_pattern((s, p, o, g).into())
}
}

impl<'a> From<&'a [Quad<'a>]> for Dataset<'a> {
fn from(quads: &'a [Quad<'a>]) -> Self {
Self {
quads: quads.to_vec(),
}
}
}

impl<'a> IntoIterator for &'a Dataset<'a> {
type Item = &'a Quad<'a>;
type IntoIter = Iter<'a, Quad<'a>>;

fn into_iter(self) -> Self::IntoIter {
self.quads.iter()
}
}

#[derive(Copy, Clone)]
pub struct QuadPattern<'a> {
subject: Option<Subject<'a>>,
predicate: Option<NamedNode<'a>>,
object: Option<Term<'a>>,
graph_name: Option<Option<GraphName<'a>>>,
}

impl<'a>
From<(
Option<Subject<'a>>,
Option<NamedNode<'a>>,
Option<Term<'a>>,
Option<Option<GraphName<'a>>>,
)> for QuadPattern<'a>
{
fn from(
value: (
Option<Subject<'a>>,
Option<NamedNode<'a>>,
Option<Term<'a>>,
Option<Option<GraphName<'a>>>,
),
) -> Self {
Self {
subject: value.0,
predicate: value.1,
object: value.2,
graph_name: value.3,
}
}
}

impl QuadPattern<'_> {
pub fn match_pattern<'a>(self, quad: &'a Quad<'a>) -> bool {
self.subject
.map(|s| s == quad.subject)
.unwrap_or_else(|| true)
&& self
.predicate
.map(|p| p == quad.predicate)
.unwrap_or_else(|| true)
&& self
.object
.map(|o| o == quad.object)
.unwrap_or_else(|| true)
&& self
.graph_name
.map(|o| o == quad.graph_name)
.unwrap_or_else(|| true)
}

pub fn skip_pattern<'a>(self, quad: &'a Quad<'a>) -> bool {
!self.match_pattern(quad)
}
}

pub trait QuadIterator<'a>: Iterator<Item = &'a Quad<'a>> {
fn match_patterns(self, patterns: Vec<QuadPattern<'a>>) -> QuadPatternFilter<'a, Self>
where
Self: Sized,
{
QuadPatternFilter::new(self, patterns, QuadPatternFilterKind::Match)
}

fn skip_patterns(self, patterns: Vec<QuadPattern<'a>>) -> QuadPatternFilter<'a, Self>
where
Self: Sized,
{
QuadPatternFilter::new(self, patterns, QuadPatternFilterKind::Skip)
}

fn match_pattern(self, pattern: QuadPattern<'a>) -> QuadPatternFilter<'a, Self>
where
Self: Sized,
{
self.match_patterns(vec![pattern])
}

fn skip_pattern(self, pattern: QuadPattern<'a>) -> QuadPatternFilter<'a, Self>
where
Self: Sized,
{
self.skip_patterns(vec![pattern])
}

fn subjects(self) -> Box<dyn Iterator<Item = Subject<'a>> + 'a>
where
Self: Sized + 'a,
{
Box::from(self.map(|quad: &'a Quad<'a>| quad.subject).unique())
}

fn predicates(self) -> Box<dyn Iterator<Item = NamedNode<'a>> + 'a>
where
Self: Sized + 'a,
{
Box::from(self.map(|quad: &'a Quad<'a>| quad.predicate).unique())
}

fn objects(self) -> Box<dyn Iterator<Item = Term<'a>> + 'a>
where
Self: Sized + 'a,
{
Box::from(self.map(|quad: &'a Quad<'a>| quad.object).unique())
}

fn graph_names(self) -> Box<dyn Iterator<Item = Option<GraphName<'a>>> + 'a>
where
Self: Sized + 'a,
{
Box::from(self.map(|quad: &'a Quad<'a>| quad.graph_name).unique())
}
}

impl<'a, T: ?Sized> QuadIterator<'a> for T where T: Iterator<Item = &'a Quad<'a>> {}

pub enum QuadPatternFilterKind {
Match,
Skip,
}

pub struct QuadPatternFilter<'a, I>
where
I: Iterator<Item = &'a Quad<'a>>,
{
patterns: Vec<QuadPattern<'a>>,
filter_kind: QuadPatternFilterKind,
inner: I,
}

impl<'a, I> QuadPatternFilter<'a, I>
where
I: Iterator<Item = &'a Quad<'a>>,
{
pub fn new(
inner: I,
patterns: Vec<QuadPattern<'a>>,
filter_kind: QuadPatternFilterKind,
) -> Self {
Self {
patterns,
inner,
filter_kind,
}
}
}

impl<'a, I> Iterator for QuadPatternFilter<'a, I>
where
I: Iterator<Item = &'a Quad<'a>>,
{
type Item = &'a Quad<'a>;

fn next(&mut self) -> Option<Self::Item> {
self.inner.find(|quad| match self.filter_kind {
QuadPatternFilterKind::Match => self
.patterns
.iter()
.fold(true, |v, p| v && p.match_pattern(quad)),
QuadPatternFilterKind::Skip => self
.patterns
.iter()
.fold(true, |v, p| v || p.skip_pattern(quad)),
})
}
}
1 change: 1 addition & 0 deletions packages/okp4-rdf/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod dataset;
pub mod normalize;
pub mod serde;
pub mod uri;

0 comments on commit 1c6c456

Please sign in to comment.