Skip to content

Commit

Permalink
Merge branch 'parser_api', addresses #25
Browse files Browse the repository at this point in the history
  • Loading branch information
pchampin committed Jan 9, 2020
2 parents 592eb87 + 7d1ac44 commit 9af95c5
Show file tree
Hide file tree
Showing 10 changed files with 359 additions and 363 deletions.
58 changes: 7 additions & 51 deletions sophia/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Types for handling errors.
use crate::parser::Location;
use crate::term::TermError;
use std::convert::Infallible;
use std::fmt;

error_chain! {
errors {
Expand Down Expand Up @@ -35,67 +35,23 @@ impl From<Infallible> for Error {
}
}

/// Required until `NtPatserError` is introduced.
/// Required by parser::xml
impl From<TermError> for Error {
fn from(te: TermError) -> Self {
ErrorKind::TermError(te).into()
}
}

/// A position in a parsed stream.
#[derive(Clone, Debug)]
pub enum Position {
// Byte offset (starting at 0)
Offset(usize),
// Line-Column position (both starting at 1)
LiCo(usize, usize),
}

impl fmt::Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result {
/// Required by parser::xml
impl crate::parser::WithLocation for Error {
fn location(&self) -> Location {
match self {
Position::Offset(offset) => write!(f, "{}", offset),
Position::LiCo(li, co) => write!(f, "{}:{}", li, co),
Error(ErrorKind::ParserError(_, location), ..) => location.clone(),
_ => Location::Unknown,
}
}
}

/// The location of a ParseError
#[derive(Clone, Debug)]
pub enum Location {
Unknown,
Pos(Position),
Span(Position, Position),
}

impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Location::Unknown => write!(f, "?"),
Location::Pos(pos) => write!(f, "{}", pos),
Location::Span(s, e) => write!(f, "{}-{}", s, e),
}
}
}

impl Location {
pub fn from_offset(offset: usize) -> Location {
Location::Pos(Position::Offset(offset))
}
pub fn from_lico(line: usize, column: usize) -> Location {
Location::Pos(Position::LiCo(line, column))
}
pub fn from_offsets(offset1: usize, offset2: usize) -> Location {
Location::Span(Position::Offset(offset1), Position::Offset(offset2))
}
pub fn from_licos(line1: usize, column1: usize, line2: usize, column2: usize) -> Location {
Location::Span(
Position::LiCo(line1, column1),
Position::LiCo(line2, column2),
)
}
}

/// Make a Parser Error with minimal information
pub fn make_parser_error(message: String, line_offset: usize) -> ErrorKind {
ErrorKind::ParserError(message, Location::from_lico(line_offset, 0))
Expand Down
146 changes: 96 additions & 50 deletions sophia/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,59 +1,105 @@
//! Parsers for standard RDF syntaxes,
//! and tools for building new parsers.
//!
//! # Uniform interface
//!
//! Each parser module defines a `Config` type, that
//! - implements [`Default`],
//! - has three methods `parse_bufread`, `parse_read` and `parse_str`,
//! accepting [`io::BufRead`], [`io::Read`] and [`&str`] respectively,
//! and all returning a [`TripleSource`] or [`QuadSource`].
//!
//! Each parser module also has three functions
//! `parse_bufread`, `parse_read` and `parse_str`,
//! calling the corresponding methods from the default `Config`.
//!
//! [`Default`]: https://doc.rust-lang.org/std/default/trait.Default.html
//! [`io::BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
//! [`io::Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
//! [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
//! [`TripleSource`]: ../triple/stream/trait.TripleSource.html
//! [`QuadSource`]: ../quad/stream/trait.QuadSource.html
//! API for parsing RDF syntaxes.
/// This macro provides a straightforward implementation of the default functions
/// of a parser module producing triples.
#[macro_export]
macro_rules! def_default_triple_parser_api {
() => {
def_default_parser_api!($crate::triple::stream::TripleSource);
};
use crate::quad::stream::QuadSource;
use crate::triple::stream::TripleSource;

mod _location;
pub use _location::*;

/// A generic parser takes some data of type `T`,
/// and returns a [`TripleSource`] or a [`QuadSource`].
///
/// See also [`TripleParser`] and [`QuadParser`].
///
/// [`TripleParser`]: trait.TripleParser.html
/// [`TripleSource`]: ../triple/stream/trait.TripleSource.html
/// [`QuadParser`]: trait.QuadParser.html
/// [`QuadSource`]: ../quad/stream/trait.QuadSource.html
pub trait Parser<T> {
/// The source produced by this parser, generally
/// [`TripleSource`] or [`QuadSource`].
///
/// [`TripleSource`]: ../triple/stream/trait.TripleSource.html
/// [`QuadSource`]: ../quad/stream/trait.QuadSource.html
type Source;

/// The central method of `Parser`: parses data into a (triple or quad) source.
fn parse(&self, data: T) -> Self::Source;

/// Convenient shortcut method for parsing strings.
///
/// It may not be available on some exotic parsers,
/// but will be automatically supported for parsers supporting any
/// [`BufRead`] or [`Read`].
///
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
fn parse_str<'t>(&self, txt: &'t str) -> Self::Source
where
&'t str: IntoParsable<Target = T>,
{
self.parse(txt.into_parsable())
}
}

/// This macro provides a straightforward implementation of the default functions
/// of a parser module producing quads.
#[macro_export]
macro_rules! def_default_quad_parser_api {
() => {
def_default_parser_api!($crate::quad::stream::QuadSource);
};
/// Specialization of [`Parser`] that returns a [`QuadSource`].
/// It also constrains the returned source's error to implement [`WithLocation`].
///
/// [`Parser`]: trait.Parser.html
/// [`QuadSource`]: ../quad/stream/trait.QuadSource.html
/// [`WithLocation`]: trait.WithLocation.html
pub trait QuadParser<T>: Parser<T> {}
impl<T, P> QuadParser<T> for P
where
P: Parser<T>,
<P as Parser<T>>::Source: QuadSource,
<<P as Parser<T>>::Source as QuadSource>::Error: WithLocation,
{
}

macro_rules! def_default_parser_api {
($item: path) => {
/// Shortcut for `Config::default().parse_bufread(bufread)`
#[inline]
pub fn parse_bufread<'a, B: ::std::io::BufRead + 'a>(bufread: B) -> impl $item + 'a {
Config::default().parse_bufread(bufread)
}
/// Shortcut for `Config::default().parse_read(read)`
#[inline]
pub fn parse_read<'a, R: ::std::io::Read + 'a>(read: R) -> impl $item + 'a {
Config::default().parse_read(read)
/// Specialization of [`Parser`] that returns a [`QuadSource`].
/// It also constrains the returned source's error to implement [`WithLocation`].
///
/// [`Parser`]: trait.Parser.html
/// [`QuadSource`]: ../quad/stream/trait.QuadSource.html
/// [`WithLocation`]: trait.WithLocation.html
pub trait TripleParser<T>: Parser<T> {}
impl<T, P> TripleParser<T> for P
where
P: Parser<T>,
<P as Parser<T>>::Source: TripleSource,
<<P as Parser<T>>::Source as TripleSource>::Error: WithLocation,
{
}

/// Utility trait to support [`Parser::parse_str`].
///
/// [`Parser::parse_str`]: trait.Parser.html#method.parse_str
pub trait IntoParsable {
type Target;
fn into_parsable(self) -> Self::Target;
}
impl<'a> IntoParsable for &'a str {
type Target = &'a [u8];
fn into_parsable(self) -> Self::Target {
self.as_bytes()
}
}

/// Define convenience module-level functions for a parser implementation supporting BufRead.
#[macro_export]
macro_rules! def_mod_functions_for_bufread_parser {
($parser_type: ident) => {
/// Convenience function for parsing a BufRead with the default parser.
pub fn parse_bufread<B: std::io::BufRead>(
bufread: B,
) -> <$parser_type as $crate::parser::Parser<B>>::Source {
$parser_type::default().parse(bufread)
}
/// Shortcut for `Config::default().parse_str(txt)`
#[inline]
pub fn parse_str<'a>(txt: &'a str) -> impl $item + 'a {
Config::default().parse_str(txt)

/// Convenience function for parsing a str with the default parser.
pub fn parse_str(txt: &str) -> <$parser_type as $crate::parser::Parser<&[u8]>>::Source {
$parser_type::default().parse_str(txt)
}
};
}
Expand Down
66 changes: 66 additions & 0 deletions sophia/src/parser/_location.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// this module is transparently re-exported by its parent `parser`
use std::fmt;

/// A location in a parsed stream, which can be unknown, a specific point, or a span.
#[derive(Clone, Debug)]
pub enum Location {
Unknown,
Pos(Position),
Span(Position, Position),
}

impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Location::Unknown => write!(f, "?"),
Location::Pos(pos) => write!(f, "{}", pos),
Location::Span(s, e) => write!(f, "{}-{}", s, e),
}
}
}

impl Location {
/// Build a location which is a byte-offset (starting at 0) in the stream.
pub fn from_offset(offset: usize) -> Location {
Location::Pos(Position::Offset(offset))
}
/// Build a location which is a line-column position (both starting at 1) in the stream.
pub fn from_lico(line: usize, column: usize) -> Location {
Location::Pos(Position::LiCo(line, column))
}
/// Build a location which is a span between two byte-offsets (starting at 0) in the stream.
pub fn from_offsets(offset1: usize, offset2: usize) -> Location {
Location::Span(Position::Offset(offset1), Position::Offset(offset2))
}
/// Build a location which a span between two line-column positions (both starting at 1)
/// in the stream.
pub fn from_licos(line1: usize, column1: usize, line2: usize, column2: usize) -> Location {
Location::Span(
Position::LiCo(line1, column1),
Position::LiCo(line2, column2),
)
}
}

/// A position in a parsed stream.
#[derive(Clone, Debug)]
pub enum Position {
// Byte offset (starting at 0)
Offset(usize),
// Line-Column position (both starting at 1)
LiCo(usize, usize),
}

impl fmt::Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result {
match self {
Position::Offset(offset) => write!(f, "{}", offset),
Position::LiCo(li, co) => write!(f, "{}:{}", li, co),
}
}
}

/// This trait is meant to be implemented by errors raised by parsers.
pub trait WithLocation {
fn location(&self) -> Location;
}
56 changes: 23 additions & 33 deletions sophia/src/parser/gtrig.rs
Original file line number Diff line number Diff line change
@@ -1,50 +1,30 @@
//! Adapter for the Generalized TriG parser from [RIO](https://github.com/Tpt/rio/blob/master/turtle/src/gtrig.rs)
use std::io::{BufRead, BufReader, Cursor, Read};
use std::io::BufRead;

use rio_turtle::GTriGParser;
use rio_turtle::{GTriGParser as RioGTriGParser, TurtleError};

use crate::def_default_quad_parser_api;
use crate::error::*;
use crate::parser::rio_common::*;
use crate::quad::stream::QuadSource;
use crate::parser::Parser;

/// RIO TriG parser configuration.
///
/// For more information,
/// see the [uniform interface] of parsers.
///
/// [uniform interface]: ../index.html#uniform-interface
/// TriG parser based on RIO.
#[derive(Clone, Debug, Default)]
pub struct Config {
pub struct GTriGParser {
pub base: Option<String>,
}

impl Config {
#[inline]
pub fn parse_bufread<'a, B: BufRead + 'a>(
&self,
bufread: B,
) -> impl QuadSource<Error = Error> + 'a {
impl<B: BufRead> Parser<B> for GTriGParser {
type Source = GeneralizedRioSource<RioGTriGParser<B>, TurtleError>;
fn parse(&self, data: B) -> Self::Source {
let base: &str = match &self.base {
Some(base) => &base,
None => "",
};
GeneralizedRioSource::from(GTriGParser::new(bufread, base))
}

#[inline]
pub fn parse_read<'a, R: Read + 'a>(&self, read: R) -> impl QuadSource<Error = Error> + 'a {
self.parse_bufread(BufReader::new(read))
}

#[inline]
pub fn parse_str<'a>(&self, txt: &'a str) -> impl QuadSource<Error = Error> + 'a {
self.parse_bufread(Cursor::new(txt.as_bytes()))
GeneralizedRioSource::from(RioGTriGParser::new(data, base))
}
}

def_default_quad_parser_api! {}
def_mod_functions_for_bufread_parser!(GTriGParser);

// ---------------------------------------------------------------------------------
// tests
Expand All @@ -61,7 +41,17 @@ mod test {
use crate::term::StaticTerm;

#[test]
fn test_simple_trig_string() -> std::result::Result<(), Box<dyn std::error::Error>> {
fn test_is_triple_parser() {
// check that GTriGParser implements QuadParser;
// actually, if this test compiles, it passes
fn check_trait<P: crate::parser::QuadParser<&'static [u8]>>(_: &P) {
assert!(true)
}
check_trait(&GTriGParser::default());
}

#[test]
fn test_simple_gtrig_string() -> std::result::Result<(), Box<dyn std::error::Error>> {
let turtle = r#"
@prefix : <http://example.org/ns/> .
Expand All @@ -74,8 +64,8 @@ mod test {
"#;

let mut d = FastDataset::new();
let cfg = Config { base: None };
let c = cfg.parse_str(&turtle).in_dataset(&mut d)?;
let p = GTriGParser { base: None };
let c = p.parse_str(&turtle).in_dataset(&mut d)?;
assert_eq!(c, 3);
assert!(d
.quads_matching(
Expand Down
Loading

0 comments on commit 9af95c5

Please sign in to comment.