Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix utils.parse_object() #46

Merged
merged 10 commits into from
Dec 13, 2022
72 changes: 58 additions & 14 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Test utils"""
# pylint: disable=invalid-name
# pylint: disable=invalid-name,too-few-public-methods
import dlite
import pytest

Expand All @@ -20,6 +20,51 @@
assert infer_iri(coll.meta) == coll.meta.uri
assert infer_iri(coll) == coll.uuid

# We have no dependencies on pydantic, hence don't assume that it is installed.
# But if it is, infer_iri() should be able to infer IRIs from SOFT7 datamodels.
try:
from pydantic import AnyUrl, BaseModel, Field
except ImportError:
pass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try to avoid except pass - they are notoriously difficult to reason about

else:
from typing import Any, Optional

class Property(BaseModel):
"""A property."""

# pylint: disable=unsubscriptable-object
# Yet another pylint bug, see https://github.com/PyCQA/pylint/issues/1498
type: Any = Field(..., description="Valid type name.")
shape: Optional[list[str]] = Field(
None, description="List of dimension expressions."
)
unit: Optional[str] = Field(None, description="Unit of a property.")
description: Optional[str] = Field(
None, description="A human description of the property."
)

class Entity(BaseModel):
"""An entity."""

# pylint: disable=unsubscriptable-object
identity: AnyUrl = Field(..., description="Unique URI identifying the entity.")
description: str = Field("", description="A description of the entity.")
dimensions: Optional[dict[str, str]] = Field(
None, description="Dict mapping dimension names to descriptions."
)
properties: dict[str, Property] = Field(..., description="Dict of properties.")

user = Entity(
identity="http://onto-ns.com/meta/0.1/User",
properties={
"username": Property(type=str, description="username"),
"quota": Property(type=float, unit="GB", description="User quota"),
},
)

assert infer_iri(user) == "http://onto-ns.com/meta/0.1/User"


# Test split_iri()
rdfs = str(RDFS)
assert split_iri(RDFS.subClassOf) == (rdfs, "subClassOf")
Expand Down Expand Up @@ -65,15 +110,15 @@ def h():
# test parse_literal()
assert parse_literal("abc") == Literal("abc", datatype=XSD.string)
assert parse_literal(True) == Literal("True", datatype=XSD.boolean)
assert parse_literal(1) == Literal("1", datatype=XSD.inteter)
assert parse_literal(1) == Literal("1", datatype=XSD.integer)
assert parse_literal(3.14) == Literal("3.14", datatype=XSD.double)
assert parse_literal(f'"3.14"^^{XSD.double}') == Literal("3.14", datatype=XSD.double)


# test parse_object()
assert parse_object("True") == Literal("True", datatype=XSD.boolean)
assert parse_object("False") == Literal("False", datatype=XSD.boolean)
assert parse_object("true") == Literal("true", datatype=XSD.string)
assert parse_object("true") == Literal("true", datatype=XSD.boolean)
assert parse_object("false") == Literal("false", datatype=XSD.boolean)
assert parse_object("True") == Literal("True", datatype=XSD.string)
assert parse_object("0") == Literal("0", datatype=XSD.integer)
assert parse_object("1") == Literal("1", datatype=XSD.integer)
assert parse_object("-1") == Literal("-1", datatype=XSD.integer)
Expand All @@ -97,12 +142,13 @@ def h():
assert parse_object("2022-12-01T12:30:30") == Literal(
"2022-12-01T12:30:30", datatype=XSD.dateTime
)
assert parse_object("2022-12-01 12:30:30.50") == Literal(
"2022-12-01 12:30:30.50", datatype=XSD.dateTime
)
assert parse_object("2022-12-01 12:30:30Z") == Literal(
"2022-12-01 12:30:30Z", datatype=XSD.dateTime
assert parse_object("2022-12-01 12:30:30.500") == Literal(
"2022-12-01 12:30:30.500", datatype=XSD.dateTime
)
# Format not supported in Python < 3.11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented-out code

# assert parse_object("2022-12-01 12:30:30Z") == Literal(
# "2022-12-01 12:30:30Z", datatype=XSD.dateTime
# )
assert parse_object("2022-12-01 12:30:30+01:00") == Literal(
"2022-12-01 12:30:30+01:00", datatype=XSD.dateTime
)
Expand All @@ -112,7 +158,5 @@ def h():
assert parse_object(XSD.int) == XSD.int
assert parse_object(f'"42"^^{XSD.integer}') == Literal("42", datatype=XSD.integer)
assert parse_object(f'"4.2"^^{XSD.double}') == Literal("4.2", datatype=XSD.double)

# __FIXME__: parse_object() currently fails for the following cases:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented-out code

# assert parse_object(f'"42"^^{XSD.double}') == Literal("42", datatype=XSD.double)
# assert parse_object(f'"42"^^{XSD.int}') == Literal("42", datatype=XSD.int)
assert parse_object(f'"42"^^{XSD.double}') == Literal("42.0", datatype=XSD.double)
assert parse_object(f'"42"^^{XSD.int}') == Literal("42", datatype=XSD.int)
108 changes: 56 additions & 52 deletions tripper/literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime
from typing import TYPE_CHECKING

from tripper.namespace import OWL, RDF, RDFS, XSD
from tripper.namespace import RDF, RDFS, XSD

if TYPE_CHECKING: # pragma: no cover
from typing import Any, Optional, Union
Expand All @@ -13,15 +13,14 @@ class Literal(str):
"""A literal RDF value.

Arguments:
value (Union[datetime, bytes, bytearray, bool, int, float, str]): The literal
value. See the `datatypes` class attribute for valid supported data types.
A localised string is provided as a string with `lang` set to a language
code.
lang (Optional[str]): A standard language code, like "en", "no", etc. Implies
that the `value` is a localised string.
datatype (Any): Explicit specification of the type of `value`. Should not be
combined with `lang`.

value (Union[datetime, bytes, bytearray, bool, int, float, str]):
The literal value. See the `datatypes` class attribute for valid
supported data types. A localised string is provided as a string
with `lang` set to a language code.
lang (Optional[str]): A standard language code, like "en", "no", etc.
Implies that the `value` is a localised string.
datatype (Any): Explicit specification of the type of `value`. Should
not be combined with `lang`.
"""

lang: "Union[str, None]"
Expand All @@ -30,13 +29,37 @@ class Literal(str):
# Note that the order of datatypes matters - it is used by
# utils.parse_literal() when inferring the datatype of a literal.
datatypes = {
datetime: XSD.dateTime,
bytes: XSD.hexBinary,
bytearray: XSD.hexBinary,
bool: XSD.boolean,
int: XSD.integer,
float: XSD.double,
str: XSD.string,
datetime: (XSD.dateTime,),
bytes: (XSD.hexBinary,),
bytearray: (XSD.hexBinary,),
bool: (XSD.boolean,),
int: (
XSD.integer,
XSD.int,
XSD.short,
XSD.long,
XSD.nonPositiveInteger,
XSD.negativeInteger,
XSD.unsignedInt,
XSD.unsignedShort,
XSD.unsignedLong,
XSD.byte,
XSD.unsignedByte,
),
float: (XSD.double, XSD.decimal, XSD.dateTimeStamp, XSD.real, XSD.rational),
str: (
XSD.string,
RDF.PlainLiteral,
RDF.XMLLiteral,
RDFS.Literal,
XSD.anyURI,
XSD.language,
XSD.Name,
XSD.NMName,
XSD.normalizedString,
XSD.token,
XSD.NMTOKEN,
),
}

def __new__(
Expand All @@ -54,7 +77,7 @@ def __new__(
else:
string.lang = None
if datatype:
string.datatype = cls.datatypes.get(datatype, datatype)
string.datatype = cls.datatypes.get(datatype, (datatype,))[0]
elif isinstance(value, str):
string.datatype = None
elif isinstance(value, bool):
Expand All @@ -78,6 +101,18 @@ def __new__(
string.datatype = None
return string

def __hash__(self):
return hash((str(self), self.lang, self.datatype))

def __eq__(self, other):
if isinstance(other, Literal):
return (
str(self) == str(other)
and self.lang == other.lang
and self.datatype == other.datatype
)
return str(self) == str(other)

def __repr__(self) -> str:
lang = f", lang='{self.lang}'" if self.lang else ""
datatype = f", datatype='{self.datatype}'" if self.datatype else ""
Expand All @@ -95,46 +130,15 @@ def to_python(self):

if self.datatype == XSD.boolean:
value = False if self == "False" else bool(self)
elif self.datatype in (
XSD.integer,
XSD.int,
XSD.short,
XSD.long,
XSD.nonPositiveInteger,
XSD.negativeInteger,
XSD.nonNegativeInteger,
XSD.unsignedInt,
XSD.unsignedShort,
XSD.unsignedLong,
XSD.byte,
XSD.unsignedByte,
):
elif self.datatype in self.datatypes[int]:
value = int(self)
elif self.datatype in (
XSD.double,
XSD.decimal,
XSD.dataTimeStamp,
OWL.real,
OWL.rational,
):
elif self.datatype in self.datatypes[float]:
value = float(self)
elif self.datatype == XSD.hexBinary:
value = self.encode()
elif self.datatype == XSD.dateTime:
value = datetime.fromisoformat(self)
elif self.datatype and self.datatype not in (
RDF.PlainLiteral,
RDF.XMLLiteral,
RDFS.Literal,
XSD.anyURI,
XSD.language,
XSD.Name,
XSD.NMName,
XSD.normalizedString,
XSD.string,
XSD.token,
XSD.NMTOKEN,
):
elif self.datatype and self.datatype not in self.datatypes[str]:
warnings.warn(f"unknown datatype: {self.datatype} - assuming string")
return value

Expand Down
6 changes: 6 additions & 0 deletions tripper/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ def __str__(self):
def __add__(self, other):
return self._iri + str(other)

def __hash__(self):
return hash(self._iri)

def __eq__(self, other):
return self._iri == str(other)


# Pre-defined namespaces
XML = Namespace("http://www.w3.org/XML/1998/namespace")
Expand Down
Loading