Skip to content

Commit

Permalink
Merge pull request #260 from RDFLib/rule_mode
Browse files Browse the repository at this point in the history
The long-await standalone SHACL Rule Expander mode
  • Loading branch information
ashleysommer authored Oct 11, 2024
2 parents 81b91a4 + 850b162 commit 6e9c85f
Show file tree
Hide file tree
Showing 23 changed files with 1,488 additions and 317 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
## [Unreleased]

### Added
- SHACL Rules Expander Mode
- A new alternative Run Mode for PySHACL
- PySHACL will not validate the DataGraph against Shapes and Constraints, instead it will simply run all SHACL-AF Rules to expand the DataGraph.
- By default it will output a new graph containing the existing DataGraph Triples plus the expanded triples
- Run with inplace mode to expand the new triples directly into the input DataGraph
- Focus Node Filtering
- You can now pass in a list of focus nodes to the validator, and it will only validate those focus nodes.
- Note, you still need to pass in a SHACL Shapes Graph, and the shapes still need to target the focus nodes.
Expand All @@ -19,6 +24,9 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
- If you give the validator a list of Shapes to use, and a list of focus nodes, the validator will operate in
a highly-targeted mode, it feeds those focus nodes directly into those given Shapes for validation.
- In this mode, the selected SHACL Shape does not need to specify any focus-targeting mechanisms of its own.
- Combined Rules Expander Mode with Shape Selection
- The combination of SHACL Rules Expander Mode and Shape Selection will allow specialised workflows.
- For example, you can run specific expansion rules from a SHACL Shapes File, based on the new triples required.

### Changed
- Don't make a clone of the DataGraph if the input data graph is ephemeral.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ COPY . .
RUN chown -R pyshacl:pyshacl /home/pyshacl /app && chmod -R 775 /home/pyshacl /app
USER pyshacl
ENV PATH="/home/pyshacl/.local/bin:$PATH"
RUN pip3 install "poetry>=1.5.0,<2.0"
RUN pip3 install "poetry>=1.8.3,<2.0"
RUN poetry install --no-dev --extras "js http"
USER root
RUN apk del build-dependencies
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ dev-coverage = ["pytest-cov", "coverage", "platformdirs"]

[tool.poetry.scripts]
pyshacl = "pyshacl.cli:main"
pyshacl_rules = "pyshacl.cli_rules:main"
pyshacl_validate = "pyshacl.cli:main"
pyshacl_server = "pyshacl.http:cli"

Expand Down
6 changes: 4 additions & 2 deletions pyshacl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# -*- coding: latin-1 -*-
#
from .entrypoints import shacl_rules, validate
from .rule_expand_runner import RuleExpandRunner
from .shape import Shape
from .shapes_graph import ShapesGraph
from .validate import Validator, validate
from .validator import Validator

# version compliant with https://www.python.org/dev/peps/pep-0440/
__version__ = '0.26.0'
# Don't forget to change the version number in pyproject.toml, Dockerfile, and CITATION.cff along with this one

__all__ = ['validate', 'Validator', '__version__', 'Shape', 'ShapesGraph']
__all__ = ['validate', 'shacl_rules', 'Validator', 'RuleExpandRunner', '__version__', 'Shape', 'ShapesGraph']
13 changes: 9 additions & 4 deletions pyshacl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import os
import sys

from pyshacl.cli import main
from pyshacl.cli import main as validate_main
from pyshacl.cli_rules import main as rules_main


def str_is_true(s_var: str):
Expand All @@ -16,11 +17,15 @@ def str_is_true(s_var: str):
do_server = os.getenv("PYSHACL_HTTP", "")
do_server = os.getenv("PYSHACL_SERVER", do_server)

if (len(sys.argv) > 1 and str(sys.argv[1]).lower() in ('serve', 'server', '--server')) or (
first_arg = None if len(sys.argv) < 2 else sys.argv[1]

if first_arg is not None and str(first_arg).lower() in ('rules', '--rules'):
rules_main(prog="python3 -m pyshacl")
elif (first_arg is not None and str(first_arg).lower() in ('serve', 'server', '--server')) or (
do_server and str_is_true(do_server)
):
from pyshacl.sh_http import main as http_main

http_main()

main(prog="python3 -m pyshacl")
else:
validate_main(prog="python3 -m pyshacl")
12 changes: 12 additions & 0 deletions pyshacl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,13 @@ def str_is_true(s_var: str):
help='Send output to a file (defaults to stdout).',
default=sys.stdout,
)
parser.add_argument(
'--rules',
help='Ignore validation options, run PySHACL in Rules Expansion mode. Same as `pyshacl_rules`.',
action='store_true',
dest='do_rules',
default=False,
)
parser.add_argument(
'--server',
help='Ignore all the rest of the options, start the HTTP Server. Same as `pyshacl_server`.',
Expand All @@ -240,6 +247,11 @@ def main(prog: Union[str, None] = None) -> None:

# http_main calls sys.exit(0) and never returns
http_main()
if args.do_rules:
from pyshacl.cli_rules import main as rules_main

# rules_main calls sys.exit(0) and never returns
rules_main()
if not args.data:
# No datafile give, and not starting in server mode.
sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n')
Expand Down
274 changes: 274 additions & 0 deletions pyshacl/cli_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
from io import BufferedReader
from typing import Union, cast

from pyshacl import __version__, shacl_rules
from pyshacl.cli import ShowVersion
from pyshacl.errors import (
ConstraintLoadError,
ReportableRuntimeError,
RuleLoadError,
ShapeLoadError,
ValidationFailure,
)

parser = argparse.ArgumentParser(
description='PySHACL {} SHACL Rules Expander command line tool.'.format(str(__version__))
)
parser.add_argument(
'data',
metavar='DataGraph',
help='The file or endpoint containing the Target Data Graph.',
default=None,
nargs='?',
)
parser.add_argument(
'-s',
'--shapes',
'--shacl',
dest='shacl',
action='store',
nargs='?',
help='A file containing the SHACL Shapes Graph.',
)
parser.add_argument(
'-e',
'--ont-graph',
dest='ont',
action='store',
nargs='?',
help='A file path or URL to a document containing extra ontological information. '
'RDFS and OWL definitions from this are used to inoculate the DataGraph.',
)
parser.add_argument(
'-i',
'--inference',
dest='inference',
action='store',
default='none',
choices=('none', 'rdfs', 'owlrl', 'both'),
help='Choose a type of inferencing to run against the Data Graph before validating.',
)
parser.add_argument(
'-im',
'--imports',
dest='imports',
action='store_true',
default=False,
help='Allow import of sub-graphs defined in statements with owl:imports.',
)
parser.add_argument(
'-j',
'--js',
dest='js',
action='store_true',
default=False,
help='Enable features from the SHACL-JS Specification.',
)
parser.add_argument(
'-it',
'--iterate-rules',
dest='iterate_rules',
action='store_true',
default=False,
help="Run Shape's SHACL Rules iteratively until the data_graph reaches a steady state.",
)
parser.add_argument(
'-d',
'--debug',
dest='debug',
action='store_true',
default=False,
help='Output additional verbose runtime messages.',
)
parser.add_argument(
'--focus',
dest='focus',
action='store',
help='Optional IRIs of focus nodes from the DataGraph, the shapes will validate only these node. Comma-separated list.',
nargs="?",
default=None,
)
parser.add_argument(
'--shape',
dest='shape',
action='store',
help='Optional IRIs of a NodeShape or PropertyShape from the SHACL ShapesGraph, only these shapes will be used to validate the DataGraph. Comma-separated list.',
nargs="?",
default=None,
)
parser.add_argument(
'-f',
'--format',
dest='format',
action='store',
help='Choose an output format. Default is \"trig\" for Datasets and \"turtle\" for Graphs.',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-df',
'--data-file-format',
dest='data_file_format',
action='store',
help='Explicitly state the RDF File format of the input DataGraph file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-sf',
'--shacl-file-format',
dest='shacl_file_format',
action='store',
help='Explicitly state the RDF File format of the input SHACL file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument(
'-ef',
'--ont-file-format',
dest='ont_file_format',
action='store',
help='Explicitly state the RDF File format of the extra ontology file. Default=\"auto\".',
default='auto',
choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'),
)
parser.add_argument('-V', '--version', action=ShowVersion, help='Show PySHACL version and exit.')
parser.add_argument(
'-o',
'--output',
dest='output',
nargs='?',
type=argparse.FileType('w'),
help='Send output to a file (defaults to stdout).',
default=sys.stdout,
)
# parser.add_argument('-h', '--help', action="help", help='Show this help text.')


def main(prog: Union[str, None] = None) -> None:
if prog is not None and len(prog) > 0:
parser.prog = prog

args = parser.parse_args()
if not args.data:
# No datafile give, and not starting in server mode.
sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n')
parser.print_usage(sys.stderr)
sys.exit(1)
runner_kwargs = {
'debug': args.debug,
'serialize_expanded_graph': True,
}
data_file = None
data_graph: Union[BufferedReader, str]

try:
data_file = open(args.data, 'rb')
except FileNotFoundError:
sys.stderr.write('Input Error. DataGraph file not found.\n')
sys.exit(1)
except PermissionError:
sys.stderr.write('Input Error. DataGraph file not readable.\n')
sys.exit(1)
else:
# NOTE: This cast is not necessary in Python >= 3.10.
data_graph = cast(BufferedReader, data_file)
if args.shacl is not None:
runner_kwargs['shacl_graph'] = args.shacl
if args.ont is not None:
runner_kwargs['ont_graph'] = args.ont
if args.inference != 'none':
runner_kwargs['inference'] = args.inference
if args.imports:
runner_kwargs['do_owl_imports'] = True
if args.js:
runner_kwargs['js'] = True
if args.focus:
runner_kwargs['focus_nodes'] = [_f.strip() for _f in args.focus.split(',')]
if args.shape:
runner_kwargs['use_shapes'] = [_s.strip() for _s in args.shape.split(',')]
if args.iterate_rules:
runner_kwargs['iterate_rules'] = True
if args.shacl_file_format:
_f: str = args.shacl_file_format
if _f != "auto":
runner_kwargs['shacl_graph_format'] = _f
if args.ont_file_format:
_f = args.ont_file_format
if _f != "auto":
runner_kwargs['ont_graph_format'] = _f
if args.data_file_format:
_f = args.data_file_format
if _f != "auto":
runner_kwargs['data_graph_format'] = _f
if args.format != "auto":
runner_kwargs['serialize_expanded_graph_format'] = args.format
exit_code: Union[int, None] = None
try:
output_txt = shacl_rules(data_graph, **runner_kwargs)
if isinstance(output_txt, BaseException):
raise output_txt
except ValidationFailure as vf:
args.output.write("Rules Runner generated a Validation Failure result:\n")
args.output.write(str(vf.message))
args.output.write("\n")
exit_code = 1
except ShapeLoadError as sle:
sys.stderr.write("Rules Runner encountered a Shape Load Error:\n")
sys.stderr.write(str(sle))
exit_code = 2
except ConstraintLoadError as cle:
sys.stderr.write("Rules Runner encountered a Constraint Load Error:\n")
sys.stderr.write(str(cle))
exit_code = 2
except RuleLoadError as rle:
sys.stderr.write("Rules Runner encountered a Rule Load Error:\n")
sys.stderr.write(str(rle))
exit_code = 2
except ReportableRuntimeError as rre:
sys.stderr.write("Rules Runner encountered a Runtime Error:\n")
sys.stderr.write(str(rre.message))
sys.stderr.write("\nIf you believe this is a bug in pyshacl, open an Issue on the pyshacl github page.\n")
exit_code = 2
except NotImplementedError as nie:
sys.stderr.write("Rules Runner feature is not implemented:\n")
if len(nie.args) > 0:
sys.stderr.write(str(nie.args[0]))
else:
sys.stderr.write("No message provided.")
sys.stderr.write("\nIf your use-case requires this feature, open an Issue on the pyshacl github page.\n")
exit_code = 3
except RuntimeError as re:
import traceback

traceback.print_tb(re.__traceback__)
sys.stderr.write(
"\n\nRules Runner encountered a Runtime Error. Please report this to the PySHACL issue tracker.\n"
)
exit_code = 2
finally:
if data_file is not None:
try:
data_file.close()
except Exception as e:
sys.stderr.write("Error closing data file:\n")
sys.stderr.write(str(e))
if exit_code is not None:
sys.exit(exit_code)

if isinstance(output_txt, bytes):
output_unicode = output_txt.decode('utf-8')
else:
output_unicode = output_txt
args.output.write(output_unicode)
args.output.close()
sys.exit(0)


if __name__ == "__main__":
main()
Loading

0 comments on commit 6e9c85f

Please sign in to comment.