diff --git a/quit/__init__.py b/quit/__init__.py index e69de29b..963dda29 100644 --- a/quit/__init__.py +++ b/quit/__init__.py @@ -0,0 +1,10 @@ +from rdflib.plugin import register +from rdflib.query import Processor, UpdateProcessor + +register( + 'sparql', Processor, + 'quit.processor', 'SPARQLProcessor') + +register( + 'sparql', UpdateProcessor, + 'quit.processor', 'SPARQLUpdateProcessor') \ No newline at end of file diff --git a/quit/algebra.py b/quit/algebra.py new file mode 100644 index 00000000..5ce5364c --- /dev/null +++ b/quit/algebra.py @@ -0,0 +1,804 @@ + +""" +Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression + +http://www.w3.org/TR/sparql11-query/#sparqlQuery + +""" + +import functools +import operator +import collections + +from rdflib import Literal, Variable, URIRef, BNode + +from rdflib.plugins.sparql.sparql import Prologue, Query +from rdflib.plugins.sparql.parserutils import CompValue, Expr +from rdflib.plugins.sparql.operators import ( + and_, TrueFilter, simplify as simplifyFilters) +from rdflib.paths import ( + InvPath, AlternativePath, SequencePath, MulPath, NegatedPath) + +from pyparsing import ParseResults +from functools import reduce + + +# --------------------------- +# Some convenience methods +def OrderBy(p, expr): + return CompValue('OrderBy', p=p, expr=expr) + + +def ToMultiSet(p): + return CompValue('ToMultiSet', p=p) + + +def Union(p1, p2): + return CompValue('Union', p1=p1, p2=p2) + + +def Join(p1, p2): + return CompValue('Join', p1=p1, p2=p2) + + +def Minus(p1, p2): + return CompValue('Minus', p1=p1, p2=p2) + + +def Graph(term, graph): + return CompValue('Graph', term=term, p=graph) + + +def BGP(triples=None): + return CompValue('BGP', triples=triples or []) + + +def LeftJoin(p1, p2, expr): + return CompValue('LeftJoin', p1=p1, p2=p2, expr=expr) + + +def Filter(expr, p): + return CompValue('Filter', expr=expr, p=p) + + +def Extend(p, expr, var): + return CompValue('Extend', p=p, expr=expr, var=var) + + +def Project(p, PV): + return CompValue('Project', p=p, PV=PV) + + +def Group(p, expr=None): + return CompValue('Group', p=p, expr=expr) + +def Service(term, graph): + return CompValue('Service', term=term, p=graph) + +def _knownTerms(triple, varsknown, varscount): + return (len([_f for _f in (x not in varsknown and + isinstance( + x, (Variable, BNode)) for x in triple) if _f]), + -sum(varscount.get(x, 0) for x in triple), + not isinstance(triple[2], Literal), + ) + + +def reorderTriples(l): + """ + Reorder triple patterns so that we execute the + ones with most bindings first + """ + + def _addvar(term, varsknown): + if isinstance(term, (Variable, BNode)): + varsknown.add(term) + + l = [(None, x) for x in l] + varsknown = set() + varscount = collections.defaultdict(int) + for t in l: + for c in t[1]: + if isinstance(c, (Variable, BNode)): + varscount[c] += 1 + i = 0 + + # Done in steps, sort by number of bound terms + # the top block of patterns with the most bound terms is kept + # the rest is resorted based on the vars bound after the first + # block is evaluated + + # we sort by decorate/undecorate, since we need the value of the sort keys + + while i < len(l): + l[i:] = sorted((_knownTerms(x[ + 1], varsknown, varscount), x[1]) for x in l[i:]) + t = l[i][0][0] # top block has this many terms bound + j = 0 + while i+j < len(l) and l[i+j][0][0] == t: + for c in l[i+j][1]: + _addvar(c, varsknown) + j += 1 + i += 1 + + return [x[1] for x in l] + + +def triples(l): + + l = reduce(lambda x, y: x + y, l) + if (len(l) % 3) != 0: + raise Exception('these aint triples') + return reorderTriples((l[x], l[x + 1], l[x + 2]) + for x in range(0, len(l), 3)) + + +def translatePName(p, prologue): + """ + Expand prefixed/relative URIs + """ + if isinstance(p, CompValue): + if p.name == 'pname': + return prologue.absolutize(p) + if p.name == 'literal': + return Literal(p.string, lang=p.lang, + datatype=prologue.absolutize(p.datatype)) + elif isinstance(p, URIRef): + return prologue.absolutize(p) + + +def translatePath(p): + + """ + Translate PropertyPath expressions + """ + + if isinstance(p, CompValue): + if p.name == 'PathAlternative': + if len(p.part) == 1: + return p.part[0] + else: + return AlternativePath(*p.part) + + elif p.name == 'PathSequence': + if len(p.part) == 1: + return p.part[0] + else: + return SequencePath(*p.part) + + elif p.name == 'PathElt': + if not p.mod: + return p.part + else: + if isinstance(p.part, list): + if len(p.part) != 1: + raise Exception('Denkfehler!') + + return MulPath(p.part[0], p.mod) + else: + return MulPath(p.part, p.mod) + + elif p.name == 'PathEltOrInverse': + if isinstance(p.part, list): + if len(p.part) != 1: + raise Exception('Denkfehler!') + return InvPath(p.part[0]) + else: + return InvPath(p.part) + + elif p.name == 'PathNegatedPropertySet': + if isinstance(p.part, list): + return NegatedPath(AlternativePath(*p.part)) + else: + return NegatedPath(p.part) + + +def translateExists(e): + + """ + Translate the graph pattern used by EXISTS and NOT EXISTS + http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters + """ + + def _c(n): + if isinstance(n, CompValue): + if n.name in ('Builtin_EXISTS', 'Builtin_NOTEXISTS'): + n.graph = translateGroupGraphPattern(n.graph) + + e = traverse(e, visitPost=_c) + + return e + + +def collectAndRemoveFilters(parts): + + """ + + FILTER expressions apply to the whole group graph pattern in which + they appear. + + http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters + """ + + filters = [] + + i = 0 + while i < len(parts): + p = parts[i] + if p.name == 'Filter': + filters.append(translateExists(p.expr)) + parts.pop(i) + else: + i += 1 + + if filters: + return and_(*filters) + + return None + + +def translateGroupOrUnionGraphPattern(graphPattern): + A = None + + for g in graphPattern.graph: + g = translateGroupGraphPattern(g) + if not A: + A = g + else: + A = Union(A, g) + return A + +def translateServiceGraphPattern(graphPattern): + return Service(graphPattern.term, + translateGroupGraphPattern(graphPattern.graph)) + +def translateGraphGraphPattern(graphPattern): + return Graph(graphPattern.term, + translateGroupGraphPattern(graphPattern.graph)) + + +def translateInlineData(graphPattern): + return ToMultiSet(translateValues(graphPattern)) + + +def translateGroupGraphPattern(graphPattern): + """ + http://www.w3.org/TR/sparql11-query/#convertGraphPattern + """ + + if graphPattern.name == 'SubSelect': + return ToMultiSet(translate(graphPattern)[0]) + + if not graphPattern.part: + graphPattern.part = [] # empty { } + + filters = collectAndRemoveFilters(graphPattern.part) + + g = [] + for p in graphPattern.part: + if p.name == 'TriplesBlock': + # merge adjacent TripleBlocks + if not (g and g[-1].name == 'BGP'): + g.append(BGP()) + g[-1]["triples"] += triples(p.triples) + else: + g.append(p) + + G = BGP() + for p in g: + if p.name == 'OptionalGraphPattern': + A = translateGroupGraphPattern(p.graph) + if A.name == 'Filter': + G = LeftJoin(G, A.p, A.expr) + else: + G = LeftJoin(G, A, TrueFilter) + elif p.name == 'MinusGraphPattern': + G = Minus(p1=G, p2=translateGroupGraphPattern(p.graph)) + elif p.name == 'GroupOrUnionGraphPattern': + G = Join(p1=G, p2=translateGroupOrUnionGraphPattern(p)) + elif p.name == 'GraphGraphPattern': + G = Join(p1=G, p2=translateGraphGraphPattern(p)) + elif p.name == 'InlineData': + G = Join(p1=G, p2=translateInlineData(p)) + elif p.name == 'ServiceGraphPattern': + G = Join(p1=G, p2=translateServiceGraphPattern(p)) + elif p.name in ('BGP', 'Extend'): + G = Join(p1=G, p2=p) + elif p.name == 'Bind': + G = Extend(G, p.expr, p.var) + + else: + raise Exception('Unknown part in GroupGraphPattern: %s - %s' % + (type(p), p.name)) + + if filters: + G = Filter(expr=filters, p=G) + + return G + + +class StopTraversal(Exception): + def __init__(self, rv): + self.rv = rv + + +def _traverse(e, visitPre=lambda n: None, visitPost=lambda n: None): + """ + Traverse a parse-tree, visit each node + + if visit functions return a value, replace current node + """ + _e = visitPre(e) + if _e is not None: + return _e + + if e is None: + return None + + if isinstance(e, (list, ParseResults)): + return [_traverse(x, visitPre, visitPost) for x in e] + elif isinstance(e, tuple): + return tuple([_traverse(x, visitPre, visitPost) for x in e]) + + elif isinstance(e, CompValue): + for k, val in e.items(): + e[k] = _traverse(val, visitPre, visitPost) + + _e = visitPost(e) + if _e is not None: + return _e + + return e + + +def _traverseAgg(e, visitor=lambda n, v: None): + """ + Traverse a parse-tree, visit each node + + if visit functions return a value, replace current node + """ + + res = [] + + if isinstance(e, (list, ParseResults, tuple)): + res = [_traverseAgg(x, visitor) for x in e] + + elif isinstance(e, CompValue): + for k, val in e.items(): + if val != None: + res.append(_traverseAgg(val, visitor)) + + return visitor(e, res) + + +def traverse( + tree, visitPre=lambda n: None, + visitPost=lambda n: None, complete=None): + """ + Traverse tree, visit each node with visit function + visit function may raise StopTraversal to stop traversal + if complete!=None, it is returned on complete traversal, + otherwise the transformed tree is returned + """ + try: + r = _traverse(tree, visitPre, visitPost) + if complete is not None: + return complete + return r + except StopTraversal as st: + return st.rv + + +def _hasAggregate(x): + """ + Traverse parse(sub)Tree + return true if any aggregates are used + """ + + if isinstance(x, CompValue): + if x.name.startswith('Aggregate_'): + raise StopTraversal(True) + + +def _aggs(e, A): + """ + Collect Aggregates in A + replaces aggregates with variable references + """ + + # TODO: nested Aggregates? + + if isinstance(e, CompValue) and e.name.startswith('Aggregate_'): + A.append(e) + aggvar = Variable('__agg_%d__' % len(A)) + e["res"] = aggvar + return aggvar + + +def _findVars(x, res): + """ + Find all variables in a tree + """ + if isinstance(x, Variable): + res.add(x) + if isinstance(x, CompValue): + if x.name == "Bind": + res.add(x.var) + return x # stop recursion and finding vars in the expr + elif x.name == 'SubSelect': + if x.projection: + res.update(v.var or v.evar for v in x.projection) + return x + + +def _addVars(x, children): + # import pdb; pdb.set_trace() + if isinstance(x, Variable): + return set([x]) + elif isinstance(x, CompValue): + x["_vars"] = set(reduce(operator.or_, children, set())) + if x.name == "Bind": + return set([x.var]) + elif x.name == 'SubSelect': + if x.projection: + s = set(v.var or v.evar for v in x.projection) + else: + s = set() + + return s + return reduce(operator.or_, children, set()) + + +def _sample(e, v=None): + """ + For each unaggregated variable V in expr + Replace V with Sample(V) + """ + if isinstance(e, CompValue) and e.name.startswith("Aggregate_"): + return e # do not replace vars in aggregates + if isinstance(e, Variable) and v != e: + return CompValue('Aggregate_Sample', vars=e) + + +def _simplifyFilters(e): + if isinstance(e, Expr): + return simplifyFilters(e) + + +def translateAggregates(q, M): + E = [] + A = [] + + # collect/replace aggs in : + # select expr as ?var + if q.projection: + for v in q.projection: + if v.evar: + v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar)) + v.expr = traverse(v.expr, functools.partial(_aggs, A=A)) + + + # having clause + if traverse(q.having, _hasAggregate, complete=False): + q.having = traverse(q.having, _sample) + traverse(q.having, functools.partial(_aggs, A=A)) + + # order by + if traverse(q.orderby, _hasAggregate, complete=False): + q.orderby = traverse(q.orderby, _sample) + traverse(q.orderby, functools.partial(_aggs, A=A)) + + # sample all other select vars + # TODO: only allowed for vars in group-by? + if q.projection: + for v in q.projection: + if v.var: + rv = Variable('__agg_%d__' % (len(A) + 1)) + A.append(CompValue('Aggregate_Sample', vars=v.var, res=rv)) + E.append((rv, v.var)) + + return CompValue('AggregateJoin', A=A, p=M), E + + +def translateValues(v): + # if len(v.var)!=len(v.value): + # raise Exception("Unmatched vars and values in ValueClause: "+str(v)) + + res = [] + if not v.var: + return res + if not v.value: + return res + if not isinstance(v.value[0], list): + + for val in v.value: + res.append({v.var[0]: val}) + else: + for vals in v.value: + res.append(dict(list(zip(v.var, vals)))) + + return CompValue('values', res=res) + + +def translate(q): + """ + http://www.w3.org/TR/sparql11-query/#convertSolMod + + """ + + _traverse(q, _simplifyFilters) + + q.where = traverse(q.where, visitPost=translatePath) + + # TODO: Var scope test + VS = set() + traverse(q.where, functools.partial(_findVars, res=VS)) + + # all query types have a where part + M = translateGroupGraphPattern(q.where) + + aggregate = False + if q.groupby: + conditions = [] + # convert "GROUP BY (?expr as ?var)" to an Extend + for c in q.groupby.condition: + if isinstance(c, CompValue) and c.name == 'GroupAs': + M = Extend(M, c.expr, c.var) + c = c.var + conditions.append(c) + + M = Group(p=M, expr=conditions) + aggregate = True + elif traverse(q.having, _hasAggregate, complete=False) or \ + traverse(q.orderby, _hasAggregate, complete=False) or \ + any(traverse(x.expr, _hasAggregate, complete=False) + for x in q.projection or [] if x.evar): + # if any aggregate is used, implicit group by + M = Group(p=M) + aggregate = True + + if aggregate: + M, E = translateAggregates(q, M) + else: + E = [] + + # HAVING + if q.having: + M = Filter(expr=and_(*q.having.condition), p=M) + + # VALUES + if q.valuesClause: + M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause))) + + if not q.projection: + # select * + PV = list(VS) + else: + PV = list() + for v in q.projection: + if v.var: + if v not in PV: + PV.append(v.var) + elif v.evar: + if v not in PV: + PV.append(v.evar) + + E.append((v.expr, v.evar)) + else: + raise Exception("I expected a var or evar here!") + + for e, v in E: + M = Extend(M, e, v) + + # ORDER BY + if q.orderby: + M = OrderBy(M, [CompValue('OrderCondition', expr=c.expr, + order=c.order) for c in q.orderby.condition]) + + # PROJECT + M = Project(M, PV) + + if q.modifier: + if q.modifier == 'DISTINCT': + M = CompValue('Distinct', p=M) + elif q.modifier == 'REDUCED': + M = CompValue('Reduced', p=M) + + if q.limitoffset: + offset = 0 + if q.limitoffset.offset != None: + offset = q.limitoffset.offset.toPython() + + if q.limitoffset.limit != None: + M = CompValue('Slice', p=M, start=offset, + length=q.limitoffset.limit.toPython()) + else: + M = CompValue('Slice', p=M, start=offset) + + return M, PV + + +def simplify(n): + """Remove joins to empty BGPs""" + if isinstance(n, CompValue): + if n.name == 'Join': + if n.p1.name == 'BGP' and len(n.p1.triples) == 0: + return n.p2 + if n.p2.name == 'BGP' and len(n.p2.triples) == 0: + return n.p1 + elif n.name == 'BGP': + n["triples"] = reorderTriples(n.triples) + return n + + +def analyse(n, children): + + if isinstance(n, CompValue): + if n.name == 'Join': + n["lazy"] = all(children) + return False + elif n.name in ('Slice', 'Distinct'): + return False + else: + return all(children) + else: + return True + + +def translatePrologue(p, base, initNs=None, prologue=None): + + if prologue is None: + prologue = Prologue() + prologue.base = "" + if base: + prologue.base = base + if initNs: + for k, v in initNs.items(): + prologue.bind(k, v) + + for x in p: + if x.name == 'Base': + prologue.base = x.iri + elif x.name == 'PrefixDecl': + prologue.bind(x.prefix, prologue.absolutize(x.iri)) + + return prologue + + +def translateQuads(quads): + if quads.triples: + alltriples = triples(quads.triples) + else: + alltriples = [] + + allquads = collections.defaultdict(list) + + if quads.quadsNotTriples: + for q in quads.quadsNotTriples: + if q.triples: + allquads[q.term] += triples(q.triples) + + return alltriples, allquads + + +def translateUpdate1(u, prologue): + if u.name in ('Load', 'Clear', 'Drop', 'Create'): + pass # no translation needed + elif u.name in ('Add', 'Move', 'Copy'): + pass + elif u.name in ('InsertData', 'DeleteData', 'DeleteWhere'): + t, q = translateQuads(u.quads) + u["quads"] = q + u["triples"] = t + if u.name in ('DeleteWhere', 'DeleteData'): + pass # TODO: check for bnodes in triples + elif u.name == 'Modify': + if u.delete: + u.delete["triples"], u.delete[ + "quads"] = translateQuads(u.delete.quads) + if u.insert: + u.insert["triples"], u.insert[ + "quads"] = translateQuads(u.insert.quads) + u["where"] = translateGroupGraphPattern(u.where) + else: + raise Exception('Unknown type of update operation: %s' % u) + + u.prologue = prologue + return u + + +def translateUpdate(q, base=None, initNs=None): + """ + Returns a list of SPARQL Update Algebra expressions + """ + + res = [] + prologue = None + if not q.request: + return res + for p, u in zip(q.prologue, q.request): + prologue = translatePrologue(p, base, initNs, prologue) + + # absolutize/resolve prefixes + u = traverse( + u, visitPost=functools.partial(translatePName, prologue=prologue)) + u = _traverse(u, _simplifyFilters) + + u = traverse(u, visitPost=translatePath) + + res.append(translateUpdate1(u, prologue)) + + return res + + +def translateQuery(q, base=None, initNs=None): + """ + Translate a query-parsetree to a SPARQL Algebra Expression + + Return a rdflib.plugins.sparql.sparql.Query object + """ + + # We get in: (prologue, query) + + prologue = translatePrologue(q[0], base, initNs) + + # absolutize/resolve prefixes + q[1] = traverse( + q[1], visitPost=functools.partial(translatePName, prologue=prologue)) + + P, PV = translate(q[1]) + datasetClause = q[1].datasetClause + if q[1].name == 'ConstructQuery': + + template = triples(q[1].template) if q[1].template else None + + res = CompValue(q[1].name, p=P, + template=template, + datasetClause=datasetClause) + else: + res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV) + + res = traverse(res, visitPost=simplify) + _traverseAgg(res, visitor=analyse) + _traverseAgg(res, _addVars) + + return Query(prologue, res) + + +def pprintAlgebra(q): + def pp(p, ind=" "): + # if isinstance(p, list): + # print "[ " + # for x in p: pp(x,ind) + # print "%s ]"%ind + # return + if not isinstance(p, CompValue): + print(p) + return + print("%s(" % (p.name, )) + for k in p: + print("%s%s =" % (ind, k,), end=' ') + pp(p[k], ind + " ") + print("%s)" % ind) + + try: + pp(q.algebra) + except AttributeError: + # it's update, just a list + for x in q: + pp(x) + +if __name__ == '__main__': + import sys + from rdflib.plugins.sparql import parser + import os.path + + if os.path.exists(sys.argv[1]): + q = file(sys.argv[1]) + else: + q = sys.argv[1] + + pq = parser.parseQuery(q) + print(pq) + tq = translateQuery(pq) + print(pprintAlgebra(tq)) diff --git a/quit/core.py b/quit/core.py index 2131ff18..a0871e6a 100644 --- a/quit/core.py +++ b/quit/core.py @@ -1,19 +1,28 @@ +import pygit2 + +import quit.deprecated as deprecated + + from datetime import datetime import logging from os import makedirs from os.path import abspath, exists, isdir, join -from quit.update import evalUpdate -from pygit2 import GIT_MERGE_ANALYSIS_UP_TO_DATE -from pygit2 import GIT_MERGE_ANALYSIS_FASTFORWARD -from pygit2 import GIT_MERGE_ANALYSIS_NORMAL +from subprocess import Popen + +from pygit2 import GIT_MERGE_ANALYSIS_NORMAL, GIT_MERGE_ANALYSIS_UP_TO_DATE, GIT_MERGE_ANALYSIS_FASTFORWARD from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT from pygit2 import init_repository, clone_repository from pygit2 import Repository, Signature, RemoteCallbacks, Keypair, UserPass -from rdflib import ConjunctiveGraph, Graph, URIRef, BNode -from subprocess import Popen +import pygit2 -corelogger = logging.getLogger('core.quit') +from rdflib import ConjunctiveGraph, Graph, URIRef, BNode, Literal, BNode, Namespace + +from quit.update import evalUpdate +from quit.namespace import RDF, RDFS, FOAF, XSD, PROV, QUIT, is_a +from quit.graphs import RevisionGraph, InstanceGraph +from quit.utils import graphdiff +corelogger = logging.getLogger('core.quit') class FileReference: """A class that manages n-quad files. @@ -171,8 +180,34 @@ def isversioned(self): """Check if a File is part of version control system.""" return(self.versioning) +class Queryable: + """ + A class that represents a querable graph-like object. + """ + def __init__(self, **kwargs): + self.store = ConjunctiveGraph(identifier='default') + + def query(self, querystring): + """Execute a SPARQL select query. + + Args: + querystring: A string containing a SPARQL ask or select query. + Returns: + The SPARQL result set + """ + pass + + def update(self, querystring, versioning=True): + """Execute a SPARQL update query and update the store. -class MemoryStore: + This method executes a SPARQL update query and updates and commits all affected files. + + Args: + querystring: A string containing a SPARQL upate query. + """ + pass + +class Store(Queryable): """A class that combines and syncronieses n-quad files and an in-memory quad store. This class contains information about all graphs, their corresponding URIs and @@ -180,11 +215,11 @@ class MemoryStore: FileReference object (n-quad) that enables versioning (with git) and persistence. """ - def __init__(self): + def __init__(self, store): """Initialize a new MemoryStore instance.""" self.logger = logging.getLogger('memory_store.core.quit') self.logger.debug('Create an instance of MemoryStore') - self.store = ConjunctiveGraph(identifier='default') + self.store = store return @@ -317,6 +352,246 @@ def exit(self): """Execute actions on API shutdown.""" return +class MemoryStore(Store): + def __init__(self): + store = ConjunctiveGraph(identifier='default') + super().__init__(store=store) + +class VirtualGraph(Queryable): + def __init__(self, rewrites): + self.store = InstanceGraph(rewrites) + + def query(self, querystring): + return self.store.query(querystring) + + def update(self, querystring, versioning=True): + return self.store.update(querystring) + +class Quit(object): + def __init__(self, config, repository, store): + self.config = config + self.repository = repository + self.store = store + + def sync(self, rebuild = False): + """ + Synchronizes store with repository data. + """ + if rebuild: + for c in self.store.contexts(): + self.store.remove((None,None,None), c) + + def exists(id): + uri = QUIT['version-' + id] + for _ in self.store.store.quads((uri, None, None, QUIT.default)): + return True + return False + + def traverse(commit, seen): + commits = [] + merges = [] + + while True: + id = commit.id + if id in seen: + break + seen.add(id) + if exists(id): + break + commits.append(commit) + parents = commit.parents + if not parents: + break + commit = parents[0] + if len(parents) > 1: + merges.append((len(commits), parents[1:])) + for idx, parents in reversed(merges): + for parent in parents: + commits[idx:idx] = traverse(parent, seen) + return commits + + seen = set() + + for name in self.repository.tags_or_branches: + initial_commit = self.repository.revision(name); + commits = traverse(initial_commit, seen) + + prov = self.changesets(commits) + self.store.addquads((s, p, o, c) for s, p, o, c in prov.quads()) + + #for commit in commits: + #(_, g) = commit.__prov__() + #self.store += g + + + def instance(self, id, from_git=False): + commit = self.repository.revision(id) + + target_files = self.config.getgraphurifilemap().values() + mapping = {} + + for entry in commit.node().entries(recursive=True): + if not entry.is_file: + continue + + if entry not in target_files: + continue + + public = QUIT[prefix] + private = QUIT[prefix + '-' + entry.blob.id] + + if not from_git: + g = RevisionGraph(entry.blob.id, store=self.store.store.store, identifier=private) + else: + g = RevisionGraph(entry.blob.id, identifier=private) + g.parse(data=entry.content, format='nquads') + + mapping[public] = g + + return VirtualGraph(mapping) + + def changesets(self, commits=None): + if not commits: + return + + g = ConjunctiveGraph(identifier=QUIT.default) + + last = None + + role_author_uri = QUIT['author'] + role_committer_uri = QUIT['committer'] + + if commits: + g.add((role_author_uri, is_a, PROV['Role'])) + g.add((role_committer_uri, is_a, PROV['Role'])) + + while commits: + commit = commits.pop() + rev = commit.id + + # Create the commit + commit_graph = self.instance(commit.id, True) + commit_uri = QUIT['commit-' + commit.id] + + g.add((commit_uri, is_a, PROV['Activity'])) + + if 'import' in commit.properties.keys(): + g.add((commit_uri, is_a, QUIT['Import'])) + g.add((commit_uri, QUIT['dataSource'], URIRef(commit.properties['import'].strip()))) + + g.add((commit_uri, PROV['startedAtTime'], Literal(commit.author_date, datatype = XSD.dateTime))) + g.add((commit_uri, PROV['endedAtTime'], Literal(commit.committer_date, datatype = XSD.dateTime))) + g.add((commit_uri, RDFS['comment'], Literal(commit.message.strip()))) + + # Author + hash = pygit2.hash(commit.author.email).hex + author_uri = QUIT['user-' + hash] + g.add((commit_uri, PROV['wasAssociatedWith'], author_uri)) + + g.add((author_uri, is_a, PROV['Agent'])) + g.add((author_uri, RDFS.label, Literal(commit.author.name))) + g.add((author_uri, FOAF.mbox, Literal(commit.author.email))) + + q_author_uri = BNode() + g.add((commit_uri, PROV['qualifiedAssociation'], q_author_uri)) + g.add((q_author_uri, is_a, PROV['Association'])) + g.add((q_author_uri, PROV['agent'], author_uri)) + g.add((q_author_uri, PROV['role'], role_author_uri)) + + if commit.author.name != commit.committer.name: + # Committer + hash = pygit2.hash(commit.committer.email).hex + committer_uri = QUIT['user-' + hash] + g.add((commit_uri, PROV['wasAssociatedWith'], committer_uri)) + + g.add((committer_uri, is_a, PROV['Agent'])) + g.add((committer_uri, RDFS.label, Literal(commit.committer.name))) + g.add((committer_uri, FOAF.mbox, Literal(commit.committer.email))) + + q_committer_uri = BNode() + g.add((commit_uri, PROV['qualifiedAssociation'], q_committer_uri)) + g.add((q_committer_uri, is_a, PROV['Association'])) + g.add((q_committer_uri, PROV['agent'], author_uri)) + g.add((q_committer_uri, PROV['role'], role_committer_uri)) + else: + g.add((q_author_uri, PROV['role'], role_committer_uri)) + + # Parents + parent = None + parent_graph = None + + if commit.parents: + parent = commit.parents[0] + parent_graph = self.instance(parent.id, True) + + for parent in commit.parents: + parent_uri = QUIT['commit-' + parent.id] + g.add((commit_uri, QUIT["preceedingCommit"], parent_uri)) + + # Diff + diff = graphdiff(parent_graph, commit_graph) + for ((resource_uri, hex), changesets) in diff.items(): + for (op, update_graph) in changesets: + update_uri = QUIT['update-' + hex] + op_uri = QUIT[op + ':' + hex] + g.add((commit_uri, QUIT['updates'], update_uri)) + g.add((update_uri, QUIT['graph'], resource_uri)) + g.add((update_uri, QUIT[op], op_uri)) + g.addN((s, p, o, op_uri) for s, p, o in update_graph) + + # Entities + _m = self.config.getgraphurifilemap() + + for entity in commit.node().entries(recursive=True): + # todo check if file was changed + if entity.is_file: + if entity.name not in _m.values(): + continue + + tmp = ConjunctiveGraph() + tmp.parse(data=entity.content, format='nquads') + + for context in tmp.contexts(): + if context not in _m.keys(): + continue + + public_uri = QUIT[context] + private_uri = QUIT[context + '-' + entity.blob.hex] + + g.add((private_uri, PROV['specializationOf'], public_uri)) + g.add((private_uri, PROV['wasGeneratedBy'], commit_uri)) + g.addN((s, p, o, entity_uri) for s, p, o in tmp.quads((None, None, None, context))) + + return g + + def commit(self, graph, message, ref='refs/heads/master'): + if not isinstance(graph, VirtualGraph): + raise Exception() + + if not graph.is_dirty: + return + + index = self.repository.index(branch) + for context in graph.contexts(): + if context.id: + continue + + identifier = context.identifier + path = self.config.getfileforgraphuri(identifier) + + if len(context) > 0: + content = context.serialize(format='nt').decode('UTF-8') + index.add(path, content) + else: + index.remove(path) + + author = self.repository.git_repository.default_signature + id = index.commit(str(message), author.name, author.email, ref=ref) + + if id: + self.repository.git_repository.set_head(id) + if not self.repository.is_bare: + self.repository.git_repository.reset(id, pygit2.GIT_RESET_HARD) class GitRepo: """A class that manages a git repository. diff --git a/quit/deprecated.py b/quit/deprecated.py new file mode 100644 index 00000000..b5cbf7ad --- /dev/null +++ b/quit/deprecated.py @@ -0,0 +1,16 @@ +import warnings +import functools + +def deprecated(func): + """This is a decorator which can be used to mark functions + as deprecated. It will result in a warning being emmitted + when the function is used.""" + + @functools.wraps(func) + def new_func(*args, **kwargs): + warnings.simplefilter('always', DeprecationWarning) #turn off filter + warnings.warn("Call to deprecated function {}.".format(func.__name__), category=DeprecationWarning, stacklevel=2) + warnings.simplefilter('default', DeprecationWarning) #reset filter + return func(*args, **kwargs) + + return new_func \ No newline at end of file diff --git a/quit/evaluate.py b/quit/evaluate.py new file mode 100644 index 00000000..22ba6029 --- /dev/null +++ b/quit/evaluate.py @@ -0,0 +1,460 @@ +""" +These method recursively evaluate the SPARQL Algebra + +evalQuery is the entry-point, it will setup context and +return the SPARQLResult object + +evalPart is called on each level and will delegate to the right method + +A rdflib.plugins.sparql.sparql.QueryContext is passed along, keeping +information needed for evaluation + +A list of dicts (solution mappings) is returned, apart from GroupBy which may +also return a dict of list of dicts + +""" +import collections + +from rdflib import Variable, Graph, BNode, URIRef, Literal + +from rdflib.plugins.sparql import CUSTOM_EVALS +from rdflib.plugins.sparql.parserutils import value +from rdflib.plugins.sparql.sparql import ( + QueryContext, AlreadyBound, FrozenBindings, SPARQLError) +from rdflib.plugins.sparql.evalutils import ( + _filter, _eval, _join, _diff, _minus, _fillTemplate, _ebv) + +from rdflib.plugins.sparql.aggregates import evalAgg + +import quit.service as service + +def evalBGP(ctx, bgp): + + """ + A basic graph pattern + """ + + if not bgp: + yield ctx.solution() + return + + s, p, o = bgp[0] + + _s = ctx[s] + _p = ctx[p] + _o = ctx[o] + + for ss, sp, so in ctx.graph.triples((_s, _p, _o)): + if None in (_s, _p, _o): + c = ctx.push() + else: + c = ctx + + if _s is None: + c[s] = ss + + try: + if _p is None: + c[p] = sp + except AlreadyBound: + continue + + try: + if _o is None: + c[o] = so + except AlreadyBound: + continue + + for x in evalBGP(c, bgp[1:]): + yield x + + +def evalExtend(ctx, extend): + # TODO: Deal with dict returned from evalPart from GROUP BY + + for c in evalPart(ctx, extend.p): + try: + e = _eval(extend.expr, c.forget(ctx)) + if isinstance(e, SPARQLError): + raise e + + yield c.merge({extend.var: e}) + + except SPARQLError: + yield c + + +def evalLazyJoin(ctx, join): + """ + A lazy join will push the variables bound + in the first part to the second part, + essentially doing the join implicitly + hopefully evaluating much fewer triples + """ + for a in evalPart(ctx, join.p1): + c = ctx.thaw(a) + for b in evalPart(c, join.p2): + yield b + + +def evalJoin(ctx, join): + + # TODO: Deal with dict returned from evalPart from GROUP BY + # only ever for join.p1 + + if join.lazy: + return evalLazyJoin(ctx, join) + else: + a = evalPart(ctx, join.p1) + b = set(evalPart(ctx, join.p2)) + return _join(a, b) + + +def evalUnion(ctx, union): + res = set() + for x in evalPart(ctx, union.p1): + res.add(x) + yield x + for x in evalPart(ctx, union.p2): + if x not in res: + yield x + + +def evalMinus(ctx, minus): + a = evalPart(ctx, minus.p1) + b = set(evalPart(ctx, minus.p2)) + return _minus(a, b) + + +def evalLeftJoin(ctx, join): + # import pdb; pdb.set_trace() + for a in evalPart(ctx, join.p1): + ok = False + c = ctx.thaw(a) + for b in evalPart(c, join.p2): + if _ebv(join.expr, b.forget(ctx)): + ok = True + yield b + if not ok: + # we've cheated, the ctx above may contain + # vars bound outside our scope + # before we yield a solution without the OPTIONAL part + # check that we would have had no OPTIONAL matches + # even without prior bindings... + if not any(_ebv(join.expr, b) for b in + evalPart(ctx.thaw(a.remember(join.p1._vars)), join.p2)): + + yield a + + +def evalFilter(ctx, part): + # TODO: Deal with dict returned from evalPart! + for c in evalPart(ctx, part.p): + if _ebv(part.expr, c.forget(ctx)): + yield c + + +def evalGraph(ctx, part): + + if ctx.dataset is None: + raise Exception( + "Non-conjunctive-graph doesn't know about " + + "graphs. Try a query without GRAPH.") + + ctx = ctx.clone() + graph = ctx[part.term] + if graph is None: + + for graph in ctx.dataset.contexts(): + + # in SPARQL the default graph is NOT a named graph + if graph == ctx.dataset.default_context: + continue + + c = ctx.pushGraph(graph) + c = c.push() + graphSolution = [{part.term: graph.identifier}] + for x in _join(evalPart(c, part.p), graphSolution): + yield x + + else: + c = ctx.pushGraph(ctx.dataset.get_context(graph)) + for x in evalPart(c, part.p): + yield x + + +def evalValues(ctx, part): + for r in part.p.res: + c = ctx.push() + try: + for k, v in r.items(): + if v != 'UNDEF': + c[k] = v + except AlreadyBound: + continue + + yield c.solution() + + +def evalMultiset(ctx, part): + + if part.p.name == 'values': + return evalValues(ctx, part) + + return evalPart(ctx, part.p) + + +def evalPart(ctx, part): + + # try custom evaluation functions + for name, c in list(CUSTOM_EVALS.items()): + try: + return c(ctx, part) + except NotImplementedError: + pass # the given custome-function did not handle this part + + if part.name == 'BGP': + return evalBGP(ctx, part.triples) # NOTE pass part.triples, not part! + elif part.name == 'Filter': + return evalFilter(ctx, part) + elif part.name == 'Join': + return evalJoin(ctx, part) + elif part.name == 'LeftJoin': + return evalLeftJoin(ctx, part) + elif part.name == 'Graph': + return evalGraph(ctx, part) + elif part.name == 'Union': + return evalUnion(ctx, part) + elif part.name == 'ToMultiSet': + return evalMultiset(ctx, part) + elif part.name == 'Extend': + return evalExtend(ctx, part) + elif part.name == 'Minus': + return evalMinus(ctx, part) + elif part.name == 'Service': + return evalService(ctx, part) + + elif part.name == 'Project': + return evalProject(ctx, part) + elif part.name == 'Slice': + return evalSlice(ctx, part) + elif part.name == 'Distinct': + return evalDistinct(ctx, part) + elif part.name == 'Reduced': + return evalReduced(ctx, part) + + elif part.name == 'OrderBy': + return evalOrderBy(ctx, part) + elif part.name == 'Group': + return evalGroup(ctx, part) + elif part.name == 'AggregateJoin': + return evalAggregateJoin(ctx, part) + + elif part.name == 'SelectQuery': + return evalSelectQuery(ctx, part) + elif part.name == 'AskQuery': + return evalAskQuery(ctx, part) + elif part.name == 'ConstructQuery': + return evalConstructQuery(ctx, part) + + elif part.name == 'ServiceGraphPattern': + raise Exception('SERVICE not implemented') + + elif part.name == 'DescribeQuery': + raise Exception('DESCRIBE not implemented') + + else: + # import pdb ; pdb.set_trace() + raise Exception('I dont know: %s' % part.name) + + +def evalGroup(ctx, group): + + """ + http://www.w3.org/TR/sparql11-query/#defn_algGroup + """ + + p = evalPart(ctx, group.p) + if not group.expr: + return {1: list(p)} + else: + res = collections.defaultdict(list) + for c in p: + k = tuple(_eval(e, c) for e in group.expr) + res[k].append(c) + return res + + +def evalAggregateJoin(ctx, agg): + # import pdb ; pdb.set_trace() + p = evalPart(ctx, agg.p) + # p is always a Group, we always get a dict back + + for row in p: + bindings = {} + for a in agg.A: + evalAgg(a, p[row], bindings) + + yield FrozenBindings(ctx, bindings) + + if len(p) == 0: + yield FrozenBindings(ctx) + + + +def evalOrderBy(ctx, part): + + res = evalPart(ctx, part.p) + + for e in reversed(part.expr): + + def val(x): + v = value(x, e.expr, variables=True) + if isinstance(v, Variable): + return (0, v) + elif isinstance(v, BNode): + return (1, v) + elif isinstance(v, URIRef): + return (2, v) + elif isinstance(v, Literal): + return (3, v) + + reverse = bool(e.order and e.order == 'DESC') + res = sorted(res, key=val, reverse=reverse) + + return res + + +def evalSlice(ctx, slice): + # import pdb; pdb.set_trace() + res = evalPart(ctx, slice.p) + i = 0 + while i < slice.start: + next(res) + i += 1 + i = 0 + for x in res: + i += 1 + if slice.length is None: + yield x + else: + if i <= slice.length: + yield x + else: + break + + +def evalReduced(ctx, part): + return evalPart(ctx, part.p) # TODO! + + +def evalDistinct(ctx, part): + res = evalPart(ctx, part.p) + + done = set() + for x in res: + if x not in done: + yield x + done.add(x) + + +def evalProject(ctx, project): + res = evalPart(ctx, project.p) + + return (row.project(project.PV) for row in res) + + +def evalSelectQuery(ctx, query): + + res = {} + res["type_"] = "SELECT" + res["bindings"] = evalPart(ctx, query.p) + res["vars_"] = query.PV + return res + + +def evalAskQuery(ctx, query): + res = {} + res["type_"] = "ASK" + res["askAnswer"] = False + for x in evalPart(ctx, query.p): + res["askAnswer"] = True + break + + return res + + +def evalConstructQuery(ctx, query): + template = query.template + + if not template: + # a construct-where query + template = query.p.p.triples # query->project->bgp ... + + graph = Graph() + + for c in evalPart(ctx, query.p): + graph += _fillTemplate(template, c) + + res = {} + res["type_"] = "CONSTRUCT" + res["graph"] = graph + + return res + +def evalService(ctx, part): + + srv = service.get(part.term) + if srv is None: + raise Exception('SERVICE not implemented') + + else: + c = ctx.pushGraph(srv) + c._dataset = srv + for x in evalPart(c, part.p): + yield x + + +def evalQuery(graph, query, initBindings, base=None): + try: + ctx = QueryContext(graph) + + ctx.prologue = query.prologue + + if initBindings: + for k, v in initBindings.items(): + if not isinstance(k, Variable): + k = Variable(k) + ctx[k] = v + # ctx.push() # nescessary? + + main = query.algebra + + # import pdb; pdb.set_trace() + if main.datasetClause: + if ctx.dataset is None: + raise Exception( + "Non-conjunctive-graph doesn't know about " + + "graphs! Try a query without FROM (NAMED).") + + ctx = ctx.clone() # or push/pop? + + firstDefault = False + for d in main.datasetClause: + if d.default: + + if firstDefault: + # replace current default graph + dg = ctx.dataset.get_context(BNode()) + ctx = ctx.pushGraph(dg) + firstDefault = True + + ctx.load(d.default, default=True) + + elif d.named: + g = d.named + ctx.load(g, default=False) + + return evalPart(ctx, main) + except Exception: + raise diff --git a/quit/exceptions.py b/quit/exceptions.py index d378643a..04c20b9f 100644 --- a/quit/exceptions.py +++ b/quit/exceptions.py @@ -11,3 +11,39 @@ class MissingConfigurationError(Error): class MissingFileError(Error): pass + +class RepositoryNotFound(Error): + """ + Exception raised when a repository is invalid + """ + +class ResourceNotFound(Error): + """ + Thrown when a non-existent resource is requested + """ + +class RevisionNotFound(ResourceNotFound): + """ + Thrown when a non-existent revision is requested + """ + def __init__(self, id): + ResourceNotFound.__init__(self, "No commit '%s' in the repository," % id) + +class NodeNotFound(ResourceNotFound): + """ + Thrown when a non-existent node is requested + """ + def __init__(self, path, id): + ResourceNotFound.__init__(self, "No node '%s' in commit '%s'," % (path, id)) + +class IndexError(Error): + """ + Thrown during indexing + """ + pass + +class ServiceException(Error): + """ + Thrown when requesting a missing service + """ + pass \ No newline at end of file diff --git a/quit/git.py b/quit/git.py new file mode 100644 index 00000000..e6860702 --- /dev/null +++ b/quit/git.py @@ -0,0 +1,628 @@ +import os +import pygit2 +import re + +from datetime import datetime +from exceptions import RepositoryNotFound, RevisionNotFound, NodeNotFound +from rdflib import Graph, Literal, URIRef, ConjunctiveGraph, Dataset, BNode +from quit.graphs import InstanceGraph, RevisionGraph +from quit.namespace import FOAF, RDFS, PROV, QUIT, is_a, XSD +from quit.utils import graphdiff + +# roles +role_author = QUIT['author'] +role_committer = QUIT['committer'] + +def clean_path(path): + path = os.path.normpath(path) + if path.startswith(os.sep): + path = path[len(os.sep):] + + return path + +def _git_timestamp(ts, offset): + import quit.utils as tzinfo + if offset == 0: + tz = utc + else: + hours, rem = divmod(abs(offset), 60) + tzname = 'UTC%+03d:%02d' % ((hours, -hours)[offset < 0], rem) + tz = tzinfo.TZ(offset, tzname) + return datetime.fromtimestamp(ts, tz) + +class Base(object): + def __init__(self): + pass + + def __prov__(self): + pass + + +class Repository(Base): + def __init__(self, path, **params): + origin = params.get('origin', None) + + try: + self._repository = pygit2.Repository(path) + except KeyError: + if not params.get('create', False): + raise RepositoryNotFound('Repository "%s" does not exist' % path) + + if origin: + self.callback = self._callback(origin) + pygit2.clone_repository(url=origin, path=path, bare=False) + else: + pygit2.init_repository(path) + + name = os.path.basename(path).lower() + + self.name = name + self.path = path + self.params = params + + def _callback(self, origin): + """Set a pygit callback for user authentication when acting with remotes. + This method uses the private-public-keypair of the ssh host configuration. + The keys are expected to be found at ~/.ssh/ + Warning: Create a keypair that will be used only in QuitStore and do not use + existing keypairs. + Args: + username: The git username (mostly) given in the adress. + passphrase: The passphrase of the private key. + """ + from os.path import expanduser + ssh = join(expanduser('~'), '.ssh') + pubkey = join(ssh, 'id_quit.pub') + privkey = join(ssh, 'id_quit') + + from re import search + # regex to match username in web git adresses + regex = '(\w+:\/\/)?((.+)@)*([\w\d\.]+)(:[\d]+){0,1}\/*(.*)' + p = search(regex, origin) + username = p.group(3) + + passphrase = '' + + try: + credentials = Keypair(username, pubkey, privkey, passphrase) + except: + self.logger.debug('GitRepo, setcallback: Something went wrong with Keypair') + return + + return RemoteCallbacks(credentials=credentials) + + def _clone(self, origin, path): + try: + self.addRemote('origin', origin) + repo = pygit2.clone_repository(url=origin, path=path, bare=False, callbacks=self.callback) + return repo + except: + raise Exception('Could not clone from', origin) + + @property + def is_empty(self): + return self._repository.is_empty + + @property + def is_bare(self): + return self._repository.is_bare + + def close(self): + self._repository = None + + def revision(self, id='HEAD'): + try: + commit = self._repository.revparse_single(id) + except KeyError: + raise RevisionNotFound(id) + + return Revision(self, commit) + + def revisions(self): + seen_oids = set() + + def iter_commits(): + for name in self.branches: + ref = self._repository.lookup_reference(name) + for commit in git_repos.walk(ref.target, pygit2.GIT_SORT_REVERSE): + oid = commit.oid + if oid not in seen_oids: + seen_oids.add(oid) + yield Revision(self, commit) + return iter_commits() + + @property + def branches(self): + return [x for x in self._repository.listall_references() if x.startswith('refs/heads/')] + + @property + def tags(self): + return [x for x in self._repository.listall_references() if x.startswith('refs/tags/')] + + @property + def tags_or_branches(self): + return [x for x in self._repository.listall_references() if x.startswith('refs/tags/') or x.startswith('refs/heads/')] + + def index(self, revision=None): + index = Index(self) + index.set_revision(revision or 'HEAD') + return index + + def pull(self, remote_name='origin', branch='master'): + for remote in self._repository.remotes: + if remote.name == remote_name: + remote.fetch() + remote_master_id = self._repository.lookup_reference('refs/remotes/origin/%s' % (branch)).target + merge_result, _ = self._repository.merge_analysis(remote_master_id) + + # Up to date, do nothing + if merge_result & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE: + return + + # We can just fastforward + elif merge_result & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD: + self._repository.checkout_tree(self._repository.get(remote_master_id)) + try: + master_ref = self._repository.lookup_reference('refs/heads/%s' % (branch)) + master_ref.set_target(remote_master_id) + except KeyError: + self._repository.create_branch(branch, repo.get(remote_master_id)) + self._repository.head.set_target(remote_master_id) + + elif merge_result & pygit2.GIT_MERGE_ANALYSIS_NORMAL: + self._repository.merge(remote_master_id) + + if self._repository.index.conflicts is not None: + for conflict in repo.index.conflicts: + print('Conflicts found in:', conflict[0].path) + raise AssertionError('Conflicts, ahhhhh!!') + + user = self._repository.default_signature + tree = self._repository.index.write_tree() + commit = self._repository.create_commit('HEAD', + user, + user, + 'Merge!', + tree, + [self._repository.head.target, remote_master_id]) + # We need to do this or git CLI will think we are still merging. + self._repository.state_cleanup() + else: + raise AssertionError('Unknown merge analysis result') + + def push(self, remote_name='origin', ref='refs/heads/master:refs/heads/master'): + for remote in self._repository.remotes: + if remote.name == remote_name: + remote.push(ref) + + def __prov__(self): + + commit_graph = self.instance(commit.id, True) + pass + + +class Revision(Base): + re_parser = re.compile( + r'(?P[\w\-_]+): ((?P[^"\n]+)|"(?P.+)")', + re.DOTALL + ) + + def __init__(self, repository, commit): + + message = commit.message.strip() + properties = self._parse_message(commit.message) + author = Signature(commit.author.name, commit.author.email, _git_timestamp(commit.author.time, commit.author.offset), commit.author.offset) + committer = Signature(commit.committer.name, commit.committer.email, _git_timestamp(commit.committer.time, commit.committer.offset), commit.committer.offset) + + self.id = commit.hex + self.short_id = self.id[:10] + self.message = message + self.author = author + self.author_date = author.datetime + self.committer = committer + self.committer_date = committer.datetime + + self._repository = repository + self._commit = commit + self._parents = None + self._properties = properties + + def _parse_message(self, message): + found = dict() + for line in message.splitlines(): + m = re.match(self.re_parser, line) + if m is not None: + found[m.group('key')] = m.group('value') or m.group('multiline') + else: + break + + return found + + @property + def properties(self): + return self._properties + + @property + def parents(self): + if self._parents is None: + self._parents = [Revision(self._repository, id) + for id in self._commit.parents] + return self._parents + + def node(self, path=None): + return Node(self._repository, self._commit, path) + + def graph(store): + mapping = dict() + + for entry in self.node().entries(recursive=True): + if not entry.is_file: + continue + + for (public_uri, g) in entry.graph(store): + if public_uri is None: + continue + + mapping[public_uri] = g + + return InstanceGraph(mapping) + + def __prov__(self): + + uri = QUIT['commit-' + self.id] + + g = ConjunctiveGraph(identifier=QUIT.default) + + # default activity + g.add((uri, is_a, PROV['Activity'])) + + # special activity + if 'import' in self.properties.keys(): + g.add((uri, is_a, QUIT['Import'])) + g.add((uri, QUIT['dataSource'], URIRef(self.properties['import'].strip()))) + + # properties + g.add((uri, PROV['startedAtTime'], Literal(self.author_date, datatype = XSD.dateTime))) + g.add((uri, PROV['endedAtTime'], Literal(self.committer_date, datatype = XSD.dateTime))) + g.add((uri, RDFS['comment'], Literal(self.message))) + + # parents + for parent in self.parents: + parent_uri = QUIT['commit-' + parent.id] + g.add((uri, QUIT["preceedingCommit"], parent_uri)) + + g.add((role_author, is_a, PROV['Role'])) + g.add((role_committer, is_a, PROV['Role'])) + + # author + (author_uri, author_graph) = self.author.__prov__() + + g += author_graph + g.add((uri, PROV['wasAssociatedWith'], author_uri)) + + qualified_author = BNode() + g.add((uri, PROV['qualifiedAssociation'], qualified_author)) + g.add((qualified_author, is_a, PROV['Association'])) + g.add((qualified_author, PROV['agent'], author_uri)) + g.add((qualified_author, PROV['role'], role_author)) + + # commiter + if self.author.name != self.committer.name: + (committer_uri, committer_graph) = self.committer.__prov__() + + g += committer_graph + g.add((uri, PROV['wasAssociatedWith'], committer_uri)) + + qualified_committer = BNode() + g.add((uri, PROV['qualifiedAssociation'], qualified_committer)) + g.add((qualified_committer, is_a, PROV['Association'])) + g.add((qualified_committer, PROV['agent'], author_uri)) + g.add((qualified_committer, PROV['role'], role_committer)) + else: + g.add((qualified_author, PROV['role'], role_committer)) + + # diff + diff = graphdiff(parent_graph, commit_graph) + for ((resource_uri, hex), changesets) in diff.items(): + for (op, update_graph) in changesets: + update_uri = QUIT['update-' + hex] + op_uri = QUIT[op + '-' + hex] + g.add((uri, QUIT['updates'], update_uri)) + g.add((update_uri, QUIT['graph'], resource_uri)) + g.add((update_uri, QUIT[op], op_uri)) + g.addN((s, p, o, op_uri) for s, p, o in update_graph) + + # entities + for entity in self.node().entries(recursive=True): + for (entity_uri, entity_graph) in self.committer.__prov__(): + g += entity_graph + g.add((entity_uri, PROV['wasGeneratedBy'], uri)) + + return (uri, g) + + +class Signature(Base): + + def __init__(self, name, email, datetime, offset): + self.name = name + self.email = email + self.offset = offset + self.datetime = datetime + + def __str__(self): + return '{name} <{email}> {date}{offset}'.format(**self.__dict__) + + def __repr__(self): + return '<{0}> {1}'.format(self.__class__.__name__, self.name).encode('UTF-8') + + def __prov__(self): + + hash = pygit2.hash(self.email).hex + uri = QUIT['user-' + hash] + + g = ConjunctiveGraph(identifier=QUIT.default) + + g.add((uri, is_a, PROV['Agent'])) + g.add((uri, RDFS.label, Literal(self.name))) + g.add((uri, FOAF.mbox, Literal(self.email))) + + return (uri,g) + + +class Node(Base): + + DIRECTORY = "dir" + FILE = "file" + + def __init__(self, repository, commit, path=None): + + if path in (None, '', '.'): + self._obj = commit.tree + self.name = '' + self.kind = Node.DIRECTORY + self.tree = self._obj + else: + try: + entry = commit.tree[path] + except KeyError: + raise NodeNotFound(path, commit.hex) + self._obj = repository._repository.get(entry.oid) + self.name = path + if self._obj.type == pygit2.GIT_OBJ_TREE: + self.kind = Node.DIRECTORY + self.tree = self._obj + elif self._obj.type == pygit2.GIT_OBJ_BLOB: + self.kind = Node.FILE + self.blob = self._obj + + self._repository = repository + self._commit = commit + + @property + def is_dir(self) : + return self.kind == Node.DIRECTORY + + @property + def is_file(self) : + return self.kind == Node.FILE + + @property + def dirname(self): + return os.path.dirname(self.name) + + @property + def basename(self): + return os.path.basename(self.name) + + @property + def content(self): + if not self.is_file: + return None + return self.blob.data.decode("utf-8") + + def entries(self, recursive=False): + if isinstance(self._obj, pygit2.Tree): + for entry in self._obj: + dirname = self.is_dir and self.name or self.dirname + node = Node(self._repository, self._commit, '/'.join(x for x in [dirname, entry.name] if x)) + + yield node + if recursive and node.is_dir and node._obj is not None: + for x in node.entries(recursive=True): + yield x + + @property + def content_length(self): + if self.is_file: + return self.blob.size + return None + + def graph(store): + if self.is_file: + + tmp = ConjunctiveGraph() + tmp.parse(data=self.content, format='nquads') + + for context in tmp.context(): + + public_uri = QUIT[context] + private_uri = QUIT[context + '-' + self.blob.hex] + + g = RevisionGraph(entry.blob.hex, identifier=private_uri) + g.parse(data=entry.content, format='nquads') + + yield (public_uri, g) + + + def __prov__(self): + if self.is_file: + + tmp = ConjunctiveGraph() + tmp.parse(data=self.content, format='nquads') + + for context in tmp.context(): + g = ConjunctiveGraph(identifier=QUIT.default) + + public_uri = QUIT[context] + private_uri = QUIT[context + '-' + self.blob.hex] + + g.add((private_uri, is_a, PROV['Entity'])) + g.add((private_uri, PROV['specializationOf'], public_uri)) + g.addN((s, p, o, private_uri) for s, p, o, _ in tmp.quads(None, None, None, context)) + + yield (private_uri, g) + +from heapq import heappush, heappop + + +class Index(object): + def __init__(self, repository): + self.repository = repository + self.revision = None + self.stash = {} + self.contents = set() + self.dirty = False + + def set_revision(self, revision): + try: + self.revision = self.repository.revision(revision) + except RevisionNotFound as e: + raise IndexError(e) + + def add(self, path, contents, mode=None): + self._assert_revision() + path = clean_path(path) + + oid = self.repository._repository.create_blob(contents) + + self.stash[path] = (oid, mode or pygit2.GIT_FILEMODE_BLOB) + self.contents.add(contents) + + def remove(self, path): + self._assert_revision() + path = clean_path(path) + + self.stash[path] = (None, None) + + def commit(self, message, author_name, author_email, **kwargs): + self._assert_revision() + if self.dirty: + raise IndexError('Index already commited') + + ref = kwargs.pop('ref', 'HEAD') + commiter_name = kwargs.pop('commiter_name', author_name) + commiter_email = kwargs.pop('commiter_email', author_email) + parents = kwargs.pop('parents', [self.revision.id]) + + author = pygit2.Signature(author_name, author_email) + commiter = pygit2.Signature(commiter_name, commiter_email) + + # Sort index items + items = sorted(self.stash.items(), key=lambda x: (x[1][0], x[0])) + + # Create tree + tree = IndexTree(self.revision) + while len(items) > 0: + path, (oid, mode) = items.pop(0) + + if oid is None: + tree.remove(path) + else: + tree.add(path, oid, mode) + + oid = tree.write() + self.dirty = True + + try: + return self.repository._repository.create_commit(ref, author, commiter, message, oid, parents) + except: + return None + + def _assert_revision(self): + if self.revision is None: + raise IndexError('No base revision') + + +class IndexHeap(object): + def __init__(self): + self._dict = {} + self._heap = [] + + def __len__(self): + return len(self._dict) + + def get(self, path): + return self._dict.get(path) + + def __setitem__(self, path, value): + if path not in self._dict: + n = -path.count(os.sep) if path else 1 + heappush(self._heap, (n, path)) + + self._dict[path] = value + + def popitem(self): + key = heappop(self._heap) + path = key[1] + return path, self._dict.pop(path) + + +class IndexTree(object): + def __init__(self, revision): + self.repository = revision.repository + self.revision = revision + self.builders = IndexHeap() + self.builders[''] = ( + None, self.repository._repository.TreeBuilder(self.revision.commit.tree)) + + def get_builder(self, path): + parts = path.split(os.path.sep) + + # Create builders if needed + for i in range(len(parts)): + _path = os.path.join(*parts[0:i + 1]) + + if self.builders.get(_path): + continue + + args = [] + try: + node = self.revision.node(_path) + if node.is_file: + raise IndexError( + 'Cannot create a tree builder. "{0}" is a file'.format(node.name)) + args.append(node.obj.oid) + except NodeNotFound: + pass + + self.builders[_path] = (os.path.dirname( + _path), self.repository._repository.TreeBuilder(*args)) + + return self.builders.get(path)[1] + + def add(self, path, oid, mode): + builder = self.get_builder(os.path.dirname(path)) + builder.insert(os.path.basename(path), oid, mode) + + def remove(self, path): + self.revision.node(path) + builder = self.get_builder(os.path.dirname(path)) + builder.remove(os.path.basename(path)) + + def write(self): + """ + Attach and writes all builders and return main builder oid + """ + # Create trees + while len(self.builders) > 0: + path, (parent, builder) = self.builders.popitem() + if parent is not None: + oid = builder.write() + builder.clear() + self.builders.get(parent)[1].insert( + os.path.basename(path), oid, pygit2.GIT_FILEMODE_TREE) + + oid = builder.write() + builder.clear() + + return oid diff --git a/quit/graphs.py b/quit/graphs.py new file mode 100644 index 00000000..2c9ec68a --- /dev/null +++ b/quit/graphs.py @@ -0,0 +1,186 @@ +from rdflib import Graph, Literal, URIRef, ConjunctiveGraph, Dataset + +class RevisionGraph(ConjunctiveGraph): + + def __init__(self, id = None, store = 'default', identifier = None, namespace_manager = None): + super().__init__(hex, store=store, identifier=identifier, namespace_manager=namespace_manager) + self.id = id + +class InstanceGraph(ConjunctiveGraph): + def __init__(self, graphs): + super().__init__() + + self.graphs = graphs + self.dirty = False + + @property + def is_dirty(self): + return self.dirty + + def destroy(self, configuration): + raise Exception() + + # Transactional interfaces (optional) + def commit(self): + pass + + def rollback(self): + pass + + def open(self, configuration, create = False): + for k, v in self.graphs.items(): + v.open(self, configuration, create) + + def close(self): + for k, v in self.graphs.items(): + v.close() + + def add(self, triple_or_quad): + context = None + if len(triple_or_quad) == 4: + context = triple_or_quad[3] + + if context in self.graphs.keys(): + for k, v in self.graphs.items(): + if context is None: + continue + if k == context: + if not self.graphs[k] or not self.graphs[k].editable: + tmp = RevisionGraph(identifier=k) + tmp += v + self.graphs[k] = tmp + self.dirty = True + self.graphs[k].add(triple_or_quad[:3]) + else: + k = context + if not self.graphs.get(k): + self.graphs[k] = RevisionGraph(identifier=context) + self.dirty = True + self.graphs[k].add(triple_or_quad[:3]) + + def addN(self, quads): + for s,p,o,c in quads: + self.add((s,p,o,c)) + + def remove(self, triple_or_quad): + s,p,o,context = self._spoc(triple_or_quad, default=True) + + for k, v in self.graphs.items(): + if context is None: + continue + if k == context: + if not self.graphs[k].editable: + tmp = RevisionGraph(identifier=k) + tmp += v + self.graphs[k] = tmp + self.dirty = True + self.graphs[k].remove(triple_or_quad[:3]) + + def triples(self, triple_or_quad): + context = None + if len(triple_or_quad) == 4: + context = triple_or_quad[3] + + for k, v in self.graphs.items(): + if context is None or k == context: + for s1, p1, o1 in v.triples(triple_or_quad[:3]): + yield (s1, p1, o1) + + def __contains__(self, triple_or_quad): + context = None + if len(triple_or_quad) == 4: + context = triple_or_quad[3] + + for k, v in self.graphs.items(): + if context is None or k == context: + if triple_or_quad[:3] in v: + return True + return False + + def quads(self, quad): + (s, p, o, context) = quad + for k, v in self.graphs.items(): + if context is None or k == context: + for s1, p1, o1 in v.triples((s, p, o)): + yield (s1, p1, o1, k) + + def __len__(self): + return sum(len(g) for g in self.graphs) + + def contexts(self, triple = None): + for k, v in self.graphs.items(): + if triple is None or triple in v: + yield v + + def get_context(self, identifier, quoted = False): + print('asked: %s'% identifier) + for k, v in self.graphs.items(): + if k == identifier: + return v + return Graph(identifier=identifier) + + def __hash__(self): + raise UnSupportedAggregateOperation() + + def __cmp__(self, other): + if other is None: + return -1 + elif isinstance(other, Graph): + return -1 + elif isinstance(other, InstanceGraph): + return cmp(self.graphs, other.graphs) + else: + return -1 + + def __iadd__(self, other): + raise Exception() + + def __isub__(self, other): + raise Exception() + + # Conv. methods + + def triples_choices(self,triple_or_quad): + s,p,o,context = self._spoc(triple_or_quad) + + for k, v in self.graphs.items(): + if context is None or k == context.identifier: + for (s1, p1, o1) in v.triples_choices((s, p, o)): + yield (s1, p1, o1, k) + + def qname(self, uri): + if hasattr(self, 'namespace_manager') and self.namespace_manager: + return self.namespace_manager.qname(uri) + raise Exception() + + def compute_qname(self, uri, generate = True): + if hasattr(self, 'namespace_manager') and self.namespace_manager: + return self.namespace_manager.compute_qname(uri, generate) + raise Exception() + + def bind(self, prefix, namespace, override = True): + raise Exception() + + def namespaces(self): + if hasattr(self, 'namespace_manager'): + for prefix, namespace in self.namespace_manager.namespaces(): + yield prefix, namespace + else: + for graph in self.graphs: + for prefix, namespace in graph.namespaces(): + yield prefix, namespace + + def absolutize(self, uri, defrag = 1): + raise Exception() + + def parse(self, source, publicID = None, format = "xml", **args): + raise Exception() + + def n3(self): + raise Exception() + + def __reduce__(self): + raise Exception() + + def __repr__(self): + return "" % len(self.graphs) \ No newline at end of file diff --git a/quit/namespace.py b/quit/namespace.py new file mode 100644 index 00000000..f7f6fbdb --- /dev/null +++ b/quit/namespace.py @@ -0,0 +1,10 @@ +from rdflib.namespace import Namespace, RDF, RDFS, FOAF, DC, VOID, XSD + +__all__ = ['RDF', 'RDFS', 'FOAF', 'DC', 'VOID', 'XSD', 'PROV', 'QUIT', 'is_a'] + +# missing namespaces +PROV = Namespace('http://www.w3.org/ns/prov#') +QUIT = Namespace('http://quit.aksw.org/') + +# simplified properties +is_a = RDF.type diff --git a/quit/processor.py b/quit/processor.py new file mode 100644 index 00000000..984ec551 --- /dev/null +++ b/quit/processor.py @@ -0,0 +1,40 @@ +from rdflib.query import Processor, Result, UpdateProcessor +from rdflib.plugins.sparql.sparql import Query +from rdflib.plugins.sparql.parser import parseQuery, parseUpdate + +from quit.algebra import translateQuery, translateUpdate +from quit.evaluate import evalQuery +from quit.update import evalUpdate + +class SPARQLUpdateProcessor(UpdateProcessor): + def __init__(self, graph): + self.graph = graph + + def update(self, strOrQuery, initBindings={}, initNs={}): + if isinstance(strOrQuery, str): + strOrQuery=translateUpdate(parseUpdate(strOrQuery), initNs=initNs) + + return evalUpdate(self.graph, strOrQuery, initBindings) + + +class SPARQLProcessor(Processor): + + def __init__(self, graph): + self.graph = graph + + def query( + self, strOrQuery, initBindings={}, + initNs={}, base=None, DEBUG=False): + """ + Evaluate a query with the given initial bindings, and initial + namespaces. The given base is used to resolve relative URIs in + the query and will be overridden by any BASE given in the query. + """ + + if not isinstance(strOrQuery, Query): + parsetree = parseQuery(strOrQuery) + query = translateQuery(parsetree, base, initNs) + else: + query = strOrQuery + + return evalQuery(self.graph, query, initBindings, base) \ No newline at end of file diff --git a/quit/provenance.py b/quit/provenance.py new file mode 100644 index 00000000..e257ba25 --- /dev/null +++ b/quit/provenance.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +import functools as ft + +from rdflib import BNode +from quit.namespace import FOAF, PROV, QUIT + +class Blame(object): + """ + Reusable Blame object for web client + """ + def __init__(self, quit): + self.quit = quit + + def _generate_values(self, quads): + result = list() + + for quad in quads: + (s, p, o, c) = quad + + # Todo: BNodes in VALUES are not supported by specification? Using UNDEF for now + _s = 'UNDEF' if isinstance(s, BNode) else s.n3() + _p = 'UNDEF' if isinstance(p, BNode) else p.n3() + _o = 'UNDEF' if isinstance(o, BNode) else o.n3() + _c = 'UNDEF' if isinstance(c, BNode) else c.n3() + + result.append((_s, _p, _o, _c)) + return result + + def run(self, quads=None, commit='master'): + """ + Annotated every quad with the respective author + + Args: + querystring: A string containing a SPARQL ask or select query. + Returns: + The SPARQL result set + """ + + + commit = self.quit.repository.revision(id) + g = self.instance(id) + + self.quit.quit.instance(commit) + values = self._generate_values() + values_string = ft.reduce(lambda acc, quad: acc + '( %s %s %s %s )\n' % quad, values, '') + + q = """ + SELECT ?s ?p ?o ?context ?commit ?name ?date WHERE { + ?commit quit:preceedingCommit* ?y . + ?y prov:endedAtTime ?date ; + prov:qualifiedAssociation ?qa ; + quit:updates ?update . + ?qa prov:agent ?user ; + prov:role quit:author . + ?user foaf:mbox ?email ; + rdfs:label ?name . + ?update quit:graph ?context ; + quit:additions ?additions . + GRAPH ?additions { + ?s ?p ?o + } + FILTER NOT EXISTS { + ?y quit:preceedingCommit+ ?z . + ?z quit:updates ?update2 . + ?update2 quit:graph ?context ; + quit:removals ?removals . + GRAPH ?removals { + ?s ?p ?o + } + } + VALUES (?s ?p ?o ?context) { + %s + } + } + """ % values_string + + return self.store.query(q, initNs = { 'foaf': FOAF, 'prov': PROV, 'quit': QUIT }, initBindings = { 'commit': QUIT['commit-' + id] }) \ No newline at end of file diff --git a/quit/quit.py b/quit/quit.py index 104621d8..d192dfac 100755 --- a/quit/quit.py +++ b/quit/quit.py @@ -13,6 +13,8 @@ from quit.helpers import QueryAnalyzer from quit.parsers import NQuadsParser from quit.utils import splitinformation, sparqlresponse +from quit.deprecated import deprecated +from quit.web.app import create_app import handleexit import logging from flask import request, Response @@ -45,7 +47,6 @@ logger.addHandler(fh) logger.addHandler(ch) - def __savefiles(): """Update the files after a update query was executed on the store.""" for file in config.getfiles(): @@ -60,7 +61,6 @@ def __savefiles(): return - def __updategit(): """Private method to add all updated tracked files.""" gitrepo.addall() @@ -70,7 +70,6 @@ def __updategit(): return - def __removefile(self, graphuri): # TODO actions needed to remove file also from # - directory and @@ -87,7 +86,6 @@ def __removefile(self, graphuri): return - def __commit(self, message=None): """Private method to commit the changes.""" try: @@ -97,7 +95,6 @@ def __commit(self, message=None): return - def reloadstore(): """Create a new (empty) store and parse all known files into it.""" store = MemoryStore @@ -115,7 +112,6 @@ def reloadstore(): return - def applyupdates(actions): """Update files after store was updated.""" graphsandfiles = config.getgraphurifilemap() @@ -573,7 +569,7 @@ def pull(): HTTP Response 201: If pull was possible HTTP Response: 403: If pull did not work """ - if store.pull(): + if gitrepo.pull(): return '', status.HTTP_201_CREATED else: return '', status.HTTP_403_FORBIDDEN @@ -619,8 +615,9 @@ def resultFormat(): return {"mime": best, "format": formats[best]} -def main(): +def main(config): """Start the app.""" + app = create_app(config) app.run(debug=True, use_reloader=False) @@ -653,4 +650,4 @@ def main(): # The app is started with an exit handler with handleexit.handle_exit(savedexit): - main() + main(config) diff --git a/quit/service.py b/quit/service.py new file mode 100644 index 00000000..d378236c --- /dev/null +++ b/quit/service.py @@ -0,0 +1,23 @@ +from quit.exceptions import ServiceException + +_services = {} + +class Service(object): + """ + A Service object for sparql + """ + def __init__(self, name, graph): + self.name = name + self.graph = graph + +def register(name, graph): + s = Service(name, graph) + _services[name] = s + + +def get(name): + try: + s = _services[name] + except KeyError: + raise ServiceException("No service registered for %s" % name) + return s.graph \ No newline at end of file diff --git a/quit/utils.py b/quit/utils.py index 9bab40db..39e4e384 100644 --- a/quit/utils.py +++ b/quit/utils.py @@ -3,6 +3,27 @@ import contextlib import signal import sys +from datetime import tzinfo, timedelta, datetime +from quit.graphs import InstanceGraph + +ZERO = timedelta(0) +HOUR = timedelta(hours=1) + +class TZ(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes = offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return ZERO def sparqlresponse(result, format): @@ -34,6 +55,40 @@ def splitinformation(quads, GraphObject): return {'graphs': graphsInRequest, 'data': data, 'GraphObject': GraphObject} +def graphdiff(g1, g2): + """ + Diff between graph instances, should be replaced/included in quit diff + """ + diffs = {} + uris = set() + + if g1 and isinstance(g1, InstanceGraph): + uris |= set(g1.graphs.keys()) + if g2 and isinstance(g2, InstanceGraph): + uris |= set(g2.graphs.keys()) + + for uri in uris: + id = g2.graphs[uri].hex + changes = diffs.get((uri, id), []) + + if (g1 is not None and uri in g1.graphs.keys()) and (g2 is not None and uri in g2.graphs.keys()): + in_both, in_first, in_second = graph_diff(to_isomorphic(g1.graphs[uri]), to_isomorphic(g2.graphs[uri])) + + if len(in_second) > 0: + changes.append(('additions', in_second)) + if len(in_first) > 0: + changes.append(('removals', in_first)) + elif g1 is not None and uri in g1.graphs.keys(): + changes.append(('removals', g1.graphs[uri])) + elif g2 is not None and uri in g2.graphs.keys(): + changes.append(('additions', g2.graphs[uri])) + else: + continue + + diffs[(uri, id)] = changes + + return diffs + def _sigterm_handler(signum, frame): sys.exit(0) _sigterm_handler.__enter_ctx__ = False diff --git a/quit/web/__init__.py b/quit/web/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/quit/web/app.py b/quit/web/app.py new file mode 100644 index 00000000..9cb1d714 --- /dev/null +++ b/quit/web/app.py @@ -0,0 +1,123 @@ +import os + +from flask import Flask, render_template as rt, g, current_app +from flask.ext.cors import CORS + +from quit.conf import QuitConfiguration +from quit.core import MemoryStore, Quit +from quit.git import Repository + +from quit.namespace import QUIT +from quit.service import register +from quit.provenance import Blame + +# For import * +__all__ = ['create_app'] + +def create_app(config): + """Create a Flask app.""" + + app = Flask(__name__.split('.')[0], template_folder='web/templates', static_folder='web/static') + register_app(app, config) + register_hook(app) + register_blueprints(app) + register_extensions(app) + register_logging(app) + register_errorhandlers(app) + print("test4:" + app.template_folder) + return app + + +def register_app(app, config): + """Different ways of configurations.""" + + repository = Repository(config.getRepoPath(), create=True) + + print("test") + quit = Quit(config, repository, MemoryStore()) + quit.sync() + print("test2:" + str(len(quit.store.store))) + app.config['quit'] = quit + app.config['tools'] = { + 'blame': Blame(quit.store) + } + print("test3") + register(QUIT.service, quit.store) + +def register_extensions(app): + """Register extensions.""" + + cors = CORS() + cors.init_app(app) + +def register_blueprints(app): + """Register blueprints in views.""" + + from quit.web.modules.debug import debug + from quit.web.modules.endpoint import endpoint + from quit.web.modules.git import git + + for bp in [debug, endpoint, git]: + app.register_blueprint(bp) + + +def register_logging(app): + """Register file(info) and email(error) logging.""" + + if app.debug or app.testing: + # Skip debug and test mode. Just check standard output. + return + + import logging + import os + + app.logger.setLevel(logging.DEBUG) + + # logging format + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + # create file handler which logs even debug messages + fh = logging.FileHandler('quit.log') + fh.setLevel(logging.DEBUG) + fh.setFormatter(formatter) + + # create console handler with a higher log level + ch = logging.StreamHandler() + ch.setLevel(logging.ERROR) + ch.setFormatter(formatter) + + # add the handlers to the logger + app.logger.addHandler(fh) + app.logger.addHandler(ch) + +def register_hook(app): + import time + + @app.before_request + def before_request(): + g.start=time.time() + + @app.after_request + def after_request(response): + diff = time.time() - g.start + return response + + +def register_errorhandlers(app): + + @app.errorhandler(404) + def page_not_found(error): + return render_template("404.html"), 404 + +def render_template(template_name_or_list, **kwargs): + + quit = current_app.config['quit'] + + context = { + 'available_branches': quit.repository.branches, + 'available_tags': quit.repository.tags, + } + context.update(kwargs) + + return rt(template_name_or_list, **context) + diff --git a/quit/web/modules/__init__.py b/quit/web/modules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/quit/web/modules/debug.py b/quit/web/modules/debug.py new file mode 100644 index 00000000..7107b793 --- /dev/null +++ b/quit/web/modules/debug.py @@ -0,0 +1,27 @@ +from flask import Blueprint, flash, redirect, request, url_for, current_app +from quit.web.app import render_template + +from werkzeug.http import parse_accept_header +import sys, traceback + +__all__ = [ 'debug' ] + +debug = Blueprint('debug', __name__) + +@debug.route("/blame", defaults={'branch_or_ref': 'master'}, methods=['GET']) +@debug.route("/blame/", methods=['GET']) +def blame(branch_or_ref): + + if 'Accept' in request.headers: + mimetype = parse_accept_header(request.headers['Accept']).best + else: + mimetype = 'text/html' + + try: + if mimetype in ['text/html', 'application/xhtml_xml', '*/*']: + results = current_app['blame'].run(branch_or_ref) + return render_template('blame.html', results=results) + except Exception as e: + current_app.logger.error(e) + current_app.logger.error(traceback.format_exc()) + return "
"+traceback.format_exc()+"
", 400 \ No newline at end of file diff --git a/quit/web/modules/endpoint.py b/quit/web/modules/endpoint.py new file mode 100644 index 00000000..cb9f8ba5 --- /dev/null +++ b/quit/web/modules/endpoint.py @@ -0,0 +1,109 @@ +import re + +from flask import Blueprint, flash, redirect, request, url_for, current_app +from quit.web.app import render_template + +import sys, traceback + +__all__ = [ 'endpoint' ] + +endpoint = Blueprint('endpoint', __name__) + +pattern = re.compile(r""" + (?P(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD)) + """, re.VERBOSE | re.IGNORECASE) + +def parse_query_type(query): + try: + query_type = pattern.search(query).group("queryType").upper() + except AttributeError: + query_type = None + + return query_type + +@endpoint.route("/sparql", defaults={'branch_or_ref': 'master'}, methods=['POST', 'GET']) +@endpoint.route("/sparql/", methods=['POST', 'GET']) +def sparql(branch_or_ref): + """Process a SPARQL query (Select or Update). + + Returns: + HTTP Response with query result: If query was a valid select query. + HTTP Response 200: If request contained a valid update query. + HTTP Response 400: If request doesn't contain a valid sparql query. + """ + quit = current_app.config['quit'] + + try: + q = request.values.get('query', None) + if not q: + q = request.values.get('update', None) + + ref = request.values.get('ref', None) or 'refs/heads/master' + + res = "" + if q: + query_type = parse_query_type(q) + graph = quit.instance(branch_or_ref) + + if query_type in ['SELECT', 'CONSTRUCT', 'ASK', 'DESCRIBE']: + res = graph.query(q) + else: + res = graph.update(q) + quit.commit(graph, "Test Query", ref, query=q) + + context = { + 'result': res, + 'query': q, + } + + return render_template('sparql.html', **context) + except Exception as e: + print(e) + return render_template('sparql.html') + +@endpoint.route("/add", methods=['POST']) +def add(): + """Add nquads to the store. + + Returns: + HTTP Response 201: If data was processed (even if no data was added) + HTTP Response: 403: If Request contains non valid nquads + """ + try: + data = checkrequest(request) + + + for graphuri in data['graphs']: + if not store.graphexists(graphuri): + logger.debug('Graph ' + graphuri + ' is not part of the store') + return '', status.HTTP_403_FORBIDDEN + + addtriples(data) + + return '', 201 + except: + return '', 403 + + +@endpoint.route("/delete", methods=['POST']) +def delete(): + """Delete nquads from the store. + + Returns: + HTTP Response 201: If data was processed (even if no data was deleted) + HTTP Response: 403: If Request contains non valid nquads + """ + try: + values = checkrequest(request) + + + for graphuri in values['graphs']: + if not store.graphexists(graphuri): + logger.debug('Graph ' + graphuri + ' is not part of the store') + return '', status.HTTP_403_FORBIDDEN + + deletetriples(values) + + return '', 201 + except: + return '', 403 \ No newline at end of file diff --git a/quit/web/modules/git.py b/quit/web/modules/git.py new file mode 100644 index 00000000..fba6bd88 --- /dev/null +++ b/quit/web/modules/git.py @@ -0,0 +1,49 @@ +import re + +from flask import Blueprint, flash, redirect, request, url_for, current_app +from quit.web.app import render_template + +import sys, traceback + +__all__ = [ 'git' ] + +git = Blueprint('git', __name__) + +@git.route("/pull", methods=['POST', 'GET']) +def pull(): + """Pull from remote. + + Returns: + HTTP Response 201: If pull was possible + HTTP Response: 403: If pull did not work + """ + #if current_app.config['repository'].pull(): + # return '', status.HTTP_201_CREATED + #else: + # return '', status.HTTP_403_FORBIDDEN + try: + quit = current_app.config['quit'] + quit.repository.pull() + return '', 201 + except Exception: + return '', 403 + +@git.route("/push", methods=['POST', 'GET']) +def push(): + """Pull from remote. + + Returns: + HTTP Response 201: If push was possible + HTTP Response: 403: If push did not work + """ + #if current_app.config['repository'].push(): + # return '', status.HTTP_201_CREATED + #else: + # return '', status.HTTP_403_FORBIDDEN + + try: + quit = current_app.config['quit'] + quit.repository.push() + return '', 201 + except Exception: + return '', 403 diff --git a/quit/web/templates/404.html b/quit/web/templates/404.html new file mode 100644 index 00000000..360cb5f0 --- /dev/null +++ b/quit/web/templates/404.html @@ -0,0 +1,7 @@ +{% extends "default.html" %} + +{% block quit_content %} + +404 - Not Found + +{% endblock %} \ No newline at end of file diff --git a/quit/web/templates/base.html b/quit/web/templates/base.html new file mode 100644 index 00000000..1fbcfd0c --- /dev/null +++ b/quit/web/templates/base.html @@ -0,0 +1,34 @@ +{% block doc -%} + + + {%- block html %} + + {%- block head %} + {% block title %}{{title|default}}{% endblock title %} + + {%- block metas %} + + {%- endblock metas %} + + {%- block styles %} + + + {%- endblock styles %} + {%- endblock head %} + + + {% block body -%} + {% block navbar %} + {%- endblock navbar %} + {% block content -%} + {%- endblock content %} + + {% block scripts %} + + + {%- endblock scripts %} + {%- endblock body %} + + {%- endblock html %} + +{% endblock doc -%} \ No newline at end of file diff --git a/quit/web/templates/blame.html b/quit/web/templates/blame.html new file mode 100644 index 00000000..ee4d80e1 --- /dev/null +++ b/quit/web/templates/blame.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} + +{% block quit_content %} + +{% if results|length > 0 %} + {{ results }} +{% else %} + No results found +{% endif %} + +{% endblock %} \ No newline at end of file diff --git a/quit/web/templates/default.html b/quit/web/templates/default.html new file mode 100644 index 00000000..911c06da --- /dev/null +++ b/quit/web/templates/default.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} + +{% block title %}Quit{% endblock %} + +{% block navbar %} + +{% endblock %} + +{% block content %} +
+
+
+ {% block quit_content %} + {% endblock %} +
+
+
+{% endblock %} + +{% block scripts %} +{{super()}} +{% endblock %} + +{% block styles %} +{{super()}} +{% endblock %} diff --git a/quit/web/templates/sparql.html b/quit/web/templates/sparql.html new file mode 100644 index 00000000..9dddea7e --- /dev/null +++ b/quit/web/templates/sparql.html @@ -0,0 +1,20 @@ +{% extends "default.html" %} + +{% block quit_content %} +
+{% endblock %} + +{% block scripts %} +{{super()}} + + +{% endblock %} + +{% block styles %} +{{super()}} + +{% endblock %} \ No newline at end of file