-
Notifications
You must be signed in to change notification settings - Fork 7
/
patterns.lisp
629 lines (569 loc) · 24.2 KB
/
patterns.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
;;; -*- show-trailing-whitespace: t; indent-tabs-mode: nil -*-
;;; Copyright (c) 2007,2008 Ivan Shvedunov. All rights reserved.
;;; Copyright (c) 2007,2008 David Lichteblau. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;;
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;;
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;;
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package :xpath)
;;;; API
(defparameter *allow-variables-in-patterns* t
"If set to T, predicates in patterns are permitted to reference variables
using $var syntax. If set to NIL, such variable references signal a
compilation-time error. The default is T. Bind this variable to NIL to
enable compatibility with XSLT 1.0.
@see{compute-patterns}")
(defstruct (pattern
(:constructor %make-pattern
(key thunk priority value expression)))
priority
value
key
thunk
;; for profiler output only:
expression)
(setf (documentation 'pattern 'type)
"Represents a parsed XSLT pattern.
@see-constructor{compute-patterns}
@see-slot{pattern-value}
@see-slot{pattern-priority}
@see{make-pattern-matcher}")
(setf (documentation 'pattern-value 'function)
"@arg[instance]{a @class{pattern}}
@return{an object}
Return the user-specified value that will be returned by pattern
matchers if this pattern matches.
@see{matching-value}
@see{matching-values}")
(setf (documentation 'pattern-priority 'function)
"@arg[instance]{a @class{pattern}}
@return{an integer}
@short{Return the priority of this pattern.}
When several patters would match the same node, the pattern matcher
will only consider the patterns that have the highest priority.
@see{matching-value}
@see{matching-values}")
(defun matching-values (matcher node)
"@arg[matcher]{a pattern matching function}
@arg[node]{any node implementing the XPath protocol}
@return{an object}
@short{Apply a pattern matcher to node, and return one or more matching
values.}
For use with @code{matcher} functions that have been returned by
@fun{make-pattern-matcher} or a higher-level function like
@fun{make-pattern-matcher*}.
The resulting list will contain the user-specified values as
returned by @fun{pattern-value} on the patterns for this matcher, in
any order. Duplicates under @code{eql} will have been removed from
the list.
@see{node-matches-p}
@see{pattern-case}
@see{pattern-ecase}"
(with-float-traps-masked ()
(funcall matcher node)))
(defun matching-value (matcher node &optional (default nil))
"@arg[matcher]{a pattern matching function}
@arg[node]{any node implementing the XPath protocol}
@arg[default]{an object}
@return{an object}
@short{Apply a pattern matcher to node, and return exactly one value.}
For use with @code{matcher} functions that have been returned by
@fun{make-pattern-matcher} or a higher-level function like
@fun{make-pattern-matcher*}.
If exactly one pattern matches, or several patterns for the same value
match, the user-specified values as determined by @fun{pattern-value}
will be returned by this function.
If no pattern matches, @code{default} will be returned instead.
If more than one pattern of highest priority and different values
match, an @code{xpath-error} will be signalled.
@see{node-matches-p}
@see{pattern-case}
@see{pattern-ecase}"
(let ((matching-values (matching-values matcher node)))
(cond
((null matching-values)
default)
((null (cdr matching-values))
(car matching-values))
(t
(xpath-error "conflicting patterns")))))
(defun parse-pattern-expression (str)
"@arg[str]{a string}
@return{a s-expression-based pattern expression}
Parses an XSLT pattern into an s-expression."
(let ((form (parse-xpath str)))
(unless (consp form)
(xpath-error "not a valid pattern: ~A" str))
(labels ((process-inner-form (form)
(cond ((not (or (eq (car form) :path)
(and (eq (car form) :filter)
(let ((filter (second form)))
(and (consp filter)
(member (car filter)
'(:key :id))))
(equal (third form) '(:true)))
(member (car form) '(:key :id))))
(xpath-error "not a valid pattern: ~A ~A" str form))
((not (valid-expression-p form))
(xpath-error "invalid pattern filter"))
(t (list form))))
(process-form (form)
(if (eq (car form) :union)
(mapcan #'process-inner-form (rest form))
(process-inner-form form))))
(cons :patterns (process-form form)))))
(defun make-pattern-matcher* (expression environment)
"@arg[expression]{a string or s-expression}
@arg[environment]{an @code{environment}}
@return{the pattern matcher, a function}
@short{Create a pattern matcher for a single pattern.}
This function is a convenience wrapper around @fun{compute-patterns}
and @fun{make-pattern-matcher}.
The resulting matcher will return T if the specified @code{expression}
matches, or NIL if it doesn't.
@see{compute-patterns}
@see{matching-value}
@see{matching-values}"
(make-pattern-matcher (compute-patterns expression 42 t environment)))
(defun make-pattern-matcher (patterns)
"@arg[patterns]{a list of @class{pattern}s}
@return{the pattern matcher, a function}
@short{Create a pattern matcher that distinguishes between
multiple patterns.}
This function combines several patterns, and creates a matcher function
for use with @fun{matching-value} or @fun{matching-values}.
The matcher function will compare a node against each pattern, and
find the highest-priority pattern or patterns that match the node.
@see{compute-patterns}"
(let ((name-patterns (make-hash-table :test 'equal))
(namespace-patterns (make-hash-table :test 'equal))
(type-patterns (make-hash-table))
(other-patterns '()))
(loop
for cp in patterns
for key = (pattern-key cp)
for type = (car key)
for name = (cdr key)
for spec = (list (pattern-priority cp)
(pattern-thunk cp)
(pattern-value cp))
do
(cond
(name
(if (car name)
(push spec (gethash name name-patterns))
(push spec (gethash (cdr name) namespace-patterns))))
(type
(push spec (gethash type type-patterns)))
(t
(push spec other-patterns))))
(maybe-wrap-profiling
patterns
(lambda (node)
(let ((results nil)
(result-priority nil))
(flet ((process-spec (spec)
(destructuring-bind (priority thunk value)
spec
(when (and (or (null result-priority)
(<= result-priority priority))
(funcall thunk node))
(cond
((null result-priority)
(setf result-priority priority))
((< result-priority priority)
(setf result-priority priority)
(setf results nil)))
(pushnew value results)))))
(let ((name (xpath-protocol:local-name node))
(uri (xpath-protocol:namespace-uri node))
(type (node-type node)))
(when name
(mapc #'process-spec (gethash (cons name uri) name-patterns))
(mapc #'process-spec (gethash uri namespace-patterns)))
(mapc #'process-spec (gethash type type-patterns))
(mapc #'process-spec other-patterns)))
results)))))
(defun compute-patterns (expression priority value environment)
"@arg[expression]{a string or s-expression}
@arg[priority]{an integer}
@arg[value]{an object}
@arg[environment]{an @code{environment}}
@return{a list of @class{pattern}s}
@short{Parse an XSLT pattern expression into one or more pattern objects.}
Parses an expression, resolves its namespace-, variable-, and
function-references using the specified @code{environment}, and creates
a @class{pattern} object for the expression (if it does not use a union)
or one @class{pattern} object for each sub-expression that is being
joined into the union.
The specified @code{priority} is used as the @fun{pattern-priority},
and the specified @code{value} is used as the @fun{pattern-value}.
@see{make-pattern-matcher*}
@see{make-pattern-matcher}"
(multiple-value-bind (keys thunks subexpressions)
(compile-pattern-expression expression environment)
(mapcar (lambda (key thunk subexpression)
(%make-pattern key thunk priority value subexpression))
keys
thunks
subexpressions)))
(defun node-matches-p (node pattern-expression)
"@arg[node]{any node implementing the XPath protocol}
@arg[pattern-expression]{a string or s-expression}
@return{a boolean}
@short{Determine whether @code{node} matches the pattern expression.}
The expression is compiled using the dynamic environment.
@see{with-namespaces}
@see{with-variables}
@see{pattern-case}
@see{pattern-ecase}"
(matching-value
(if (functionp pattern-expression)
pattern-expression
(make-pattern-matcher* pattern-expression
(make-dynamic-environment *dynamic-namespaces*)))
node))
(define-compiler-macro node-matches-p (node pattern)
(once-only (pattern)
`(matching-value
(with-cache ((,pattern)
(*dynamic-namespaces* :test namespaces-match-p)
(*profiling-enabled-p* :test eql))
(make-pattern-matcher*
,pattern
(make-dynamic-environment *dynamic-namespaces*)))
,node)))
(defmacro pattern-ecase (node &rest clauses)
"@arg[node]{any node implementing the XPath protocol}
@arg[clauses]{cases of the form (expression &rest body)}
@return{The value returned by the matching clause body.}
@short{Match a node against static expressions.}
Evaluates @code{node}, and matches them against the specified XSLT
patterns. The first matching pattern will be chosen, i.e. earlier
clauses have higher priority that later clauses.
Expressions are compiled using the dynamic environment.
If no clause matches, an error will be signalled.
@see{with-namespaces}
@see{pattern-case}
@see{node-matches-p}
@see{with-variables}"
`(pattern-case ,node
,@clauses
(t (error "pattern-ecase fell through"))))
(defmacro pattern-case (node &body clauses)
"@arg[node]{any node implementing the XPath protocol}
@arg[clauses]{cases of the form (expression &rest body)}
@return{The value returned by the matching clause body, or nil.}
@short{Match a node against static expressions.}
Evaluates @code{node}, and matches them against the specified XSLT
patterns. The first matching pattern will be chosen, i.e. earlier
clauses have higher priority that later clauses.
Expressions are compiled using the dynamic environment.
As a special case, the last expression can be @code{t}, in which case
it matches unconditionally.
@see{with-namespaces}
@see{pattern-ecase}
@see{node-matches-p}
@see{with-variables}"
(let* ((otherwise-body nil)
(patterns
(loop
for priority downfrom 0
for ((expression . body) . rest) on clauses
if (eq expression t)
do
(if rest
(xpath-error "t clause not at end of pattern-case")
(setf otherwise-body body))
else collect
`(compute-patterns ,expression
,priority
(lambda () ,@body)
(make-dynamic-environment
*dynamic-namespaces*)))))
`(funcall
(matching-value
(with-cache ((*dynamic-namespaces* :test namespaces-match-p)
(*profiling-enabled-p* :test eql))
(make-pattern-matcher (append ,@patterns)))
,node
(lambda () ,@otherwise-body)))))
;;;; Implementation
(deftype pattern-expr ()
'(or string (cons (eql :patterns) (cons t null))))
(defun valid-expression-p (expr)
(cond
((atom expr) t)
((eq (first expr) :path)
(every (lambda (x)
(let ((filter (third x)))
(or (null filter) (valid-expression-p filter))))
(cdr expr)))
((eq (first expr) :variable)
*allow-variables-in-patterns*)
(t
(every #'valid-expression-p (cdr expr)))))
(defun compile-pattern-expression (pattern environment)
(unless (typep pattern 'pattern-expr)
(xpath-error "invalid pattern designator: ~A" pattern))
(compile-pattern-expression/sexpr (if (stringp pattern)
(parse-pattern-expression pattern)
pattern)
environment))
(defun step-key (step environment)
(destructuring-bind (axis test &rest predicates)
step
(declare (ignore predicates))
(let* ((type (axis-principal-node-type axis))
(attributep (eq type :attribute))
(name
(etypecase test
(string
(multiple-value-bind (local-name uri)
(decode-qname test environment attributep)
(cons local-name uri)))
(list
(ecase (first test)
(:processing-instruction
(setf type :processing-instruction)
nil)
(:namespace
(let ((prefix (second test)))
(cons nil (find-namespace prefix environment attributep))))
(:qname
;; This case is just an alternative to the string case for
;; the convenience of callers that have a split name already.
(destructuring-bind (prefix local-name) (rest test)
(let ((uri (find-namespace prefix environment attributep)))
(cons local-name uri))))))
(t
(ecase test
(* nil)
(:node
(setf type nil)
nil)
(:text
(setf type :text)
nil)
(:processing-instruction
(setf type :processing-instruction)
nil)
(:comment
(setf type :comment)
nil))))))
(cons type name))))
(defun compile-pattern-expression/sexpr (pattern environment)
(assert (eq (car pattern) :patterns))
(values
(mapcar (lambda (x) (subpattern-key x environment)) (cdr pattern))
(mapcar (lambda (x) (compile-subpattern x environment)) (cdr pattern))
(cdr pattern)))
(defun subpattern-key (subpattern environment)
(ecase (car subpattern)
((:path :filter)
(step-key (car (last subpattern)) environment))
((:key :id) nil)))
(defun %compile-subpattern (subpattern environment)
(ecase (car subpattern)
(:path
(let ((steps (cdr subpattern)))
(if (eq (caar steps) :root)
(values (lambda (node)
(xpath-protocol:node-type-p node :document))
(cdr steps))
(values (constantly t) steps))))
((:key :id)
(%compile-subpattern `(:filter ,subpattern (:true)) environment))
(:filter
(destructuring-bind (filter predicate &rest steps) (rest subpattern)
(let* ((filter-thunk (compile-xpath/sexpr filter environment))
(predicate-thunk
(compile-predicates (list predicate) environment)))
(values (lambda (starting-node)
(let ((initial-node-set
(funcall filter-thunk (make-context starting-node))))
(unless (typep initial-node-set 'node-set)
(xpath-error "not a node set: ~A" initial-node-set))
(find-in-pipe starting-node
(funcall predicate-thunk
(sorted-pipe-of initial-node-set)
starting-node)
:test #'xpath-protocol:node-equal)))
(nth-value 1 (%compile-subpattern (cons :path steps)
environment))))))))
(defun compile-subpattern (subpattern environment)
(multiple-value-bind (starting-node-test steps)
(%compile-subpattern subpattern environment)
(make-reverse-location-path
starting-node-test
(mapcar (lambda (step)
(make-reverse-location-step step environment))
steps)
(mapcar (lambda (step)
(make-reverse-predicate-check step environment))
steps))))
;; Example: Consider
;; (make-reverse-location-step '(:child "bar") env)
;; with the document
;; <foo> <bar> <baz/> </bar> </foo>
;;
;; Then a call to the closure with the path
;; (#<element node for bar>
;; #<element node for baz>)
;; will return a list of one path, with the next parent prepended:
;; ((#<element node for foo>
;; #<element node for bar>
;; #<element node for baz>))
;;
;; But a call to the closure with the path
;; (#<any-other-element>)
;; will return
;; ()
;; indicating failure.
;;
;; More than one path can be returned if the descendant-or-self axis is used,
;; one for each ancestor.
;;
(defun make-reverse-location-step (step environment)
(destructuring-bind (axis node-test &rest predicates)
step
(declare (ignore predicates))
(let ((principal-node-type (axis-principal-node-type axis))
(node-test-thunk-1
(compile-node-test node-test environment (eq axis :attribute)))
(node-test-thunk-2
(case axis
(:attribute
(lambda (node)
(xpath-protocol:node-type-p node :attribute)))
((:child :descendant-or-self)
(lambda (node)
(not (xpath-protocol:node-type-p node :attribute))))
(t
(xpath-error "invalid as a pattern axis: ~A" axis))))
(upstep
(case axis
((or :child :attribute)
(lambda (current-node rest-of-path)
(let ((parent (xpath-protocol:parent-node current-node)))
(if parent
(list (list* parent current-node rest-of-path))
nil))))
(:descendant-or-self
(lambda (current-node rest-of-path)
(loop
for parent = current-node
then (xpath-protocol:parent-node parent)
while parent
collect (list* parent
current-node
rest-of-path))))
(t
(xpath-error "invalid as a pattern axis: ~A" axis)))))
(lambda (current-path)
(destructuring-bind (current-node &rest rest-of-path)
current-path
(if (and (funcall node-test-thunk-1 current-node principal-node-type)
(funcall node-test-thunk-2 current-node))
(funcall upstep current-node rest-of-path)
nil))))))
;; FIXME: optimize this for the case where there aren't any predicates.
(defun make-reverse-predicate-check (step environment)
(destructuring-bind (axis node-test &rest predicates)
step
(if predicates
(let* ((node-test-fun
(compile-node-test node-test environment (eq axis :attribute)))
(thunk
(make-location-step axis node-test-fun predicates environment)))
(lambda (parent node starting-node)
(and (find-in-pipe node
(funcall thunk parent starting-node)
:test #'xpath-protocol:node-equal)
t)))
(constantly t))))
(defun make-reverse-location-path (starting-node-check steps predicate-checks)
;; - Run from back to front, computing "paths" of nodes, with one node
;; for each step in the location path (not including parents that the
;; location path doesn't match on).
;;
;; This list can have more than one path only if // is used, and multiple
;; parents might be candidates for the starting point.
;;
;; Predicates are ignored.
;;
;; - In a second step, check each such path to see if the predicates are
;; a match by working front the front to the back as usual.
;;
;; We can do this only after having determined the starting node,
;; because a predicate might use current().
;;
;; front-to-back is similar to `make-location-path', but doesn't
;; have to follow descendent-or-self fully, and doesn't accumulate a full
;; list of nodes.
(labels ((back-to-front (steps)
(if steps
(let ((head-thunk (first steps))
(rest-thunk (back-to-front (rest steps))))
(lambda (target-nodes)
(loop
for tail in (funcall rest-thunk target-nodes)
nconc (funcall head-thunk tail))))
(lambda (target-nodes)
(mapcar #'list target-nodes))))
(front-to-back (checks)
(if checks
(let ((check (first checks))
(rest-thunk (front-to-back (rest checks))))
(lambda (parent path starting-node)
(let ((node (car path)))
(and (funcall check parent node starting-node)
(funcall rest-thunk
node
(cdr path)
starting-node)))))
(constantly t))))
(let ((btf-thunk (back-to-front steps))
(ftb-thunk (front-to-back predicate-checks)))
(lambda (node)
(some (lambda (path)
(destructuring-bind (starting-node &rest child-path)
path
(and (funcall starting-node-check starting-node)
(funcall ftb-thunk
starting-node
child-path
starting-node))))
(funcall btf-thunk (list node)))))))
;;; FIXME: replace xpath-protocol:node-type-p with a new generic function
;;; xpath-protocol:node-type then implement node-type-p as an ordinary
;;; function on top of that.
(defun node-type (node)
(dolist (type '(:element
:attribute
:text
:document
:namespace
:processing-instruction
:comment))
(when (xpath-protocol:node-type-p node type)
(return type))))