-
-
Notifications
You must be signed in to change notification settings - Fork 21
/
special-tags.lisp
189 lines (167 loc) · 7.2 KB
/
special-tags.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
(in-package #:org.shirakumo.plump.parser)
(declaim (inline starts-with ends-with))
(defun starts-with (find string)
(and (<= (length find) (length string))
(string= find string :start2 0 :end2 (length find))))
(defun ends-with (find string)
(let ((end (length string)))
(and (<= (length find) (length string))
(string= find string :start2 (- end (length find)) :end2 end))))
;; We simply ignore closing tags.
;; We can do this because the matching of the proper
;; closing tag in READ-CHILDREN happens before this
;; even has a chance to dispatch. Thus only
;; inappropriate or badly ordered closing tags are
;; handled by this, which are best left ignored.
;; That way the order of the closing tags is
;; restored naturally by the reading algorithm.
(define-tag-dispatcher (invalid-closing-tag *tag-dispatchers* *xml-tags* *html-tags*) (name)
(and (< 0 (length name)) (char= (elt name 0) #\/)))
(define-tag-parser invalid-closing-tag (name)
(consume-until (make-matcher (is #\>)))
(advance)
(dolist (tag *tagstack*)
(when (string-equal name tag :start2 1)
(throw tag NIL)))
NIL)
;; Comments are special nodes. We try to handle them
;; with a bit of grace, but having the inner content
;; be read in the best way possible is hard to get
;; right due to various commenting styles.
(define-tag-dispatcher (comment *tag-dispatchers* *xml-tags* *html-tags*) (name)
(starts-with "!--" name))
(define-tag-parser comment (name)
(make-comment
*root*
(decode-entities
(if (and (ends-with "--" name)
(char= (or (peek) #\!) #\>))
(prog1 (subseq name 3 (max 3 (- (length name) 2)))
(advance))
(prog1 (concatenate
'string (subseq name 3)
(consume-until (make-matcher (is "-->"))))
(advance-n 3))))))
;; Special handling for the doctype tag
(define-tag-dispatcher (doctype *tag-dispatchers* *xml-tags* *html-tags*) (name)
(string-equal name "!DOCTYPE"))
(define-tag-parser doctype (name)
(let ((declaration (read-tag-contents)))
(when (char= (or (consume) #\ ) #\/)
(advance)) ;; Consume closing
(make-doctype *root* (string-trim " " declaration))))
;; Special handling for the XML header
(define-tag-dispatcher (xml-header *tag-dispatchers* *xml-tags*) (name)
(string-equal name "?xml"))
(define-tag-parser xml-header (name)
(let* ((attrs-string (consume-until (make-matcher (and (is #\?) (next (is #\>))))))
(attrs (with-lexer-environment (attrs-string)
(read-attributes))))
(unless (gethash "version" attrs)
(setf (gethash "version" attrs) "1.0"))
(advance-n 2)
(make-xml-header *root* :attributes attrs)))
;; Special handling for CDATA sections
(define-tag-dispatcher (cdata *tag-dispatchers* *xml-tags*) (name)
(starts-with "![CDATA[" name))
(define-tag-parser cdata (name)
;; KLUDGE: Since tag names can contain [ and ] we need to
;; take special care of cases where there is a token in the
;; cdata without any characters that would stop the tag
;; name reading.
(let ((text (if (string= name "]]" :start1 (- (length name) 2))
(prog1 (subseq name 8 (- (length name) 2))
(advance-n 1))
(prog1 (concatenate 'string
(subseq name 8)
(consume-until (make-matcher (is "]]>"))))
(advance-n 3)))))
(make-cdata *root* :text text)))
;; Shorthand macro to define self-closing elements
(defmacro define-self-closing-element (tag &rest lists)
`(progn
(define-tag-dispatcher (,tag ,@lists) (name)
(string-equal name ,(string tag)))
(define-tag-parser ,tag (name)
(let ((attrs (read-attributes)))
(when (char= (or (consume) #\ ) #\/)
(advance)) ;; Consume closing
(make-element *root* ,(string-downcase tag) :attributes attrs)))
(define-tag-printer ,tag (node)
(plump-dom::wrs "<" (tag-name node))
(serialize (attributes node) *stream*)
(if (< 0 (length (children node)))
(progn
(plump-dom::wrs ">")
(loop for child across (children node)
do (serialize child *stream*))
(plump-dom::wrs "</" (tag-name node) ">"))
(plump-dom::wrs ">"))
T)))
;; According to http://www.w3.org/html/wg/drafts/html/master/syntax.html#void-elements
;; area, base, br, col, embed, hr, img, input, keygen, link, menuitem, meta, param, source, track, wbr
(macrolet ((define-all (&rest tags)
`(progn ,@(loop for tag in tags collect `(define-self-closing-element ,tag *tag-dispatchers* *html-tags*)))))
(define-all area base br col embed hr img input keygen link menuitem meta param source track wbr))
;; wildcard dispatcher
(define-wildcard-dispatcher html-default *tag-dispatchers* *html-tags*)
;; default html5 behavior
(define-tag-printer html-default (node)
(plump-dom::wrs "<" (tag-name node))
(serialize (attributes node) *stream*)
(plump-dom::wrs ">")
(loop for child across (children node)
do (serialize child *stream*))
(plump-dom::wrs "</" (tag-name node) ">")
T)
(define-tag-parser html-default (name)
(read-standard-tag name))
(defun read-fulltext-element-content (name)
(with-output-to-string (out)
(tagbody
start (let ((next (peek)))
(case next
((NIL) (go end))
(#\< (advance) (go tag))
(T (write-char next out) (advance) (go start))))
tag (case (peek)
(#\/ (advance) (go name))
(T (write-char #\< out) (go start)))
name (let ((tag (consume-until (make-matcher (not :name)))))
(cond ((and (string-equal tag name) (eql #\> (peek)))
(advance) (go end))
(T
(write-string "</" out)
(write-string tag out) (go start))))
end)))
;; Some tags accept arbitrary text and no sub-elements.
(defmacro define-fulltext-element (tag &rest lists)
(let ((name (string-downcase tag)))
`(progn
(define-tag-dispatcher (,tag ,@lists) (name)
(string-equal name ,name))
(define-tag-parser ,tag (name)
(let* ((closing (consume))
(attrs (if (member closing *whitespace*)
(prog1 (read-attributes)
(setf closing (consume)))
(make-attribute-map))))
(case closing
(#\/
(advance)
(make-element *root* ,name :attributes attrs))
(#\>
(let ((*root* (make-fulltext-element *root* ,name :attributes attrs))
(string (read-fulltext-element-content name)))
(make-text-node *root* string)
*root*)))))
(define-tag-printer ,tag (node)
(plump-dom::wrs "<" (tag-name node))
(serialize (attributes node) *stream*)
(plump-dom::wrs ">")
(loop for child across (children node)
do (serialize child *stream*))
(plump-dom::wrs "</" (tag-name node) ">")
T))))
(define-fulltext-element style *tag-dispatchers* *html-tags*)
(define-fulltext-element script *tag-dispatchers* *html-tags*)