forked from acdh-oeaw/xsl-tokenizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
profile.rng
181 lines (171 loc) · 6.82 KB
/
profile.rng
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
<?xml version="1.0" encoding="UTF-8"?>
<grammar ns="" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes" xmlns:sch="http://purl.oclc.org/dsdl/schematron">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">This grammar defines the structure of a Tokenization Profile Definition Document used by the xsl-tokenizer. For more information see http://github.com/acdh-oeaw/xsl-tokenizer.</documentation>
<define name="anyElement">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">A class of any element in any namespace with any number of attributes of any name.</documentation>
<element>
<anyName/>
<zeroOrMore>
<attribute>
<anyName/>
</attribute>
</zeroOrMore>
<zeroOrMore>
<ref name="anyElement"/>
</zeroOrMore>
</element>
</define>
<define name="profile.atts">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">The class of attributes on the root element profile.</documentation>
<attribute name="created">
<data type="dateTime"/>
</attribute>
<attribute name="id">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">The ID of this tokenization profile. This is used by the webapplication xtx (see https://github.com/acdh-oeaw/xtx).</documentation>
</attribute>
<attribute name="last-updated">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">Timestamp of last profile update. This is used by the webapplication xtx (see https://github.com/acdh-oeaw/xtx).</documentation>
<optional>
<data type="dateTime"/>
</optional>
</attribute>
</define>
<define name="md.class">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">The class of metadata elements</documentation>
<element name="name">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">The name of the Profile</documentation>
<text/>
</element>
<element name="creator">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">Name of the creator.</documentation>
<text/>
</element>
<optional>
<element name="contact">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">Contact of the creator.</documentation>
<text/>
</element>
</optional>
<element name="description">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">A description of the profile (purpose, scope, projects etc.)</documentation>
<text/>
</element>
</define>
<define name="params.class">
<sch:pattern>
<sch:rule context="param[@key = ('preserve-ws','debug','useLexicon')]">
<sch:assert test="@value castable as xs:boolean">@value in param with @key <sch:value-of select="@key"/> must be castable to xs:boolean.</sch:assert>
</sch:rule>
</sch:pattern>
<sch:pattern>
<sch:rule context="param[@key = 'useLexicon']">
<sch:assert test="exists(../param[@key = 'lexicon'])">Parameter with key 'lexicon' must be defined if useLexicon is set to true. The parameter must contain one lexicon tokens per line.</sch:assert>
</sch:rule>
</sch:pattern>
<sch:pattern>
<sch:rule context="param[@key = 'lexicon']">
<sch:assert test="@xml:space">Missing attribute @xml:space</sch:assert>
</sch:rule>
</sch:pattern>
<zeroOrMore>
<element name="param">
<attribute name="key">
<choice>
<value>ws-regex</value>
<value>pc-regex</value>
<value>preserve-ws</value>
<value>debug</value>
<value>useLexicon</value>
<value>lexicon</value>
</choice>
</attribute>
<optional>
<attribute name="value">
<text/>
</attribute>
</optional>
<optional>
<attribute ns="http://www.w3.org/XML/1998/namespace" name="space">
<value>preserve</value>
</attribute>
</optional>
<optional>
<text/>
</optional>
</element>
</zeroOrMore>
</define>
<start>
<element name="profile">
<ref name="profile.atts"/>
<element name="about">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">Basic metadata about this Profile Definition Document.</documentation>
<ref name="md.class"/>
</element>
<element name="parameters">
<ref name="params.class"/>
</element>
<optional>
<element name="postProcessing">
<optional>
<element name="stylesheet" ns="http://www.w3.org/1999/XSL/Transform">
<ref name="anyElement"/>
</element>
</optional>
</element>
</optional>
<zeroOrMore>
<element name="namespace">
<attribute name="prefix">
<data type="NCName"/>
</attribute>
<data type="anyURI"/>
</element>
</zeroOrMore>
<element name="copy">
<ref name="borderDefElts.class"/>
</element>
<element name="ignore">
<ref name="borderDefElts.class"/>
</element>
<element name="in-word-tags">
<ref name="borderDefElts.class"/>
</element>
<element name="floating-blocks">
<ref name="borderDefElts.class"/>
</element>
<element name="structure">
<ref name="borderDefElts.class"/>
</element>
<optional>
<element name="doc-attributes">
<oneOrMore>
<element name="doc-attribute">
<attribute name="name">
<data type="NCName"/>
</attribute>
<ref name="expression.class"/>
</element>
</oneOrMore>
</element>
</optional>
</element>
</start>
<define name="borderDefElts.class">
<zeroOrMore>
<ref name="expression.class"/>
</zeroOrMore>
</define>
<define name="expression.class">
<documentation xmlns="http://relaxng.org/ns/compatibility/annotations/1.0">Expression elements contain XPath expression to locate elements in the input document. We try to ensure (very simplistically) that a used namespace prefix is declared.</documentation>
<sch:pattern>
<sch:rule context="expression[matches(.,'^\w+:')]">
<sch:let name="prefix" value="substring-before(.,':')"/>
<sch:assert test="$prefix = root()//namespace/@prefix">Missing namespace element for prefix <sch:value-of select="$prefix"/>.</sch:assert>
</sch:rule>
</sch:pattern>
<element name="expression">
<text/>
</element>
</define>
</grammar>