forked from prettydiff/prettydiff
-
Notifications
You must be signed in to change notification settings - Fork 0
/
markupmin.js
380 lines (358 loc) · 16.1 KB
/
markupmin.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
/*
This code may be used internally to Travelocity without limitation,
exclusion, or restriction. If this code is used externally the
following comment must be included everywhere this code is used.
*/
/***********************************************************************
This is written by Austin Cheney on 7 May 2009. Anybody may use this
code without permission so long as this comment exists verbatim in each
instance of its use.
http://www.travelocity.com/
http://mailmarkup.org/
**********************************************************************/
/*
If the comments argument is supplied with the value "comments" this
function presumes it is being used as a tool of beautification. In
this case JSMin is not used or needed and the next statements are
irrelevant. This is a minification application for markup languages.
Its only requirement is that the modified jsmin.js be included prior to
the inclusion of this code. The required jsmin.js is customized to
independently support CSS and JavaScript minification. It must be
obtained from the following location.
http://prettydiff.com/fulljsmin.js
Minification is achieved according to this pattern:
1) It looks for syntax characters inside tags. Whitespace is
tokenized inside tags and removes all whitespace directly next to
a syntax character except quotes that do not occur directly next
or one space away from an equal sign. Numbers, hypens (dash,
minus), underscores, alpha characters, and ampersands were not
considered in this logic.
3) It takes the contents of script and style tags, runs jsmin against
this content, removes the original content, and then returns the
code minified with jsmin.
4) It removes all comments.
5) Then at the end it removes even more spaces, using loops and the
replace method, to tokenize additional spaces introduced by the
prior logic except single spaces adjacent to singleton tags or
content.
Arguments:
* x = source code
* comments = whether or not to preserve comments. Accepted values are
"comments" and "beautify". The only difference is that "beautify"
will preserve the comments with inline CSS and JavaScript for use
with the markup_beauty.js application.
* presume_html = This lets the application know to expect HTML with
singleton tags that look like starting tags, such as "<br>" instead
of "<br/>". The tag names usedbythis argument are located in the array
named "HTML" at the top of this code. This argument accepts a Boolean
value.
* top_comments is passed through to the modified fulljsmin.js. This
informs JSMin to preserve all comments before the first line of code
in CSS and JavaScript code. This argument accepts a Boolean value.
markupmin is composed of these child objects:
* it: This self initiating function performs a single pass through
the data looking for markup tags, script blocks, style blocks, and
comments.
* markupspace: This function is executed by the it function when a
tag is encountered that is not a comment, style block, or script
block. This function serves to remove all spaces around syntax
characters inside tags except double and single quotes.
* markupcomment: This function removes opening comment characters and
every character after until the closing comment is found.
* markupscript: This function is fired if a script or style tag is
found. It checks if jsmin exists as a named object, or otherwise
immediately exits. This function finds the opening tag, and
records it. Then it takes all the characters inside the concerned
tag and passes them to jsmin. The opening tag and output of jsmin
is returned only after the original opening tag and tag content are
removed.
At the end all other white space is tokenized and spaces around the
opening and closing of tags, except those adjacent to content, is
removed.
*/
var markupmin = function (x, comments, presume_html, top_comments) {
"use strict";
var i,
a,
b,
c,
y,
Y,
verbose = (/^\s+$/),
white = (/\s/),
html = ["br", "meta", "link", "img", "hr", "base", "basefont", "area", "col", "frame", "input", "param"],
//This closure performs checks for excessive whitespace
//inside markup tags. Whitespace around certain syntax
//characters is collapsed and all remaining whitespace is
//tokenized.
markupspace = function () {
var d = "",
Y = x.length;
for (a = i; a < Y; a += 1) {
if (x[a] === ">") {
break;
} else {
d = d + x[a];
x[a] = "";
}
}
d = d.replace(/\s+/g, " ").replace(/\s*,\s+/g, ", ").replace(/\s*\/\s*/g, "/").replace(/\s*=\s*/g, "=").replace(/\s*:\s*/g, ":").replace(/ \="/g, "=\"").replace(/ \='/g, "='") + ">";
i = a;
x[i] = d;
},
//This function looks for markup comments and removes all
//contained characters until the comment is properly closed.
//If a comment is not properly close then all remaining
//characters will be removed, which is fine because they
//would not be parsed by a browser anyways.
markupcomment = function () {
var Y = x.length;
c = "";
for (b = i; b < Y; b += 1) {
if (x[b] === "-" && x[b + 1] === "-" && x[b + 2] === ">") {
x[b] = "";
x[b + 1] = "";
x[b + 2] = "";
i = b + 2;
break;
} else if (comments !== "comments" && comments !== "beautify") {
x[b] = "";
} else {
c = c + x[b];
x[b] = "";
}
}
if (comments === "comments" || comments === "beautify") {
c = " " + c + "-->";
x[i] = c;
}
},
//This function passes the content of script and style
//blocks off to jsmin.
markupscript = function (z) {
var e = [],
f,
h = "",
j = "</" + z,
m,
Y = x.length,
cdataStart = (/^(\s*\/+<!\[+[A-Z]+\[+)/),
cdataEnd = (/(\/+\]+>\s*)$/),
scriptStart = (/^(\s*<\!\-\-)/),
scriptEnd = (/(\/+\-\->\s*)$/),
cs = "",
ce = "";
if (jsmin === undefined) {
return;
}
for (c = i; c < Y; c += 1) {
if ((y.slice(c, c + j.length)).toLowerCase() === j) {
f = c;
break;
}
}
for (c = i; c < f; c += 1) {
if (x[c - 1] !== ">") {
e.push(x[c]);
x[c] = "";
} else {
break;
}
}
m = e[0];
e.splice(0, 1);
if (white.test(e[0])) {
e.splice(0, 1);
}
for (f; f < Y; f += 1) {
if (x[f] !== ">") {
h = h + x[f];
x[f] = "";
} else {
break;
}
}
h = h + ">";
i = f;
if (e.join("") === "") {
x[i] = m + h;
return;
}
e = e.join("");
if (comments !== "beautify") {
if (cdataStart.test(e)) {
cs = e.match(cdataStart)[0];
e = e.replace(cdataStart, "");
} else if (scriptStart.test(e)) {
cs = e.match(scriptStart)[0];
e = e.replace(scriptStart, "");
}
if (cdataEnd.test(e)) {
ce = e.match(cdataEnd)[0];
e = e.replace(cdataEnd, "");
} else if (scriptEnd.test(e)) {
ce = e.match(scriptEnd)[0];
e = e.replace(scriptEnd, "");
}
if (z === "style") {
e = cs + jsmin(e, 3, "css", true, top_comments) + ce;
} else {
e = cs + jsmin(e, 3, "javascript", false, top_comments) + ce;
}
}
Y = e.length;
for (c = 0; c < Y; c += 1) {
if (white.test(e.charAt(c))) {
e = e.substr(c + 1);
} else {
break;
}
}
x[i] = m + e + h;
},
preserve = function (end) {
var Y = x.length;
b = "";
for (c = i; c < Y; c += 1) {
if (x[c - 1] + x[c] === end) {
break;
}
}
for (a = i; a < c; a += 1) {
b += x[a];
x[a] = "";
}
x[i] = b;
i = c;
},
content = function () {
var Y = x.length;
b = "";
for (a = i; a < Y; a += 1) {
if (x[a] === "<") {
break;
} else {
b = b + x[a];
x[a] = "";
}
}
i = a - 1;
x[i] = b.replace(/\s+/g, " ");
},
//This self invocating function is the action piece of
//markupmin. It is a single loop that execute the closures
//described above when comments, tags, style blocks, and/or
//script blocks are encountered. No logic is performed on
//content, aside from whitespace tokenization.
it = (function () {
var a,
b,
c = x.length;
y = x;
x = x.split("");
for (i = 0; i < x.length; i += 1) {
//If markupmin is requested by markup_beauty then do
//not process scripts or styles.
if ((y.slice(i, i + 7)).toLowerCase() === "<script") {
a = [];
for (b = i + 8; b < c; b += 1) {
if (y.charAt(b) === ">") {
break;
}
a.push(y.charAt(b));
}
a = a.join("").toLowerCase().replace(/'/g, "\"");
if (comments !== "beautify" && comments !== "diff") {
markupspace();
}
if (a.indexOf("type=\"") === -1 || a.indexOf("type=\"text/javascript\"") !== -1 || a.indexOf("type=\"application/javascript\"") !== -1 || a.indexOf("type=\"application/x-javascript\"") !== -1 || a.indexOf("type=\"text/ecmascript\"") !== -1 || a.indexOf("type=\"application/ecmascript\"") !== -1) {
markupscript("script");
}
} else if ((y.slice(i, i + 6)).toLowerCase() === "<style") {
a = [];
for (b = i + 7; b < c; b += 1) {
if (y.charAt(b) === ">") {
break;
}
a.push(y.charAt(b));
}
a = a.join("").toLowerCase().replace(/'/g, "\"");
if (comments !== "beautify" && comments !== "diff") {
markupspace();
}
if (a.indexOf("type=\"") === -1 || a.indexOf("type=\"text/css\"") !== -1) {
markupscript("style");
}
} else if (y.slice(i, i + 4) === "<!--" && x[i + 4] !== "#") {
markupcomment();
} else if (y.slice(i, i + 5) === "<?php") {
preserve("?>");
} else if (y.slice(i, i + 2) === "<%") {
preserve("%>");
} else if ((x[i] === "<" && x[i + 1] !== "!") || (x[i] === "<" && x[i + 1] === "!" && x[i + 2] !== "-")) {
markupspace();
} else if (x[i] === undefined) {
x[i] = "";
} else if (x[i - 1] !== undefined && x[i - 1].charAt(x[i - 1].length - 1) === ">") {
content();
}
}
}());
//The following loop pushes not empty indexes from the "x" array
//into another temporary array: "i".
i = [];
Y = x.length;
for (a = 0; a < Y; a += 1) {
if (x[a] !== "") {
i.push(x[a]);
}
}
//The following loop pushes indexes from temporary array "i"
//into the newly emptied array "x" that are not consecutive runs
//of white space.
x = [];
Y = i.length;
for (a = 0; a < Y; a += 1) {
if (!verbose.test(i[a]) || (verbose.test(i[a]) && !verbose.test(i[a + 1]))) {
x.push(i[a]);
}
}
//The following loop converts indexes in the array that contain
//only whitespace to an empty string if that index does not
//align with a syntax formatted singleton.
Y = x.length;
for (a = 2; a < Y; a += 1) {
c = 0;
//This is a cheat to look at vocabulary to determine if a
//tag is a singleton opposed to looking at only syntax.
if (presume_html === true) {
b = "";
for (i = 1; i < x[a].length; i += 1) {
if (/[a-z]/i.test(x[a].charAt(i))) {
b += x[a].charAt(i);
} else {
break;
}
}
for (i = 0; i < html.length; i += 1) {
if (b === html[i] && x[a].charAt(0) === "<") {
c = 1;
break;
}
}
}
//This removes spaces between elements except between two
//closing tags following content or any space around a
//singleton tag.
if (verbose.test(x[a - 1])) {
if (c !== 1 && (x[a].charAt(0) === "<" && x[a].charAt(1) === "/" && x[a - 1] !== " " && x[a - 2].charAt(0) === "<" && x[a - 2].charAt(1) === "/" && x[a - 3].charAt(0) !== "<") && (x[a].charAt(0) === "<" && x[a].charAt(x[a].length - 2) !== "/") && (x[a].charAt(0) === "<" && x[a].charAt(x[a].length - 2) !== "/" && x[a - 2].charAt(0) === "<" && x[a - 2].charAt(1) === "/")) {
x[a - 1] = "";
}
}
}
x = x.join("").replace(/-->\s+/g, "--> ").replace(/\s+<\?php/g, " <?php").replace(/\s+<%/g, " <%").replace(/\s*>\s+/g, "> ").replace(/\s+<\s*/g, " <").replace(/\s+\/>/g, "/>").replace(/\s+>/g, ">");
if (white.test(x.charAt(0))) {
x = x.slice(1, x.length);
}
return x;
};