-
-
Notifications
You must be signed in to change notification settings - Fork 361
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
5,097 additions
and
420 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,153 +1,210 @@ | ||
'use strict' | ||
|
||
var ccount = require('ccount') | ||
var decode = require('parse-entities') | ||
var decimal = require('is-decimal') | ||
var alphabetical = require('is-alphabetical') | ||
var whitespace = require('is-whitespace-character') | ||
var locate = require('../locate/url') | ||
|
||
module.exports = url | ||
url.locator = locate | ||
url.notInLink = true | ||
|
||
var quotationMark = '"' | ||
var apostrophe = "'" | ||
var leftParenthesis = '(' | ||
var rightParenthesis = ')' | ||
var comma = ',' | ||
var dot = '.' | ||
var colon = ':' | ||
var semicolon = ';' | ||
var lessThan = '<' | ||
var atSign = '@' | ||
var leftSquareBracket = '[' | ||
var rightSquareBracket = ']' | ||
|
||
var http = 'http://' | ||
var https = 'https://' | ||
var mailto = 'mailto:' | ||
|
||
var protocols = [http, https, mailto] | ||
|
||
var protocolsLength = protocols.length | ||
var exclamationMark = 33 // '!' | ||
var ampersand = 38 // '&' | ||
var rightParenthesis = 41 // ')' | ||
var asterisk = 42 // '*' | ||
var comma = 44 // ',' | ||
var dash = 45 // '-' | ||
var dot = 46 // '.' | ||
var colon = 58 // ':' | ||
var semicolon = 59 // ';' | ||
var questionMark = 63 // '?' | ||
var lessThan = 60 // '<' | ||
var underscore = 95 // '_' | ||
var tilde = 126 // '~' | ||
|
||
var leftParenthesisCharacter = '(' | ||
var rightParenthesisCharacter = ')' | ||
|
||
function url(eat, value, silent) { | ||
var self = this | ||
var subvalue | ||
var content | ||
var character | ||
var gfm = self.options.gfm | ||
var tokenizers = self.inlineTokenizers | ||
var length = value.length | ||
var previousDot = -1 | ||
var protocolless = false | ||
var dots | ||
var lastTwoPartsStart | ||
var start | ||
var index | ||
var position | ||
var protocol | ||
var match | ||
var length | ||
var queue | ||
var parenCount | ||
var nextCharacter | ||
var tokenizers | ||
var pathStart | ||
var path | ||
var code | ||
var end | ||
var leftCount | ||
var rightCount | ||
var content | ||
var children | ||
var url | ||
var exit | ||
|
||
if (!self.options.gfm) { | ||
if (!gfm) { | ||
return | ||
} | ||
|
||
// `WWW.` doesn’t work. | ||
if (value.slice(0, 4) === 'www.') { | ||
protocolless = true | ||
index = 4 | ||
} else if (value.slice(0, 7).toLowerCase() === 'http://') { | ||
index = 7 | ||
} else if (value.slice(0, 8).toLowerCase() === 'https://') { | ||
index = 8 | ||
} else { | ||
return | ||
} | ||
|
||
subvalue = '' | ||
index = -1 | ||
// Act as if the starting boundary is a dot. | ||
previousDot = index - 1 | ||
|
||
while (++index < protocolsLength) { | ||
protocol = protocols[index] | ||
match = value.slice(0, protocol.length) | ||
// Parse a valid domain. | ||
start = index | ||
dots = [] | ||
|
||
if (match.toLowerCase() === protocol) { | ||
subvalue = match | ||
break | ||
while (index < length) { | ||
code = value.charCodeAt(index) | ||
|
||
if (code === dot) { | ||
// Dots may not appear after each other. | ||
if (previousDot === index - 1) { | ||
break | ||
} | ||
|
||
dots.push(index) | ||
previousDot = index | ||
index++ | ||
continue | ||
} | ||
|
||
if ( | ||
decimal(code) || | ||
alphabetical(code) || | ||
code === dash || | ||
code === underscore | ||
) { | ||
index++ | ||
continue | ||
} | ||
|
||
break | ||
} | ||
|
||
// Ignore a final dot: | ||
if (code === dot) { | ||
dots.pop() | ||
index-- | ||
} | ||
|
||
if (!subvalue) { | ||
// If there are not dots, exit. | ||
if (dots[0] === undefined) { | ||
return | ||
} | ||
|
||
index = subvalue.length | ||
length = value.length | ||
queue = '' | ||
parenCount = 0 | ||
// If there is an underscore in the last two domain parts, exit: | ||
// `www.example.c_m` and `www.ex_ample.com` are not OK, but | ||
// `www.sub_domain.example.com` is. | ||
lastTwoPartsStart = dots.length < 2 ? start : dots[dots.length - 2] + 1 | ||
|
||
if (value.slice(lastTwoPartsStart, index).indexOf('_') !== -1) { | ||
return | ||
} | ||
|
||
/* istanbul ignore if - never used (yet) */ | ||
if (silent) { | ||
return true | ||
} | ||
|
||
end = index | ||
pathStart = index | ||
|
||
// Parse a path. | ||
while (index < length) { | ||
character = value.charAt(index) | ||
code = value.charCodeAt(index) | ||
|
||
if (whitespace(character) || character === lessThan) { | ||
if (whitespace(code) || code === lessThan) { | ||
break | ||
} | ||
|
||
index++ | ||
|
||
if ( | ||
character === dot || | ||
character === comma || | ||
character === colon || | ||
character === semicolon || | ||
character === quotationMark || | ||
character === apostrophe || | ||
character === rightParenthesis || | ||
character === rightSquareBracket | ||
code === exclamationMark || | ||
code === asterisk || | ||
code === comma || | ||
code === dot || | ||
code === colon || | ||
code === questionMark || | ||
code === underscore || | ||
code === tilde | ||
) { | ||
nextCharacter = value.charAt(index + 1) | ||
|
||
if (!nextCharacter || whitespace(nextCharacter)) { | ||
break | ||
} | ||
// Empty | ||
} else { | ||
end = index | ||
} | ||
} | ||
|
||
if (character === leftParenthesis || character === leftSquareBracket) { | ||
parenCount++ | ||
} | ||
index = end | ||
|
||
if (character === rightParenthesis || character === rightSquareBracket) { | ||
parenCount-- | ||
// If the path ends in a closing paren, and the count of closing parens is | ||
// higher than the opening count, then remove the supefluous closing parens. | ||
if (value.charCodeAt(index - 1) === rightParenthesis) { | ||
path = value.slice(pathStart, index) | ||
leftCount = ccount(path, leftParenthesisCharacter) | ||
rightCount = ccount(path, rightParenthesisCharacter) | ||
|
||
if (parenCount < 0) { | ||
break | ||
} | ||
while (rightCount > leftCount) { | ||
index = pathStart + path.lastIndexOf(rightParenthesisCharacter) | ||
path = value.slice(pathStart, index) | ||
rightCount-- | ||
} | ||
|
||
queue += character | ||
index++ | ||
} | ||
|
||
if (!queue) { | ||
return | ||
} | ||
if (value.charCodeAt(index - 1) === semicolon) { | ||
// GitHub doesn’t document this, but final semicolons aren’t paret of the | ||
// URL either. | ||
index-- | ||
|
||
subvalue += queue | ||
content = subvalue | ||
// // If the path ends in what looks like an entity, it’s not part of the path. | ||
if (alphabetical(value.charCodeAt(index - 1))) { | ||
end = index - 2 | ||
|
||
if (protocol === mailto) { | ||
position = queue.indexOf(atSign) | ||
while (alphabetical(value.charCodeAt(end))) { | ||
end-- | ||
} | ||
|
||
if (position === -1 || position === length - 1) { | ||
return | ||
if (value.charCodeAt(end) === ampersand) { | ||
index = end | ||
} | ||
} | ||
|
||
content = content.slice(mailto.length) | ||
} | ||
|
||
/* istanbul ignore if - never used (yet) */ | ||
if (silent) { | ||
return true | ||
content = value.slice(0, index) | ||
url = decode(content, {nonTerminated: false}) | ||
|
||
if (protocolless) { | ||
url = 'http://' + url | ||
} | ||
|
||
exit = self.enterLink() | ||
|
||
// Temporarily remove all tokenizers except text in url. | ||
tokenizers = self.inlineTokenizers | ||
self.inlineTokenizers = {text: tokenizers.text} | ||
|
||
content = self.tokenizeInline(content, eat.now()) | ||
|
||
children = self.tokenizeInline(content, eat.now()) | ||
self.inlineTokenizers = tokenizers | ||
|
||
exit() | ||
|
||
return eat(subvalue)({ | ||
type: 'link', | ||
title: null, | ||
url: decode(subvalue, {nonTerminated: false}), | ||
children: content | ||
}) | ||
return eat(content)({type: 'link', title: null, url: url, children: children}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.