From 249b04405812d532839f6be618310d799f8d0c75 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 5 Feb 2016 17:41:30 +1100 Subject: [PATCH] Treat more special characters as trailing delimiters (#3) * `;` is now treated the same as `,` and `:` * `<` and `>` now also need to match, same as other brackets * `/` can still be within or at the end of an URL, but if it's within a group of other delimiters, it behaves as a delimiter Together, these new rules result in `">`, `"/>` and `");` to be excluded at the end of links, while hopefully not messing with the overall heuristics too much. --- .../nibor/autolink/internal/UrlScanner.java | 18 +++++++++++++- .../org/nibor/autolink/AutolinkUrlTest.java | 24 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/nibor/autolink/internal/UrlScanner.java b/src/main/java/org/nibor/autolink/internal/UrlScanner.java index cc45894..0dd2ef4 100644 --- a/src/main/java/org/nibor/autolink/internal/UrlScanner.java +++ b/src/main/java/org/nibor/autolink/internal/UrlScanner.java @@ -54,6 +54,7 @@ private int findLast(CharSequence input, int beginIndex) { int round = 0; int square = 0; int curly = 0; + int angle = 0; boolean doubleQuote = false; boolean singleQuote = false; int last = beginIndex; @@ -67,13 +68,22 @@ private int findLast(CharSequence input, int beginIndex) { case '\u000B': case '\f': case '\r': + // These can never be part of an URL, so stop now break loop; case '?': case '!': case '.': case ',': case ':': + case ';': + // These may be part of an URL but not at the end continue loop; + case '/': + // This may be part of an URL and at the end, but not if the previous character can't be the end of an URL + if (last != i - 1) { + continue loop; + } + break; case '(': round++; break; @@ -92,6 +102,12 @@ private int findLast(CharSequence input, int beginIndex) { case '}': curly--; break; + case '<': + angle++; + break; + case '>': + angle--; + break; case '"': doubleQuote = !doubleQuote; break; @@ -102,7 +118,7 @@ private int findLast(CharSequence input, int beginIndex) { last = i; continue loop; } - if (round >= 0 && square >= 0 && curly >= 0 && !doubleQuote && !singleQuote) { + if (round >= 0 && square >= 0 && curly >= 0 && angle >= 0 && !doubleQuote && !singleQuote) { last = i; } } diff --git a/src/test/java/org/nibor/autolink/AutolinkUrlTest.java b/src/test/java/org/nibor/autolink/AutolinkUrlTest.java index 23bb7ad..62e808f 100644 --- a/src/test/java/org/nibor/autolink/AutolinkUrlTest.java +++ b/src/test/java/org/nibor/autolink/AutolinkUrlTest.java @@ -87,6 +87,7 @@ public void delimiterSeparation() { assertLinked("http://example.org/:", "|http://example.org/|:"); assertLinked("http://example.org/?", "|http://example.org/|?"); assertLinked("http://example.org/!", "|http://example.org/|!"); + assertLinked("http://example.org/;", "|http://example.org/|;"); } @Test @@ -94,6 +95,7 @@ public void matchingPunctuation() { assertLinked("http://example.org/a(b)", "|http://example.org/a(b)|"); assertLinked("http://example.org/a[b]", "|http://example.org/a[b]|"); assertLinked("http://example.org/a{b}", "|http://example.org/a{b}|"); + assertLinked("http://example.org/a", "|http://example.org/a|"); assertLinked("http://example.org/a\"b\"", "|http://example.org/a\"b\"|"); assertLinked("http://example.org/a'b'", "|http://example.org/a'b'|"); assertLinked("(http://example.org/)", "(|http://example.org/|)"); @@ -110,11 +112,33 @@ public void matchingPunctuationTricky() { assertLinked("[(http://example.org/)]", "[(|http://example.org/|)]"); assertLinked("(http://example.org/).", "(|http://example.org/|)."); assertLinked("(http://example.org/.)", "(|http://example.org/|.)"); + assertLinked("http://example.org/>", "|http://example.org/|>"); // not sure about these: assertLinked("http://example.org/(", "|http://example.org/(|"); assertLinked("http://example.org/]()", "|http://example.org/|]()"); } + @Test + public void html() { + assertLinked("http://example.org\">", "|http://example.org|\">"); + assertLinked("http://example.org'>", "|http://example.org|'>"); + assertLinked("http://example.org\"/>", "|http://example.org|\"/>"); + assertLinked("http://example.org'/>", "|http://example.org|'/>"); + } + + @Test + public void css() { + assertLinked("http://example.org\");", "|http://example.org|\");"); + assertLinked("http://example.org');", "|http://example.org|');"); + } + + @Test + public void slash() { + assertLinked("http://example.org/", "|http://example.org/|"); + assertLinked("http://example.org/a/", "|http://example.org/a/|"); + assertLinked("http://example.org//", "|http://example.org//|"); + } + @Test public void multiple() { assertLinked("http://one.org/ http://two.org/", "|http://one.org/| |http://two.org/|");