Skip to content

Commit

Permalink
Require Node.js 10
Browse files Browse the repository at this point in the history
Fixes #60
Fixes #31
Fixes #34
  • Loading branch information
sindresorhus committed Aug 29, 2020
1 parent 4fe40da commit 010c957
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
language: node_js
node_js:
- '14'
- '12'
- '10'
- '8'
22 changes: 11 additions & 11 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
const urlRegex = require('url-regex-safe');
const normalizeUrl = require('normalize-url');

const getUrlsFromQueryParams = url => {
const ret = new Set();
const getUrlsFromQueryParameters = url => {
const returnValue = new Set();
const {searchParams} = (new URL(url.replace(/^(?:\/\/|(?:www\.))/i, 'http://$2')));

for (const [, value] of searchParams) {
if (urlRegex({exact: true}).test(value)) {
ret.add(value);
returnValue.add(value);
}
}

return ret;
return returnValue;
};

module.exports = (text, options = {}) => {
Expand All @@ -24,12 +24,12 @@ module.exports = (text, options = {}) => {
throw new TypeError('The `exclude` option must be an array');
}

const ret = new Set();
const returnValue = new Set();

const add = url => {
try {
ret.add(normalizeUrl(url.trim().replace(/\.+$/, ''), options));
} catch (_) {}
returnValue.add(normalizeUrl(url.trim().replace(/\.+$/, ''), options));
} catch {}
};

const urls = text.match(
Expand All @@ -41,21 +41,21 @@ module.exports = (text, options = {}) => {
add(url);

if (options.extractFromQueryString) {
const qsUrls = getUrlsFromQueryParams(url);
const qsUrls = getUrlsFromQueryParameters(url);
for (const qsUrl of qsUrls) {
add(qsUrl);
}
}
}

for (const excludedItem of options.exclude || []) {
for (const item of ret) {
for (const item of returnValue) {
const regex = new RegExp(excludedItem);
if (regex.test(item)) {
ret.delete(item);
returnValue.delete(item);
}
}
}

return ret;
return returnValue;
};
2 changes: 1 addition & 1 deletion license
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) Sindre Sorhus <sindresorhus@gmail.com> (sindresorhus.com)
Copyright (c) Sindre Sorhus <sindresorhus@gmail.com> (https://sindresorhus.com)

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
11 changes: 6 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
"description": "Get all URLs in a string",
"license": "MIT",
"repository": "sindresorhus/get-urls",
"funding": "https://github.com/sponsors/sindresorhus",
"author": {
"name": "Sindre Sorhus",
"email": "sindresorhus@gmail.com",
"url": "sindresorhus.com"
"url": "https://sindresorhus.com"
},
"engines": {
"node": ">=10.12.0"
Expand All @@ -30,12 +31,12 @@
"string"
],
"dependencies": {
"normalize-url": "^4.3.0",
"url-regex-safe": "0.0.5"
"normalize-url": "^5.1.0",
"url-regex-safe": "^1.0.2"
},
"devDependencies": {
"ava": "^2.4.0",
"tsd": "^0.9.0",
"xo": "^0.25.3"
"tsd": "^0.13.1",
"xo": "^0.33.0"
}
}
12 changes: 4 additions & 8 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
# get-urls [![Build Status](https://travis-ci.org/sindresorhus/get-urls.svg?branch=master)](https://travis-ci.org/sindresorhus/get-urls)
# get-urls [![Build Status](https://travis-ci.com/sindresorhus/get-urls.svg?branch=master)](https://travis-ci.com/github/sindresorhus/get-urls)

> Get all URLs in a string
The URLs will be [normalized](https://github.com/sindresorhus/normalize-url).

*Don't use this for any kind of security-related validation.*


## Install

```
$ npm install get-urls
```


## Usage

```js
Expand All @@ -25,7 +23,6 @@ getUrls(text);
//=> Set {'http://sindresorhus.com', 'http://yeoman.io'}
```


## API

### getUrls(text, options?)
Expand All @@ -44,28 +41,27 @@ All the `normalize-url` [options](https://github.com/sindresorhus/normalize-url#

#### extractFromQueryString

Type: `boolean`<br>
Type: `boolean`\
Default: `false`

Extract URLs that appear as query parameters in the found URLs.

#### exclude

Type: `string[]`<br>
Type: `string[]`\
Default: `[]`

Exclude URLs that match URLs in the given array.

#### requireSchemeOrWww

Type: `boolean`<br>
Type: `boolean`\
Default: `true`

Require URLs to have a scheme or leading `www.` to be considered an URL. When `false`, matches against a list of valid TLDs, so it will match URLs like `unicorn.education`.

Does not affect URLs in query parameters if using the `extractFromQueryString` option.


## Related

- [get-urls-cli](https://github.com/sindresorhus/get-urls-cli) - CLI for this module
Expand Down
75 changes: 56 additions & 19 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,23 @@ import test from 'ava';
import getUrls from '.';

test('get unique cleaned-up urls from a string', t => {
t.deepEqual(getUrls(fs.readFileSync('fixture.txt', 'utf8')), new Set([
'http://google.com',
'http://todomvc.com',
'http://yeoman.io',
'http://twitter.com/sindresorhus',
'https://tastejs.com',
'http://example.com',
'http://github.com'
]));
t.deepEqual(
getUrls(fs.readFileSync('fixture.txt', 'utf8')),
new Set([
'http://google.com',
'http://todomvc.com',
'http://yeoman.io',
'http://twitter.com/sindresorhus',
'https://tastejs.com',
'http://example.com',
'http://github.com'
])
);
});

test('do not get nested urls from query strings', t => {
const text = 'You can read http://www.awin1.com/cread.php?a=b&p=https%3A%2F%2Fuk.hotels.com%2Fhotel%2Fdetails.html%3Ftab%3Ddescription%26hotelId%3D287452%26q-localised-check-in%3D15%2F12%2F2017%26q-localised-check-out%3D19%2F12%2F2017%26q-room-0-adults%3D2%26q-room-0-children%3D0%26locale%3Den_GB%26pos%3DHCOM_UK for more info';

t.deepEqual(
getUrls(text),
new Set([
Expand All @@ -26,6 +30,7 @@ test('do not get nested urls from query strings', t => {

test('get nested urls from query strings', t => {
const text = 'You can read http://www.awin1.com/cread.php?a=b&p=https%3A%2F%2Fuk.hotels.com%2Fhotel%2Fdetails.html%3Ftab%3Ddescription%26hotelId%3D287452%26q-localised-check-in%3D15%2F12%2F2017%26q-localised-check-out%3D19%2F12%2F2017%26q-room-0-adults%3D2%26q-room-0-children%3D0%26locale%3Den_GB%26pos%3DHCOM_UK for more info';

t.deepEqual(
getUrls(text, {extractFromQueryString: true}),
new Set([
Expand All @@ -37,6 +42,7 @@ test('get nested urls from query strings', t => {

test('don\'t strip hash when stripHash is set to false', t => {
const text = 'You can read http://www.foobar.com/document.html#about for more info';

t.deepEqual(
getUrls(text, {stripHash: false}),
new Set(['http://foobar.com/document.html#about'])
Expand Down Expand Up @@ -75,15 +81,19 @@ test('finds urls beginning with `www`', t => {

test('exclude matching urls', t => {
const text = `${fs.readFileSync('fixture.txt', 'utf8')} http://w3.org/2000/svg, http://foobar.com/document.html, https://www.w3schools.com/`;
t.deepEqual(getUrls(text, {exclude: ['http://w3.org/2000/svg', 'foobar.com', 'w3schools']}), new Set([
'http://google.com',
'http://todomvc.com',
'http://yeoman.io',
'http://twitter.com/sindresorhus',
'https://tastejs.com',
'http://example.com',
'http://github.com'
]));

t.deepEqual(
getUrls(text, {exclude: ['http://w3.org/2000/svg', 'foobar.com', 'w3schools']}),
new Set([
'http://google.com',
'http://todomvc.com',
'http://yeoman.io',
'http://twitter.com/sindresorhus',
'https://tastejs.com',
'http://example.com',
'http://github.com'
])
);
});

test('throw TypeError for non-array `exclude` option', t => {
Expand All @@ -94,6 +104,7 @@ test('throw TypeError for non-array `exclude` option', t => {

test('get urls without scheme', t => {
const text = 'Lorem ipsum dolor sit amet, //sindresorhus.com consectetuer adipiscing http://yeoman.io elit. www.github.com';

t.deepEqual(
getUrls(text, {
extractFromQueryString: true
Expand All @@ -108,6 +119,7 @@ test('get urls without scheme', t => {

test('get schemeless url from query string', t => {
const text = 'You can read http://www.awin1.com/cread.php?a=b&p=%2F%2Fuk.hotels.com%2Fhotel%2Fdetails.html%3Ftab%3Ddescription%26hotelId%3D287452%26q-localised-check-in%3D15%2F12%2F2017%26q-localised-check-out%3D19%2F12%2F2017%26q-room-0-adults%3D2%26q-room-0-children%3D0%26locale%3Den_GB%26pos%3DHCOM_UK for more info';

t.deepEqual(
getUrls(text, {
extractFromQueryString: true
Expand All @@ -121,10 +133,12 @@ test('get schemeless url from query string', t => {

test('requireSchemeOrWww turned off', t => {
const text = 'Here is a URL: sindresorhus.com here is another: unicorn.education';

t.deepEqual(
getUrls(text, {
requireSchemeOrWww: false
}), new Set([
}),
new Set([
'http://sindresorhus.com',
'http://unicorn.education'
])
Expand Down Expand Up @@ -164,3 +178,26 @@ test('throw an error when the text argument is not a string', t => {
getUrls();
}, TypeError);
});

test('handles parens', t => {
const text = 'foo https://sindresorhus.com/some/example) foo';

t.deepEqual(
getUrls(text),
new Set([
'https://sindresorhus.com/some/example'
])
);
});

test('handles Markdown', t => {
const text = 'foo [![](https://sindresorhus.com/unicorn.png)](https://sindresorhus.com/?foo=bar) foo';

t.deepEqual(
getUrls(text),
new Set([
'https://sindresorhus.com/unicorn.png',
'https://sindresorhus.com/?foo=bar'
])
);
});

0 comments on commit 010c957

Please sign in to comment.