From e9f222855543eead97721befcf70d4c4939dbc11 Mon Sep 17 00:00:00 2001 From: Artur Barseghyan Date: Mon, 11 Jan 2021 22:28:08 +0100 Subject: [PATCH] Prepare 0.12.5 --- CHANGELOG.rst | 12 +++ LICENSE_GPL2.0.txt | 2 +- LICENSE_LGPL_2.1.txt | 2 +- LICENSE_MPL_1.1.txt | 4 +- docs/changelog.rst | 12 +++ setup.py | 2 +- src/tld/__init__.py | 4 +- src/tld/res/effective_tld_names.dat.txt | 90 ++++++++++++++----- .../effective_tld_names_public_only.dat.txt | 90 ++++++++++++++----- src/tld/tests/test_core.py | 45 +++++++--- src/tld/utils.py | 4 +- 11 files changed, 203 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eb329fc..bd223d4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,18 @@ are used for versioning (schema follows below): 0.3.4 to 0.4). - All backwards incompatible changes are mentioned in this document. +0.12.5 +------ +2021-01-11 + +.. note:: + + Release dedicated to defenders of Armenia and Artsakh (Nagorno Karabakh) + and all the victims of Turkish and Azerbaijani aggression. + +- Fixed lower-cased `parsed_url` attributes (`SplitResult`) when getting + tld as object (`as_object=True`). + 0.12.4 ------ 2021-01-02 diff --git a/LICENSE_GPL2.0.txt b/LICENSE_GPL2.0.txt index 4da28c3..0330576 100644 --- a/LICENSE_GPL2.0.txt +++ b/LICENSE_GPL2.0.txt @@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. tld - Extract the top level domain (TLD) from the URL given. - Copyright (C) 2013-2020 Artur Barseghyan + Copyright (C) 2013-2021 Artur Barseghyan This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/LICENSE_LGPL_2.1.txt b/LICENSE_LGPL_2.1.txt index 4699cb5..2f6caa7 100644 --- a/LICENSE_LGPL_2.1.txt +++ b/LICENSE_LGPL_2.1.txt @@ -471,7 +471,7 @@ convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. tld - Extract the top level domain (TLD) from the URL given. - Copyright (C) 2013-2020 Artur Barseghyan + Copyright (C) 2013-2021 Artur Barseghyan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/LICENSE_MPL_1.1.txt b/LICENSE_MPL_1.1.txt index 930256b..66dc0e1 100644 --- a/LICENSE_MPL_1.1.txt +++ b/LICENSE_MPL_1.1.txt @@ -445,10 +445,10 @@ EXHIBIT A -Mozilla Public License. under the License. tld - Extract the top level domain (TLD) from the URL given. - Copyright (C) 2013-2020 Artur Barseghyan + Copyright (C) 2013-2021 Artur Barseghyan The Initial Developer of the Original Code is Artur Barseghyan, - copyright (C) 2013-2020. All Rights Reserved. + copyright (C) 2013-2021. All Rights Reserved. Alternatively, the contents of this file may be used under the terms of the GPL 2.0 license or LGPL 2.1 license, in which case the diff --git a/docs/changelog.rst b/docs/changelog.rst index eb329fc..bd223d4 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,6 +15,18 @@ are used for versioning (schema follows below): 0.3.4 to 0.4). - All backwards incompatible changes are mentioned in this document. +0.12.5 +------ +2021-01-11 + +.. note:: + + Release dedicated to defenders of Armenia and Artsakh (Nagorno Karabakh) + and all the victims of Turkish and Azerbaijani aggression. + +- Fixed lower-cased `parsed_url` attributes (`SplitResult`) when getting + tld as object (`as_object=True`). + 0.12.4 ------ 2021-01-02 diff --git a/setup.py b/setup.py index bc447b6..ae0c996 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ except: readme = '' -version = '0.12.4' +version = '0.12.5' py_where = './src' py_package_dir = 'src' diff --git a/src/tld/__init__.py b/src/tld/__init__.py index a1bdf61..a49d629 100644 --- a/src/tld/__init__.py +++ b/src/tld/__init__.py @@ -9,9 +9,9 @@ ) __title__ = 'tld' -__version__ = '0.12.4' +__version__ = '0.12.5' __author__ = 'Artur Barseghyan' -__copyright__ = '2013-2020 Artur Barseghyan' +__copyright__ = '2013-2021 Artur Barseghyan' __license__ = 'MPL-1.1 OR GPL-2.0-only OR LGPL-2.1-or-later' __all__ = ( 'get_fld', diff --git a/src/tld/res/effective_tld_names.dat.txt b/src/tld/res/effective_tld_names.dat.txt index 6c36c21..0a7fb83 100644 --- a/src/tld/res/effective_tld_names.dat.txt +++ b/src/tld/res/effective_tld_names.dat.txt @@ -7111,7 +7111,7 @@ org.zw // newGTLDs -// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-11-21T18:09:21Z +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-12-21T16:35:44Z // This list is auto-generated, don't edit it manually. // aaa : 2015-02-26 American Automobile Association, Inc. aaa @@ -7335,7 +7335,7 @@ autos // avianca : 2015-01-08 Avianca Holdings S.A. avianca -// aws : 2015-06-25 Amazon Registry Services, Inc. +// aws : 2015-06-25 AWS Registry LLC aws // axa : 2013-12-19 AXA Group Operations SAS @@ -7656,9 +7656,6 @@ cbre // cbs : 2015-08-06 CBS Domains Inc. cbs -// ceb : 2015-04-09 The Corporate Executive Board Company -ceb - // center : 2013-11-07 Binky Moon, LLC center @@ -7839,7 +7836,7 @@ credit // creditcard : 2014-03-20 Binky Moon, LLC creditcard -// creditunion : 2015-01-22 CUNA Performance Resources, LLC +// creditunion : 2015-01-22 DotCooperation LLC creditunion // cricket : 2014-10-09 dot Cricket Limited @@ -8922,9 +8919,6 @@ ltda // lundbeck : 2015-08-06 H. Lundbeck A/S lundbeck -// lupin : 2014-11-07 LUPIN LIMITED -lupin - // luxe : 2014-01-09 Minds + Machines Group Limited luxe @@ -9207,7 +9201,7 @@ nyc // obi : 2014-09-25 OBI Group Holding SE & Co. KGaA obi -// observer : 2015-04-30 Top Level Spectrum, Inc. +// observer : 2015-04-30 Dog Beach, LLC observer // off : 2015-07-23 Johnson Shareholdings, Inc. @@ -9474,7 +9468,7 @@ realestate // realtor : 2014-05-29 Real Estate Domains LLC realtor -// realty : 2015-03-19 Fegistry, LLC +// realty : 2015-03-19 Dog Beach, LLC realty // recipes : 2013-10-17 Binky Moon, LLC @@ -9756,9 +9750,6 @@ show // showtime : 2015-08-06 CBS Domains Inc. showtime -// shriram : 2014-01-23 Shriram Capital Ltd. -shriram - // silk : 2015-06-25 Amazon Registry Services, Inc. silk @@ -11177,6 +11168,11 @@ edgestack.me // Submitted by Peter Palfrader / Debian Sysadmin Team debian.net +// Deno Land Inc : https://deno.com/ +// Submitted by Luca Casonato +deno.dev +deno-staging.dev + // deSEC : https://desec.io/ // Submitted by Peter Thomassen dedyn.io @@ -11198,6 +11194,10 @@ shop.th // Submitted by Paul Fang drayddns.com +// DreamCommerce : https://shoper.pl/ +// Submitted by Konrad Kotarba +shoparena.pl + // DreamHost : http://www.dreamhost.com/ // Submitted by Andrew Farmer dreamhosters.com @@ -11771,10 +11771,6 @@ myfast.host fastvps.site myfast.space -// Featherhead : https://featherhead.xyz/ -// Submitted by Simon Menke -fhapp.xyz - // Fedora : https://fedoraproject.org/ // submitted by Patrick Uiterwijk fedorainfracloud.org @@ -11812,6 +11808,10 @@ filegear-sg.me // Submitted by Chris Raynor firebaseapp.com +// FLAP : https://www.flap.cloud +// Submitted by Louis Chemineau +flap.id + // fly.io: https://fly.io // Submitted by Kurt Mackey fly.dev @@ -11870,6 +11870,10 @@ gentlentapis.com lab.ms cdn-edges.net +// Ghost Foundation : https://ghost.org +// Submitted by Matt Hanley +ghost.io + // GitHub, Inc. // Submitted by Patrick Toomey github.io @@ -12016,6 +12020,11 @@ graphox.us // Submitted by Tyler Todd awsmppl.com +// GünstigBestellen : https://günstigbestellen.de +// Submitted by Furkan Akkoc +günstigbestellen.de +günstigliefern.de + // Hakaran group: http://hakaran.cz // Submited by Arseniy Sokolov fin.ci @@ -12060,18 +12069,16 @@ secaas.hk // HOSTBIP REGISTRY : https://www.hostbip.com/ // Submitted by Atanunu Igbunuroghene -bpl.biz orx.biz -ng.city biz.gl -ng.ink col.ng firm.ng gen.ng ltd.ng ngo.ng -ng.school +edu.scot sch.so +org.yt // HostyHosting (hostyhosting.com) hostyhosting.io @@ -12089,6 +12096,14 @@ moonscale.net // Submitted by Hannu Aronsson iki.fi +// Incsub, LLC: https://incsub.com/ +// Submitted by Aaron Edwards +smushcdn.com +wphostedmail.com +wpmucdn.com +tempurl.host +wpmudev.host + // Individual Network Berlin e.V. : https://www.in-berlin.de/ // Submitted by Christian Seitz dyn-berlin.de @@ -12303,6 +12318,11 @@ oya.to co.krd edu.krd +// Krellian Ltd. : https://krellian.com +// Submitted by Ben Francis +krellian.net +webthings.io + // LCube - Professional hosting e.K. : https://www.lcube-webhosting.de // Submitted by Lars Laehn git-repos.de @@ -12360,6 +12380,10 @@ loginline.io loginline.services loginline.site +// Lõhmus Family, The +// Submitted by Heiki Lõhmus +lohmus.me + // LubMAN UMCS Sp. z o.o : https://lubman.pl/ // Submitted by Ireneusz Maliszewski krasnik.pl @@ -12765,6 +12789,10 @@ opensocial.site // Submitted by Sven Marnach opencraft.hosting +// OpenResearch GmbH: https://openresearch.com/ +// Submitted by Philipp Schmid +orsites.com + // Opera Software, A.S.A. // Submitted by Yngve Pettersen operaunite.com @@ -13052,6 +13080,10 @@ shiftedit.io // Submitted by Alex Bowers myshopblocks.com +// Shopify : https://www.shopify.com +// Submitted by Alex Richter +myshopify.com + // Shopit : https://www.shopitcommerce.com/ // Submitted by Craig McMahon shopitsite.com @@ -13399,6 +13431,7 @@ daemon.panel.gg // WoltLab GmbH : https://www.woltlab.com // Submitted by Tim Düsterhus +woltlab-demo.com myforum.community community-pro.de diskussionsbereich.de @@ -13491,4 +13524,17 @@ impertrix.com // GignoSystemJapan: http://gsj.bz // Submitted by GignoSystemJapan gsj.bz + +// Rusnames Limited: http://rusnames.ru/ +// Submitted by Sergey Zotov +биз.рус +ком.рус +крым.рус +мир.рус +мск.рус +орг.рус +самара.рус +сочи.рус +спб.рус +я.рус // ===END PRIVATE DOMAINS=== diff --git a/src/tld/res/effective_tld_names_public_only.dat.txt b/src/tld/res/effective_tld_names_public_only.dat.txt index 6c36c21..0a7fb83 100644 --- a/src/tld/res/effective_tld_names_public_only.dat.txt +++ b/src/tld/res/effective_tld_names_public_only.dat.txt @@ -7111,7 +7111,7 @@ org.zw // newGTLDs -// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-11-21T18:09:21Z +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2020-12-21T16:35:44Z // This list is auto-generated, don't edit it manually. // aaa : 2015-02-26 American Automobile Association, Inc. aaa @@ -7335,7 +7335,7 @@ autos // avianca : 2015-01-08 Avianca Holdings S.A. avianca -// aws : 2015-06-25 Amazon Registry Services, Inc. +// aws : 2015-06-25 AWS Registry LLC aws // axa : 2013-12-19 AXA Group Operations SAS @@ -7656,9 +7656,6 @@ cbre // cbs : 2015-08-06 CBS Domains Inc. cbs -// ceb : 2015-04-09 The Corporate Executive Board Company -ceb - // center : 2013-11-07 Binky Moon, LLC center @@ -7839,7 +7836,7 @@ credit // creditcard : 2014-03-20 Binky Moon, LLC creditcard -// creditunion : 2015-01-22 CUNA Performance Resources, LLC +// creditunion : 2015-01-22 DotCooperation LLC creditunion // cricket : 2014-10-09 dot Cricket Limited @@ -8922,9 +8919,6 @@ ltda // lundbeck : 2015-08-06 H. Lundbeck A/S lundbeck -// lupin : 2014-11-07 LUPIN LIMITED -lupin - // luxe : 2014-01-09 Minds + Machines Group Limited luxe @@ -9207,7 +9201,7 @@ nyc // obi : 2014-09-25 OBI Group Holding SE & Co. KGaA obi -// observer : 2015-04-30 Top Level Spectrum, Inc. +// observer : 2015-04-30 Dog Beach, LLC observer // off : 2015-07-23 Johnson Shareholdings, Inc. @@ -9474,7 +9468,7 @@ realestate // realtor : 2014-05-29 Real Estate Domains LLC realtor -// realty : 2015-03-19 Fegistry, LLC +// realty : 2015-03-19 Dog Beach, LLC realty // recipes : 2013-10-17 Binky Moon, LLC @@ -9756,9 +9750,6 @@ show // showtime : 2015-08-06 CBS Domains Inc. showtime -// shriram : 2014-01-23 Shriram Capital Ltd. -shriram - // silk : 2015-06-25 Amazon Registry Services, Inc. silk @@ -11177,6 +11168,11 @@ edgestack.me // Submitted by Peter Palfrader / Debian Sysadmin Team debian.net +// Deno Land Inc : https://deno.com/ +// Submitted by Luca Casonato +deno.dev +deno-staging.dev + // deSEC : https://desec.io/ // Submitted by Peter Thomassen dedyn.io @@ -11198,6 +11194,10 @@ shop.th // Submitted by Paul Fang drayddns.com +// DreamCommerce : https://shoper.pl/ +// Submitted by Konrad Kotarba +shoparena.pl + // DreamHost : http://www.dreamhost.com/ // Submitted by Andrew Farmer dreamhosters.com @@ -11771,10 +11771,6 @@ myfast.host fastvps.site myfast.space -// Featherhead : https://featherhead.xyz/ -// Submitted by Simon Menke -fhapp.xyz - // Fedora : https://fedoraproject.org/ // submitted by Patrick Uiterwijk fedorainfracloud.org @@ -11812,6 +11808,10 @@ filegear-sg.me // Submitted by Chris Raynor firebaseapp.com +// FLAP : https://www.flap.cloud +// Submitted by Louis Chemineau +flap.id + // fly.io: https://fly.io // Submitted by Kurt Mackey fly.dev @@ -11870,6 +11870,10 @@ gentlentapis.com lab.ms cdn-edges.net +// Ghost Foundation : https://ghost.org +// Submitted by Matt Hanley +ghost.io + // GitHub, Inc. // Submitted by Patrick Toomey github.io @@ -12016,6 +12020,11 @@ graphox.us // Submitted by Tyler Todd awsmppl.com +// GünstigBestellen : https://günstigbestellen.de +// Submitted by Furkan Akkoc +günstigbestellen.de +günstigliefern.de + // Hakaran group: http://hakaran.cz // Submited by Arseniy Sokolov fin.ci @@ -12060,18 +12069,16 @@ secaas.hk // HOSTBIP REGISTRY : https://www.hostbip.com/ // Submitted by Atanunu Igbunuroghene -bpl.biz orx.biz -ng.city biz.gl -ng.ink col.ng firm.ng gen.ng ltd.ng ngo.ng -ng.school +edu.scot sch.so +org.yt // HostyHosting (hostyhosting.com) hostyhosting.io @@ -12089,6 +12096,14 @@ moonscale.net // Submitted by Hannu Aronsson iki.fi +// Incsub, LLC: https://incsub.com/ +// Submitted by Aaron Edwards +smushcdn.com +wphostedmail.com +wpmucdn.com +tempurl.host +wpmudev.host + // Individual Network Berlin e.V. : https://www.in-berlin.de/ // Submitted by Christian Seitz dyn-berlin.de @@ -12303,6 +12318,11 @@ oya.to co.krd edu.krd +// Krellian Ltd. : https://krellian.com +// Submitted by Ben Francis +krellian.net +webthings.io + // LCube - Professional hosting e.K. : https://www.lcube-webhosting.de // Submitted by Lars Laehn git-repos.de @@ -12360,6 +12380,10 @@ loginline.io loginline.services loginline.site +// Lõhmus Family, The +// Submitted by Heiki Lõhmus +lohmus.me + // LubMAN UMCS Sp. z o.o : https://lubman.pl/ // Submitted by Ireneusz Maliszewski krasnik.pl @@ -12765,6 +12789,10 @@ opensocial.site // Submitted by Sven Marnach opencraft.hosting +// OpenResearch GmbH: https://openresearch.com/ +// Submitted by Philipp Schmid +orsites.com + // Opera Software, A.S.A. // Submitted by Yngve Pettersen operaunite.com @@ -13052,6 +13080,10 @@ shiftedit.io // Submitted by Alex Bowers myshopblocks.com +// Shopify : https://www.shopify.com +// Submitted by Alex Richter +myshopify.com + // Shopit : https://www.shopitcommerce.com/ // Submitted by Craig McMahon shopitsite.com @@ -13399,6 +13431,7 @@ daemon.panel.gg // WoltLab GmbH : https://www.woltlab.com // Submitted by Tim Düsterhus +woltlab-demo.com myforum.community community-pro.de diskussionsbereich.de @@ -13491,4 +13524,17 @@ impertrix.com // GignoSystemJapan: http://gsj.bz // Submitted by GignoSystemJapan gsj.bz + +// Rusnames Limited: http://rusnames.ru/ +// Submitted by Sergey Zotov +биз.рус +ком.рус +крым.рус +мир.рус +мск.рус +орг.рус +самара.рус +сочи.рус +спб.рус +я.рус // ===END PRIVATE DOMAINS=== diff --git a/src/tld/tests/test_core.py b/src/tld/tests/test_core.py index 68f3d2a..47c675e 100644 --- a/src/tld/tests/test_core.py +++ b/src/tld/tests/test_core.py @@ -7,7 +7,7 @@ from tempfile import gettempdir from typing import Type -from urllib.parse import urlsplit +from urllib.parse import urlsplit, SplitResult from faker import Faker # type: ignore @@ -648,13 +648,13 @@ def test_15_fail_get_tld_names(self): get_tld(**kwargs) @log_info - def test_15_fail_get_fld_wrong_kwargs(self): + def test_16_fail_get_fld_wrong_kwargs(self): """Test fail `get_fld` with wrong kwargs.""" with self.assertRaises(TldImproperlyConfigured): get_fld(self.good_url, as_object=True) @log_info - def test_16_fail_parse_tld(self): + def test_17_fail_parse_tld(self): """Test fail `parse_tld`. Assert raise TldIOError on wrong `NAMES_SOURCE_URL` for `parse_tld`. @@ -670,7 +670,7 @@ def test_16_fail_parse_tld(self): self.assertEqual(parsed_tld, (None, None, None)) @log_info - def test_17_get_tld_names_and_reset_tld_names(self): + def test_18_get_tld_names_and_reset_tld_names(self): """Test fail `get_tld_names` and repair using `reset_tld_names`.""" tmp_filename = join( gettempdir(), @@ -712,14 +712,14 @@ def test_17_get_tld_names_and_reset_tld_names(self): @internet_available_only @log_info - def test_18_update_tld_names_cli(self): + def test_19_update_tld_names_cli(self): """Test the return code of the CLI version of `update_tld_names`.""" reset_tld_names() res = update_tld_names_cli() self.assertEqual(res, 0) @log_info - def test_19_parse_tld_custom_tld_names_good_patterns(self): + def test_20_parse_tld_custom_tld_names_good_patterns(self): """Test `parse_tld` good URL patterns for custom tld names.""" res = [] @@ -735,7 +735,7 @@ def test_19_parse_tld_custom_tld_names_good_patterns(self): return res @log_info - def test_20_tld_custom_tld_names_good_patterns_pass_parsed_object(self): + def test_21_tld_custom_tld_names_good_patterns_pass_parsed_object(self): """Test `get_tld` good URL patterns for custom tld names.""" res = [] for data in self.good_patterns_custom_parser: @@ -771,7 +771,7 @@ def test_20_tld_custom_tld_names_good_patterns_pass_parsed_object(self): return res @log_info - def test_21_reset_tld_names_for_custom_parser(self): + def test_22_reset_tld_names_for_custom_parser(self): """Test `reset_tld_names` for `tld_names_local_path`.""" res = [] parser_class = self.get_custom_parser_class() @@ -814,7 +814,7 @@ def test_21_reset_tld_names_for_custom_parser(self): return res @log_info - def test_22_fail_define_custom_parser_class_without_uid(self): + def test_23_fail_define_custom_parser_class_without_uid(self): """Test fail define custom parser class without `uid`.""" class CustomParser(BaseTLDSourceParser): pass @@ -832,7 +832,7 @@ class AnotherCustomParser(BaseTLDSourceParser): AnotherCustomParser.get_tld_names() @log_info - def test_23_len_trie_nodes(self): + def test_24_len_trie_nodes(self): """Test len of the trie nodes.""" get_tld('http://delusionalinsanity.com') tld_names = get_tld_names_container() @@ -842,7 +842,7 @@ def test_23_len_trie_nodes(self): ) @log_info - def test_24_get_tld_names_no_arguments(self): + def test_25_get_tld_names_no_arguments(self): """Test len of the trie nodes.""" tld_names = get_tld_names() self.assertGreater( @@ -850,6 +850,29 @@ def test_24_get_tld_names_no_arguments(self): 0 ) + @log_info + def test_26_case(self): + res = get_tld( + 'https://MyDomain.com/AsDrFt?QUeRY=12aA', + fail_silently=True, + search_private=False, + as_object=True + ) + self.assertEqual(res.tld, 'com') + self.assertEqual(res.domain, 'mydomain') + self.assertEqual(res.subdomain, '') + self.assertEqual(res.fld, 'mydomain.com') + self.assertEqual( + res.parsed_url, + SplitResult( + scheme='https', + netloc='MyDomain.com', + path='/AsDrFt', + query='QUeRY=12aA', + fragment='' + ) + ) + if __name__ == '__main__': unittest.main() diff --git a/src/tld/utils.py b/src/tld/utils.py index 6947843..972336d 100644 --- a/src/tld/utils.py +++ b/src/tld/utils.py @@ -320,8 +320,6 @@ def process_url( ) if not isinstance(url, SplitResult): - url = url.lower() - if ( fix_protocol and not url.startswith(('//', 'http://', 'https://')) ): @@ -341,6 +339,8 @@ def process_url( else: raise TldBadUrl(url=url) + domain_name = domain_name.lower() + # This will correctly handle dots at the end of domain name in URLs like # https://github.com............/barseghyanartur/tld/ if domain_name.endswith('.'):