From c97bfbf31fd300d50d60d9e0559d499f11120651 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 24 Apr 2022 21:14:00 +0100 Subject: [PATCH 01/16] gh-81790: support "UNC" device paths in ntpath.splitdrive() --- Lib/ntpath.py | 21 ++++++++++++++++---- Lib/pathlib.py | 53 ++++---------------------------------------------- 2 files changed, 21 insertions(+), 53 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc75cb127c..d0a73799f1d6d6 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -146,17 +146,30 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' + unc2 = b'\\\\' + unc4 = b'\\\\?\\' + unc8 = b'\\\\?\\UNC\\' else: sep = '\\' altsep = '/' colon = ':' + unc2 = '\\\\' + unc4 = '\\\\?\\' + unc8 = '\\\\?\\UNC\\' normp = p.replace(altsep, sep) - if (normp[0:2] == sep*2) and (normp[2:3] != sep): + if normp[:8] == unc8: + normp = sep * 8 + normp[8:] + offset = 6 + elif normp[:4] == unc4: + offset = 4 + else: + offset = 0 + if (normp[offset:offset + 2] == unc2) and (normp[offset + 2:offset + 3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path # \\machine\mountpoint\directory\etc\... # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, 2) + index = normp.find(sep, offset + 2) if index == -1: return p[:0], p index2 = normp.find(sep, index + 1) @@ -167,8 +180,8 @@ def splitdrive(p): if index2 == -1: index2 = len(p) return p[:index2], p[index2:] - if normp[1:2] == colon: - return p[:2], p[2:] + if normp[offset + 1:offset + 2] == colon: + return p[:offset + 2], p[offset + 2:] return p[:0], p diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 4763ab54f6ba81..aa82a3c2c7d8be 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -120,9 +120,6 @@ class _WindowsFlavour(_Flavour): is_supported = (os.name == 'nt') - drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - ext_namespace_prefix = '\\\\?\\' - reserved_names = ( {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} | @@ -145,43 +142,11 @@ class _WindowsFlavour(_Flavour): # even with the '\\?\' prefix. def splitroot(self, part, sep=sep): - first = part[0:1] - second = part[1:2] - if (second == sep and first == sep): - # XXX extended paths should also disable the collapsing of "." - # components (according to MSDN docs). - prefix, part = self._split_extended_path(part) - first = part[0:1] - second = part[1:2] + drv, rest = self.pathmod.splitdrive(part) + if drv[:1] == sep or rest[:1] == sep: + return drv, sep, rest.lstrip(sep) else: - prefix = '' - third = part[2:3] - if (second == sep and first == sep and third != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvvv root - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^ - index = part.find(sep, 2) - if index != -1: - index2 = part.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 != index + 1: - if index2 == -1: - index2 = len(part) - if prefix: - return prefix + part[1:index2], sep, part[index2+1:] - else: - return part[:index2], sep, part[index2+1:] - drv = root = '' - if second == ':' and first in self.drive_letters: - drv = part[:2] - part = part[2:] - first = third - if first == sep: - root = first - part = part.lstrip(sep) - return prefix + drv, root, part + return drv, '', rest def casefold(self, s): return s.lower() @@ -192,16 +157,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): - prefix = '' - if s.startswith(ext_prefix): - prefix = s[:4] - s = s[4:] - if s.startswith('UNC\\'): - prefix += s[:3] - s = '\\' + s[3:] - return prefix, s - def is_reserved(self, parts): # NOTE: the rules for reserved names seem somewhat complicated # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not From 2da18baea29bfc9a3d91176666feb5f40d29dbb8 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 24 Apr 2022 22:27:11 +0100 Subject: [PATCH 02/16] Add docs, news, tests. --- Doc/library/os.path.rst | 2 +- Lib/pathlib.py | 15 --------------- Lib/test/test_ntpath.py | 16 ++++++++++++++++ ...2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst | 3 +++ 4 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index c201b1460ede30..7e2ae79016beba 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -469,7 +469,7 @@ the :mod:`glob` module.) ("c:", "/dir") If the path contains a UNC path, drive will contain the host name - and share, up to but not including the fourth separator:: + and share:: >>> splitdrive("//host/computer/dir") ("//host/computer", "/dir") diff --git a/Lib/pathlib.py b/Lib/pathlib.py index aa82a3c2c7d8be..dd7b2dea1a6ae5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -126,21 +126,6 @@ class _WindowsFlavour(_Flavour): {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'} ) - # Interesting findings about extended paths: - # * '\\?\c:\a' is an extended path, which bypasses normal Windows API - # path processing. Thus relative paths are not resolved and slash is not - # translated to backslash. It has the native NT path limit of 32767 - # characters, but a bit less after resolving device symbolic links, - # such as '\??\C:' => '\Device\HarddiskVolume2'. - # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a - # regular name character in the object namespace. - # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. - # The only path separator at the filesystem level is backslash. - # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and - # thus limited to MAX_PATH. - # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, - # even with the '\\?\' prefix. - def splitroot(self, part, sep=sep): drv, rest = self.pathmod.splitdrive(part) if drv[:1] == sep or rest[:1] == sep: diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 7211ed861762b4..1b4014ae736e94 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -117,6 +117,22 @@ def test_splitdrive(self): # Issue #19911: UNC part containing U+0130 self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'), ('//conky/MOUNTPOİNT', '/foo/bar')) + # gh-81790: support device namespace, including UNC drives. + tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) + tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) + tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) + tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) + tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) + tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) + tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) + tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) + tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', + ("\\\\?\\UNC\\server\\share", "\\dir")) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) diff --git a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst new file mode 100644 index 00000000000000..faed78d72c0b45 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst @@ -0,0 +1,3 @@ +:func:`os.path.splitdrive` now understands DOS device paths (beginning +``\\?\``), including UNC links (beginning ``\\?\UNC\``). Contributed by +Barney Gale. From 49f9373ad5f2f69d6d15a00e2595bf54c522cfc6 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 24 Apr 2022 23:07:51 +0100 Subject: [PATCH 03/16] Add a couple of comments. --- Lib/ntpath.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index d0a73799f1d6d6..cf12c230587933 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -157,13 +157,14 @@ def splitdrive(p): unc4 = '\\\\?\\' unc8 = '\\\\?\\UNC\\' normp = p.replace(altsep, sep) + offset = 0 if normp[:8] == unc8: + # is a DOS device path with a UNC link, e.g. \\?\UNC\server\share\dir\file normp = sep * 8 + normp[8:] offset = 6 elif normp[:4] == unc4: + # is a DOS device path without a UNC link, e.g. \\?\c:\dir\file offset = 4 - else: - offset = 0 if (normp[offset:offset + 2] == unc2) and (normp[offset + 2:offset + 3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path From 57840d405db2f9965f219929cab9ae79a521734a Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 25 Apr 2022 00:54:43 +0100 Subject: [PATCH 04/16] Improve variable names a little --- Lib/ntpath.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index cf12c230587933..bd27d7b03d47eb 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -146,31 +146,28 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' - unc2 = b'\\\\' - unc4 = b'\\\\?\\' - unc8 = b'\\\\?\\UNC\\' + dev_prefix = b'\\\\?\\' + unc_prefix = b'UNC\\' else: sep = '\\' altsep = '/' colon = ':' - unc2 = '\\\\' - unc4 = '\\\\?\\' - unc8 = '\\\\?\\UNC\\' + dev_prefix = '\\\\?\\' + unc_prefix = 'UNC\\' normp = p.replace(altsep, sep) - offset = 0 - if normp[:8] == unc8: - # is a DOS device path with a UNC link, e.g. \\?\UNC\server\share\dir\file - normp = sep * 8 + normp[8:] - offset = 6 - elif normp[:4] == unc4: - # is a DOS device path without a UNC link, e.g. \\?\c:\dir\file - offset = 4 - if (normp[offset:offset + 2] == unc2) and (normp[offset + 2:offset + 3] != sep): + start = 0 + if normp[:4] == dev_prefix: + start = 4 + if normp[4:8] == unc_prefix: + # prepend slashes to machine name + normp = 8 * sep + normp[8:] + start = 6 + if (normp[start:start + 2] == sep*2) and (normp[start + 2:start + 3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path # \\machine\mountpoint\directory\etc\... # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, offset + 2) + index = normp.find(sep, start + 2) if index == -1: return p[:0], p index2 = normp.find(sep, index + 1) @@ -181,8 +178,8 @@ def splitdrive(p): if index2 == -1: index2 = len(p) return p[:index2], p[index2:] - if normp[offset + 1:offset + 2] == colon: - return p[:offset + 2], p[offset + 2:] + if normp[start + 1:start + 2] == colon: + return p[:start + 2], p[start + 2:] return p[:0], p From 264d79e3429223a637472d96daf2ff572038c842 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 25 Apr 2022 20:54:18 +0100 Subject: [PATCH 05/16] Address review feedback --- Lib/ntpath.py | 52 ++++++++++++++++++++++------------------- Lib/test/test_ntpath.py | 2 ++ 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index bd27d7b03d47eb..dd6533f4cdeadc 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -146,40 +146,44 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' - dev_prefix = b'\\\\?\\' + prefix = b'\\\\?\\' unc_prefix = b'UNC\\' else: sep = '\\' altsep = '/' colon = ':' - dev_prefix = '\\\\?\\' + prefix = '\\\\?\\' unc_prefix = 'UNC\\' normp = p.replace(altsep, sep) - start = 0 - if normp[:4] == dev_prefix: - start = 4 + if normp[:4] == prefix: if normp[4:8] == unc_prefix: - # prepend slashes to machine name - normp = 8 * sep + normp[8:] - start = 6 - if (normp[start:start + 2] == sep*2) and (normp[start + 2:start + 3] != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, start + 2) - if index == -1: + start = 8 + elif normp[5:6] == colon and not normp[6:7].strip(sep): + return p[:6], p[6:] + else: return p[:0], p - index2 = normp.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 == index + 1: + else: + if normp[:2] == sep*2 and normp[2:3] != sep: + start = 2 + elif normp[1:2] == colon: + return p[:2], p[2:] + else: return p[:0], p - if index2 == -1: - index2 = len(p) - return p[:index2], p[index2:] - if normp[start + 1:start + 2] == colon: - return p[:start + 2], p[start + 2:] + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^^ + index = normp.find(sep, start) + if index == -1: + return p[:0], p + index2 = normp.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 == index + 1: + return p[:0], p + if index2 == -1: + index2 = len(p) + return p[:index2], p[index2:] return p[:0], p diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 1b4014ae736e94..8014c522159100 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -121,6 +121,7 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) + tester('ntpath.splitdrive("//?/c:blah")', ("", "//?/c:blah")) tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) @@ -128,6 +129,7 @@ def test_splitdrive(self): tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\c:blah")', ("", "\\\\?\\c:blah")) tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) From 0652b1256b2af13fe22ccd552534be8756c27013 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 25 Apr 2022 21:35:39 +0100 Subject: [PATCH 06/16] Add a few comments --- Lib/ntpath.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index dd6533f4cdeadc..a4fb7683a9841a 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -157,17 +157,23 @@ def splitdrive(p): normp = p.replace(altsep, sep) if normp[:4] == prefix: if normp[4:8] == unc_prefix: + # e.g. \\?\UNC\server\share\dir\file start = 8 elif normp[5:6] == colon and not normp[6:7].strip(sep): + # e.g. \\?\c:\dir\file return p[:6], p[6:] else: + # anything else with a \\?\ prefix return p[:0], p else: if normp[:2] == sep*2 and normp[2:3] != sep: + # e.g. \\server\share\dir\file start = 2 elif normp[1:2] == colon: + # e.g. c:\dir\file return p[:2], p[2:] else: + # anything else return p[:0], p # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path From bcc561cb8dfa234ce99842e653101566a8b024a6 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 26 Apr 2022 18:46:40 +0100 Subject: [PATCH 07/16] Fix volume-by-guid path support --- Lib/ntpath.py | 7 ++++++- Lib/test/test_ntpath.py | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index a4fb7683a9841a..c11cf97f0b0e66 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -148,17 +148,22 @@ def splitdrive(p): colon = b':' prefix = b'\\\\?\\' unc_prefix = b'UNC\\' + vol_prefix = b'VOLUME' else: sep = '\\' altsep = '/' colon = ':' prefix = '\\\\?\\' unc_prefix = 'UNC\\' + vol_prefix = 'VOLUME' normp = p.replace(altsep, sep) if normp[:4] == prefix: - if normp[4:8] == unc_prefix: + if normp[4:8].upper() == unc_prefix: # e.g. \\?\UNC\server\share\dir\file start = 8 + elif normp[4:10].upper() == vol_prefix: + # e.g. \\?\VOLUME{...}\dir\file + start = 2 elif normp[5:6] == colon and not normp[6:7].strip(sep): # e.g. \\?\c:\dir\file return p[:6], p[6:] diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 8014c522159100..d1840ee608e61c 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -126,6 +126,8 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', + ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) @@ -135,6 +137,8 @@ def test_splitdrive(self): tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', ("\\\\?\\UNC\\server\\share", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', + ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) From 40c1eafec5fe0f003d484ca95ceb91b1277c127f Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 26 Apr 2022 21:23:16 +0100 Subject: [PATCH 08/16] Update Lib/ntpath.py Co-authored-by: Eryk Sun --- Lib/ntpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index c11cf97f0b0e66..0de09210b5ac47 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -148,7 +148,7 @@ def splitdrive(p): colon = b':' prefix = b'\\\\?\\' unc_prefix = b'UNC\\' - vol_prefix = b'VOLUME' + vol_prefix = b'VOLUME{' else: sep = '\\' altsep = '/' From 34358c8ae10444caa2a098d0a72079c63dcb894d Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 26 Apr 2022 21:23:22 +0100 Subject: [PATCH 09/16] Update Lib/ntpath.py Co-authored-by: Eryk Sun --- Lib/ntpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 0de09210b5ac47..3c79e5957ef898 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -161,7 +161,7 @@ def splitdrive(p): if normp[4:8].upper() == unc_prefix: # e.g. \\?\UNC\server\share\dir\file start = 8 - elif normp[4:10].upper() == vol_prefix: + elif normp[4:11].upper() == vol_prefix: # e.g. \\?\VOLUME{...}\dir\file start = 2 elif normp[5:6] == colon and not normp[6:7].strip(sep): From 0213a72eade04d30e72141b58a4ce0864d33aaf3 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 26 Apr 2022 21:28:51 +0100 Subject: [PATCH 10/16] Update Lib/ntpath.py Co-authored-by: Eryk Sun --- Lib/ntpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 3c79e5957ef898..822d3a4b483f14 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -155,7 +155,7 @@ def splitdrive(p): colon = ':' prefix = '\\\\?\\' unc_prefix = 'UNC\\' - vol_prefix = 'VOLUME' + vol_prefix = 'VOLUME{' normp = p.replace(altsep, sep) if normp[:4] == prefix: if normp[4:8].upper() == unc_prefix: From a7fa62b5055f122d57f04ffaf5025186b4b85dea Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 28 May 2022 14:48:25 +0100 Subject: [PATCH 11/16] Revise implementation to affect only paths beginning `\\?\UNC\`. --- Lib/ntpath.py | 60 +++++++------------ Lib/test/test_ntpath.py | 9 ++- ...2-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst | 5 +- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 822d3a4b483f14..659f2c976fb10c 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -146,55 +146,35 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' - prefix = b'\\\\?\\' - unc_prefix = b'UNC\\' - vol_prefix = b'VOLUME{' + unc_prefix = b'\\\\?\\UNC\\' else: sep = '\\' altsep = '/' colon = ':' - prefix = '\\\\?\\' - unc_prefix = 'UNC\\' - vol_prefix = 'VOLUME{' + unc_prefix = '\\\\?\\UNC\\' normp = p.replace(altsep, sep) - if normp[:4] == prefix: - if normp[4:8].upper() == unc_prefix: - # e.g. \\?\UNC\server\share\dir\file + if (normp[0:2] == sep * 2) and (normp[2:3] != sep): + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path + # \\machine\mountpoint\directory\etc\... + # directory ^^^^^^^^^^^^^^^ + if normp[:8].upper() == unc_prefix: start = 8 - elif normp[4:11].upper() == vol_prefix: - # e.g. \\?\VOLUME{...}\dir\file - start = 2 - elif normp[5:6] == colon and not normp[6:7].strip(sep): - # e.g. \\?\c:\dir\file - return p[:6], p[6:] else: - # anything else with a \\?\ prefix - return p[:0], p - else: - if normp[:2] == sep*2 and normp[2:3] != sep: - # e.g. \\server\share\dir\file start = 2 - elif normp[1:2] == colon: - # e.g. c:\dir\file - return p[:2], p[2:] - else: - # anything else + index = normp.find(sep, start) + if index == -1: + return p[:0], p + index2 = normp.find(sep, index + 1) + # a UNC path can't have two slashes in a row + # (after the initial two) + if index2 == index + 1: return p[:0], p - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, start) - if index == -1: - return p[:0], p - index2 = normp.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 == index + 1: - return p[:0], p - if index2 == -1: - index2 = len(p) - return p[:index2], p[index2:] + if index2 == -1: + index2 = len(p) + return p[:index2], p[index2:] + if normp[1:2] == colon: + return p[:2], p[2:] return p[:0], p diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index d1840ee608e61c..43fd33081ce9f5 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -121,17 +121,19 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) - tester('ntpath.splitdrive("//?/c:blah")', ("", "//?/c:blah")) tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) + tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/")) + tester('ntpath.splitdrive("//?/Harddisk0Partition2/")', ("//?/Harddisk0Partition2", "/")) + tester('ntpath.splitdrive("//?/HarddiskVolume2/")', ("//?/HarddiskVolume2", "/")) + tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) - tester('ntpath.splitdrive("\\\\?\\c:blah")', ("", "\\\\?\\c:blah")) tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) @@ -139,6 +141,9 @@ def test_splitdrive(self): ("\\\\?\\UNC\\server\\share", "\\dir")) tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) + tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\")) + tester('ntpath.splitdrive("\\\\?\\Harddisk0Partition2\\")', ("\\\\?\\Harddisk0Partition2", "\\")) + tester('ntpath.splitdrive("\\\\?\\HarddiskVolume2\\")', ("\\\\?\\HarddiskVolume2", "\\")) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) diff --git a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst index faed78d72c0b45..8894493e97410f 100644 --- a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst +++ b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst @@ -1,3 +1,2 @@ -:func:`os.path.splitdrive` now understands DOS device paths (beginning -``\\?\``), including UNC links (beginning ``\\?\UNC\``). Contributed by -Barney Gale. +:func:`os.path.splitdrive` now understands DOS device paths with UNC +links (beginning ``\\?\UNC\``). Contributed by Barney Gale. From 992f59d9037ac031e6093e31210a7cf34db217b8 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 28 May 2022 15:57:26 +0100 Subject: [PATCH 12/16] Undo formatting change. --- Lib/ntpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 659f2c976fb10c..872a425dc45a7a 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -153,7 +153,7 @@ def splitdrive(p): colon = ':' unc_prefix = '\\\\?\\UNC\\' normp = p.replace(altsep, sep) - if (normp[0:2] == sep * 2) and (normp[2:3] != sep): + if (normp[0:2] == sep*2) and (normp[2:3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path # \\machine\mountpoint\directory\etc\... From 3f163ffadf155df7bb48a421431fc4c34d08e904 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 28 May 2022 16:17:40 +0100 Subject: [PATCH 13/16] Re-introduce comment with some interesting findings on extended paths. --- Lib/ntpath.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 872a425dc45a7a..d9d4b39fc8334f 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -118,6 +118,21 @@ def join(path, *paths): raise +# Interesting findings about extended paths: +# * '\\?\c:\a' is an extended path, which bypasses normal Windows API +# path processing. Thus relative paths are not resolved and slash is not +# translated to backslash. It has the native NT path limit of 32767 +# characters, but a bit less after resolving device symbolic links, +# such as '\??\C:' => '\Device\HarddiskVolume2'. +# * '\\?\c:/a' looks for a device named 'C:/a' because slash is a +# regular name character in the object namespace. +# * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. +# The only path separator at the filesystem level is backslash. +# * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and +# thus limited to MAX_PATH. +# * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, +# even with the '\\?\' prefix. + # Split a path in a drive specification (a drive letter followed by a # colon) and the path specification. # It is always true that drivespec + pathspec == p From 4244684aef0ea83d89a8b854b6b6ecabed550b64 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 31 May 2022 13:39:02 +0100 Subject: [PATCH 14/16] Fix handling of `\\?\UNC` (no trailing slash) --- Lib/ntpath.py | 6 +++--- Lib/test/test_ntpath.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index d9d4b39fc8334f..fc2754ad86e9f2 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -161,19 +161,19 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' - unc_prefix = b'\\\\?\\UNC\\' + unc_prefix = b'\\\\?\\UNC' else: sep = '\\' altsep = '/' colon = ':' - unc_prefix = '\\\\?\\UNC\\' + unc_prefix = '\\\\?\\UNC' normp = p.replace(altsep, sep) if (normp[0:2] == sep*2) and (normp[2:3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path # \\machine\mountpoint\directory\etc\... # directory ^^^^^^^^^^^^^^^ - if normp[:8].upper() == unc_prefix: + if normp[:8].upper().rstrip(sep) == unc_prefix: start = 8 else: start = 2 diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 43fd33081ce9f5..eb3e75b5a55b71 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -121,6 +121,7 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) + tester('ntpath.splitdrive("//?/UNC")', ("", "//?/UNC")) tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) @@ -134,6 +135,7 @@ def test_splitdrive(self): tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\UNC")', ("", "\\\\?\\UNC")) tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) From 471e3b4c3f68f888f8f0534a3062e377460013be Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 31 May 2022 14:38:09 +0100 Subject: [PATCH 15/16] Remove extending paths findings comment. --- Lib/ntpath.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index fc2754ad86e9f2..955539ae458b33 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -118,21 +118,6 @@ def join(path, *paths): raise -# Interesting findings about extended paths: -# * '\\?\c:\a' is an extended path, which bypasses normal Windows API -# path processing. Thus relative paths are not resolved and slash is not -# translated to backslash. It has the native NT path limit of 32767 -# characters, but a bit less after resolving device symbolic links, -# such as '\??\C:' => '\Device\HarddiskVolume2'. -# * '\\?\c:/a' looks for a device named 'C:/a' because slash is a -# regular name character in the object namespace. -# * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. -# The only path separator at the filesystem level is backslash. -# * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and -# thus limited to MAX_PATH. -# * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, -# even with the '\\?\' prefix. - # Split a path in a drive specification (a drive letter followed by a # colon) and the path specification. # It is always true that drivespec + pathspec == p From 8f9ab9a9d232d8c2dbe4d2bc3dc9efc4041ea684 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 9 Jun 2022 18:22:36 +0100 Subject: [PATCH 16/16] Remove unnecessary tests --- Lib/test/test_ntpath.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index eb3e75b5a55b71..d50984d5fb153f 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -129,8 +129,6 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/")) - tester('ntpath.splitdrive("//?/Harddisk0Partition2/")', ("//?/Harddisk0Partition2", "/")) - tester('ntpath.splitdrive("//?/HarddiskVolume2/")', ("//?/HarddiskVolume2", "/")) tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) @@ -144,8 +142,6 @@ def test_splitdrive(self): tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\")) - tester('ntpath.splitdrive("\\\\?\\Harddisk0Partition2\\")', ("\\\\?\\Harddisk0Partition2", "\\")) - tester('ntpath.splitdrive("\\\\?\\HarddiskVolume2\\")', ("\\\\?\\HarddiskVolume2", "\\")) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))