diff --git a/src/sys/paths.nim b/src/sys/paths.nim index 68773e0..b6f4516 100644 --- a/src/sys/paths.nim +++ b/src/sys/paths.nim @@ -26,10 +26,34 @@ type ## ## * Any trailing slash will be removed. ## + ## For Windows, the path will always be represented under the following rules: + ## + ## * Any `/` separator will be converted to `\`. + ## + ## * Any `\..` at the root component will be collasped into `\\`. + ## + ## * Any `.` path element will be omitted, unless its the only element or is necessary + ## for disambiguation. + ## + ## * Any `\\` will be converted to `\`, unless they occur at the beginning of the path. + ## + ## * Any trailing backslash will be removed if they are not significant. + ## + ## * For DOS paths, the drive letter is always in uppercase. + ## + ## This type does not support Windows' native NT paths (paths starting with `\??`) and + ## will treat them as relative paths. The `\\?` prefix should be used to + ## handle them instead. + ## ## The path is never an empty string. ComponentKind* {.pure.} = enum ## The type of path component + Prefix ## The prefix in which a rooted path will start from. + ## + ## A path might have more than one prefix (ie. UNC host and shares). + ## In such cases the prefixes can be concatenated into one using + ## the `Separator`. Root PreviousDir Element @@ -46,6 +70,10 @@ type when defined(posix): include private/paths_posix +elif defined(windows): + include private/paths_windows +else: + {.error: "This module has not been ported to your operating system.".} const Separator* = SeparatorImpl @@ -123,6 +151,13 @@ func join*[T: string | Nulless | Path](base: var Path, parts: varargs[T]) ## joined with prior entries. ## ## If any of `parts` contains `NUL`, `ValueError` will be raised. + ## + ## **Platform specific details** + ## + ## * On Windows, drive-relative paths can only be created if the base itself is + ## pointing to a drive-relative entry (ie. `C:relative`). A bare drive like `C:` + ## will always be joined into a drive-absolute path to reduce the surprise factor. + ## joinImpl() func toPath*(p: sink Path): Path = diff --git a/src/sys/private/paths_windows.nim b/src/sys/private/paths_windows.nim new file mode 100644 index 0000000..2295228 --- /dev/null +++ b/src/sys/private/paths_windows.nim @@ -0,0 +1,252 @@ +# +# Abstractions for operating system services +# Copyright (c) 2023 Leorize +# +# Licensed under the terms of the MIT license which can be found in +# the file "license.txt" included with this distribution. Alternatively, +# the full text can be found at: https://spdx.org/licenses/MIT.html + +import ".."/strings + +import strsliceutils + +const + SeparatorImpl = '\\' + ValidSeparatorsImpl = {SeparatorImpl, '/'} + +template componentSlicesImpl() {.dirty.} = + type + State = enum + Start + MaybeRoot + FoundPrefix + DosDrivePrefix + UncPrefix + AtRoot + PathElement + + var state = Start + + for slice in s.splitSlices(ValidSeparators): + var + stay = true + slice = slice + while stay: + stay = false + case state + of Start: + # Drive letter and maybe a path component + if s.slice(slice).hasDosDrive: + yield (Prefix, 0 .. 1) + state = DosDrivePrefix + if slice.len > 2: + # This is a relative path (ie. C:abc) + # Trim the drive letter and switch gear + slice = slice.a + 2 .. slice.b + state = PathElement + stay = true + + # Single \ + elif slice.len == 0: + state = MaybeRoot + + else: + state = PathElement + stay = true + + of MaybeRoot: + # Double \ + if slice.len == 0: + state = FoundPrefix + else: + yield (Root, 0 ..< 0) + state = AtRoot + stay = true + + of FoundPrefix: + # Special prefix for NT and DOS paths + if slice.len == 1 and s[slice.a] in {'.', '?'}: + state = AtRoot + yield (Prefix, 0 .. slice.b) + yield (Root, 0 ..< 0) + # UNC otherwise + else: + state = UncPrefix + yield (Prefix, 0 .. slice.b) + + of DosDrivePrefix: + # There is something after the DOS drive, this is a rooted path + yield (Root, 0 ..< 0) + state = AtRoot + stay = true + + of UncPrefix: + if slice.len > 0: + state = AtRoot + yield (Prefix, slice) + yield (Root, 0 ..< 0) + + of AtRoot: + if s.slice(slice) == "." or s.slice(slice) == "..": + discard ". and .. at root is still root" + elif slice.len > 0: + state = PathElement + yield (Element, slice) + + of PathElement: + if slice.len > 0: + if s.slice(slice) == "..": + yield (PreviousDir, slice) + elif s.slice(slice) != ".": + yield (Element, slice) + + case state + # Nothing after we found '\' or '\\' + # Then it's just a root. + of MaybeRoot, FoundPrefix: + yield (Root, 0 ..< 0) + # Incomplete UNC path + # Cap it off with a root. + of UncPrefix: + yield (Root, 0 ..< 0) + +func isNotDos(p: Path | Nulless | openarray[char]): bool = + ## Returns whether `p` is not a DOS path. + p.len > 1 and p.slice[0] == '\\' and p.slice[1] == '\\' + +func hasDosDrive(p: Path | Nulless | openarray[char]): bool = + ## Returns whether `p` is prefixed with a DOS drive. + p.len > 1 and p[1] == ":" + +func isIncompleteUnc(p: Path): bool = + ## Returns whether `p` is an UNC path without a share. + if not p.isNotDos: + return false + + for kind, slice in p.componentSlices: + case kind + of Prefix: + if p.slice(slice) == r"\\." or p.slice(slice) == r"\\?": + return false + + result = not result + else: + break + +template isAbsoluteImpl(): bool {.dirty.} = + p.isNotDos() or (p.len > 2 and p.slice(1..2) == r":\") + +template joinImpl() {.dirty.} = + # Temporary empty out the base if it's the current directory. + # + # It will be inserted for disambiguation as needed. + if base == ".": + base.string.setLen 0 + + var needTrailingSep = base.isIncompleteUnc + for part in parts.items: + for kind, slice in part.componentSlices: + case kind + of Root: + discard "All should be relative to current base" + of Prefix: + if part.slice(slice) == r"\\." or part.slice(slice) == r"//.": + discard "Skipped to avoid redundant current-directory symbols" + elif part.slice(slice).isNotDos: + # Treat these like a regular subfolder, for example: + # + # * `a` join `\\?\C:` => `a\?\C:` + # * `.` join `\\?\C:` => `?\C:` + # + # At this point, only these can be found: + # + # * `\\?` + # * `\\string without backslash` + # + # Skip the first two (back)slashes + let slice = slice.a + 2 .. slice.b + + # If the result looks like it starts with a DOS drive and the path is empty + if part.slice(slice).hasDosDrive and base.len == 0: + # Add `.\` to disambiguates + base.string.add r".\" + + base.string.add part.slice(slice) + elif slice.len > 0: + # The prefix is either a DOS drive or UNC share + # + # If the result looks like it starts with a DOS drive and the path is empty + if part.slice(slice).hasDosDrive and base.len == 0: + # Disambiguates with `.\` + base.string.add r".\" + + base.string.add part.slice(slice) + else: + # If the result looks like it starts with a DOS drive and the path is empty + if part.slice(slice).hasDosDrive and base.len == 0: + # Add `.\` to disambiguate + base.string.add r".\" + + # If the next position is not at the start of the path and there were + # no separator at the end of the current path. + elif base.len > 0 and base[^1] != Separator: + # Insert a separator + base.string.add Separator + + base.string.add part.slice(slice) + + if needTrailingSep and base[^1] != Separator: + base.string.add Separator + needTrailingSep = false + + # If the path is empty + if base.len == 0: + # Set it to "." + base.string.add '.' + +template toPathImpl() {.dirty.} = + result = Path: + # Create a new buffer with the length of `s`. + var path = newString(s.len) + # Set the length to zero, which lets us keep the buffer. + path.setLen 0 + path + + var afterPrefix = false + for kind, slice in s.componentSlices: + case kind + of Prefix: + if s.slice(slice).isNotDos: + # Skips the first two (back)slashes + let slice = slice.a + 2 .. slice.b + # Add our own + path.add r"\\" + # And the rest + path.add s.slice(slice) + elif s.slice(slice).hasDosDrive: + # Normalize the drive by uppercasing it + path.add: toAsciiUpper s[slice.a] + path.add ':' + else: + # UNC share name + path.add '\\' + path.add s.slice(slice) + + afterPrefix = true + of Root: + path.add Separator + + afterPrefix = false + else: + # Add separator as needed + if afterPrefix: + discard "Don't add separator after a prefix to handle drive-relative paths" + elif path.len > 0 and path[^1] != Separator: + path.add Separator + # Disambiguates an element that looked like a drive + elif path.len == 0 and s.slice(slice).hasDosDrive: + path.add r".\" + + path.add s.slice(slice) + + afterPrefix = false diff --git a/tests/paths/twindows.nim b/tests/paths/twindows.nim new file mode 100644 index 0000000..fe031db --- /dev/null +++ b/tests/paths/twindows.nim @@ -0,0 +1,218 @@ +# +# Abstractions for operating system services +# Copyright (c) 2023 Leorize +# +# Licensed under the terms of the MIT license which can be found in +# the file "license.txt" included with this distribution. Alternatively, +# the full text can be found at: https://spdx.org/licenses/MIT.html + +when defined(windows): + import pkg/balls + import sys/paths + + suite "Windows path handling tests": + test "toPath() normalization": + const tests = [ + # Normalized + # -- Relative paths + ("abc", "abc"), + ("a/b", r"a\b"), + ("abc/def", r"abc\def"), + (r"abc\..\def", r"abc\..\def"), + (r"abc\def\ghi", r"abc\def\ghi"), + # -- Rooted paths + (r"\", r"\"), + ("/abc", r"\abc"), + (r"\abc\def", r"\abc\def"), + ("/abc/../def", r"\abc\..\def"), + # -- Root-relative path + (r"\..", r"\"), + ("/../../abc", r"\abc"), + # -- Parent-relative path + ("..", ".."), + ("../..", r"..\.."), + (r"..\..\abc", r"..\..\abc"), + # -- Current directory + (".", "."), + # -- Paths starting with dot + ("...", "..."), + (".abc", ".abc"), + ("..abc", "..abc"), + ("../...", r"..\..."), + (r"...\...", r"...\..."), + ("abc/.def", r"abc\.def"), + (r"abc\..def\.ghi", r"abc\..def\.ghi"), + ("/abc/.def/.ghi", r"\abc\.def\.ghi"), + (r"\abc\..def\.ghi", r"\abc\..def\.ghi"), + + # Empty path + ("", "."), + + # Drive-qualified path + # -- Relative paths + ("c:abc", "C:abc"), + ("a:a/b", r"A:a\b"), + ("1:abc/def", r"1:abc\def"), + (r"@:abc\..\def", r"@:abc\..\def"), + (r"D:abc\def\ghi", r"D:abc\def\ghi"), + # -- Absolute paths + (r"A:\", r"A:\"), + ("a:/abc", r"A:\abc"), + (r"#:\abc\def", r"#:\abc\def"), + ("Q:/abc/../def", r"Q:\abc\..\def"), + # -- Root-relative path + (r"c:\..", r"C:\"), + ("D:/../../abc", r"D:\abc"), + # -- Parent-relative path + ("R:..", "R:.."), + ("R:../..", r"R:..\.."), + (r"r:..\..\abc", r"R:..\..\abc"), + # -- Current directory + ("C:.", "C:"), + ("c:", "C:"), + # -- Paths starting with dot + ("C:...", "C:..."), + ("D:.abc", "D:.abc"), + ("b:..abc", "B:..abc"), + ("A:../...", r"A:..\..."), + (r"F:...\...", r"F:...\..."), + ("z:abc/.def", r"Z:abc\.def"), + (r"a:abc\..def\.ghi", r"A:abc\..def\.ghi"), + ("c:/abc/.def/.ghi", r"C:\abc\.def\.ghi"), + (r"d:\abc\..def\.ghi", r"D:\abc\..def\.ghi"), + + # NT-qualified path + # -- Absolute paths + (r"\\.", r"\\.\"), + ("//?/abc", r"\\?\abc"), + (r"\\?\abc\def", r"\\?\abc\def"), + ("//./abc/../def", r"\\.\abc\..\def"), + # -- Root-relative path + (r"\\?\..", r"\\?\"), + ("//./../../abc", r"\\.\abc"), + ("//?/../../abc", r"\\?\abc"), + + # UNC-qualified path + # -- Absolute paths + ("//hostonly", r"\\hostonly\"), + (r"\\host\share", r"\\host\share\"), + ("//another/c$", r"\\another\c$\"), + (r"\\host\share\abc\def", r"\\host\share\abc\def"), + ("//host/share/abc/../def", r"\\host\share\abc\..\def"), + # -- Root-relative path + (r"\\host\share\", r"\\host\share"), + ("//host/share/../../abc", r"\\host\share\abc"), + ("//hostonly/..", r"\\hostonly\..\"), + + # Trailing slash + # -- Relative paths + ("abc/", "abc"), + (r"a\b\", r"a\b"), + ("abc/def/", r"abc\def"), + # -- Rooted paths + ("/////", r"\"), + (r"\abc\", r"\abc"), + ("/abc/def/", r"\abc\def"), + (r"\abc\..\def\", r"\abc\..\def"), + # -- Parent-relative paths + ("../", ".."), + (r"..\..\", r"..\.."), + ("../../abc/", r"..\..\abc"), + # -- Current directory + (r".\", "."), + ("./././.", "."), + # -- Paths starting with dot + (".../", "..."), + (r".abc\", ".abc"), + ("..abc/", "..abc"), + (r"..\...\", r"..\..."), + (".../.../", r"...\..."), + (r"abc\.def\", r"abc\.def"), + ("abc/..def/.ghi/", r"abc\..def\.ghi"), + (r"\abc\.def\.ghi\", r"\abc\.def\.ghi"), + ("/abc/..def/.ghi/", r"\abc\..def\.ghi"), + + # Double slash + # -- Relative paths + (r"abc\\", "abc"), + ("abc///", "abc"), + (r"abc\\\\", "abc"), + ("a//b", r"a\b"), + (r"abc\\def", r"abc\def"), + ("abc//..////def", r"abc\..\def"), + # -- Rooted paths + (r"\abc\\", r"\abc"), + ("/abc///", r"\abc"), + (r"\abc\\\\", r"\abc"), + ("/abc//def", r"\abc\def"), + (r"\abc\\..\\\\def", r"\abc\..\def"), + # -- Parent-relative paths + ("..////...", r"..\..."), + (r"..\\...\\\\", r"..\..."), + ("...//...", r"...\..."), + (r"...\\...\\\\", r"...\..."), + ("abc////.def", r"abc\.def"), + (r"abc\\\\..def\.ghi\", r"abc\..def\.ghi"), + ("/abc////.def/.ghi/", r"\abc\.def\.ghi"), + (r"\abc\\\\..def\.ghi\", r"\abc\..def\.ghi"), + # -- UNC path + ("//host/////share", r"\\host\share"), + (r"\\\\host", r"\\host\"), + ("///host", r"\\host\"), + + # Dot element + # -- Relative paths + (r"abc\.", "abc"), + ("a/./b", r"a\b"), + (r"abc\.\def", r"abc\def"), + ("abc/.././def", r"abc\..\def"), + (r".\abc\.", r"abc"), + ("./a/./b", r"a\b"), + ("./abc/./def", r"abc\def"), + (r".\abc\..\.\def", r"abc\..\def"), + # -- Rooted paths + ("/abc/.", r"\abc"), + (r"\abc\.\def", r"\abc\def"), + ("/abc/.././def", r"\abc\..\def"), + # -- Parent-relative path + (r"..\.", r".."), + (r".\..", r".."), + (".././..", r"..\.."), + (r"..\.\..\.\abc", r"..\..\abc"), + # -- Paths starting with dot + ("./.../", "..."), + (r"...\.", "..."), + (".abc/.", ".abc"), + (r"..abc\.", "..abc"), + (".././...", r"..\..."), + (r"...\.\...", r"...\..."), + ("abc/./.def", r"abc\.def"), + (r"abc\..def\.\.ghi\", r"abc\..def\.ghi"), + ("/./abc/.def/.ghi/", r"\abc\.def\.ghi"), + (r"\.\abc\.\..def\.ghi\", r"\abc\..def\.ghi"), + ("./C:/notadrive", r".\C:\notadrive") + ] + + for (orig, target) in tests: + let normalized = orig.toPath.string + check normalized == target, + "expected '" & target & "' but got '" & normalized & '\'' + checkpoint "passed:", "'" & orig & "'", "->", "'" & normalized & "'" + + test "Joining paths": + const tests = [ + ("", @["a"], "a"), + ("", @["a", "b"], r"a\b"), + # Base swapping is not allowed + ("", @["C:", "Windows"], r".\C:\Windows"), + ("", @[r"\\host\share", "foo/bar"], r"host\share\foo\bar"), + # Always produce rooted drive paths + ("c:", @["Windows", "System32"], r"C:\Windows\System32"), + # Can complete UNC paths + ("//host", @["share", "..", "other"], r"\\host\share\other") + ] + + for (base, parts, target) in tests: + var path = base.toPath + path.join parts + check path == target, "expected '" & target & "' but got '" & path & '\''