From 1366e3453fdee2b7d3121e78d05de7ac3356e17d Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 29 Feb 2024 18:04:39 -0500 Subject: [PATCH] add _readdirx for returning more object info gathered during dir scan (#53377) (cherry picked from commit 989c4db50e0ca59b890af2f1800004ecf91c0faf) --- base/file.jl | 85 ++++++++++++++++++++++++++++++++++++++++++++++++---- test/file.jl | 8 +++++ test/misc.jl | 5 ++-- 3 files changed, 91 insertions(+), 7 deletions(-) diff --git a/base/file.jl b/base/file.jl index 6347f042e3422..7f5caaaf96c6b 100644 --- a/base/file.jl +++ b/base/file.jl @@ -913,7 +913,79 @@ julia> readdir(abspath("base"), join=true) "/home/JuliaUser/dev/julia/base/weakkeydict.jl" ``` """ -function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true) +readdir(; join::Bool=false, kwargs...) = readdir(join ? pwd() : "."; join, kwargs...)::Vector{String} +readdir(dir::AbstractString; kwargs...) = _readdir(dir; return_objects=false, kwargs...)::Vector{String} + +# this might be better as an Enum but they're not available here +# UV_DIRENT_T +const UV_DIRENT_UNKNOWN = Cint(0) +const UV_DIRENT_FILE = Cint(1) +const UV_DIRENT_DIR = Cint(2) +const UV_DIRENT_LINK = Cint(3) +const UV_DIRENT_FIFO = Cint(4) +const UV_DIRENT_SOCKET = Cint(5) +const UV_DIRENT_CHAR = Cint(6) +const UV_DIRENT_BLOCK = Cint(7) + +""" + DirEntry + +A type representing a filesystem entry that contains the name of the entry, the directory, and +the raw type of the entry. The full path of the entry can be obtained lazily by accessing the +`path` field. The type of the entry can be checked for by calling [`isfile`](@ref), [`isdir`](@ref), +[`islink`](@ref), [`isfifo`](@ref), [`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref) +""" +struct DirEntry + dir::String + name::String + rawtype::Cint +end +function Base.getproperty(obj::DirEntry, p::Symbol) + if p === :path + return joinpath(obj.dir, obj.name) + else + return getfield(obj, p) + end +end +Base.propertynames(::DirEntry) = (:dir, :name, :path, :rawtype) +Base.isless(a::DirEntry, b::DirEntry) = a.dir == b.dir ? isless(a.name, b.name) : isless(a.dir, b.dir) +Base.hash(o::DirEntry, h::UInt) = hash(o.dir, hash(o.name, hash(o.rawtype, h))) +Base.:(==)(a::DirEntry, b::DirEntry) = a.name == b.name && a.dir == b.dir && a.rawtype == b.rawtype +joinpath(obj::DirEntry, args...) = joinpath(obj.path, args...) +isunknown(obj::DirEntry) = obj.rawtype == UV_DIRENT_UNKNOWN +islink(obj::DirEntry) = isunknown(obj) ? islink(obj.path) : obj.rawtype == UV_DIRENT_LINK +isfile(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isfile(obj.path) : obj.rawtype == UV_DIRENT_FILE +isdir(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isdir(obj.path) : obj.rawtype == UV_DIRENT_DIR +isfifo(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isfifo(obj.path) : obj.rawtype == UV_DIRENT_FIFO +issocket(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? issocket(obj.path) : obj.rawtype == UV_DIRENT_SOCKET +ischardev(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? ischardev(obj.path) : obj.rawtype == UV_DIRENT_CHAR +isblockdev(obj::DirEntry) = (isunknown(obj) || islink(obj)) ? isblockdev(obj.path) : obj.rawtype == UV_DIRENT_BLOCK +realpath(obj::DirEntry) = realpath(obj.path) + +""" + _readdirx(dir::AbstractString=pwd(); sort::Bool = true) -> Vector{DirEntry} + +Return a vector of [`DirEntry`](@ref) objects representing the contents of the directory `dir`, +or the current working directory if not given. If `sort` is true, the returned vector is +sorted by name. + +Unlike [`readdir`](@ref), `_readdirx` returns [`DirEntry`](@ref) objects, which contain the name of the +file, the directory it is in, and the type of the file which is determined during the +directory scan. This means that calls to [`isfile`](@ref), [`isdir`](@ref), [`islink`](@ref), [`isfifo`](@ref), +[`issocket`](@ref), [`ischardev`](@ref), and [`isblockdev`](@ref) can be made on the +returned objects without further stat calls. However, for some filesystems, the type of the file +cannot be determined without a stat call. In these cases the `rawtype` field of the [`DirEntry`](@ref)) +object will be 0 (`UV_DIRENT_UNKNOWN`) and [`isfile`](@ref) etc. will fall back to a `stat` call. + +```julia +for obj in _readdirx() + isfile(obj) && println("\$(obj.name) is a file with path \$(obj.path)") +end +``` +""" +_readdirx(dir::AbstractString=pwd(); sort::Bool=true) = _readdir(dir; return_objects=true, sort)::Vector{DirEntry} + +function _readdir(dir::AbstractString; return_objects::Bool=false, join::Bool=false, sort::Bool=true) # Allocate space for uv_fs_t struct req = Libc.malloc(_sizeof_uv_fs) try @@ -923,11 +995,16 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true) err < 0 && uv_error("readdir($(repr(dir)))", err) # iterate the listing into entries - entries = String[] + entries = return_objects ? DirEntry[] : String[] ent = Ref{uv_dirent_t}() while Base.UV_EOF != ccall(:uv_fs_scandir_next, Cint, (Ptr{Cvoid}, Ptr{uv_dirent_t}), req, ent) name = unsafe_string(ent[].name) - push!(entries, join ? joinpath(dir, name) : name) + if return_objects + rawtype = ent[].typ + push!(entries, DirEntry(dir, name, rawtype)) + else + push!(entries, join ? joinpath(dir, name) : name) + end end # Clean up the request string @@ -941,8 +1018,6 @@ function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true) Libc.free(req) end end -readdir(; join::Bool=false, sort::Bool=true) = - readdir(join ? pwd() : ".", join=join, sort=sort) """ walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw) diff --git a/test/file.jl b/test/file.jl index c32f655eb677e..a476895af442a 100644 --- a/test/file.jl +++ b/test/file.jl @@ -31,6 +31,8 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER symlink(subdir, dirlink) @test stat(dirlink) == stat(subdir) @test readdir(dirlink) == readdir(subdir) + @test map(o->o.names, Base.Filesystem._readdirx(dirlink)) == map(o->o.names, Base.Filesystem._readdirx(subdir)) + @test realpath.(Base.Filesystem._readdirx(dirlink)) == realpath.(Base.Filesystem._readdirx(subdir)) # relative link relsubdirlink = joinpath(subdir, "rel_subdirlink") @@ -38,6 +40,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER symlink(reldir, relsubdirlink) @test stat(relsubdirlink) == stat(subdir2) @test readdir(relsubdirlink) == readdir(subdir2) + @test Base.Filesystem._readdirx(relsubdirlink) == Base.Filesystem._readdirx(subdir2) # creation of symlink to directory that does not yet exist new_dir = joinpath(subdir, "new_dir") @@ -56,6 +59,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER mkdir(new_dir) touch(foo_file) @test readdir(new_dir) == readdir(nedlink) + @test realpath.(Base.Filesystem._readdirx(new_dir)) == realpath.(Base.Filesystem._readdirx(nedlink)) rm(foo_file) rm(new_dir) @@ -1438,6 +1442,10 @@ rm(dirwalk, recursive=true) touch(randstring()) end @test issorted(readdir()) + @test issorted(Base.Filesystem._readdirx()) + @test map(o->o.name, Base.Filesystem._readdirx()) == readdir() + @test map(o->o.path, Base.Filesystem._readdirx()) == readdir(join=true) + @test count(isfile, readdir(join=true)) == count(isfile, Base.Filesystem._readdirx()) end end end diff --git a/test/misc.jl b/test/misc.jl index 6597ecf8a8428..e870c7f491c13 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -1363,9 +1363,10 @@ end @test isdefined(KwdefWithEsc_TestModule, :Struct) @testset "exports of modules" begin - for (_, mod) in Base.loaded_modules + @testset "$mod" for (_, mod) in Base.loaded_modules mod === Main && continue # Main exports everything - for v in names(mod) + @testset "$v" for v in names(mod) + isdefined(mod, v) || @error "missing $v in $mod" @test isdefined(mod, v) end end