Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
  • Loading branch information
carmenbianca committed Jul 8, 2024
1 parent 3e3872e commit 5d7759f
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 149 deletions.
185 changes: 97 additions & 88 deletions src/reuse/covered_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,107 +14,116 @@
from pathlib import Path
from typing import Generator, Optional

import attrs

from . import (
_IGNORE_DIR_PATTERNS,
_IGNORE_FILE_PATTERNS,
_IGNORE_MESON_PARENT_DIR_PATTERNS,
)
from ._util import StrPath
from .vcs import VCSStrategy
from .vcs import VCSStrategy, VCSStrategyNone

_LOGGER = logging.getLogger(__name__)


def is_path_ignored(
path: Path,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> bool:
"""Is *path* ignored by some mechanism?"""
# pylint: disable=too-many-return-statements,too-many-branches
name = path.name
parent_parts = path.parent.parts
parent_dir = parent_parts[-1] if len(parent_parts) > 0 else ""

if path.is_symlink():
_LOGGER.debug("skipping symlink '%s'", path)
return True

if path.is_file():
for pattern in _IGNORE_FILE_PATTERNS:
if pattern.match(name):
return True
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
if path.stat().st_size == 0:
_LOGGER.debug("skipping 0-sized file '%s'", path)
return True
@attrs.define
class ProjectSeed:
"""Some core values from which all or most other values of :class:`Project`
can be derived.
elif path.is_dir():
for pattern in _IGNORE_DIR_PATTERNS:
if pattern.match(name):
return True
if not include_meson_subprojects:
for pattern in _IGNORE_MESON_PARENT_DIR_PATTERNS:
if pattern.match(parent_dir):
_LOGGER.info(
"ignoring '%s' because it is a Meson subproject", path
)
return True
if (
not include_submodules
and vcs_strategy
and vcs_strategy.is_submodule(path)
):
_LOGGER.info("ignoring '%s' because it is a submodule", path)
return True

if vcs_strategy and vcs_strategy.is_ignored(path):
return True
Technically *vcs_strategy* can also be derived from *root*, but that is
neither here nor there.
"""

return False
root: Path = attrs.field(converter=Path)
include_submodules: bool = False
include_meson_subprojects: bool = False
vcs_strategy: VCSStrategy = attrs.field()

@vcs_strategy.default
def _default_vcs_strategy(self) -> VCSStrategy:
return VCSStrategyNone(self.root)

def is_path_ignored(
self,
path: Path,
) -> bool:
"""Is *path* ignored by some mechanism?"""
# pylint: disable=too-many-return-statements,too-many-branches
name = path.name
parent_parts = path.parent.parts
parent_dir = parent_parts[-1] if len(parent_parts) > 0 else ""

if path.is_symlink():
_LOGGER.debug("skipping symlink '%s'", path)
return True

if path.is_file():
for pattern in _IGNORE_FILE_PATTERNS:
if pattern.match(name):
return True
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
if path.stat().st_size == 0:
_LOGGER.debug("skipping 0-sized file '%s'", path)
return True

def all_files(
directory: StrPath,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
vcs_strategy: Optional[VCSStrategy] = None,
) -> Generator[Path, None, None]:
"""Yield all Covered Files in *directory* and its subdirectories according
to the REUSE Specification.
"""
directory = Path(directory)

for root_str, dirs, files in os.walk(directory):
root = Path(root_str)
_LOGGER.debug("currently walking in '%s'", root)

# Don't walk ignored directories
for dir_ in list(dirs):
the_dir = root / dir_
if is_path_ignored(
the_dir,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
):
_LOGGER.debug("ignoring '%s'", the_dir)
dirs.remove(dir_)

# Filter files.
for file_ in files:
the_file = root / file_
if is_path_ignored(
the_file,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
elif path.is_dir():
for pattern in _IGNORE_DIR_PATTERNS:
if pattern.match(name):
return True
if not self.include_meson_subprojects:
for pattern in _IGNORE_MESON_PARENT_DIR_PATTERNS:
if pattern.match(parent_dir):
_LOGGER.info(
"ignoring '%s' because it is a Meson subproject",
path,
)
return True
if not self.include_submodules and self.vcs_strategy.is_submodule(
path
):
_LOGGER.debug("ignoring '%s'", the_file)
continue
_LOGGER.info("ignoring '%s' because it is a submodule", path)
return True

if self.vcs_strategy.is_ignored(path):
return True

_LOGGER.debug("yielding '%s'", the_file)
yield the_file
return False

def all_files(
self, directory: Optional[StrPath] = None
) -> Generator[Path, None, None]:
"""Yield all Covered Files in *directory* and its subdirectories according
to the REUSE Specification.
"""
if directory is None:
directory = self.root
directory = Path(directory)

for root_str, dirs, files in os.walk(directory):
root = Path(root_str)
_LOGGER.debug("currently walking in '%s'", root)

# Don't walk ignored directories
for dir_ in list(dirs):
the_dir = root / dir_
if self.is_path_ignored(
the_dir,
):
_LOGGER.debug("ignoring '%s'", the_dir)
dirs.remove(dir_)

# Filter files.
for file_ in files:
the_file = root / file_
if self.is_path_ignored(
the_file,
):
_LOGGER.debug("ignoring '%s'", the_file)
continue

_LOGGER.debug("yielding '%s'", the_file)
yield the_file
29 changes: 16 additions & 13 deletions src/reuse/global_licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gettext import gettext as _
from pathlib import Path, PurePath
from typing import (
TYPE_CHECKING,
Any,
Callable,
Collection,
Expand Down Expand Up @@ -44,6 +45,9 @@
from .covered_files import all_files
from .vcs import VCSStrategy

if TYPE_CHECKING:
from .project import ProjectSeedProtocol

_LOGGER = logging.getLogger(__name__)

_T = TypeVar("_T")
Expand Down Expand Up @@ -479,6 +483,13 @@ def directory(self) -> PurePath:
return PurePath(self.source).parent


@attrs.define
class _ProjectSeed:
root: Path
include_submodules: bool = False
include_meson_subprojects: bool = False


@attrs.define
class NestedReuseTOML(GlobalLicensing):
"""A class that represents a hierarchy of :class:`ReuseTOML` objects."""
Expand All @@ -488,18 +499,14 @@ class NestedReuseTOML(GlobalLicensing):
@classmethod
def from_file(cls, path: StrPath, **kwargs: Any) -> "GlobalLicensing":
"""TODO: *path* is a directory instead of a file."""
include_submodules: bool = kwargs.get("include_submodules", False)
include_meson_subprojects: bool = kwargs.get(
"include_meson_subprojects", False
project: "ProjectSeedProtocol" = kwargs.get(
"project", _ProjectSeed(Path(path))
)
vcs_strategy: Optional[VCSStrategy] = kwargs.get("vcs_strategy")
tomls = [
ReuseTOML.from_file(toml_path)
for toml_path in cls.find_reuse_tomls(
path,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
vcs_strategy=vcs_strategy,
project, vcs_strategy=vcs_strategy
)
]
return cls(reuse_tomls=tomls, source=str(path))
Expand Down Expand Up @@ -561,18 +568,14 @@ def reuse_info_of(
@classmethod
def find_reuse_tomls(
cls,
path: StrPath,
include_submodules: bool = False,
include_meson_subprojects: bool = False,
project: "ProjectSeedProtocol",
vcs_strategy: Optional[VCSStrategy] = None,
) -> Generator[Path, None, None]:
"""Find all REUSE.toml files in *path*."""
return (
item
for item in all_files(
path,
include_submodules=include_submodules,
include_meson_subprojects=include_meson_subprojects,
project,
vcs_strategy=vcs_strategy,
)
if item.name == "REUSE.toml"
Expand Down
Loading

0 comments on commit 5d7759f

Please sign in to comment.