diff --git a/compiler_gym/datasets/tar_dataset.py b/compiler_gym/datasets/tar_dataset.py index b6b8ab80e4..6b20403439 100644 --- a/compiler_gym/datasets/tar_dataset.py +++ b/compiler_gym/datasets/tar_dataset.py @@ -92,9 +92,12 @@ def install(self) -> None: # Remove any partially-completed prior extraction. shutil.rmtree(self.site_data_path / "contents", ignore_errors=True) - logger.info("Downloading %s dataset", self.name) + logger.warning( + "Installing the %s dataset. This may take a few moments ...", self.name + ) + tar_data = io.BytesIO(download(self.tar_urls, self.tar_sha256)) - logger.info("Unpacking %s dataset", self.name) + logger.info("Unpacking %s dataset to %s", self.name, self.site_data_path) with tarfile.open( fileobj=tar_data, mode=f"r:{self.tar_compression}" ) as arc: diff --git a/compiler_gym/third_party/llvm/__init__.py b/compiler_gym/third_party/llvm/__init__.py index 58be6ba2cc..80781616f4 100644 --- a/compiler_gym/third_party/llvm/__init__.py +++ b/compiler_gym/third_party/llvm/__init__.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. """Module for resolving paths to LLVM binaries and libraries.""" import io +import logging import shutil import sys import tarfile @@ -16,6 +17,8 @@ from compiler_gym.util.download import download from compiler_gym.util.runfiles_path import cache_path, site_data_path +logger = logging.getLogger(__name__) + # The data archive containing LLVM binaries and libraries. _LLVM_URL, _LLVM_SHA256 = { "darwin": ( @@ -37,6 +40,10 @@ def _download_llvm_files(destination: Path) -> Path: """Download and unpack the LLVM data pack.""" + logger.warning( + "Installing the CompilerGym LLVM environment runtime. This may take a few moments ..." + ) + # Tidy up an incomplete unpack. shutil.rmtree(destination, ignore_errors=True)