Skip to content

Commit

Permalink
Merge pull request #57 from TaekyungHeo/install-better
Browse files Browse the repository at this point in the history
Enhance installation process with detailed error handling and Docker image caching
  • Loading branch information
amaslenn authored Jun 4, 2024
2 parents 2e1f499 + ade14f7 commit 967e7d7
Show file tree
Hide file tree
Showing 20 changed files with 1,103 additions and 371 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
bokeh==3.4.1
pandas==2.2.1
requests==2.32.0
tbparse==0.0.8
toml==0.10.2
16 changes: 11 additions & 5 deletions src/cloudai/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,17 @@ def handle_install_and_uninstall(args: argparse.Namespace) -> None:
if installer.is_installed(test_templates):
print("Cloud AI is already installed.")
else:
installer.install(test_templates)
print("Installation completed.")
result = installer.install(test_templates)
if not result:
print(result)
sys.exit(1)

elif args.mode == "uninstall":
logging.info("Uninstalling test templates.")
installer.uninstall(test_templates)
print("Uninstallation completed.")
result = installer.uninstall(test_templates)
if not result:
print(result)
sys.exit(1)


def handle_dry_run_and_run(args: argparse.Namespace) -> None:
Expand Down Expand Up @@ -169,8 +173,10 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> None:
if args.mode == "run":
logging.info("Checking if test templates are installed.")
installer = Installer(system)
if not installer.is_installed(test_templates):
result = installer.is_installed(test_templates)
if not result:
print("Cloud AI has not been installed. Please run install mode first.")
print(result)
sys.exit(1)

test_scenario.pretty_print()
Expand Down
88 changes: 66 additions & 22 deletions src/cloudai/_core/base_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Iterable

from cloudai._core.install_status_result import InstallStatusResult

from .system import System
from .test_template import TestTemplate

Expand Down Expand Up @@ -57,60 +59,102 @@ def _is_binary_installed(self, binary_name: str) -> bool:
self.logger.debug(f"Checking if binary '{binary_name}' is installed.")
return shutil.which(binary_name) is not None

def _check_prerequisites(self) -> None:
def _check_prerequisites(self) -> InstallStatusResult:
"""
Check if common prerequisites are installed.
This method should be overridden in derived classes for specific prerequisite checks.
Raises
EnvironmentError: If a required binary is not installed.
Returns
InstallStatusResult: Result containing the status and any error message.
"""
self.logger.info("Checking for common prerequisites.")
return InstallStatusResult(True)

def is_installed(self, test_templates: Iterable[TestTemplate]) -> bool:
def is_installed(self, test_templates: Iterable[TestTemplate]) -> InstallStatusResult:
"""
Check if the necessary components for the provided test templates are already installed.
Verify the installation status of each test template.
Args:
test_templates (Iterable[TestTemplate]): The list of test templates to
check for installation.
test_templates (Iterable[TestTemplate]): The list of test templates to check for installation.
Returns:
bool: True if all test templates are installed, False otherwise.
InstallStatusResult: Result containing the installation status and error message if not installed.
"""
self.logger.info("Verifying installation status of test templates.")
return all(test_template.is_installed() for test_template in test_templates)

def install(self, test_templates: Iterable[TestTemplate]) -> None:
not_installed = {}
for test_template in test_templates:
try:
if not test_template.is_installed():
not_installed[test_template.name] = "Not installed"
except Exception as e:
not_installed[test_template.name] = str(e)

if not_installed:
return InstallStatusResult(False, "Some test templates are not installed.", not_installed)
else:
return InstallStatusResult(True, "All test templates are installed.")

def install(self, test_templates: Iterable[TestTemplate]) -> InstallStatusResult:
"""
Install the necessary components if they are not already installed.
Raises an exception if installation fails for any component.
Args:
test_templates (Iterable[TestTemplate]): The test templates.
Returns:
InstallStatusResult: Result containing the installation status and error message if any.
"""
self.logger.info("Starting installation of test templates.")
self._check_prerequisites()
prerequisites_result = self._check_prerequisites()
if not prerequisites_result:
return prerequisites_result

install_results = {}
with ThreadPoolExecutor() as executor:
futures = [executor.submit(test_template.install) for test_template in test_templates]
futures = {executor.submit(test_template.install): test_template for test_template in test_templates}
for future in as_completed(futures):
future.result()

def uninstall(self, test_templates: Iterable[TestTemplate]) -> None:
test_template = futures[future]
try:
future.result()
install_results[test_template.name] = "Success"
except Exception as e:
self.logger.error(f"Installation failed for {test_template.name}: {e}")
install_results[test_template.name] = str(e)

all_success = all(result == "Success" for result in install_results.values())
if all_success:
return InstallStatusResult(True, "All test templates installed successfully.")
else:
return InstallStatusResult(False, "Some test templates failed to install.", install_results)

def uninstall(self, test_templates: Iterable[TestTemplate]) -> InstallStatusResult:
"""
Uninstalls the benchmarks or test templates.
Raises an exception if uninstallation fails for any component.
Uninstall the benchmarks or test templates.
Args:
test_templates (Iterable[TestTemplate]): The test templates.
Returns:
InstallStatusResult: Result containing the uninstallation status and error message if any.
"""
self.logger.info("Uninstalling test templates.")
uninstall_results = {}
with ThreadPoolExecutor() as executor:
futures = [executor.submit(test_template.uninstall) for test_template in test_templates]
futures = {executor.submit(test_template.uninstall): test_template for test_template in test_templates}
for future in as_completed(futures):
future.result()
test_template = futures[future]
try:
future.result()
uninstall_results[test_template.name] = "Success"
except Exception as e:
self.logger.error(f"Uninstallation failed for {test_template.name}: {e}")
uninstall_results[test_template.name] = str(e)

all_success = all(result == "Success" for result in uninstall_results.values())
if all_success:
return InstallStatusResult(True, "All test templates uninstalled successfully.")
else:
return InstallStatusResult(False, "Some test templates failed to uninstall.", uninstall_results)
46 changes: 46 additions & 0 deletions src/cloudai/_core/install_status_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, Optional


class InstallStatusResult:
"""
Class representing the result of an installation, uninstallation, or status check.
Attributes
success (bool): Indicates whether the operation was successful.
message (str): A message providing additional information about the result.
details (Optional[Dict[str, str]]): A dictionary containing details about the result for each test template.
"""

def __init__(self, success: bool, message: str = "", details: Optional[Dict[str, str]] = None):
"""
Initialize the InstallStatusResult.
Args:
success (bool): Indicates whether the operation was successful.
message (str): A message providing additional information about the result.
details (Optional[Dict[str, str]]): A dictionary containing details about the result for each test template.
"""
self.success = success
self.message = message
self.details = details if details else {}

def __bool__(self):
return self.success

def __str__(self):
details_str = "\n".join(f"{key}: {value}" for key, value in self.details.items())
return f"{self.message}\n{details_str}" if self.details else self.message
26 changes: 11 additions & 15 deletions src/cloudai/_core/install_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,39 @@

from abc import abstractmethod

from .install_status_result import InstallStatusResult
from .test_template_strategy import TestTemplateStrategy


class InstallStrategy(TestTemplateStrategy):
"""
Abstract base class defining the interface for installation strategies across different system environments.
This class provides methods to check if necessary components are installed, to install those components, and
to uninstall them if needed.
"""
"""Abstract base class defining the interface for installation strategies across different system environments."""

@abstractmethod
def is_installed(self) -> bool:
def is_installed(self) -> InstallStatusResult:
"""
Check if the necessary components are already installed on the system.
Returns
bool: True if the necessary components are installed, False otherwise.
InstallStatusResult: Result containing the installation status and error message if not installed.
"""
pass
return InstallStatusResult(success=True)

@abstractmethod
def install(self) -> None:
def install(self) -> InstallStatusResult:
"""
Perform installation operations for a specific system.
Returns
None
InstallStatusResult: Result containing the installation status and error message if installation failed.
"""
pass
return InstallStatusResult(success=True)

@abstractmethod
def uninstall(self) -> None:
def uninstall(self) -> InstallStatusResult:
"""
Perform uninstallation operations for a specific system.
Returns
None
InstallStatusResult: Result containing the uninstallation status and error message if uninstallation failed.
"""
pass
return InstallStatusResult(success=True)
33 changes: 24 additions & 9 deletions src/cloudai/_core/test_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from .command_gen_strategy import CommandGenStrategy
from .grading_strategy import GradingStrategy
from .install_status_result import InstallStatusResult
from .install_strategy import InstallStrategy
from .job_id_retrieval_strategy import JobIdRetrievalStrategy
from .job_status_result import JobStatusResult
Expand Down Expand Up @@ -84,27 +85,41 @@ def __repr__(self) -> str:
"""
return f"TestTemplate(name={self.name})"

def is_installed(self) -> bool:
def is_installed(self) -> InstallStatusResult:
"""
Check if the test template is already installed on the specified system.
Returns
bool: True if installed, False otherwise.
InstallStatusResult: Result containing the installation status and error message if not installed.
"""
if self.install_strategy is not None:
return self.install_strategy.is_installed()
else:
return True
return InstallStatusResult(success=True)

def install(self) -> None:
"""Install the test template at the specified location using the system's installation strategy."""
def install(self) -> InstallStatusResult:
"""
Install the test template at the specified location using the system's installation strategy.
Returns
InstallStatusResult: Result containing the installation status and error message if installation failed.
"""
if self.install_strategy is not None:
self.install_strategy.install()
return self.install_strategy.install()
else:
return InstallStatusResult(success=True)

def uninstall(self) -> None:
"""Uninstall the test template from the specified location using the system's uninstallation strategy."""
def uninstall(self) -> InstallStatusResult:
"""
Uninstall the test template from the specified location using the system's uninstallation strategy.
Returns
InstallStatusResult: Result containing the uninstallation status and error message if uninstallation failed.
"""
if self.install_strategy is not None:
self.install_strategy.uninstall()
return self.install_strategy.uninstall()
else:
return InstallStatusResult(success=True)

def gen_exec_command(
self,
Expand Down
Loading

0 comments on commit 967e7d7

Please sign in to comment.