Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tarball alea for later user installation #215

Merged
merged 6 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 45 additions & 33 deletions alea/submitters/htcondor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
from tqdm import tqdm
from utilix.x509 import _validate_x509_proxy
from utilix.tarball import Tarball
from Pegasus.api import (
Arch,
Operation,
Expand Down Expand Up @@ -128,7 +129,7 @@ def requirements(self):
return _requirements

def _tar_h5_files(self, directory, template_tarball="templates.tar.gz"):
"""Tar all .h5 templates in the directory and its subdirectories into a tarball."""
"""Tar all needed templates into a flat tarball."""
# Create a tar.gz archive
with tarfile.open(template_tarball, "w:gz") as tar:
tar.add(directory, arcname=os.path.basename(directory))
Expand Down Expand Up @@ -172,8 +173,8 @@ def _modify_yaml(self):
def update_template_filenames(node):
if isinstance(node, dict):
for key, value in node.items():
if key == "template_filename":
filename = value.split("/")[-1]
if key in ["template_filename", "spectrum_name"]:
filename = os.path.basename(value)
node[key] = filename
else:
update_template_filenames(value)
Expand All @@ -192,32 +193,6 @@ def update_template_filenames(node):
f"written to {self.modified_statistical_model_config}"
)

def _contains_subdirectories(self, directory):
"""Check if the specified directory contains any subdirectories.

Args:
directory (str): The path to the directory to check.

Returns:
bool: True if there are subdirectories inside the given directory, False otherwise.

"""
# List all entries in the directory
try:
for entry in os.listdir(directory):
# Check if the entry is a directory
if os.path.isdir(os.path.join(directory, entry)):
return True
except FileNotFoundError:
print("The specified directory does not exist.")
return False
except PermissionError:
print("Permission denied for accessing the directory.")
return False

# If no subdirectories are found
return False

def _setup_workflow_id(self):
"""Set up the workflow ID."""
# If you have named the workflow, use that name. Otherwise, use the current time as name.
Expand Down Expand Up @@ -346,7 +321,7 @@ def _generate_rc(self):
"""Generate the ReplicaCatalog for the workflow.

1. The input files for the job, which are the templates in tarball,
the yaml files and alea_run_toymc.
the yaml files, toydata files, alea_run_toymc.py and install.sh.
2. The output files for the job, which are the toydata and the output files.
Since the outputs are not known in advance, we will add them in the job definition.

Expand Down Expand Up @@ -383,10 +358,10 @@ def _generate_rc(self):
"file://{}".format(self.top_dir / "alea/submitters/run_toymc_wrapper.sh"),
)
# Add alea_run_toymc
self.f_alea_run_toymc = File("alea_run_toymc")
self.f_alea_run_toymc = File("alea_run_toymc.py")
rc.add_replica(
"local",
"alea_run_toymc",
"alea_run_toymc.py",
"file://{}".format(self.top_dir / "alea/scripts/alea_run_toymc.py"),
)
# Add combine executable
Expand All @@ -403,9 +378,39 @@ def _generate_rc(self):
"separate.sh",
"file://{}".format(self.top_dir / "alea/submitters/separate.sh"),
)
# Untar and install the packages
self.f_install = File("install.sh")
rc.add_replica(
"local",
"install.sh",
"file://{}".format(self.top_dir / "alea/submitters/install.sh"),
)

return rc

def make_tarballs(self):
"""Make tarballs of Ax-based packages if they are in editable user-installed mode."""
tarballs = []
tarball_paths = []
for package_name in ["alea"]:
_tarball = Tarball(self.generated_dir, package_name)
if not Tarball.get_installed_git_repo(package_name):
# Packages should not be non-editable user-installed
if Tarball.is_user_installed(package_name):
raise RuntimeError(
f"You should install {package_name} in non-editable user-installed mode."
)
else:
_tarball.create_tarball()
tarball = File(_tarball.tarball_name)
tarball_path = _tarball.tarball_path
logger.warning(
f"Using tarball of user installed package {package_name} at {tarball_path}."
)
tarballs.append(tarball)
tarball_paths.append(tarball_path)
return tarballs, tarball_paths

def _initialize_job(
self,
name="run_toymc_wrapper",
Expand Down Expand Up @@ -527,6 +532,11 @@ def _generate_workflow(self, name="run_toymc_wrapper"):
self.tc = self._generate_tc()
self.rc = self._generate_rc()

# Tarball the editable self-installed packages
tarballs, tarball_paths = self.make_tarballs()
for tarball, tarball_path in zip(tarballs, tarball_paths):
self.rc.add_replica("local", tarball, tarball_path)

# Iterate over the tickets and generate jobs
combine_i = 0
new_to_combine = True
Expand All @@ -549,7 +559,7 @@ def _generate_workflow(self, name="run_toymc_wrapper"):
in ["read", "generate_and_store", "generate", "no_toydata"]
):
raise NotImplementedError(
"Only generate_and_store toydata mode is supported on OSG."
f"{args_dict['toydata_mode']} toydata mode is not supported on OSG."
)

# Create a job with base requirements
Expand Down Expand Up @@ -578,6 +588,8 @@ def _generate_workflow(self, name="run_toymc_wrapper"):
self.f_statistical_model_config,
self.f_run_toymc_wrapper,
self.f_alea_run_toymc,
self.f_install,
*tarballs,
)

if not args_dict["only_toydata"]:
Expand Down
34 changes: 34 additions & 0 deletions alea/submitters/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

set -e

# List of packages
packages=("alea")

# Loop through each package
for package in "${packages[@]}"
do
# Check if the tarball exists
if [ ! -f "$package.tar.gz" ]; then
echo "Tarball $package.tar.gz not found. Skipping $package."
echo
continue
fi

echo "Installing $package:"

# Create a directory for the package
mkdir -p $package

# Extract the tarball to the package directory
tar -xzf $package.tar.gz -C $package --strip-components=1

# Install the package in very quiet mode by -qq
pip install ./$package --user --no-deps -qq

# Verify the installation by importing the package
python -c "import $package; print($package.__file__)"

echo "$package installation complete."
echo
done
7 changes: 5 additions & 2 deletions alea/submitters/run_toymc_wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ OUTPUT_FILENAME=$(echo "$output_filename" | sed "s/'/\"/g")
SEED=$(echo "$seed" | sed "s/'/\"/g")
METADATA=$(echo "$metadata" | sed "s/'/\"/g")

# Installing customized packages
. install.sh

# Extract tarballs input
mkdir -p templates
START=$(date +%s)
Expand All @@ -110,7 +113,7 @@ echo "These are the contents of templates/:"
ls -lh templates/

# Print the command
echo "Running command: python3 ./alea_run_toymc.py \\
echo "Running command: python alea_run_toymc.py \\
--statistical_model $STATISTICAL_MODEL \\
--poi $POI \\
--hypotheses $HYPOTHESES \\
Expand All @@ -134,7 +137,7 @@ echo "Running command: python3 ./alea_run_toymc.py \\
--metadata $METADATA"

# Run the toy MC
time python3 ./alea_run_toymc \
time python alea_run_toymc.py \
--statistical_model $STATISTICAL_MODEL \
--poi $POI \
--hypotheses $HYPOTHESES \
Expand Down
Loading