From 4ed3d62fee77cd81c32930b8217b7c1f65572994 Mon Sep 17 00:00:00 2001 From: dachengx Date: Wed, 18 Sep 2024 23:57:53 -0500 Subject: [PATCH 1/5] Tarball alea for later user installation --- alea/submitters/htcondor.py | 38 +++++++++++++++++++++++++++++++++++++ alea/submitters/install.sh | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 alea/submitters/install.sh diff --git a/alea/submitters/htcondor.py b/alea/submitters/htcondor.py index 6875b2c..182911a 100644 --- a/alea/submitters/htcondor.py +++ b/alea/submitters/htcondor.py @@ -8,6 +8,7 @@ from pathlib import Path from tqdm import tqdm from utilix.x509 import _validate_x509_proxy +from utilix.tarball import Tarball from Pegasus.api import ( Arch, Operation, @@ -403,9 +404,39 @@ def _generate_rc(self): "separate.sh", "file://{}".format(self.top_dir / "alea/submitters/separate.sh"), ) + # Untar and install the packages + self.f_install = File("install.sh") + rc.add_replica( + "local", + "install.sh", + "file://{}".format(self.top_dir / "alea/submitters/install.sh"), + ) return rc + def make_tarballs(self): + """Make tarballs of Ax-based packages if they are in editable user-installed mode.""" + tarballs = [] + tarball_paths = [] + for package_name in ["alea"]: + _tarball = Tarball(self.generated_dir, package_name) + if not Tarball.get_installed_git_repo(package_name): + # Packages should not be non-editable user-installed + if Tarball.is_user_installed(package_name): + raise RuntimeError( + f"You should install {package_name} in non-editable user-installed mode." + ) + else: + _tarball.create_tarball() + tarball = File(_tarball.tarball_name) + tarball_path = _tarball.tarball_path + logger.warning( + f"Using tarball of user installed package {package_name} at {tarball_path}." + ) + tarballs.append(tarball) + tarball_paths.append(tarball_path) + return tarballs, tarball_paths + def _initialize_job( self, name="run_toymc_wrapper", @@ -527,6 +558,11 @@ def _generate_workflow(self, name="run_toymc_wrapper"): self.tc = self._generate_tc() self.rc = self._generate_rc() + # Tarball the editable self-installed packages + tarballs, tarball_paths = self.make_tarballs() + for tarball, tarball_path in zip(tarballs, tarball_paths): + self.rc.add_replica("local", tarball, tarball_path) + # Iterate over the tickets and generate jobs combine_i = 0 new_to_combine = True @@ -578,6 +614,8 @@ def _generate_workflow(self, name="run_toymc_wrapper"): self.f_statistical_model_config, self.f_run_toymc_wrapper, self.f_alea_run_toymc, + self.f_install, + *tarballs, ) if not args_dict["only_toydata"]: diff --git a/alea/submitters/install.sh b/alea/submitters/install.sh new file mode 100644 index 0000000..96e6d02 --- /dev/null +++ b/alea/submitters/install.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -e + +# List of packages +packages=("alea") + +# Loop through each package +for package in "${packages[@]}" +do + # Check if the tarball exists + if [ ! -f "$package.tar.gz" ]; then + echo "Tarball $package.tar.gz not found. Skipping $package." + echo + continue + fi + + echo "Installing $package:" + + # Create a directory for the package + mkdir -p $package + + # Extract the tarball to the package directory + tar -xzf $package.tar.gz -C $package --strip-components=1 + + # Install the package in very quiet mode by -qq + pip install ./$package --user --no-deps -qq + + # Verify the installation by importing the package + python -c "import $package; print($package.__file__)" + + echo "$package installation complete." + echo +done From 36fd6221bf252a5a0defdd9afc30f207a821503d Mon Sep 17 00:00:00 2001 From: dachengx Date: Wed, 18 Sep 2024 23:59:06 -0500 Subject: [PATCH 2/5] Execute install.sh --- alea/submitters/run_toymc_wrapper.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alea/submitters/run_toymc_wrapper.sh b/alea/submitters/run_toymc_wrapper.sh index 5583284..bdb0538 100644 --- a/alea/submitters/run_toymc_wrapper.sh +++ b/alea/submitters/run_toymc_wrapper.sh @@ -91,6 +91,9 @@ OUTPUT_FILENAME=$(echo "$output_filename" | sed "s/'/\"/g") SEED=$(echo "$seed" | sed "s/'/\"/g") METADATA=$(echo "$metadata" | sed "s/'/\"/g") +# Installing customized packages +. install.sh + # Extract tarballs input mkdir -p templates START=$(date +%s) From 93a12a54882b43c6a9ae7d9fcc5586f7dd7ebc21 Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 19 Sep 2024 01:23:08 -0500 Subject: [PATCH 3/5] Fix bugs --- alea/submitters/htcondor.py | 11 ++--------- alea/submitters/run_toymc_wrapper.sh | 4 ++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/alea/submitters/htcondor.py b/alea/submitters/htcondor.py index 517e911..46c05ef 100644 --- a/alea/submitters/htcondor.py +++ b/alea/submitters/htcondor.py @@ -173,8 +173,8 @@ def _modify_yaml(self): def update_template_filenames(node): if isinstance(node, dict): for key, value in node.items(): - if key == "template_filename": - filename = value.split("/")[-1] + if key in ["template_filename", "spectrum_name"]: + filename = os.path.basename(value) node[key] = filename else: update_template_filenames(value) @@ -383,13 +383,6 @@ def _generate_rc(self): "run_toymc_wrapper.sh", "file://{}".format(self.top_dir / "alea/submitters/run_toymc_wrapper.sh"), ) - # Add alea_run_toymc - self.f_alea_run_toymc = File("alea_run_toymc") - rc.add_replica( - "local", - "alea_run_toymc", - "file://{}".format(self.top_dir / "alea/scripts/alea_run_toymc.py"), - ) # Add combine executable self.f_combine = File("combine.sh") rc.add_replica( diff --git a/alea/submitters/run_toymc_wrapper.sh b/alea/submitters/run_toymc_wrapper.sh index 22bc4d8..df3039c 100644 --- a/alea/submitters/run_toymc_wrapper.sh +++ b/alea/submitters/run_toymc_wrapper.sh @@ -113,7 +113,7 @@ echo "These are the contents of templates/:" ls -lh templates/ # Print the command -echo "Running command: python3 ./alea_run_toymc.py \\ +echo "Running command: alea_run_toymc \\ --statistical_model $STATISTICAL_MODEL \\ --poi $POI \\ --hypotheses $HYPOTHESES \\ @@ -137,7 +137,7 @@ echo "Running command: python3 ./alea_run_toymc.py \\ --metadata $METADATA" # Run the toy MC -time python3 ./alea_run_toymc \ +time alea_run_toymc \ --statistical_model $STATISTICAL_MODEL \ --poi $POI \ --hypotheses $HYPOTHESES \ From 78cef2140cb354a9831cc0c7aaaafd0d1dd53b4b Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 19 Sep 2024 07:28:24 -0500 Subject: [PATCH 4/5] Fix bug --- alea/submitters/htcondor.py | 37 +++++++--------------------- alea/submitters/run_toymc_wrapper.sh | 4 +-- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/alea/submitters/htcondor.py b/alea/submitters/htcondor.py index 46c05ef..038d8e3 100644 --- a/alea/submitters/htcondor.py +++ b/alea/submitters/htcondor.py @@ -129,7 +129,7 @@ def requirements(self): return _requirements def _tar_h5_files(self, directory, template_tarball="templates.tar.gz"): - """Tar all .h5 templates in the directory and its subdirectories into a tarball.""" + """Tar all needed templates into a flat tarball.""" # Create a tar.gz archive with tarfile.open(template_tarball, "w:gz") as tar: tar.add(directory, arcname=os.path.basename(directory)) @@ -193,32 +193,6 @@ def update_template_filenames(node): f"written to {self.modified_statistical_model_config}" ) - def _contains_subdirectories(self, directory): - """Check if the specified directory contains any subdirectories. - - Args: - directory (str): The path to the directory to check. - - Returns: - bool: True if there are subdirectories inside the given directory, False otherwise. - - """ - # List all entries in the directory - try: - for entry in os.listdir(directory): - # Check if the entry is a directory - if os.path.isdir(os.path.join(directory, entry)): - return True - except FileNotFoundError: - print("The specified directory does not exist.") - return False - except PermissionError: - print("Permission denied for accessing the directory.") - return False - - # If no subdirectories are found - return False - def _setup_workflow_id(self): """Set up the workflow ID.""" # If you have named the workflow, use that name. Otherwise, use the current time as name. @@ -383,6 +357,13 @@ def _generate_rc(self): "run_toymc_wrapper.sh", "file://{}".format(self.top_dir / "alea/submitters/run_toymc_wrapper.sh"), ) + # Add alea_run_toymc + self.f_alea_run_toymc = File("alea_run_toymc") + rc.add_replica( + "local", + "alea_run_toymc", + "file://{}".format(self.top_dir / "alea/scripts/alea_run_toymc.py"), + ) # Add combine executable self.f_combine = File("combine.sh") rc.add_replica( @@ -578,7 +559,7 @@ def _generate_workflow(self, name="run_toymc_wrapper"): in ["read", "generate_and_store", "generate", "no_toydata"] ): raise NotImplementedError( - "Only generate_and_store toydata mode is supported on OSG." + f"{args_dict['toydata_mode']} toydata mode is not supported on OSG." ) # Create a job with base requirements diff --git a/alea/submitters/run_toymc_wrapper.sh b/alea/submitters/run_toymc_wrapper.sh index df3039c..dd7e917 100644 --- a/alea/submitters/run_toymc_wrapper.sh +++ b/alea/submitters/run_toymc_wrapper.sh @@ -113,7 +113,7 @@ echo "These are the contents of templates/:" ls -lh templates/ # Print the command -echo "Running command: alea_run_toymc \\ +echo "Running command: python alea_run_toymc.py \\ --statistical_model $STATISTICAL_MODEL \\ --poi $POI \\ --hypotheses $HYPOTHESES \\ @@ -137,7 +137,7 @@ echo "Running command: alea_run_toymc \\ --metadata $METADATA" # Run the toy MC -time alea_run_toymc \ +time python alea_run_toymc.py \ --statistical_model $STATISTICAL_MODEL \ --poi $POI \ --hypotheses $HYPOTHESES \ From c7dd500d6f07611885f1bf8f6b9dc26bf065f76a Mon Sep 17 00:00:00 2001 From: dachengx Date: Thu, 19 Sep 2024 09:39:26 -0500 Subject: [PATCH 5/5] Fix bug of missing files --- alea/submitters/htcondor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/alea/submitters/htcondor.py b/alea/submitters/htcondor.py index 038d8e3..b27f2a6 100644 --- a/alea/submitters/htcondor.py +++ b/alea/submitters/htcondor.py @@ -321,7 +321,7 @@ def _generate_rc(self): """Generate the ReplicaCatalog for the workflow. 1. The input files for the job, which are the templates in tarball, - the yaml files and alea_run_toymc. + the yaml files, toydata files, alea_run_toymc.py and install.sh. 2. The output files for the job, which are the toydata and the output files. Since the outputs are not known in advance, we will add them in the job definition. @@ -358,10 +358,10 @@ def _generate_rc(self): "file://{}".format(self.top_dir / "alea/submitters/run_toymc_wrapper.sh"), ) # Add alea_run_toymc - self.f_alea_run_toymc = File("alea_run_toymc") + self.f_alea_run_toymc = File("alea_run_toymc.py") rc.add_replica( "local", - "alea_run_toymc", + "alea_run_toymc.py", "file://{}".format(self.top_dir / "alea/scripts/alea_run_toymc.py"), ) # Add combine executable