Skip to content

Commit

Permalink
Save log for OSG jobs (#208)
Browse files Browse the repository at this point in the history
  • Loading branch information
dachengx authored Sep 7, 2024
1 parent c954d83 commit 09f35f5
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 11 deletions.
2 changes: 1 addition & 1 deletion alea/submitters/combine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ls -lh
output_filename=$workflow_id-combined_output.tar.gz

# Tar all the .h5 files into the output file
tar -czf $output_filename *.h5
tar -czf $output_filename *.h5 *.h5.log

# Check the output
echo "Checking the output"
Expand Down
20 changes: 13 additions & 7 deletions alea/submitters/htcondor.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,22 +613,28 @@ def _generate_workflow(self, name="run_toymc_wrapper"):

if not args_dict["only_toydata"]:
output_filename = args_dict["output_filename"]
job.add_outputs(File(os.path.basename(output_filename)), stage_out=False)
combine_job.add_inputs(File(os.path.basename(output_filename)))
output_filename_base = os.path.basename(output_filename)
job.add_outputs(File(output_filename_base), stage_out=False)
job.set_stdout(File(f"{output_filename_base}.log"), stage_out=False)
combine_job.add_inputs(File(output_filename_base))
combine_job.add_inputs(File(f"{output_filename_base}.log"))

toydata_filename = args_dict["toydata_filename"]
toydata_filename_base = os.path.basename(toydata_filename)
if args_dict["toydata_mode"] == "read":
if not os.path.exists(toydata_filename):
raise ValueError(f"Can not find {toydata_filename} containing toydata.")
# Add toydata as input if needed
self.rc.add_replica(
"local",
os.path.basename(toydata_filename),
toydata_filename_base,
f"file://{toydata_filename}",
)
job.add_inputs(File(os.path.basename(toydata_filename)))
job.add_inputs(File(toydata_filename_base))
elif args_dict["toydata_mode"] == "generate_and_store":
# Only add the toydata file if instructed to do so
job.add_outputs(File(os.path.basename(toydata_filename)), stage_out=False)
combine_job.add_inputs(File(os.path.basename(toydata_filename)))
job.add_outputs(File(toydata_filename_base), stage_out=False)
combine_job.add_inputs(File(toydata_filename_base))

# Add the arguments into the job
# Using escaped argument to avoid the shell syntax error
Expand Down Expand Up @@ -706,7 +712,7 @@ def _check_filename_unique(self):
def submit(self, **kwargs):
"""Serve as the main function to submit the workflow."""
if os.path.exists(self.runs_dir):
raise RuntimeError(f"Workflow already exists at {self.runs_dir}. Exiting.")
raise RuntimeError(f"Workflow already exists at {self.runs_dir}.")
self._validate_x509_proxy()

# 0o755 means read/write/execute for owner, read/execute for everyone else
Expand Down
7 changes: 4 additions & 3 deletions alea/submitters/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def __init__(self, *args, **kwargs):
self.batchq_arguments = {**BATCHQ_DEFAULT_ARGUMENTS, **self.slurm_configurations}
self._check_batchq_arguments()
super().__init__(*args, **kwargs)
self.log_dir = self.outputfolder

def _submit(self, job, **kwargs):
"""Submits job to batch queue which actually runs the analysis.
Expand All @@ -61,7 +60,7 @@ def _submit(self, job, **kwargs):

log = kwargs.pop("log", None)
if log is None:
log = os.path.join(self.log_dir, f"{jobname.lower()}.log")
log = os.path.join(self.outputfolder, f"{jobname.lower()}.log")

kwargs_to_pop = []
for key, val in kwargs.items():
Expand Down Expand Up @@ -103,6 +102,8 @@ def submit(self, **kwargs):
time.sleep(30)
batchq_kwargs["jobname"] = f"{_jobname}_{job:03d}"
if last_output_filename is not None:
batchq_kwargs["log"] = os.path.join(self.log_dir, f"{last_output_filename}.log")
batchq_kwargs["log"] = os.path.join(
self.outputfolder, f"{last_output_filename}.log"
)
self.logging.debug(f"Call '_submit' with job: {job} and kwargs: {batchq_kwargs}.")
self._submit(script, **batchq_kwargs)

0 comments on commit 09f35f5

Please sign in to comment.