From a21072d677e38c57c20ad3c389116b7b8814259a Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Dec 2024 12:16:10 -0700 Subject: [PATCH 1/4] fix add_default_workflow --- qiita_db/archive.py | 19 ++++++++++++++++-- qiita_db/metadata_template/prep_template.py | 14 +++++++++++-- qiita_db/util.py | 22 +++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/qiita_db/archive.py b/qiita_db/archive.py index aab68f783..2d9505afb 100644 --- a/qiita_db/archive.py +++ b/qiita_db/archive.py @@ -125,12 +125,27 @@ def get_merging_scheme_from_job(cls, job): parent_cmd_name = pcmd.name parent_parameters = parent_pparameters.values parent_merging_scheme = pcmd.merging_scheme - - return qdb.util.human_merging_scheme( + phms = None + if not parent_merging_scheme['ignore_parent_command']: + gp = parent.parents[0] + gp_params = gp.processing_parameters + gp_cmd = gp_params.command + + phms = qdb.util.human_merging_scheme( + parent_cmd_name, parent_merging_scheme, gp_cmd.name, + gp_cmd.merging_scheme, parent_parameters, [], + gp_params.values) + + hms = qdb.util.human_merging_scheme( acmd.name, acmd.merging_scheme, parent_cmd_name, parent_merging_scheme, job.parameters.values, [], parent_parameters) + if phms is None: + hms = qdb.util.merge_overlapping_strings(hms, phms) + + return hms + @classmethod def retrieve_feature_values(cls, archive_merging_scheme=None, features=None): diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 117156d9b..b78850e0d 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -808,14 +808,24 @@ def _get_node_info(workflow, node): parent_cmd_name = None parent_merging_scheme = None + phms = None if pcmd is not None: parent_cmd_name = pcmd.name parent_merging_scheme = pcmd.merging_scheme + if not parent_merging_scheme['ignore_parent_command']: + phms = _get_node_info(workflow, parent) - return qdb.util.human_merging_scheme( + hms = qdb.util.human_merging_scheme( ccmd.name, ccmd.merging_scheme, parent_cmd_name, parent_merging_scheme, cparams, [], pparams) + # if the parent should not ignore its parent command, then we need + # to merge the previous result with the new one + if phms is not None: + hms = qdb.util.merge_overlapping_strings(hms, phms) + + return hms + def _get_predecessors(workflow, node): # recursive method to get predecessors of a given node pred = [] @@ -989,7 +999,7 @@ def _get_predecessors(workflow, node): init_artifacts = { wkartifact_type: f'{starting_job.id}:'} else: - init_artifacts = {wkartifact_type: self.artifact.id} + init_artifacts = {wkartifact_type: str(self.artifact.id)} cmds_to_create.reverse() current_job = None diff --git a/qiita_db/util.py b/qiita_db/util.py index c7346e15a..54e50f277 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -2954,3 +2954,25 @@ def merge_rows(rows): row['node_model']] qdb.sql_connection.TRN.add(sql, sql_args=to_insert) qdb.sql_connection.TRN.execute() + + +def merge_overlapping_strings(str1, str2): + """Helper function to merge 2 overlapping strings + + Parameters + ---------- + str1: str + Initial string + str2: str + End string + + Returns + ---------- + str + The merged strings + """ + overlap = "" + for i in range(1, min(len(str1), len(str2)) + 1): + if str1.endswith(str2[:i]): + overlap = str2[:i] + return str1 + str2[len(overlap):] From 92989d6e2d0184c191384c59f430148e623a4e90 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Dec 2024 12:37:16 -0700 Subject: [PATCH 2/4] fix get_merging_scheme_from_job --- qiita_db/archive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qiita_db/archive.py b/qiita_db/archive.py index 2d9505afb..e6f6a8781 100644 --- a/qiita_db/archive.py +++ b/qiita_db/archive.py @@ -116,6 +116,7 @@ def get_merging_scheme_from_job(cls, job): acmd = job.command parent = job.input_artifacts[0] parent_pparameters = parent.processing_parameters + phms = None if parent_pparameters is None: parent_cmd_name = None parent_parameters = None @@ -125,7 +126,6 @@ def get_merging_scheme_from_job(cls, job): parent_cmd_name = pcmd.name parent_parameters = parent_pparameters.values parent_merging_scheme = pcmd.merging_scheme - phms = None if not parent_merging_scheme['ignore_parent_command']: gp = parent.parents[0] gp_params = gp.processing_parameters @@ -141,7 +141,7 @@ def get_merging_scheme_from_job(cls, job): parent_cmd_name, parent_merging_scheme, job.parameters.values, [], parent_parameters) - if phms is None: + if phms is not None: hms = qdb.util.merge_overlapping_strings(hms, phms) return hms From b8188ddc2b462b95129530d3d35b40f5ead87f44 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Dec 2024 12:57:21 -0700 Subject: [PATCH 3/4] gp_params is not None --- qiita_db/archive.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/qiita_db/archive.py b/qiita_db/archive.py index e6f6a8781..d411d598a 100644 --- a/qiita_db/archive.py +++ b/qiita_db/archive.py @@ -129,12 +129,12 @@ def get_merging_scheme_from_job(cls, job): if not parent_merging_scheme['ignore_parent_command']: gp = parent.parents[0] gp_params = gp.processing_parameters - gp_cmd = gp_params.command - - phms = qdb.util.human_merging_scheme( - parent_cmd_name, parent_merging_scheme, gp_cmd.name, - gp_cmd.merging_scheme, parent_parameters, [], - gp_params.values) + if gp_params is not None: + gp_cmd = gp_params.command + phms = qdb.util.human_merging_scheme( + parent_cmd_name, parent_merging_scheme, + gp_cmd.name, gp_cmd.merging_scheme, + parent_parameters, [], gp_params.values) hms = qdb.util.human_merging_scheme( acmd.name, acmd.merging_scheme, From 843bbeac7d4042793d1068c5afa06741d498c2df Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Thu, 12 Dec 2024 14:07:37 -0700 Subject: [PATCH 4/4] str(y.id) --- qiita_db/metadata_template/prep_template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index b78850e0d..059ccb55f 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -881,7 +881,7 @@ def _get_predecessors(workflow, node): 'artifact transformation'] merging_schemes = { qdb.archive.Archive.get_merging_scheme_from_job(j): { - x: y.id for x, y in j.outputs.items()} + x: str(y.id) for x, y in j.outputs.items()} # we are going to select only the jobs that were a 'success', that # are not 'hidden' and that have an output - jobs that are not # hidden and a successs but that do not have outputs are jobs which