Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix add_default_workflow #3446

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions qiita_db/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def get_merging_scheme_from_job(cls, job):
acmd = job.command
parent = job.input_artifacts[0]
parent_pparameters = parent.processing_parameters
phms = None
if parent_pparameters is None:
parent_cmd_name = None
parent_parameters = None
Expand All @@ -125,12 +126,26 @@ def get_merging_scheme_from_job(cls, job):
parent_cmd_name = pcmd.name
parent_parameters = parent_pparameters.values
parent_merging_scheme = pcmd.merging_scheme

return qdb.util.human_merging_scheme(
if not parent_merging_scheme['ignore_parent_command']:
gp = parent.parents[0]
gp_params = gp.processing_parameters
if gp_params is not None:
gp_cmd = gp_params.command
phms = qdb.util.human_merging_scheme(
parent_cmd_name, parent_merging_scheme,
gp_cmd.name, gp_cmd.merging_scheme,
parent_parameters, [], gp_params.values)

hms = qdb.util.human_merging_scheme(
acmd.name, acmd.merging_scheme,
parent_cmd_name, parent_merging_scheme,
job.parameters.values, [], parent_parameters)

if phms is not None:
hms = qdb.util.merge_overlapping_strings(hms, phms)

return hms

@classmethod
def retrieve_feature_values(cls, archive_merging_scheme=None,
features=None):
Expand Down
16 changes: 13 additions & 3 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,14 +808,24 @@ def _get_node_info(workflow, node):

parent_cmd_name = None
parent_merging_scheme = None
phms = None
if pcmd is not None:
parent_cmd_name = pcmd.name
parent_merging_scheme = pcmd.merging_scheme
if not parent_merging_scheme['ignore_parent_command']:
phms = _get_node_info(workflow, parent)

return qdb.util.human_merging_scheme(
hms = qdb.util.human_merging_scheme(
ccmd.name, ccmd.merging_scheme, parent_cmd_name,
parent_merging_scheme, cparams, [], pparams)

# if the parent should not ignore its parent command, then we need
# to merge the previous result with the new one
if phms is not None:
hms = qdb.util.merge_overlapping_strings(hms, phms)

return hms

def _get_predecessors(workflow, node):
# recursive method to get predecessors of a given node
pred = []
Expand Down Expand Up @@ -871,7 +881,7 @@ def _get_predecessors(workflow, node):
'artifact transformation']
merging_schemes = {
qdb.archive.Archive.get_merging_scheme_from_job(j): {
x: y.id for x, y in j.outputs.items()}
x: str(y.id) for x, y in j.outputs.items()}
Copy link
Contributor

@charles-cowart charles-cowart Dec 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conversion to string for y.id is also not obvious here. Perhaps a comment as to why it's necessary would be appropriate?

# we are going to select only the jobs that were a 'success', that
# are not 'hidden' and that have an output - jobs that are not
# hidden and a successs but that do not have outputs are jobs which
Expand Down Expand Up @@ -989,7 +999,7 @@ def _get_predecessors(workflow, node):
init_artifacts = {
wkartifact_type: f'{starting_job.id}:'}
else:
init_artifacts = {wkartifact_type: self.artifact.id}
init_artifacts = {wkartifact_type: str(self.artifact.id)}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hate to say it but having to cast an internal id to string is a little bit concerning to me. Internal code should use the numeric form of the artifact_id and it should also be easy to pass the numeric version for any REST call. May I ask why this is needed? I feel like even if it's needed, it would be much clearer to cast it right before it's used to make it obvious why it's needed. Sorry if that's not clear.


cmds_to_create.reverse()
current_job = None
Expand Down
22 changes: 22 additions & 0 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2954,3 +2954,25 @@ def merge_rows(rows):
row['node_model']]
qdb.sql_connection.TRN.add(sql, sql_args=to_insert)
qdb.sql_connection.TRN.execute()


def merge_overlapping_strings(str1, str2):
"""Helper function to merge 2 overlapping strings

Parameters
----------
str1: str
Initial string
str2: str
End string

Returns
----------
str
The merged strings
"""
overlap = ""
for i in range(1, min(len(str1), len(str2)) + 1):
if str1.endswith(str2[:i]):
overlap = str2[:i]
return str1 + str2[len(overlap):]
Loading