Skip to content

Commit

Permalink
Fix on providing empty "Name" values
Browse files Browse the repository at this point in the history
in "Experiments"
- now properly
  • Loading branch information
iquasere committed Nov 4, 2024
1 parent 2c1fb9f commit d1bbc3f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 16 deletions.
13 changes: 13 additions & 0 deletions workflow/mosca.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def save_config(config_data, filename, output_format):


def validate_exps(exps_data):
def set_name(files, data_type):
filename = files.split('/')[-1]
if data_type == 'protein':
return filename # which is the foldername (e.g. input/mp1 -> mp1)
if ',' in files:
return filename.split(',')[0].split('_R')[0]
return filename.split('.fa')[0]
exps = pd.DataFrame(exps_data)
reserved_words = [
'if', 'else', 'repeat', 'while', 'function', 'for', 'in', 'next', 'break', 'TRUE', 'FALSE', 'NULL', 'Inf',
Expand All @@ -62,6 +69,12 @@ def validate_exps(exps_data):
if not bool(good_pattern.match(name)):
sys.exit(f'INVALID "NAME" in "experiments": {name} starts with a number or has a special character.\n'
f'Please use only letters, numbers, dots (.) and underscores (_).')
for i in range(len(exps)):
if pd.isnull(exps.iloc[i]['Name']) or exps.iloc[i]['Name'] == '':
exps.iloc[i, exps.columns.get_loc('Name')] = set_name(
exps.iloc[i]['Files'], exps.iloc[i]['Data type'])
# if not config['do_assembly']:
# EXPS.iloc[i]['Sample'] = EXPS.iloc[i]['Name']
if exps['Name'].duplicated().any():
sys.exit(f'ERROR: Multiple rows with same "Name" value: {",".join(exps["Name"].duplicated().any())}.')

Expand Down
16 changes: 0 additions & 16 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,6 @@ validate(config, schema="../schemas/config.schema.yaml")

OUTPUT = config["output"]
EXPS = pd.DataFrame(config["experiments"])

def set_name(files, data_type):
filename = files.split('/')[-1]
if data_type == 'protein':
return filename # which is the foldername (e.g. input/mp1 -> mp1)
if ',' in files:
return filename.split(',')[0].split('_R')[0]
return filename.split('.fa')[0]

for i in range(len(EXPS)):
if pd.isnull(EXPS.iloc[i]['Name']) or EXPS.iloc[i]['Name'] == '':
EXPS.iloc[i, EXPS.columns.get_loc('Name')] = set_name(
EXPS.iloc[i]['Files'], EXPS.iloc[i]['Data type'])
#if not config['do_assembly']:
# EXPS.iloc[i]['Sample'] = EXPS.iloc[i]['Name']

pathlib.Path(f"{OUTPUT}").mkdir(parents=True, exist_ok=True)
EXPS.to_csv(f"{OUTPUT}/exps.tsv", sep = '\t', index = False)

Expand Down

0 comments on commit d1bbc3f

Please sign in to comment.