Skip to content

Commit

Permalink
Merge 916e0b0 into 7738ce1
Browse files Browse the repository at this point in the history
  • Loading branch information
CatarinaCarolina authored Jun 7, 2024
2 parents 7738ce1 + 916e0b0 commit 3804c69
Show file tree
Hide file tree
Showing 11 changed files with 354 additions and 241 deletions.
23 changes: 1 addition & 22 deletions big_scape/cli/cli_common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def common_all(fn):
type=click.Path(
exists=True, dir_okay=False, file_okay=True, path_type=Path
),
default="./config.ini",
default="./config.yml",
help="Path to BiG-SCAPE config file, which stores values for a "
"series of advanced use parameters. (default: ./config.ini).",
),
Expand Down Expand Up @@ -205,16 +205,6 @@ def common_cluster_query(fn):
"file will not be used for the analysis (default: final)."
),
),
click.option(
"--cds_overlap_cutoff",
type=click.FloatRange(min=0, max=1),
default=0.1,
help=(
"Specify at which overlap percentage (as a decimal) two CDS in a gbk "
"are considered to overlap. This preserves longest overlapping CDS "
"(default=0.1)."
),
),
click.option(
"-p",
"--pfam_path",
Expand All @@ -223,17 +213,6 @@ def common_cluster_query(fn):
),
help="Path to Pfam database file.",
),
# hmmer parameters
click.option(
"--domain_overlap_cutoff",
type=click.FloatRange(min=0, max=1),
default=0.1,
help=(
"Specify at which overlap percentage (as a decimal) two domains "
"in a CDS are considered to overlap. Domain with the "
"best score is kept (default=0.1)."
),
),
click.option(
# TODO: implement
"--domain_includelist_path",
Expand Down
189 changes: 149 additions & 40 deletions big_scape/cli/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Contains config class and method to parse config file """

# from python
import configparser
import yaml
from pathlib import Path


Expand All @@ -17,6 +17,10 @@ class BigscapeConfig:
MIN_BGC_LENGTH: int = 0
MAX_BGC_LENGTH: int = 500000

# CDS and DOMAIN
CDS_OVERLAP_CUTOFF: float = 0.1
DOMAIN_OVERLAP_CUTOFF: float = 0.1

# LCS
REGION_MIN_LCS_LEN: int = 3
PROTO_MIN_LCS_LEN: int = 3
Expand All @@ -37,6 +41,115 @@ class BigscapeConfig:
# TREE
TOP_FREQS: int = 3

# ANCHOR DOMAINS
ANCHOR_DOMAINS = [
"PF02801",
"PF02624",
"PF00109",
"PF00501",
"PF02797",
"PF01397",
"PF03936",
"PF00432",
"PF00195",
"PF00494",
"PF00668",
"PF05147",
]

# LEGACY ANTISMASH CLASSES
LEGACY_ANTISMASH_CLASSES = {
"pks1_products": {"t1pks", "T1PKS"},
"pksother_products": {
"transatpks",
"t2pks",
"t3pks",
"otherks",
"hglks",
"transAT-PKS",
"transAT-PKS-like",
"T2PKS",
"T3PKS",
"PKS-like",
"hglE-KS",
},
"nrps_products": {"nrps", "NRPS", "NRPS-like", "thioamide-NRP", "NAPAA"},
"ripps_products": {
"lantipeptide",
"thiopeptide",
"bacteriocin",
"linaridin",
"cyanobactin",
"glycocin",
"LAP",
"lassopeptide",
"sactipeptide",
"bottromycin",
"head_to_tail",
"microcin",
"microviridin",
"proteusin",
"lanthipeptide",
"lipolanthine",
"RaS-RiPP",
"fungal-RiPP",
"TfuA-related",
"guanidinotides",
"RiPP-like",
"lanthipeptide-class-i",
"lanthipeptide-class-ii",
"lanthipeptide-class-iii",
"lanthipeptide-class-iv",
"lanthipeptide-class-v",
"ranthipeptide",
"redox-cofactor",
"thioamitides",
"epipeptide",
"cyclic-lactone-autoinducer",
"spliceotide",
"RRE-containing",
},
"saccharide_products": {
"amglyccycl",
"oligosaccharide",
"cf_saccharide",
"saccharide",
},
"others_products": {
"acyl_amino_acids",
"arylpolyene",
"aminocoumarin",
"ectoine",
"butyrolactone",
"nucleoside",
"melanin",
"phosphoglycolipid",
"phenazine",
"phosphonate",
"other",
"cf_putative",
"resorcinol",
"indole",
"ladderane",
"PUFA",
"furan",
"hserlactone",
"fused",
"cf_fatty_acid",
"siderophore",
"blactam",
"fatty_acid",
"PpyS-KS",
"CDPS",
"betalactone",
"PBDE",
"tropodithietic-acid",
"NAGGN",
"halogenated",
"pyrrolidine",
},
}

@staticmethod
def parse_config(run: dict) -> None:
"""parses config file
Expand All @@ -47,56 +160,56 @@ def parse_config(run: dict) -> None:

config_file_path = run["config_file_path"]

config = configparser.ConfigParser()
config.read(config_file_path)
with open(config_file_path) as f:
config = yaml.load(f, Loader=yaml.FullLoader)

# PROFILER
BigscapeConfig.PROFILER_UPDATE_INTERVAL = float(
config["PROFILER"]["PROFILER_UPDATE_INTERVAL"]
)
BigscapeConfig.PROFILER_UPDATE_INTERVAL = config["PROFILER_UPDATE_INTERVAL"]

# INPUT
BigscapeConfig.MERGED_CAND_CLUSTER_TYPE = config["INPUT"][
"MERGED_CAND_CLUSTER_TYPE"
].split(",")
BigscapeConfig.MIN_BGC_LENGTH = int(config["INPUT"]["MIN_BGC_LENGTH"])
BigscapeConfig.MAX_BGC_LENGTH = int(config["INPUT"]["MAX_BGC_LENGTH"])
BigscapeConfig.MERGED_CAND_CLUSTER_TYPE = config["MERGED_CAND_CLUSTER_TYPE"]
BigscapeConfig.MIN_BGC_LENGTH = config["MIN_BGC_LENGTH"]
BigscapeConfig.MAX_BGC_LENGTH = config["MAX_BGC_LENGTH"]

# CDS and DOMAIN
BigscapeConfig.CDS_OVERLAP_CUTOFF = config["CDS_OVERLAP_CUTOFF"]
BigscapeConfig.DOMAIN_OVERLAP_CUTOFF = config["DOMAIN_OVERLAP_CUTOFF"]

# LCS
BigscapeConfig.REGION_MIN_LCS_LEN = int(config["LCS"]["REGION_MIN_LCS_LEN"])
BigscapeConfig.PROTO_MIN_LCS_LEN = int(config["LCS"]["PROTO_MIN_LCS_LEN"])
BigscapeConfig.REGION_MIN_LCS_LEN = config["REGION_MIN_LCS_LEN"]
BigscapeConfig.PROTO_MIN_LCS_LEN = config["PROTO_MIN_LCS_LEN"]

# EXPAND
BigscapeConfig.REGION_MIN_EXPAND_LEN = int(
config["EXPAND"]["REGION_MIN_EXPAND_LEN"]
)
BigscapeConfig.REGION_MIN_EXPAND_LEN_BIO = int(
config["EXPAND"]["REGION_MIN_EXPAND_LEN_BIO"]
)
BigscapeConfig.PROTO_MIN_EXPAND_LEN = int(
config["EXPAND"]["PROTO_MIN_EXPAND_LEN"]
)
BigscapeConfig.NO_MIN_CLASSES = config["EXPAND"]["NO_MIN_CLASSES"].split(",")
BigscapeConfig.EXPAND_MATCH_SCORE = int(config["EXPAND"]["EXPAND_MATCH_SCORE"])
BigscapeConfig.EXPAND_MISMATCH_SCORE = int(
config["EXPAND"]["EXPAND_MISMATCH_SCORE"]
)
BigscapeConfig.EXPAND_GAP_SCORE = int(config["EXPAND"]["EXPAND_GAP_SCORE"])
BigscapeConfig.EXPAND_MAX_MATCH_PERC = float(
config["EXPAND"]["EXPAND_MAX_MATCH_PERC"]
)
BigscapeConfig.REGION_MIN_EXPAND_LEN = config["REGION_MIN_EXPAND_LEN"]
BigscapeConfig.REGION_MIN_EXPAND_LEN_BIO = config["REGION_MIN_EXPAND_LEN_BIO"]
BigscapeConfig.PROTO_MIN_EXPAND_LEN = config["PROTO_MIN_EXPAND_LEN"]
BigscapeConfig.NO_MIN_CLASSES = config["NO_MIN_CLASSES"]
BigscapeConfig.EXPAND_MATCH_SCORE = config["EXPAND_MATCH_SCORE"]
BigscapeConfig.EXPAND_MISMATCH_SCORE = config["EXPAND_MISMATCH_SCORE"]
BigscapeConfig.EXPAND_GAP_SCORE = config["EXPAND_GAP_SCORE"]
BigscapeConfig.EXPAND_MAX_MATCH_PERC = config["EXPAND_MAX_MATCH_PERC"]

# CLUSTER
BigscapeConfig.PREFERENCE = float(config["CLUSTER"]["PREFERENCE"])
BigscapeConfig.PREFERENCE = config["PREFERENCE"]

# TREE
BigscapeConfig.TOP_FREQS = int(config["TREE"]["TOP_FREQS"])
BigscapeConfig.TOP_FREQS = config["TOP_FREQS"]

# ANCHOR DOMAINS
BigscapeConfig.ANCHOR_DOMAINS = config["ANCHOR_DOMAINS"]

# LEGACY ANTISMASH CLASSES
legacy_classes = config["LEGACY_ANTISMASH_CLASSES"]
for group, classes in legacy_classes.items():
if isinstance(classes, list):
legacy_classes[group] = set(classes)
BigscapeConfig.LEGACY_ANTISMASH_CLASSES = legacy_classes

# write config log
BigscapeConfig.write_config_log(run, config)

@staticmethod
def write_config_log(run: dict, config: configparser.ConfigParser) -> None:
def write_config_log(run: dict, config: dict) -> None:
"""writes config log file
Args:
Expand All @@ -107,9 +220,5 @@ def write_config_log(run: dict, config: configparser.ConfigParser) -> None:
config_log_path = Path(str(log_path).replace(".log", ".config.log"))

with open(config_log_path, "w") as config_log:
for section in config.sections():
config_log.write(f"\n[{section}]\n")
for key in config[section]:
value = config[section][key]
key = key.upper()
config_log.write(f"{key} = {value}\n")
for key, value in config.items():
config_log.write(f"{key}: {value}\n")
112 changes: 0 additions & 112 deletions big_scape/cli/constants.py
Original file line number Diff line number Diff line change
@@ -1,115 +1,3 @@
"""Contains constant values"""

DB_SCHEMA_PATH = "big_scape/data/schema.sql"

# TODO: add comments with notes as in BS1
# TODO: move to config file
LEGACY_ANCHOR_DOMAINS = [
"PF02801",
"PF02624",
"PF00109",
"PF00501",
"PF02797",
"PF01397",
"PF03936",
"PF00432",
"PF00195",
"PF00494",
"PF00668",
"PF05147",
]

# according with current (2021-05) antiSMASH rules:
# prodigiosin and PpyS-KS -> PKS
# CDPS -> NRPS
ANTISMASH_CLASSES = {
"pks1_products": {"t1pks", "T1PKS"},
"pksother_products": {
"transatpks",
"t2pks",
"t3pks",
"otherks",
"hglks",
"transAT-PKS",
"transAT-PKS-like",
"T2PKS",
"T3PKS",
"PKS-like",
"hglE-KS",
},
"nrps_products": {"nrps", "NRPS", "NRPS-like", "thioamide-NRP", "NAPAA"},
"ripps_products": {
"lantipeptide",
"thiopeptide",
"bacteriocin",
"linaridin",
"cyanobactin",
"glycocin",
"LAP",
"lassopeptide",
"sactipeptide",
"bottromycin",
"head_to_tail",
"microcin",
"microviridin",
"proteusin",
"lanthipeptide",
"lipolanthine",
"RaS-RiPP",
"fungal-RiPP",
"TfuA-related",
"guanidinotides",
"RiPP-like",
"lanthipeptide-class-i",
"lanthipeptide-class-ii",
"lanthipeptide-class-iii",
"lanthipeptide-class-iv",
"lanthipeptide-class-v",
"ranthipeptide",
"redox-cofactor",
"thioamitides",
"epipeptide",
"cyclic-lactone-autoinducer",
"spliceotide",
"RRE-containing",
},
"saccharide_products": {
"amglyccycl",
"oligosaccharide",
"cf_saccharide",
"saccharide",
},
"others_products": {
"acyl_amino_acids",
"arylpolyene",
"aminocoumarin",
"ectoine",
"butyrolactone",
"nucleoside",
"melanin",
"phosphoglycolipid",
"phenazine",
"phosphonate",
"other",
"cf_putative",
"resorcinol",
"indole",
"ladderane",
"PUFA",
"furan",
"hserlactone",
"fused",
"cf_fatty_acid",
"siderophore",
"blactam",
"fatty_acid",
"PpyS-KS",
"CDPS",
"betalactone",
"PBDE",
"tropodithietic-acid",
"NAGGN",
"halogenated",
"pyrrolidine",
},
}
Loading

0 comments on commit 3804c69

Please sign in to comment.