Skip to content

Commit

Permalink
add graph construction from a dataframe (#264)
Browse files Browse the repository at this point in the history
* add graph construction from a dataframe

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update changelog

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
a-r-j and pre-commit-ci[bot] authored Feb 13, 2023
1 parent ea8be9f commit 87985a1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* [Logging] - [#242](https://github.com/a-r-j/graphein/pull/242) Adds control of protein graph construction logging. Resolves [#238](https://github.com/a-r-j/graphein/issues/238)

#### Protein
* [Feature] - [#264](https://github.com/a-r-j/graphein/pull/264) Adds entrypoint to `graphein.protein.graphs.construct_graph` for passing in a BioPandas dataframe directly.
* [Feature] - [#229](https://github.com/a-r-j/graphein/pull/220) Adds support for filtering KNN edges based on self-loops and chain membership. Contribution by @anton-bushuiev.
* [Feature] - [#234](https://github.com/a-r-j/graphein/pull/234) Adds support for aggregating node features over residues (`graphein.protein.features.sequence.utils.aggregate_feature_over_residues`).
* [Bugfix] - [#234](https://github.com/a-r-j/graphein/pull/234) fixes use of nullcontext in silent graph construction.
Expand Down
36 changes: 22 additions & 14 deletions graphein/protein/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ def construct_graph(
pdb_path: Optional[str] = None,
uniprot_id: Optional[str] = None,
pdb_code: Optional[str] = None,
df: Optional[pd.DataFrame] = None,
chain_selection: str = "all",
model_index: int = 1,
df_processing_funcs: Optional[List[Callable]] = None,
Expand All @@ -632,7 +633,8 @@ def construct_graph(
verbose: bool = True,
) -> nx.Graph:
"""
Constructs protein structure graph from a ``pdb_code`` or ``pdb_path``.
Constructs protein structure graph from a ``pdb_code``, ``pdb_path``,
``uniprot_id`` or a BioPandas DataFrame containing ``ATOM`` data.
Users can provide a :class:`~graphein.protein.config.ProteinGraphConfig`
object to specify construction parameters.
Expand All @@ -655,6 +657,9 @@ def construct_graph(
:param uniprot_id: UniProt accession ID to build graph from AlphaFold2DB.
Default is ``None``.
:type uniprot_id: str, optional
:param df: Pandas dataframe containing ATOM data to build graph from.
Default is ``None``.
:type df: pd.DataFrame, optional
:param chain_selection: String of polypeptide chains to include in graph.
E.g ``"ABDF"`` or ``"all"``. Default is ``"all"``.
:type chain_selection: str
Expand Down Expand Up @@ -683,9 +688,14 @@ def construct_graph(
:rtype: nx.Graph
"""

if pdb_code is None and pdb_path is None and uniprot_id is None:
if (
pdb_code is None
and pdb_path is None
and uniprot_id is None
and df is None
):
raise ValueError(
"Either a PDB ID, UniProt ID or a path to a local PDB file"
"Either a PDB ID, UniProt ID, a dataframe or a path to a local PDB file"
" must be specified to construct a graph"
)

Expand All @@ -698,10 +708,6 @@ def construct_graph(
with context as progress:
if verbose:
task1 = progress.add_task("Reading PDB file...", total=1)
# Get name from pdb_file is no pdb_code is provided
# if pdb_path and (pdb_code is None and uniprot_id is None):
# pdb_code = get_protein_name_from_filename(pdb_path)
# pdb_code = pdb_code if len(pdb_code) == 4 else None
progress.advance(task1)

# If config params are provided, overwrite them
Expand Down Expand Up @@ -730,13 +736,15 @@ def construct_graph(
if config.edge_metadata_functions is None
else config.edge_metadata_functions
)

raw_df = read_pdb_to_dataframe(
pdb_path,
pdb_code,
uniprot_id,
model_index=model_index,
)
if df is None:
raw_df = read_pdb_to_dataframe(
pdb_path,
pdb_code,
uniprot_id,
model_index=model_index,
)
else:
raw_df = df

if verbose:
task2 = progress.add_task("Processing PDB dataframe...", total=1)
Expand Down

0 comments on commit 87985a1

Please sign in to comment.