Skip to content

Commit

Permalink
updated the example directory (#337)
Browse files Browse the repository at this point in the history
* updated the example directory

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ChristopherMancuso and pre-commit-ci[bot] authored Nov 15, 2024
1 parent e8a4a91 commit 77fab34
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 157 deletions.
30 changes: 30 additions & 0 deletions example/example_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import os.path as osp
import pathlib
import shutil
import time

import numpy as np

import geneplexus

# Set up data directory
homedir = pathlib.Path(__file__).absolute().parent
datadir = osp.join(homedir, "data")
os.makedirs(datadir, exist_ok=True)


"""
each file is separated by the species. Can select all by using
species = ["Human", "Mouse", "Fly", "Worm", "Zebrafish", "Yeast"]
or
species = "All"
for a subset just include desired species (example for just Mouse and Human)
species = ["Human", "Mouse"]
"""

geneplexus.download.download_select_data(
datadir,
species=["Human", "Mouse", "Fly", "Worm", "Zebrafish", "Yeast"],
)
20 changes: 8 additions & 12 deletions example/example_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,19 @@
# Get the data from URL
geneplexus.download.download_select_data(
datadir,
tasks="All",
networks="STRING",
features="SixSpeciesN2V",
sp_trn="Human",
sp_tst="Mouse",
gsc="GO",
species=["Human", "Mouse"],
)

# Run through the pipeline
# First initialize the geneplexus object
myclass = geneplexus.GenePlexus(
file_loc=fp_data,
gsc="Combined",
features="SixSpeciesN2V",
file_loc=datadir,
net_type="STRING",
features="SixSpeciesN2V",
sp_trn="Human",
sp_tst="Mouse",
sp_res="Mouse",
gsc_trn="Combined",
gsc_res="Combined",
)

# Load the input genes into the class and set up positives/negatives
Expand All @@ -65,7 +61,7 @@

# The makes the tables that have the model weight similarity to other models
# trained on known GO and DisGeNet sets
df_sim_GO, df_sim_Dis, weights_GO, weights_Dis = myclass.make_sim_dfs()
df_sim, sim_weights = myclass.make_sim_dfs()

# Return an edgelist
df_edge, isolated_genes, df_edge_sym, isolated_genes_sym = myclass.make_small_edgelist(num_nodes=50)
Expand All @@ -75,5 +71,5 @@

# Save a few things for checking
df_probs.to_csv(osp.join(outdir, "df_probs.tsv"), sep="\t", header=True, index=False)
df_sim_GO.to_csv(osp.join(outdir, "df_sim_GO.tsv"), sep="\t", header=True, index=False)
df_sim.to_csv(osp.join(outdir, "df_sim_GO.tsv"), sep="\t", header=True, index=False)
df_convert_out_subset.to_csv(osp.join(outdir, "df_convert_out_subset.tsv"), sep="\t", header=True, index=False)
73 changes: 0 additions & 73 deletions example/test_download.py

This file was deleted.

65 changes: 0 additions & 65 deletions example/test_download_slurm.py

This file was deleted.

13 changes: 6 additions & 7 deletions geneplexus/geneplexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,20 +382,19 @@ def fit_and_predict(
:attr:`GenePlexus.mdl_weights` (array of float)
Trained model parameters.
:attr:`GenePlexus.probs` (array of float)
Genome-wide gene prediction scores. A high value indicates the
relevance of the gene to the input gene list.
:attr:`GenePlexus.avgps` (array of float)
Cross validation results. Performance is measured using
log2(auprc/prior).
:attr:`GenePlexus.df_probs` (DataFrame)
A table with 7 columns: **Entrez** (the gene Entrez ID), **Symbol**
(the gene Symbol), **Name** (the gene Name), **Probability** (the
probability of a gene being part of the input gene list),
**Known/Novel** (whether the gene is in the input gene list),
**Class-Label** (positive, negative, or neutral), **Rank** (rank of
relevance of the gene to the input gene list).
:attr:`GenePlexus.avgps` (array of float)
Cross validation results. Performance is measured using
log2(auprc/prior).
:attr:`GenePlexus.probs` (array of float)
Genome-wide gene prediction scores. A high value indicates the
relevance of the gene to the input gene list.
"""
self._get_pos_and_neg_genes()
self.mdl_weights, self.probs, self.avgps = _geneplexus._run_sl(
Expand Down

0 comments on commit 77fab34

Please sign in to comment.