Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add majority voting parser option to celltypist example; improve log msg #142

Merged
merged 3 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions dance/modules/single_modality/cell_type_annotation/celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,17 +274,17 @@ def to_plots(self, folder: str, plot_probability: bool = False, format: str = 'p
if not os.path.isdir(folder):
raise FileNotFoundError(f" Output folder {folder} does not exist. Please provide a valid folder")
if 'X_umap' in self.adata.obsm:
logger.info(" Detected existing UMAP coordinates, will plot the results accordingly")
logger.info("Detected existing UMAP coordinates, will plot the results accordingly")
elif 'connectivities' in self.adata.obsp:
logger.info(" Generating UMAP coordinates based on the neighborhood graph")
sc.tl.umap(self.adata)
else:
logger.info(" Constructing the neighborhood graph and generating UMAP coordinates")
logger.info("Constructing the neighborhood graph and generating UMAP coordinates")
adata = self.adata.copy()
self.adata.obsm['X_pca'], self.adata.obsp['connectivities'], self.adata.obsp['distances'], self.adata.uns[
'neighbors'] = Classifier._construct_neighbor_graph(adata)
sc.tl.umap(self.adata)
logger.info(" Plotting the results")
logger.info("Plotting the results")
sc.settings.set_figure_params(figsize=[6.4, 6.4], format=format)
self.adata.obs[self.predicted_labels.columns] = self.predicted_labels
for column in self.predicted_labels:
Expand Down Expand Up @@ -464,13 +464,12 @@ def over_cluster(self, resolution: Optional[float] = None) -> pd.Series:

"""
if 'connectivities' not in self.adata.obsp:
logger.info(" Can not detect a neighborhood graph, will construct one before the over-clustering")
logger.info("Can not detect a neighborhood graph, will construct one before the over-clustering")
adata = self.adata.copy()
self.adata.obsm['X_pca'], self.adata.obsp['connectivities'], self.adata.obsp['distances'], self.adata.uns[
'neighbors'] = Classifier._construct_neighbor_graph(adata)
else:
logger.info(
" Detected a neighborhood graph in the input object, will run over-clustering on the basis of it")
logger.info("Detected a neighborhood graph in the input object, will run overclustering on the basis of it")
if resolution is None:
if self.adata.n_obs < 5000:
resolution = 5
Expand All @@ -484,7 +483,7 @@ def over_cluster(self, resolution: Optional[float] = None) -> pd.Series:
resolution = 25
else:
resolution = 30
logger.info(f" Over-clustering input data with resolution set to {resolution}")
logger.info(f"Over-clustering input data with resolution set to {resolution}")
sc.tl.leiden(self.adata, resolution=resolution, key_added='over_clustering')
return self.adata.obs.pop('over_clustering')

Expand Down
14 changes: 5 additions & 9 deletions examples/single_modality/cell_type_annotation/celltypist.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,15 @@

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--cell_type_test", type=str, help="name for the cell type information for test data",
default="Cell_type")
parser.add_argument("--cell_type_train", type=str, help="name for the cell type information for training data",
default="Cell_type")
parser.add_argument("--log_level", type=str, default="INFO", choices=LOGLEVELS)
parser.add_argument("--max_iter", type=int, help="Max iteration during training", default=200)
parser.add_argument("--majority_voting", action="store_true",
help="Whether to refine the predicted labels via majority voting after over-clustering.")
parser.add_argument("--n_jobs", type=int, help="Number of jobs", default=10)
parser.add_argument("--random_seed", type=int, default=10)
parser.add_argument("--species", default="mouse", type=str)
parser.add_argument("--test_dataset", type=int, nargs="+", default=[1759],
help="List testing training dataset ids.")
parser.add_argument("--test_dataset", nargs="+", default=[1759], help="List of testing dataset ids.")
parser.add_argument("--tissue", default="Spleen", type=str)
parser.add_argument("--train_dataset", type=int, nargs="+", default=[1970], help="List of training dataset ids.")
parser.add_argument("--train_dataset", nargs="+", default=[1970], help="List of training dataset ids.")
parser.add_argument("--not_use_SGD", action="store_true",
help="Training algorithm -- weather it will be stochastic gradient descent.")

Expand All @@ -48,7 +44,7 @@
# Train and evaluate the model
model = Celltypist()
model.fit(x_train, y_train, n_jobs=args.n_jobs, max_iter=args.max_iter, use_SGD=not args.not_use_SGD)
pred = model.predict(x_test)
pred = model.predict(x_test, majority_voting=args.majority_voting)
score = model.score(pred, y_test)
print(f"{score=:.4f}")
"""To reproduce CellTypist benchmarks, please refer to command lines below:
Expand Down