diff --git a/dance/modules/single_modality/clustering/graphsc.py b/dance/modules/single_modality/clustering/graphsc.py
index 5c9b0a4f..17d6d97d 100644
--- a/dance/modules/single_modality/clustering/graphsc.py
+++ b/dance/modules/single_modality/clustering/graphsc.py
@@ -9,10 +9,7 @@
 
 """
 
-import time
-
 import dgl
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import scanpy as sc
@@ -24,32 +21,90 @@
 from dgl.nn.pytorch import GraphConv
 from dgl.utils import expand_as_pair
 from sklearn.cluster import KMeans
-from sklearn.decomposition import PCA
-from sklearn.metrics import adjusted_rand_score, calinski_harabasz_score, normalized_mutual_info_score, silhouette_score
 from torch.nn.functional import binary_cross_entropy_with_logits as BCELoss
 from tqdm import tqdm
 
+from dance.modules.base import BaseClusteringMethod
 from dance.transforms import AnnDataTransform, Compose, SetConfig
 from dance.transforms.graph import PCACellFeatureGraph
-from dance.typing import LogLevel
+from dance.typing import Any, Literal, LogLevel, Optional
+from dance.utils import get_device
 
 
-class GraphSC:
+class GraphSC(BaseClusteringMethod):
     """GraphSC class.
 
     Parameters
     ----------
-    args : argparse.Namespace
-        a Namespace contains arguments of GCNAE. For details of parameters in parser args, please refer to link
-        (parser help document).
+    agg
+        Aggregation layer.
+    activation
+        Activation function.
+    in_feats
+        Dimension of input feature
+    n_hidden
+        Number of hidden layer.
+    hidden_dim
+        Input dimension of hidden layer 1.
+    hidden_1
+        Output dimension of hidden layer 1.
+    hidden_2
+        Output dimension of hidden layer 2.
+    dropout
+        Dropout rate.
+    n_layers
+        Number of graph convolutional layers.
+    hidden_relu
+        Use relu activation in hidden layers or not.
+    hidden_bn
+        Use batch norm in hidden layers or not.
+    cluster_method
+        Method for clustering.
+    num_workers
+        Number of workers.
+    device
+        Computation device to use.
 
     """
 
-    def __init__(self, args):
+    def __init__(
+        self,
+        agg: str = "sum",
+        activation: str = "relu",
+        in_feats: int = 50,
+        n_hidden: int = 1,
+        hidden_dim: int = 200,
+        hidden_1: int = 300,
+        hidden_2: int = 0,
+        dropout: float = 0.1,
+        n_layers: int = 1,
+        hidden_relu: bool = False,
+        hidden_bn: bool = False,
+        n_clusters: int = 10,
+        cluster_method: Literal["kmeans", "leiden"] = "kmeans",
+        num_workers: int = 1,
+        device: str = "auto",
+    ):
         super().__init__()
-        self.args = args
-        self.device = get_device(args.use_cpu)
-        self.model = GCNAE(args).to(self.device)
+        self.n_layers = n_layers
+        self.n_clusters = n_clusters
+        self.cluster_method = cluster_method
+        self.num_workers = num_workers
+        self.device = get_device(device)
+
+        self.model = GCNAE(
+            agg=agg,
+            activation=activation,
+            in_feats=in_feats,
+            n_hidden=n_hidden,
+            hidden_dim=hidden_dim,
+            hidden_1=hidden_1,
+            hidden_2=hidden_2,
+            dropout=dropout,
+            n_layers=n_layers,
+            hidden_relu=hidden_relu,
+            hidden_bn=hidden_bn,
+        ).to(self.device)
 
     @staticmethod
     def preprocessing_pipeline(n_top_genes: int = 3000, normalize_weights: str = "log_per_cell", n_components: int = 50,
@@ -90,300 +145,231 @@ def preprocessing_pipeline(n_top_genes: int = 3000, normalize_weights: str = "lo
 
         return Compose(*transforms, log_level=log_level)
 
-    def fit(self, g, n_epochs, n_clusters, lr, cluster=["KMeans"]):
+    def fit(
+        self,
+        g: dgl.DGLGraph,
+        y: Optional[Any] = None,
+        *,
+        n_epochs: int = 100,
+        lr: float = 1e-5,
+        batch_size: int = 128,
+        show_epoch_ari: bool = False,
+        eval_epoch: bool = False,
+    ):
         """Train graph-sc.
 
         Parameters
         ----------
-        g : dgl.DGLGraph
-            input cell-gene graph.
-        n_epochs : int
-            number of epochs.
-        n_clusters : int
-            number of clusters.
-        lr : float
-            learning rate.
-        cluster : list optional
-            clustering method.
+        g
+            Input cell-gene graph.
+        y
+            Not used, for compatibility with the BaseClusteringMethod class.
+        n_epochs
+            Number of epochs.
+        lr
+            Learning rate.
+        batch_size
+            Batch size.
+        show_epoch_ari
+            Show ARI score for each epoch
+        eval_epoch
+            Evaluate every epoch.
 
         """
         g.ndata["order"] = g.ndata["label"] = g.ndata["feat_id"]
         train_ids = np.where(g.ndata["label"] != -1)[0]
-        sampler = dgl.dataloading.MultiLayerFullNeighborSampler(self.args.n_layers)
-        dataloader = dgl.dataloading.NodeDataLoader(g, train_ids, sampler, batch_size=self.args.batch_size,
-                                                    shuffle=True, drop_last=False, num_workers=self.args.num_workers)
+        sampler = dgl.dataloading.MultiLayerFullNeighborSampler(self.n_layers)
+        dataloader = dgl.dataloading.NodeDataLoader(g, train_ids, sampler, batch_size=batch_size, shuffle=True,
+                                                    drop_last=False, num_workers=self.num_workers)
 
-        device = get_device(self.args.use_cpu)
+        device = get_device(self.device)
         optim = torch.optim.Adam(self.model.parameters(), lr=lr)
         losses = []
-        aris_kmeans = []
+        aris = []
         Z = {}
 
-        with dataloader.enable_cpu_affinity():
-            for epoch in tqdm(range(n_epochs)):
-                self.model.train()
-                z = []
-                y = []
-                order = []
-
-                for input_nodes, output_nodes, blocks in dataloader:
-                    blocks = [b.to(device) for b in blocks]
-                    input_features = blocks[0].srcdata['features']
-                    g = blocks[-1]
-
-                    adj_logits, emb = self.model.forward(blocks, input_features)
-                    z.extend(emb.detach().cpu().numpy())
-                    if "label" in blocks[-1].dstdata:
-                        y.extend(blocks[-1].dstdata["label"].cpu().numpy())
-                    order.extend(blocks[-1].dstdata["order"].cpu().numpy())
-
-                    adj = g.adjacency_matrix().to_dense().to(device)
-                    adj = adj[g.dstnodes()]
-                    pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
-                    factor = float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
-                    if factor == 0:
-                        factor = 1
-                    norm = adj.shape[0] * adj.shape[0] / factor
-                    adj_logits, _ = self.model.forward(blocks, input_features)
-                    loss = norm * BCELoss(adj_logits, adj.to(device), pos_weight=pos_weight.to(device))
-                    optim.zero_grad()
-                    loss.backward()
-                    optim.step()
-                    losses.append(loss.item())
-
-                z = np.array(z)
-                y = np.array(y)
-                order = np.array(order)
-                order = np.argsort(order)
-                z = z[order]
-                y = y[order]
-                if pd.isnull(y[0]):
-                    y = None
+        for epoch in tqdm(range(n_epochs)):
+            self.model.train()
+            z = []
+            y = []
+            order = []
+
+            for input_nodes, output_nodes, blocks in dataloader:
+                blocks = [b.to(device) for b in blocks]
+                input_features = blocks[0].srcdata["features"]
+                g = blocks[-1]
+
+                adj_logits, emb = self.model.forward(blocks, input_features)
+                z.extend(emb.detach().cpu().numpy())
+                if "label" in blocks[-1].dstdata:
+                    y.extend(blocks[-1].dstdata["label"].cpu().numpy())
+                order.extend(blocks[-1].dstdata["order"].cpu().numpy())
+
+                adj = g.adjacency_matrix().to_dense().to(device)
+                adj = adj[g.dstnodes()]
+                pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
+                factor = float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
+                if factor == 0:
+                    factor = 1
+                norm = adj.shape[0] * adj.shape[0] / factor
+                adj_logits, _ = self.model.forward(blocks, input_features)
+                loss = norm * BCELoss(adj_logits, adj.to(device), pos_weight=pos_weight.to(device))
+                optim.zero_grad()
+                loss.backward()
+                optim.step()
+                losses.append(loss.item())
+
+            z = np.array(z)
+            y = np.array(y)
+            order = np.array(order)
+            order = np.argsort(order)
+            z = z[order]
+            y = y[order]
+            if pd.isnull(y[0]):
+                y = None
+            self.z = z
+
+            if eval_epoch:
+                score = self.score(None, y)
+                aris.append(score)
+                if show_epoch_ari:
+                    print(f"epoch {epoch}, ARI {score}")
+                z_ = {f"epoch{epoch}": z}
+                Z = {**Z, **z_}
+
+            elif epoch == n_epochs - 1:
                 self.z = z
 
-                if self.args.eval_epoch:
-                    score = self.score(y, n_clusters, cluster=cluster)
-                    aris_kmeans.append(score["kmeans_ari"])
-                    if self.args.show_epoch_ari:
-                        print(f'epoch {epoch}, ARI {score.get("kmeans_ari")}')
-                    z_ = {f'epoch{epoch}': z}
-                    Z = {**Z, **z_}
-
-                elif epoch == n_epochs - 1:
-                    self.z = z
+        if eval_epoch:
+            index = np.argmax(aris)
+            self.z = Z[f"epoch{index}"]
 
-        if self.args.eval_epoch:
-            index = np.argmax(aris_kmeans)
-            self.z = Z[f'epoch{index}']
-
-    def predict(self, n_clusters, cluster=["KMeans"]):
+    def predict(self, x: Optional[Any] = None):
         """Get predictions from the graph autoencoder model.
 
         Parameters
         ----------
-        n_clusters : int
-            number of clusters.
-        cluster : list optional
-            clustering method.
+        x
+            Not used, for compatibility with BaseClusteringMethod class.
 
         Returns
         -------
-        pred : dict
-            prediction of given clustering method.
+        pred
+            Prediction of given clustering method.
 
         """
-        z = self.z
-        pred = {}
-
-        if "KMeans" in cluster:
-            kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=5, n_init=10)
-            kmeans_pred = {"kmeans_pred": kmeans.fit_predict(z)}
-            pred = {**pred, **kmeans_pred}
-
-        if "Leiden" in cluster:
-            leiden_pred = {"leiden_pred": run_leiden(z)}
-            pred = {**pred, **leiden_pred}
-
+        if self.cluster_method == "kmeans":
+            kmeans = KMeans(n_clusters=self.n_clusters, init="k-means++", random_state=5, n_init=10)
+            pred = kmeans.fit_predict(self.z)
+        elif self.cluster_method == "leiden":
+            pred = run_leiden(self.z)
+        else:
+            raise ValueError(f"Unknown clustering {self.cluster_method}, available options are: 'kmeans', 'leiden'")
         return pred
 
-    def score(self, y, n_clusters, plot=False, cluster=["KMeans"]):
-        """Evaluate the graph autoencoder model.
-
-        Parameters
-        ----------
-        y : list
-            true labels.
-        n_clusters : int
-            number of clusters.
-        plot : bool optional
-            show plot or not.
-        cluster : list optional
-            clustering method.
-
-        Returns
-        -------
-        scores : dict
-            metric evaluation scores.
-
-        """
-        z = self.z
-        self.model.eval()
-
-        k_start = time.time()
-        scores = {"ae_end": k_start}
-
-        if "KMeans" in cluster:
-            kmeans = KMeans(n_clusters=n_clusters, init="k-means++", random_state=5, n_init=10)
-            kmeans_pred = kmeans.fit_predict(z)
-            ari_k = None
-            nmi_k = None
-            if y is not None:
-                ari_k = round(adjusted_rand_score(y, kmeans_pred), 4)
-                nmi_k = round(normalized_mutual_info_score(y, kmeans_pred), 4)
-            sil_k = silhouette_score(z, kmeans_pred)
-            cal_k = calinski_harabasz_score(z, kmeans_pred)
-            k_end = time.time()
-            scores_k = {
-                "kmeans_ari": ari_k,
-                "kmeans_nmi": nmi_k,
-                "kmeans_sil": sil_k,
-                "kmeans_cal": cal_k,
-                "kmeans_pred": kmeans_pred,
-                "kmeans_time": k_end - k_start,
-            }
-            scores = {**scores, **scores_k}
-
-        if "Leiden" in cluster:
-            l_start = time.time()
-            leiden_pred = run_leiden(z)
-            ari_l = None
-            nmi_l = None
-            if y is not None:
-                ari_l = round(adjusted_rand_score(y, leiden_pred), 4)
-                nmi_l = round(normalized_mutual_info_score(y, leiden_pred), 4)
-            sil_l = silhouette_score(z, leiden_pred)
-            cal_l = calinski_harabasz_score(z, leiden_pred)
-            l_end = time.time()
-            scores_l = {
-                "leiden_ari": ari_l,
-                "leiden_nmi": nmi_l,
-                "leiden_sil": sil_l,
-                "leiden_cal": cal_l,
-                "leiden_pred": leiden_pred,
-                "leiden_time": l_end - l_start,
-            }
-            scores = {**scores, **scores_l}
-
-        if plot:
-            pca = PCA(2).fit_transform(z)
-            plt.figure(figsize=(12, 4))
-            plt.subplot(131)
-            plt.title("Ground truth")
-            plt.scatter(pca[:, 0], pca[:, 1], c=y, s=4)
-
-            plt.subplot(132)
-            plt.title("K-Means pred")
-            plt.scatter(pca[:, 0], pca[:, 1], c=kmeans_pred, s=4)
-
-            plt.subplot(133)
-            plt.title("Leiden pred")
-            plt.scatter(pca[:, 0], pca[:, 1], c=leiden_pred, s=4)
-            plt.show()
-        return scores
-
 
 class GCNAE(nn.Module):
     """Graph convolutional autoencoder class.
 
     Parameters
     ----------
-    args : argparse.Namespace
-        a Namespace contains arguments of scDSC. For details of parameters in parser args, please refer to link
-        (parser help document).
-    agg : str
-        aggregation layer.
-    activation :str
-        activation function.
-    in_feats : int
-        dimension of input feature
-    n_hidden : int
-        number of hidden layer.
-    hidden_dim :int
-        input dimension of hidden layer 1.
-    hidden_1 : int
-        output dimension of hidden layer 1.
-    hidden_2 : int
-        output dimension of hidden layer 2.
-    dropout : float
-        dropout rate.
-    n_layers :int
-        number of graph convolutional layers.
-    hidden_relu : bool
-        use relu activation in hidden layers or not.
-    hidden_bn : bool
-        use batch norm in hidden layers or not.
+    agg
+        Aggregation layer.
+    activation
+        Activation function.
+    in_feats
+        Dimension of input feature
+    n_hidden
+        Number of hidden layer.
+    hidden_dim
+        Input dimension of hidden layer 1.
+    hidden_1
+        Output dimension of hidden layer 1.
+    hidden_2
+        Output dimension of hidden layer 2.
+    dropout
+        Dropout rate.
+    n_layers
+        Number of graph convolutional layers.
+    hidden_relu
+        Use relu activation in hidden layers or not.
+    hidden_bn
+        Use batch norm in hidden layers or not.
 
     Returns
     -------
-    adj_rec :
-        reconstructed adjacency matrix.
-    x :
-        embedding.
+    adj_rec
+        Reconstructed adjacency matrix.
+    x
+        Embedding.
 
     """
 
-    def __init__(self, args):
-
+    def __init__(
+        self,
+        *,
+        agg: str,
+        activation: str,
+        in_feats: int,
+        n_hidden: int,
+        hidden_dim: int,
+        hidden_1: int,
+        hidden_2: int,
+        dropout: float,
+        n_layers: int,
+        hidden_relu: bool,
+        hidden_bn: bool,
+    ):
         super().__init__()
-        self.args = args
-        self.agg = args.agg
 
-        if args.activation == 'gelu':
+        self.agg = agg
+
+        if activation == "gelu":
             activation = F.gelu
-        elif args.activation == 'prelu':
+        elif activation == "prelu":
             activation = F.prelu
-        elif args.activation == 'relu':
+        elif activation == "relu":
             activation = F.relu
-        elif args.activation == 'leaky_relu':
+        elif activation == "leaky_relu":
             activation = F.leaky_relu
 
-        if args.n_hidden == 0:
+        if n_hidden == 0:
             hidden = None
-        elif args.n_hidden == 1:
-            hidden = [args.hidden_1]
-        elif args.n_hidden == 2:
-            hidden = [args.hidden_1, args.hidden_2]
+        elif n_hidden == 1:
+            hidden = [hidden_1]
+        elif n_hidden == 2:
+            hidden = [hidden_1, hidden_2]
 
-        if args.dropout != 0:
-            self.dropout = nn.Dropout(p=args.dropout)
+        if dropout != 0:
+            self.dropout = nn.Dropout(p=dropout)
         else:
             self.dropout = None
 
-        self.layer1 = WeightedGraphConv(in_feats=args.in_feats, out_feats=args.hidden_dim, activation=activation)
-        if args.n_layers == 2:
-            self.layer2 = WeightedGraphConv(in_feats=args.hidden_dim, out_feats=args.hidden_dim, activation=activation)
+        self.layer1 = WeightedGraphConv(in_feats=in_feats, out_feats=hidden_dim, activation=activation)
+        if n_layers == 2:
+            self.layer2 = WeightedGraphConv(in_feats=hidden_dim, out_feats=hidden_dim, activation=activation)
         self.decoder = InnerProductDecoder(activation=lambda x: x)
         self.hidden = hidden
         if hidden is not None:
             enc = []
             for i, s in enumerate(hidden):
                 if i == 0:
-                    enc.append(nn.Linear(args.hidden_dim, hidden[i]))
+                    enc.append(nn.Linear(hidden_dim, hidden[i]))
                 else:
                     enc.append(nn.Linear(hidden[i - 1], hidden[i]))
-                if args.hidden_bn and i != len(hidden):
+                if hidden_bn and i != len(hidden):
                     enc.append(nn.BatchNorm1d(hidden[i]))
-                if args.hidden_relu and i != len(hidden):
+                if hidden_relu and i != len(hidden):
                     enc.append(nn.ReLU())
             self.encoder = nn.Sequential(*enc)
 
     def forward(self, blocks, features):
-        x = blocks[0].srcdata['features']
+        x = blocks[0].srcdata["features"]
         for i in range(len(blocks)):
             with blocks[i].local_scope():
                 if self.dropout is not None:
                     x = self.dropout(x)
-                blocks[i].srcdata['h'] = x
+                blocks[i].srcdata["h"] = x
                 if i == 0:
                     x = self.layer1(blocks[i], x, agg=self.agg)
                 else:
@@ -399,15 +385,15 @@ class InnerProductDecoder(nn.Module):
 
     Parameters
     ----------
-    activation : optional
-        activation function.
-    dropout : float optional
-        dropout rate.
+    activation
+        Activation function.
+    dropout
+        Dropout rate.
 
     Returns
     -------
-    adj :
-        reconstructed adjacency matrix.
+    adj
+        Reconstructed adjacency matrix.
 
     """
 
@@ -430,29 +416,29 @@ def edge_selection_simple(self, edges):
 
         Parameters
         ----------
-        edges :
-            edges of graph.
+        edges
+            Edges of graph.
 
         """
-        return {'m': edges.src['h'] * edges.data['weight']}
+        return {"m": edges.src["h"] * edges.data["weight"]}
 
     def forward(self, graph, feat, weight=None, agg="sum"):
         with graph.local_scope():
             if not self._allow_zero_in_degree:
                 if (graph.in_degrees() == 0).any():
-                    raise DGLError('There are 0-in-degree nodes in the graph, '
-                                   'output for those nodes will be invalid. '
-                                   'This is harmful for some applications, '
-                                   'causing silent performance regression. '
-                                   'Adding self-loop on the input graph by '
-                                   'calling `g = dgl.add_self_loop(g)` will resolve '
-                                   'the issue. Setting ``allow_zero_in_degree`` '
-                                   'to be `True` when constructing this module will '
-                                   'suppress the check and let the code run.')
+                    raise DGLError("There are 0-in-degree nodes in the graph, "
+                                   "output for those nodes will be invalid. "
+                                   "This is harmful for some applications, "
+                                   "causing silent performance regression. "
+                                   "Adding self-loop on the input graph by "
+                                   "calling `g = dgl.add_self_loop(g)` will resolve "
+                                   "the issue. Setting ``allow_zero_in_degree`` "
+                                   "to be `True` when constructing this module will "
+                                   "suppress the check and let the code run.")
 
             # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite.
             feat_src, feat_dst = expand_as_pair(feat, graph)
-            if self._norm == 'both':
+            if self._norm == "both":
                 degs = graph.out_degrees().float().clamp(min=1)
                 norm = torch.pow(degs, -0.5)
                 shp = norm.shape + (1, ) * (feat_src.dim() - 1)
@@ -461,24 +447,24 @@ def forward(self, graph, feat, weight=None, agg="sum"):
 
             if weight is not None:
                 if self.weight is not None:
-                    raise DGLError('External weight is provided while at the same time the'
-                                   ' module has defined its own weight parameter. Please'
-                                   ' create the module with flag weight=False.')
+                    raise DGLError("External weight is provided while at the same time the"
+                                   " module has defined its own weight parameter. Please"
+                                   " create the module with flag weight=False.")
             else:
                 weight = self.weight
 
             if weight is not None:
                 feat_src = torch.matmul(feat_src, weight)
-            graph.srcdata['h'] = feat_src
+            graph.srcdata["h"] = feat_src
             if agg == "sum":
-                graph.update_all(self.edge_selection_simple, fn.sum(msg='m', out='h'))
+                graph.update_all(self.edge_selection_simple, fn.sum(msg="m", out="h"))
             if agg == "mean":
-                graph.update_all(self.edge_selection_simple, fn.mean(msg='m', out='h'))
-            rst = graph.dstdata['h']
-            if self._norm != 'none':
+                graph.update_all(self.edge_selection_simple, fn.mean(msg="m", out="h"))
+            rst = graph.dstdata["h"]
+            if self._norm != "none":
 
                 degs = graph.in_degrees().float().clamp(min=1)
-                if self._norm == 'both':
+                if self._norm == "both":
                     norm = torch.pow(degs, -0.5)
                 else:
                     norm = 1.0 / degs
@@ -504,20 +490,20 @@ def edge_selection_simple(self, edges):
 
         Parameters
         ----------
-        edges :
-            edges of graph.
+        edges
+            Edges of graph.
 
         """
-        number_of_edges = edges.src['h'].shape[0]
+        number_of_edges = edges.src["h"].shape[0]
         indices = np.expand_dims(np.array([self.gene_num + 1] * number_of_edges, dtype=np.int32), axis=1)
-        src_id, dst_id = edges.src['id'].cpu().numpy(), edges.dst['id'].cpu().numpy()
+        src_id, dst_id = edges.src["id"].cpu().numpy(), edges.dst["id"].cpu().numpy()
         indices = np.where((src_id >= 0) & (dst_id < 0), src_id, indices)  # gene->cell
         indices = np.where((dst_id >= 0) & (src_id < 0), dst_id, indices)  # cell->gene
         indices = np.where((dst_id >= 0) & (src_id >= 0), self.gene_num, indices)  # gene-gene
-        h = edges.src['h'] * self.alpha[indices.squeeze()]
-        return {'m': h}
+        h = edges.src["h"] * self.alpha[indices.squeeze()]
+        return {"m": h}
 
-    #         return {'m': h * edges.data['weight']}
+    #         return {"m": h * edges.data["weight"]}
 
     def forward(self, graph, feat, weight=None, alpha=None, gene_num=None):
         self.alpha = alpha
@@ -525,20 +511,20 @@ def forward(self, graph, feat, weight=None, alpha=None, gene_num=None):
         with graph.local_scope():
             if not self._allow_zero_in_degree:
                 if (graph.in_degrees() == 0).any():
-                    raise DGLError('There are 0-in-degree nodes in the graph, '
-                                   'output for those nodes will be invalid. '
-                                   'This is harmful for some applications, '
-                                   'causing silent performance regression. '
-                                   'Adding self-loop on the input graph by '
-                                   'calling `g = dgl.add_self_loop(g)` will resolve '
-                                   'the issue. Setting ``allow_zero_in_degree`` '
-                                   'to be `True` when constructing this module will '
-                                   'suppress the check and let the code run.')
+                    raise DGLError("There are 0-in-degree nodes in the graph, "
+                                   "output for those nodes will be invalid. "
+                                   "This is harmful for some applications, "
+                                   "causing silent performance regression. "
+                                   "Adding self-loop on the input graph by "
+                                   "calling `g = dgl.add_self_loop(g)` will resolve "
+                                   "the issue. Setting ``allow_zero_in_degree`` "
+                                   "to be `True` when constructing this module will "
+                                   "suppress the check and let the code run.")
 
             # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite.
             feat_src, feat_dst = expand_as_pair(feat, graph)
             #             print(f"feat_src : {feat_src.shape}, feat_dst {feat_dst.shape}")
-            if self._norm == 'both':
+            if self._norm == "both":
                 degs = graph.out_degrees().float().clamp(min=1)
                 norm = torch.pow(degs, -0.5)
                 shp = norm.shape + (1, ) * (feat_src.dim() - 1)
@@ -547,22 +533,22 @@ def forward(self, graph, feat, weight=None, alpha=None, gene_num=None):
 
             if weight is not None:
                 if self.weight is not None:
-                    raise DGLError('External weight is provided while at the same time the'
-                                   ' module has defined its own weight parameter. Please'
-                                   ' create the module with flag weight=False.')
+                    raise DGLError("External weight is provided while at the same time the"
+                                   " module has defined its own weight parameter. Please"
+                                   " create the module with flag weight=False.")
                 else:
                     feat_src = torch.matmul(feat_src, weight)
             else:
                 weight = self.weight
 
-            graph.srcdata['h'] = feat_src
-            graph.update_all(self.edge_selection_simple, fn.sum(msg='m', out='h'))
-            rst = graph.dstdata['h']
+            graph.srcdata["h"] = feat_src
+            graph.update_all(self.edge_selection_simple, fn.sum(msg="m", out="h"))
+            rst = graph.dstdata["h"]
 
-            if self._norm != 'none':
+            if self._norm != "none":
 
                 degs = graph.in_degrees().float().clamp(min=1)
-                if self._norm == 'both':
+                if self._norm == "both":
                     norm = torch.pow(degs, -0.5)
                 else:
                     norm = 1.0 / degs
@@ -584,39 +570,18 @@ def run_leiden(data):
 
     Parameters
     ----------
-    data :
-        data for leiden
+    data
+        Aata for leiden
 
     Returns
     -------
-    pred : list
-        prediction of leiden.
+    pred
+        Prediction of leiden.
 
     """
     adata = sc.AnnData(data)
-    sc.pp.neighbors(adata, use_rep='X', n_neighbors=300, n_pcs=0)
+    sc.pp.neighbors(adata, use_rep="X", n_neighbors=300, n_pcs=0)
     sc.tl.leiden(adata)
-    pred = adata.obs['leiden'].to_list()
+    pred = adata.obs["leiden"].to_list()
     pred = [int(x) for x in pred]
     return pred
-
-
-def get_device(use_cpu=False):
-    """Get device for training.
-
-    Parameters
-    ----------
-    use_cpu : bool optional
-        use cpu or not.
-
-    Returns
-    -------
-    device :
-        torch device.
-
-    """
-    if torch.cuda.is_available() and not use_cpu:
-        device = torch.device('cuda')
-    else:
-        device = torch.device('cpu')
-    return device
diff --git a/dance/utils/__init__.py b/dance/utils/__init__.py
index 30a85b97..d60e02a3 100644
--- a/dance/utils/__init__.py
+++ b/dance/utils/__init__.py
@@ -8,6 +8,12 @@
 from torch.utils.data import Dataset
 
 
+def get_device(device: str) -> str:
+    if device == "auto":
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+    return device
+
+
 def hexdigest(x: str, /) -> str:
     return hashlib.md5(x.encode()).hexdigest()
 
diff --git a/examples/single_modality/clustering/graphsc.py b/examples/single_modality/clustering/graphsc.py
index 650dcc8c..469ff5f1 100644
--- a/examples/single_modality/clustering/graphsc.py
+++ b/examples/single_modality/clustering/graphsc.py
@@ -10,7 +10,7 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("-e", "--epochs", default=100, type=int)
-    parser.add_argument("-cpu", "--use_cpu", default=False, action="store_true")
+    parser.add_argument("-dv", "--device", default="auto")
     parser.add_argument("-if", "--in_feats", default=50, type=int)
     parser.add_argument("-bs", "--batch_size", default=128, type=int)
     parser.add_argument("-nw", "--normalize_weights", default="log_per_cell", choices=["log_per_cell", "per_cell"])
@@ -31,8 +31,8 @@
     parser.add_argument("-ng", "--nb_genes", type=int, default=3000)
     parser.add_argument("-nr", "--num_run", type=int, default=1)
     parser.add_argument("-nbw", "--num_workers", type=int, default=1)
-    parser.add_argument("-eve", "--eval_epoch", default=True, action="store_true")
-    parser.add_argument("-show", "--show_epoch_ari", default=False, action="store_true")
+    parser.add_argument("-eve", "--eval_epoch", action="store_true")
+    parser.add_argument("-show", "--show_epoch_ari", action="store_true")
     parser.add_argument("-plot", "--plot", default=False, action="store_true")
     parser.add_argument("-dd", "--data_dir", default="./data", type=str)
     parser.add_argument("-data", "--dataset", default="10X_PBMC",
@@ -56,13 +56,15 @@
 
     for run in range(args.num_run):
         set_seed(run)
-        model = GraphSC(args)
-        model.fit(graph, args.epochs, n_clusters, args.learning_rate, cluster=["KMeans", "Leiden"])
-        pred = model.predict(n_clusters, cluster=["KMeans", "Leiden"])
-        print(f"kmeans_pred (first 10): {pred.get('kmeans_pred')[:10]}")
-        print(f"leiden_pred (first 10): {pred.get('leiden_pred')[:10]}")
-        score = model.score(y, n_clusters, plot=False, cluster=["KMeans", "Leiden"])
-        print(f"kmeans_ari: {score.get('kmeans_ari')}, leiden_ari: {score.get('leiden_ari')}")
+        model = GraphSC(agg=args.agg, activation=args.activation, in_feats=args.in_feats, n_hidden=args.n_hidden,
+                        hidden_dim=args.hidden_dim, hidden_1=args.hidden_1, hidden_2=args.hidden_2,
+                        dropout=args.dropout, n_layers=args.n_layers, hidden_relu=args.hidden_relu,
+                        hidden_bn=args.hidden_bn, n_clusters=n_clusters, cluster_method="leiden",
+                        num_workers=args.num_workers, device=args.device)
+        model.fit(graph, n_epochs=args.epochs, lr=args.learning_rate, show_epoch_ari=args.show_epoch_ari,
+                  eval_epoch=args.eval_epoch)
+        score = model.score(None, y)
+        print(f"{score:=.4f}")
 """ Reproduction information
 10X PBMC:
 python graphsc.py --dataset 10X_PBMC