OmicsML · RemyLau · Feb 15, 2023 · Feb 15, 2023
diff --git a/dance/modules/multi_modality/match_modality/cmae.py b/dance/modules/multi_modality/match_modality/cmae.py
@@ -4,12 +4,12 @@
 
 Reference
 ---------
-Yang, Karren Dai, et al. "Multi-domain translation between single-cell imaging and sequencing data using autoencoders." Nature communications 12.1 (2021): 1-10.
+Yang, Karren Dai, et al. "Multi-domain translation between single-cell imaging and sequencing data using autoencoders."
+Nature communications 12.1 (2021): 1-10.
 
 """
 import math
 import os
-import sys
 
 import torch
 import torch.nn as nn
@@ -159,9 +159,10 @@ def __init__(self, input_dim, params, shared_layer=False):
         self.dec = nn.Sequential(*decoder_layers)
 
     def forward(self, images):
-        # This is a reduced VAE implementation where we assume the outputs are multivariate Gaussian distribution with mean = hiddens and std_dev = all ones.
+        # This is a reduced VAE implementation where we assume the outputs are multivariate Gaussian distribution
+        # with mean = hiddens and std_dev = all ones.
         hiddens = self.encode(images)
-        if self.training == True:
+        if self.training:
             noise = Variable(torch.randn(hiddens.size()).cuda(hiddens.data.get_device()))
             images_recon = self.decode(hiddens + noise)
         else:
@@ -259,7 +260,8 @@ class CMAE(nn.Module):
     Parameters
     ----------
     hyperparameters : dictionary
-        A dictionary that contains arguments of CMAE. For details of parameters in parser args, please refer to link (parser help document).
+        A dictionary that contains arguments of CMAE. For details of parameters in parser args, please refer to link
+        (parser help document).
 
     """
 
@@ -319,7 +321,6 @@ def predict(self, mod1, mod2, metric='l1'):
             Joint embedding of input modalities.
 
         """
-
         with torch.no_grad():
             emb1, _ = self.gen_a.encode(mod1)
             emb2, _ = self.gen_b.encode(mod2)
@@ -348,7 +349,6 @@ def score(self, mod1, mod2, labels):
             Matching accuracy.
 
         """
-
         with torch.no_grad():
             pred = self.predict(mod1, mod2)
             return (pred[torch.arange(pred.shape[0]).long(), labels.long()].mean()).item()
@@ -419,17 +419,21 @@ def _gen_update(self, x_a, x_b, super_a, super_b, hyperparameters, a_labels=None
         class_weight = hyperparameters['gan_w'] if "class_w" not in hyperparameters else hyperparameters["class_w"]
 
         # total loss
-        self.loss_gen_total = hyperparameters['gan_w'] * self.loss_latent_a + \
-                              hyperparameters['gan_w'] * self.loss_latent_b + \
-                              class_weight * self.loss_class_a + \
-                              class_weight * self.loss_class_b + \
-                              hyperparameters['recon_x_w'] * self.loss_gen_recon_x_a + \
-                              hyperparameters['recon_x_w'] * self.loss_gen_recon_x_b + \
-                              hyperparameters['super_w'] * self.loss_supervision
+        self.loss_gen_total = (
+            hyperparameters['gan_w'] * self.loss_latent_a
+            + hyperparameters['gan_w'] * self.loss_latent_b
+            + class_weight * self.loss_class_a
+            + class_weight * self.loss_class_b
+            + hyperparameters['recon_x_w'] * self.loss_gen_recon_x_a
+            + hyperparameters['recon_x_w'] * self.loss_gen_recon_x_b
+            + hyperparameters['super_w'] * self.loss_supervision
+        )  # yapf: disable
 
         if variational:
-            self.loss_gen_total += hyperparameters['recon_kl_w'] * self.loss_gen_recon_kl_a + \
-                                   hyperparameters['recon_kl_w'] * self.loss_gen_recon_kl_b
+            self.loss_gen_total += (
+                hyperparameters['recon_kl_w'] * self.loss_gen_recon_kl_a
+                + hyperparameters['recon_kl_w'] * self.loss_gen_recon_kl_b
+            )  # yapf: disable
 
         self.loss_gen_total.backward()
         self.gen_opt.step()
@@ -496,7 +500,7 @@ def resume(self, checkpoint_dir):
         state_dict = torch.load(os.path.join(checkpoint_dir, 'optimizer.pt'))
         self.dis_opt.load_state_dict(state_dict['dis'])
         self.gen_opt.load_state_dict(state_dict['gen'])
-        # Reinitilize schedulers
+        # Reinitialize schedulers
         self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters, iterations)
         self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters, iterations)
         print('Resume from iteration %d' % iterations)
@@ -512,10 +516,6 @@ def save(self, snapshot_dir, iterations):
         iterations : int
             Current number of training iterations.
 
-        Returns
-        -------
-        None.
-
         """
         # Save generators, discriminators, and optimizers
         gen_name = os.path.join(snapshot_dir, 'gen_%08d.pt' % (iterations + 1))
@@ -546,10 +546,6 @@ def fit(self, train_mod1, train_mod2, aux_labels=None, checkpoint_directory='./c
         val_ratio : float
             Ratio for automatic train-validation split.
 
-        Returns
-        -------
-        None.
-
         """
         hyperparameters = self.hyperparameters
         idx = torch.randperm(train_mod1.shape[0])
@@ -586,8 +582,6 @@ def fit(self, train_mod1, train_mod2, aux_labels=None, checkpoint_directory='./c
                         self._gen_update(mod1, mod2, mod1, mod2, hyperparameters, variational=False)
             print('Matching score:', self.score(train_mod1[val_idx], train_mod2[val_idx],
                                                 torch.arange(val_idx.shape[0])))
-            # print('Matching score:',
-            #       self.evaluate(test_mod1, test_mod2, labels))
 
             iterations += 1
             if iterations >= hyperparameters['max_epochs']:

diff --git a/dance/modules/multi_modality/match_modality/scmm.py b/dance/modules/multi_modality/match_modality/scmm.py
@@ -4,20 +4,19 @@
 
 Reference
 ---------
-Minoura, Kodai, et al. "A mixture-of-experts deep generative model for integrated analysis of single-cell multiomics data." Cell reports methods 1.5 (2021): 100071.
+Minoura, Kodai, et al. A mixture-of-experts deep generative model for integrated analysis of single-cell multiomics
+data. Cell reports methods 1.5 (2021): 100071.
 
 """
 import math
 import os
 
 import numpy as np
-import pandas as pd
-import scipy.sparse as sp
 import torch
 import torch.distributions as dist
 import torch.nn as nn
 import torch.nn.functional as F
-from numpy import prod, sqrt
+from numpy import prod
 from pyro.distributions.zero_inflated import ZeroInflatedNegativeBinomial
 from sklearn.cluster import DBSCAN, KMeans
 from sklearn.neighbors import NearestNeighbors
@@ -131,7 +130,7 @@ def pz_params(self):
     @property
     def qz_x_params(self):
         if self._qz_x_params is None:
-            raise NameError("qz_x params not initalised yet!")
+            raise NameError("qz_x params not initialized yet!")
         return self._qz_x_params
 
     @staticmethod
@@ -365,9 +364,11 @@ class MMVAE(nn.Module):
     Parameters
     ----------
     subtask : str
-        Name of the subtask which is composed of the name of two modality. This parameter will indicate some modality-specific features in the model.
+        Name of the subtask which is composed of the name of two modality. This parameter will indicate some
+        modality-specific features in the model.
     params : argparse.Namespace
-        A Namespace object that contains arguments of MMVAE. For details of parameters in parser args, please refer to link (parser help document).
+        A Namespace object that contains arguments of MMVAE. For details of parameters in parser args, please refer to
+        link (parser help document).
 
     """
 
@@ -502,17 +503,11 @@ def fit(self, x_train, y_train, val_ratio=0.15):
         ----------
         x_train : torch.Tensor
             Input modality for training.
-
         y_train : torch.Tensor
             Target modality for training.
-
         val_ratio : float
             Ratio for automatic train-validation split.
 
-        Returns
-        -------
-        None.
-
         """
 
         start_early_stop = self.params.deterministic_warmup
@@ -535,13 +530,8 @@ def fit(self, x_train, y_train, val_ratio=0.15):
 
         train_mod1 = x_train.float().to(self.params.device)
         train_mod2 = y_train.float().to(self.params.device)
-        # test_mod1 = x_test.float().to(self.params.device)
-        # test_mod2 = y_test.float().to(self.params.device)
-        # labels = labels.float().to(self.params.device)
-
-        vals = []
-        tr = []
 
+        tr, vals = [], []
         for epoch in range(1, self.params.epochs + 1):
             self.train()
             b_loss = 0
@@ -576,7 +566,6 @@ def fit(self, x_train, y_train, val_ratio=0.15):
             if epoch % 10 == 0:
                 print('Valid Matching score:',
                       self.score(train_mod1[val_idx], train_mod2[val_idx], torch.eye(val_idx.shape[0])))
-                # print('Test Matching score:', self.score(test_mod1, test_mod2, labels))
 
             if epoch > start_early_stop and min(vals) != min(vals[-10:]):
                 print('Early stopped.')
@@ -593,7 +582,8 @@ def score(self, mod1, mod2, labels=None, metric='minkowski'):
         mod2 : torch.Tensor
             Features of modality 2.
         labels : torch.Tensor optional
-            Labels of matching modality, i.e. cell correspondence between two modalities. Required when metric is not 'loss'.
+            Labels of matching modality, i.e. cell correspondence between two modalities. Required when metric is not
+            'loss'.
         metric : str optional
             Metric of the score function, by default to be 'minkowski'.
 
@@ -662,7 +652,7 @@ def predict(self, mod1, mod2, metric='minkowski'):
         pred = []
         with torch.no_grad():
             for i, batch_idx in enumerate(data_loader):
-                dataT = [mod1[batch_idx].float().to(), mod2[batch_idx].float().to()]
+                dataT = [mod1[batch_idx], mod2[batch_idx]]
                 lats = self._get_latents(dataT, sampling=False)
                 if i == 0:
                     pred = lats