atong01 · kilianFatras · Jan 24, 2024 · Jan 18, 2024
diff --git a/examples/2D_tutorials/SF2M_tutorial.ipynb b/examples/2D_tutorials/SF2M_tutorial.ipynb
@@ -126,7 +126,7 @@
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
     "        x0,\n",
-    "        t_span=torch.linspace(0, 1, 100).to(device),\n",
+    "        t_span=torch.linspace(0, 1, 100, device=device),\n",
     "    )\n",
     "\n",
     "\n",

diff --git a/examples/images/cifar10/compute_fid.py b/examples/images/cifar10/compute_fid.py
@@ -70,14 +70,14 @@
 
 def gen_1_img(unused_latent):
     with torch.no_grad():
-        x = torch.randn(500, 3, 32, 32).to(device)
+        x = torch.randn(500, 3, 32, 32, device=device)
         if FLAGS.integration_method == "euler":
             print("Use method: ", FLAGS.integration_method)
-            t_span = torch.linspace(0, 1, FLAGS.integration_steps + 1).to(device)
+            t_span = torch.linspace(0, 1, FLAGS.integration_steps + 1, device=device)
             traj = node.trajectory(x, t_span=t_span)
         else:
             print("Use method: ", FLAGS.integration_method)
-            t_span = torch.linspace(0, 1, 2).to(device)
+            t_span = torch.linspace(0, 1, 2, device=device)
             traj = odeint(
                 new_net, x, t_span, rtol=FLAGS.tol, atol=FLAGS.tol, method=FLAGS.integration_method
             )

diff --git a/examples/images/cifar10/utils_cifar.py b/examples/images/cifar10/utils_cifar.py
@@ -34,8 +34,8 @@ def generate_samples(model, parallel, savedir, step, net_="normal"):
     node_ = NeuralODE(model_, solver="euler", sensitivity="adjoint")
     with torch.no_grad():
         traj = node_.trajectory(
-            torch.randn(64, 3, 32, 32).to(device),
-            t_span=torch.linspace(0, 1, 100).to(device),
+            torch.randn(64, 3, 32, 32, device=device),
+            t_span=torch.linspace(0, 1, 100, device=device),
         )
         traj = traj[-1, :].view([-1, 3, 32, 32]).clip(-1, 1)
         traj = traj / 2 + 0.5

diff --git a/examples/images/conditional_mnist.ipynb b/examples/images/conditional_mnist.ipynb
@@ -117,21 +117,21 @@
    ],
    "source": [
     "USE_TORCH_DIFFEQ = True\n",
-    "generated_class_list = torch.arange(10).repeat(10).to(device)\n",
+    "generated_class_list = torch.arange(10, device=device).repeat(10)\n",
     "with torch.no_grad():\n",
     "    if USE_TORCH_DIFFEQ:\n",
     "        traj = torchdiffeq.odeint(\n",
     "            lambda t, x: model.forward(t, x, generated_class_list),\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            torch.linspace(0, 1, 2, device=device),\n",
     "            atol=1e-4,\n",
     "            rtol=1e-4,\n",
     "            method=\"dopri5\",\n",
     "        )\n",
     "    else:\n",
     "        traj = node.trajectory(\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            t_span=torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            t_span=torch.linspace(0, 1, 2, device=device),\n",
     "        )\n",
     "grid = make_grid(\n",
     "    traj[-1, :100].view([-1, 1, 28, 28]).clip(-1, 1), value_range=(-1, 1), padding=0, nrow=10\n",
@@ -206,21 +206,21 @@
    ],
    "source": [
     "USE_TORCH_DIFFEQ = True\n",
-    "generated_class_list = torch.arange(10).repeat(10).to(device)\n",
+    "generated_class_list = torch.arange(10, device=device).repeat(10)\n",
     "with torch.no_grad():\n",
     "    if USE_TORCH_DIFFEQ:\n",
     "        traj = torchdiffeq.odeint(\n",
     "            lambda t, x: model.forward(t, x, generated_class_list),\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            torch.linspace(0, 1, 2, device=device),\n",
     "            atol=1e-4,\n",
     "            rtol=1e-4,\n",
     "            method=\"dopri5\",\n",
     "        )\n",
     "    else:\n",
     "        traj = node.trajectory(\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            t_span=torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            t_span=torch.linspace(0, 1, 2, device=device),\n",
     "        )\n",
     "grid = make_grid(\n",
     "    traj[-1, :100].view([-1, 1, 28, 28]).clip(-1, 1), value_range=(-1, 1), padding=0, nrow=10\n",
@@ -326,24 +326,24 @@
    ],
    "source": [
     "USE_TORCH_DIFFEQ = True\n",
-    "generated_class_list = torch.arange(10).repeat(10).to(device)\n",
+    "generated_class_list = torch.arange(10, device=device).repeat(10)\n",
     "\n",
     "node = NeuralODE(model, solver=\"euler\", sensitivity=\"adjoint\", atol=1e-4, rtol=1e-4)\n",
     "# Evaluate the ODE\n",
     "with torch.no_grad():\n",
     "    if USE_TORCH_DIFFEQ:\n",
     "        traj = torchdiffeq.odeint(\n",
     "            lambda t, x: model.forward(t, x, generated_class_list),\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            torch.linspace(0, 1, 2, device=device),\n",
     "            atol=1e-4,\n",
     "            rtol=1e-4,\n",
     "            method=\"dopri5\",\n",
     "        )\n",
     "    else:\n",
     "        traj = node.trajectory(\n",
-    "            torch.randn(100, 1, 28, 28).to(device),\n",
-    "            t_span=torch.linspace(0, 1, 2).to(device),\n",
+    "            torch.randn(100, 1, 28, 28, device=device),\n",
+    "            t_span=torch.linspace(0, 1, 2, device=device),\n",
     "        )\n",
     "grid = make_grid(\n",
     "    traj[-1, :100].view([-1, 1, 28, 28]).clip(-1, 1), value_range=(-1, 1), padding=0, nrow=10\n",
@@ -395,13 +395,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sde = SDE(model, score_model, labels=torch.arange(10).repeat(10).to(device), sigma=0.1)\n",
+    "sde = SDE(model, score_model, labels=torch.arange(10, device=device).repeat(10), sigma=0.1)\n",
     "with torch.no_grad():\n",
     "    sde_traj = torchsde.sdeint(\n",
     "        sde,\n",
     "        # x0.view(x0.size(0), -1),\n",
-    "        torch.randn(100, 1 * 28 * 28).to(device),\n",
-    "        ts=torch.linspace(0, 1, 2).to(device),\n",
+    "        torch.randn(100, 1 * 28 * 28, device=device),\n",
+    "        ts=torch.linspace(0, 1, 2, device=device),\n",
     "        dt=0.01,\n",
     "    )"
    ]

diff --git a/examples/images/mnist_example.ipynb b/examples/images/mnist_example.ipynb
@@ -124,8 +124,8 @@
    "source": [
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
-    "        torch.randn(100, 1, 28, 28).to(device),\n",
-    "        t_span=torch.linspace(0, 1, 2).to(device),\n",
+    "        torch.randn(100, 1, 28, 28, device=device),\n",
+    "        t_span=torch.linspace(0, 1, 2, device=device),\n",
     "    )\n",
     "grid = make_grid(\n",
     "    traj[-1, :100].view([-1, 1, 28, 28]).clip(-1, 1), value_range=(-1, 1), padding=0, nrow=10\n",
@@ -236,8 +236,8 @@
     "# Evaluate the ODE\n",
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
-    "        torch.randn(100, 1, 28, 28).to(device),\n",
-    "        t_span=torch.linspace(0, 1, 1000).to(device),\n",
+    "        torch.randn(100, 1, 28, 28, device=device),\n",
+    "        t_span=torch.linspace(0, 1, 1000, device=device),\n",
     "    )\n",
     "grid = make_grid(\n",
     "    traj[-1, :100].view([-1, 1, 28, 28]).clip(-1, 1), value_range=(-1, 1), padding=0, nrow=10\n",
@@ -290,8 +290,8 @@
     "    sde_traj = torchsde.sdeint(\n",
     "        sde,\n",
     "        # x0.view(x0.size(0), -1),\n",
-    "        torch.randn(50, 1 * 28 * 28).to(device),\n",
-    "        ts=torch.linspace(0, 1, 2).to(device),\n",
+    "        torch.randn(50, 1 * 28 * 28, device=device),\n",
+    "        ts=torch.linspace(0, 1, 2, device=device),\n",
     "        dt=0.01,\n",
     "    )"
    ]

diff --git a/examples/single_cell/single-cell_example.ipynb b/examples/single_cell/single-cell_example.ipynb
@@ -469,7 +469,7 @@
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
     "        x0.to(device),\n",
-    "        t_span=torch.linspace(0, n_times - 1, 400).to(device),\n",
+    "        t_span=torch.linspace(0, n_times - 1, 400, device=device),\n",
     "    ).cpu()"
    ]
   },
@@ -529,7 +529,7 @@
     "    sde_traj = torchsde.sdeint(\n",
     "        sde,\n",
     "        x0.to(device),\n",
-    "        ts=torch.linspace(0, n_times - 1, 400).to(device),\n",
+    "        ts=torch.linspace(0, n_times - 1, 400, device=device),\n",
     "    ).cpu()"
    ]
   },
@@ -594,7 +594,7 @@
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
     "        x0[2].repeat(20).view(20, 2).to(device),\n",
-    "        t_span=torch.linspace(0, n_times - 1, 400).to(device),\n",
+    "        t_span=torch.linspace(0, n_times - 1, 400, device=device),\n",
     "    ).cpu()\n",
     "# plot_trajectories(traj.cpu().numpy())"
    ]
@@ -609,7 +609,7 @@
     "    sde_traj = torchsde.sdeint(\n",
     "        sde,\n",
     "        x0[2].repeat(20).view(20, 2).to(device),\n",
-    "        ts=torch.linspace(0, n_times - 1, 400).to(device),\n",
+    "        ts=torch.linspace(0, n_times - 1, 400, device=device),\n",
     "    ).cpu()"
    ]
   },
@@ -731,14 +731,14 @@
     "with torch.no_grad():\n",
     "    traj = node.trajectory(\n",
     "        x0[1].repeat(15).view(15, 2).to(device),\n",
-    "        t_span=torch.linspace(1, n_times - 1, 300).to(device),\n",
+    "        t_span=torch.linspace(1, n_times - 1, 300, device=device),\n",
     "    ).cpu()\n",
     "\n",
     "with torch.no_grad():\n",
     "    sde_traj = torchsde.sdeint(\n",
     "        sde,\n",
     "        x0[1].repeat(15).view(15, 2).to(device),\n",
-    "        ts=torch.linspace(1, n_times - 1, 300).to(device),\n",
+    "        ts=torch.linspace(1, n_times - 1, 300, device=device),\n",
     "    ).cpu()\n",
     "\n",
     "traj = traj.detach().cpu().numpy()"

diff --git a/runner/src/models/cfm_module.py b/runner/src/models/cfm_module.py
@@ -143,7 +143,7 @@
 
     def preprocess_batch(self, X, training=False):
         """Converts a batch of data into matched a random pair of (x0, x1)"""
-        t_select = torch.zeros(1).to(X.device)
+        t_select = torch.zeros(1, device=X.device)
         if self.is_trajectory:
             batch_size, times, dim = X.shape
             if not hasattr(self.datamodule, "HAS_JOINT_PLANS"):
@@ -168,7 +168,7 @@
 
             if training and self.hparams.leaveout_timepoint > 0:
                 # Select random except for the leftout timepoint
-                t_select = torch.randint(times - 2, size=(batch_size,)).to(X.device)
+                t_select = torch.randint(times - 2, size=(batch_size,), device=X.device)
                 t_select[t_select >= self.hparams.leaveout_timepoint] += 1
             else:
                 t_select = torch.randint(times - 1, size=(batch_size,))
@@ -623,12 +623,12 @@
 
     def preprocess_batch(self, X, training=False):
         """Converts a batch of data into matched a random pair of (x0, x1)"""
-        t_select = torch.zeros(1).to(X.device)
+        t_select = torch.zeros(1, device=X.device)
         if self.is_trajectory:
             batch_size, times, dim = X.shape
             if training and self.hparams.leaveout_timepoint > 0:
                 # Select random except for the leftout timepoint
-                t_select = torch.randint(times - 2, size=(batch_size,)).to(X.device)
+                t_select = torch.randint(times - 2, size=(batch_size,), device=X.device)
                 t_select[t_select >= self.hparams.leaveout_timepoint] += 1
             else:
                 t_select = torch.randint(times - 1, size=(batch_size,))
@@ -1011,7 +1011,7 @@
             # Randomly sample a batch from the stored data.
             idx = torch.randint(self.stored_data.shape[0], size=(X.shape[0],))
             X = self.stored_data[idx]
-            t_select = torch.zeros(1).to(X.device)
+            t_select = torch.zeros(1, device=X.device)
             return X[:, 0], X[:, 1], t_select
         return super().preprocess_batch(X, training)
 

diff --git a/runner/src/models/components/nn.py b/runner/src/models/components/nn.py
@@ -94,8 +94,10 @@ def timestep_embedding(timesteps, dim, max_period=10000):
     """
     half = dim // 2
     freqs = th.exp(
-        -math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
-    ).to(device=timesteps.device)
+        -math.log(max_period)
+        * th.arange(start=0, end=half, dtype=th.float32, device=timesteps.device)
+        / half
+    )
     args = timesteps[:, None].float() * freqs[None]
     embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
     if dim % 2:

diff --git a/runner/src/models/components/plotting.py b/runner/src/models/components/plotting.py
@@ -31,10 +31,12 @@
     points_real = 50
     Y, X, T = np.mgrid[wmin:wmax:points, wmin:wmax:points, 0 : ts - 1 : 7j]
     gridpoints = torch.tensor(
-        np.stack([X.flatten(), Y.flatten()], axis=1), requires_grad=True
+        np.stack([X.flatten(), Y.flatten()], axis=1), requires_grad=True, device=device
     ).type(torch.float32)
-    times = torch.tensor(T.flatten(), requires_grad=True).type(torch.float32)[:, None]
-    out = model(times.to(device), gridpoints.to(device))
+    times = torch.tensor(T.flatten(), requires_grad=True, device=device).type(torch.float32)[
+        :, None
+    ]
+    out = model(times, gridpoints)
     out = out.reshape([points_real, points_real, 7, dim])
     out = out.cpu().detach().numpy()
     # Stream over time

diff --git a/torchcfm/models/unet/nn.py b/torchcfm/models/unet/nn.py
@@ -94,8 +94,10 @@ def timestep_embedding(timesteps, dim, max_period=10000):
     """
     half = dim // 2
     freqs = th.exp(
-        -math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
-    ).to(device=timesteps.device)
+        -math.log(max_period)
+        * th.arange(start=0, end=half, dtype=th.float32, device=timesteps.device)
+        / half
+    )
     args = timesteps[:, None].float() * freqs[None]
     embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
     if dim % 2: