Switch to using a TryFrom trait rather than From. (#683)

* Switch to using a TryFrom trait rather than From. * Adapt the examples/tests/libraries to the new TryFrom traits. * Temporary deactivate the win-test CI.
LaurentMazare · Apr 26, 2023 · 2de3ceb · 2de3ceb
1 parent da968d9
commit 2de3ceb
Show file tree

Hide file tree

Showing 33 changed files with 442 additions and 324 deletions.
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
@@ -26,7 +26,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-2019, macOS-latest]
+        os: [ubuntu-latest, macOS-latest]
         rust: [stable, nightly]
     steps:
       - uses: actions/checkout@v2

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Automated conversion of list arguments: all the generated functions that take
   as input a slice of int or float can now be used directly with int values or
   fixed length arrays [682](https://github.com/LaurentMazare/tch-rs/pull/682).
+- Replace the `From<Tensor>` traits with some `TryFrom` versions,
+  [683](https://github.com/LaurentMazare/tch-rs/pull/683). This is a breaking
+  change, note that also the old version would flatten the tensor if needed to
+  reduce the number of dimensions, this has to be done explicitely with the new
+  version.
 
 ## v0.11.0 - 2023-03-20
 ### Added

diff --git a/examples/char-rnn/main.rs b/examples/char-rnn/main.rs
@@ -31,7 +31,7 @@ fn sample(data: &TextData, lstm: &LSTM, linear: &Linear, device: Device) -> Stri
             .squeeze_dim(0)
             .softmax(-1, Kind::Float)
             .multinomial(1, false);
-        last_label = i64::from(sampled_y);
+        last_label = i64::try_from(sampled_y).unwrap();
         result.push(data.label_to_char(last_label))
     }
     result
@@ -58,7 +58,7 @@ pub fn main() -> Result<()> {
                 .view([BATCH_SIZE * SEQ_LEN, labels])
                 .cross_entropy_for_logits(&ys.to_device(device).view([BATCH_SIZE * SEQ_LEN]));
             opt.backward_step_clip(&loss, 0.5);
-            sum_loss += f64::from(loss);
+            sum_loss += f64::try_from(loss)?;
             cnt_loss += 1.0;
         }
         println!("Epoch: {}   loss: {:5.3}", epoch, sum_loss / cnt_loss);

diff --git a/examples/custom-optimizer/main.rs b/examples/custom-optimizer/main.rs
@@ -38,8 +38,8 @@ pub fn run() -> Result<()> {
         println!(
             "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
             epoch,
-            f64::from(&loss),
-            100. * f64::from(&test_accuracy),
+            f64::try_from(&loss)?,
+            100. * f64::try_from(&test_accuracy)?,
         );
     }
     Ok(())

diff --git a/examples/min-gpt/main.rs b/examples/min-gpt/main.rs
@@ -130,7 +130,7 @@ fn sample(data: &TextData, gpt: &impl ModuleT, input: Tensor) -> String {
     for _index in 0..SAMPLING_LEN {
         let logits = input.apply_t(gpt, false).i((0, -1, ..));
         let sampled_y = logits.softmax(-1, Kind::Float).multinomial(1, true);
-        let last_label = i64::from(&sampled_y);
+        let last_label = i64::try_from(&sampled_y).unwrap();
         result.push(data.label_to_char(last_label));
         input = Tensor::cat(&[input, sampled_y.view([1, 1])], 1).narrow(1, 1, BLOCK_SIZE);
     }
@@ -175,7 +175,7 @@ pub fn main() -> Result<()> {
                         .view([BATCH_SIZE * BLOCK_SIZE, labels])
                         .cross_entropy_for_logits(&ys.view([BATCH_SIZE * BLOCK_SIZE]));
                     opt.backward_step_clip(&loss, 0.5);
-                    sum_loss += f64::from(loss);
+                    sum_loss += f64::try_from(loss)?;
                     cnt_loss += 1.0;
                     idx += 1;
                     if idx % 10000 == 0 {

diff --git a/examples/mnist/mnist_nn.rs b/examples/mnist/mnist_nn.rs
@@ -26,8 +26,8 @@ pub fn run() -> Result<()> {
         println!(
             "epoch: {:4} train loss: {:8.5} test acc: {:5.2}%",
             epoch,
-            f64::from(&loss),
-            100. * f64::from(&test_accuracy),
+            f64::try_from(&loss)?,
+            100. * f64::try_from(&test_accuracy)?,
         );
     }
     Ok(())

diff --git a/examples/neural-style-transfer/main.rs b/examples/neural-style-transfer/main.rs
@@ -63,7 +63,7 @@ pub fn main() -> Result<()> {
         let loss = style_loss * STYLE_WEIGHT + content_loss;
         opt.backward_step(&loss);
         if step_idx % 1000 == 0 {
-            println!("{} {}", step_idx, f64::from(loss));
+            println!("{} {}", step_idx, f64::try_from(loss)?);
             imagenet::save_image(&input_var, format!("out{step_idx}.jpg"))?;
         }
     }

diff --git a/examples/reinforcement-learning/a2c.rs b/examples/reinforcement-learning/a2c.rs
@@ -90,11 +90,11 @@ pub fn train() -> cpython::PyResult<()> {
             let (critic, actor) = tch::no_grad(|| model(&s_states.get(s)));
             let probs = actor.softmax(-1, Float);
             let actions = probs.multinomial(1, true).squeeze_dim(-1);
-            let step = env.step(Vec::<i64>::from(&actions))?;
+            let step = env.step(Vec::<i64>::try_from(&actions).unwrap())?;
 
             sum_rewards += &step.reward;
-            total_rewards += f64::from((&sum_rewards * &step.is_done).sum(Float));
-            total_episodes += f64::from(step.is_done.sum(Float));
+            total_rewards += f64::try_from((&sum_rewards * &step.is_done).sum(Float)).unwrap();
+            total_episodes += f64::try_from(step.is_done.sum(Float)).unwrap();
 
             let masks = Tensor::from(1f32) - step.is_done;
             sum_rewards *= &masks;
@@ -162,7 +162,7 @@ pub fn sample<T: AsRef<std::path::Path>>(weight_file: T) -> cpython::PyResult<()
         let (_critic, actor) = tch::no_grad(|| model(obs));
         let probs = actor.softmax(-1, Float);
         let actions = probs.multinomial(1, true).squeeze_dim(-1);
-        let step = env.step(Vec::<i64>::from(&actions))?;
+        let step = env.step(Vec::<i64>::try_from(&actions).unwrap())?;
 
         let masks = Tensor::from(1f32) - step.is_done;
         obs = frame_stack.update(&step.obs, Some(&masks));

diff --git a/examples/reinforcement-learning/ddpg.rs b/examples/reinforcement-learning/ddpg.rs
@@ -317,7 +317,7 @@ pub fn run() -> cpython::PyResult<()> {
 
         let mut total_reward = 0.0;
         for _ in 0..EPISODE_LENGTH {
-            let mut actions = 2.0 * f64::from(agent.actions(&obs));
+            let mut actions = 2.0 * f64::try_from(agent.actions(&obs)).unwrap();
             actions = actions.clamp(-2.0, 2.0);
 
             let action_vec = vec![actions];

diff --git a/examples/reinforcement-learning/policy_gradient.rs b/examples/reinforcement-learning/policy_gradient.rs
@@ -48,7 +48,7 @@ pub fn run() -> cpython::PyResult<()> {
             let action = tch::no_grad(|| {
                 obs.unsqueeze(0).apply(&model).softmax(1, Float).multinomial(1, true)
             });
-            let action = i64::from(action);
+            let action = i64::try_from(action).unwrap();
             let step = env.step(action)?;
             steps.push(step.copy_with_obs(&obs));
             obs = if step.is_done { env.reset()? } else { step.obs };

diff --git a/examples/reinforcement-learning/ppo.rs b/examples/reinforcement-learning/ppo.rs
@@ -94,11 +94,12 @@ pub fn train() -> cpython::PyResult<()> {
             let (critic, actor) = tch::no_grad(|| model(&s_states.get(s)));
             let probs = actor.softmax(-1, Kind::Float);
             let actions = probs.multinomial(1, true).squeeze_dim(-1);
-            let step = env.step(Vec::<i64>::from(&actions))?;
+            let step = env.step(Vec::<i64>::try_from(&actions).unwrap())?;
 
             sum_rewards += &step.reward;
-            total_rewards += f64::from((&sum_rewards * &step.is_done).sum(Kind::Float));
-            total_episodes += f64::from(step.is_done.sum(Kind::Float));
+            total_rewards +=
+                f64::try_from((&sum_rewards * &step.is_done).sum(Kind::Float)).unwrap();
+            total_episodes += f64::try_from(step.is_done.sum(Kind::Float)).unwrap();
 
             let masks = Tensor::from(1f32) - step.is_done;
             sum_rewards *= &masks;
@@ -171,7 +172,7 @@ pub fn sample<T: AsRef<std::path::Path>>(weight_file: T) -> cpython::PyResult<()
         let (_critic, actor) = tch::no_grad(|| model(obs));
         let probs = actor.softmax(-1, Kind::Float);
         let actions = probs.multinomial(1, true).squeeze_dim(-1);
-        let step = env.step(Vec::<i64>::from(&actions))?;
+        let step = env.step(Vec::<i64>::try_from(&actions).unwrap())?;
 
         let masks = Tensor::from(1f32) - step.is_done;
         obs = frame_stack.update(&step.obs, Some(&masks));

diff --git a/examples/stable-diffusion/main.rs b/examples/stable-diffusion/main.rs
@@ -2418,7 +2418,7 @@ impl DDIMScheduler {
             ),
         };
         let alphas: Tensor = 1.0 - betas;
-        let alphas_cumprod = Vec::<f64>::from(alphas.cumprod(0, Kind::Double));
+        let alphas_cumprod = Vec::<f64>::try_from(alphas.cumprod(0, Kind::Double)).unwrap();
         Self { alphas_cumprod, timesteps, step_ratio, config }
     }
 

diff --git a/examples/transfer-learning/main.rs b/examples/transfer-learning/main.rs
@@ -36,7 +36,7 @@ pub fn main() -> Result<()> {
         sgd.backward_step(&loss);
 
         let test_accuracy = test_images.apply(&linear).accuracy_for_logits(&dataset.test_labels);
-        println!("{} {:.2}%", epoch_idx, 100. * f64::from(test_accuracy));
+        println!("{} {:.2}%", epoch_idx, 100. * f64::try_from(test_accuracy)?);
     }
     Ok(())
 }
diff --git a/examples/translation/main.rs b/examples/translation/main.rs
@@ -132,7 +132,7 @@ impl Model {
             let target_tensor = Tensor::of_slice(&[s as i64]).to_device(self.device);
             loss = loss + output.nll_loss(&target_tensor);
             let (_, output) = output.topk(1, -1, true, true);
-            if self.decoder_eos == i64::from(&output) as usize {
+            if self.decoder_eos == i64::try_from(&output).unwrap() as usize {
                 break;
             }
             prev = if use_teacher_forcing { target_tensor } else { output };
@@ -155,7 +155,7 @@ impl Model {
         for _i in 0..MAX_LENGTH {
             let (output, state_) = self.decoder.forward(&prev, &state, &enc_outputs, true);
             let (_, output) = output.topk(1, -1, true, true);
-            let output_ = i64::from(&output) as usize;
+            let output_ = i64::try_from(&output).unwrap() as usize;
             output_seq.push(output_);
             if self.decoder_eos == output_ {
                 break;
@@ -208,7 +208,7 @@ pub fn main() -> Result<()> {
         let (input_, target) = pairs.choose(&mut rng).unwrap();
         let loss = model.train_loss(input_, target, &mut rng);
         opt.backward_step(&loss);
-        loss_stats.update(f64::from(loss) / target.len() as f64);
+        loss_stats.update(f64::try_from(loss)? / target.len() as f64);
         if idx % 1000 == 0 {
             println!("{} {}", idx, loss_stats.avg_and_reset());
             for _pred_index in 1..5 {

diff --git a/examples/vae/main.rs b/examples/vae/main.rs
@@ -83,7 +83,7 @@ pub fn main() -> Result<()> {
             let (recon_batch, mu, logvar) = vae.forward(&bimages);
             let loss = loss(&recon_batch, &bimages, &mu, &logvar);
             opt.backward_step(&loss);
-            train_loss += f64::from(&loss);
+            train_loss += f64::try_from(&loss)?;
             samples += bimages.size()[0] as f64;
         }
         println!("Epoch: {}, loss: {}", epoch, train_loss / samples);

diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs
@@ -65,7 +65,7 @@ pub fn report(pred: &Tensor, img: &Tensor, w: i64, h: i64) -> Result<Tensor> {
     let mut bboxes: Vec<Vec<Bbox>> = (0..nclasses).map(|_| vec![]).collect();
     // Extract the bounding boxes for which confidence is above the threshold.
     for index in 0..npreds {
-        let pred = Vec::<f64>::from(pred.get(index));
+        let pred = Vec::<f64>::try_from(pred.get(index))?;
         let confidence = pred[4];
         if confidence > CONFIDENCE_THRESHOLD {
             let mut class_index = 0;

diff --git a/src/data.rs b/src/data.rs
@@ -192,7 +192,7 @@ impl Iterator for TextDataIter {
             None
         } else {
             self.batch_index += 1;
-            let indexes = Vec::<i64>::from(&self.indexes.i(start..start + size));
+            let indexes = Vec::<i64>::try_from(&self.indexes.i(start..start + size)).unwrap();
             let batch: Vec<_> = indexes.iter().map(|&i| self.data.i(i..i + self.seq_len)).collect();
             let batch: Vec<_> = batch.iter().collect();
             Some(Tensor::stack(&batch, 0))

diff --git a/src/error.rs b/src/error.rs
@@ -56,6 +56,9 @@ pub enum TchError {
     #[error(transparent)]
     Zip(#[from] ZipError),
 
+    #[error(transparent)]
+    NdArray(#[from] ndarray::ShapeError),
+
     /// Errors returned by the safetensors library.
     #[error("safetensors error {path}: {err}")]
     SafeTensorError { path: String, err: safetensors::SafeTensorError },

diff --git a/src/nn/linear.rs b/src/nn/linear.rs
@@ -65,8 +65,8 @@ fn matches_pytorch() {
     let linear = Linear { ws, bs };
     let output = linear.forward(&input);
 
-    let delta_output: f32 = (&output - &expected_output).norm().into();
-    let delta_original: f32 = (&original_output - &expected_output).norm().into();
+    let delta_output: f32 = (&output - &expected_output).norm().try_into().unwrap();
+    let delta_original: f32 = (&original_output - &expected_output).norm().try_into().unwrap();
 
     // The `matmul()` implementation is close, but `linear()` is at least as close or closer.
     assert!(output.allclose(&expected_output, 1e-5, 1e-8, false));

diff --git a/src/nn/module.rs b/src/nn/module.rs
@@ -26,7 +26,7 @@ pub trait ModuleT: std::fmt::Debug + Send {
         for (xs, ys) in Iter2::new(xs, ys, batch_size).return_smaller_last_batch() {
             let acc = self.forward_t(&xs.to_device(d), false).accuracy_for_logits(&ys.to_device(d));
             let size = xs.size()[0] as f64;
-            sum_accuracy += f64::from(&acc) * size;
+            sum_accuracy += f64::try_from(&acc).unwrap() * size;
             sample_count += size;
         }
         sum_accuracy / sample_count

diff --git a/src/nn/optimizer.rs b/src/nn/optimizer.rs
@@ -231,7 +231,7 @@ impl Optimizer {
             for var in v.trainable_variables.iter() {
                 norms.push(var.tensor.grad().norm());
             }
-            let total_norm = f64::from(Tensor::stack(&norms, 0).norm());
+            let total_norm = f64::try_from(Tensor::stack(&norms, 0).norm()).unwrap();
             let clip_coef = max / (total_norm + 1e-6);
             if clip_coef < 1.0 {
                 for var in v.trainable_variables.iter() {