huggingface · FL33TW00D · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -18,32 +18,77 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest]
+        include:
+          - name: Windows x86_64
+            os: windows-2022
+          - name: Linux x86_64
+            os: ubuntu-latest
+          #TODO: android?
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
+      - name: Install rust toolchain
+        uses: dtolnay/rust-toolchain@1.71
+
+      - name: Install cargo-nextest 
+        uses: taiki-e/install-action@v2
+        with:
+          tool: cargo-nextest
+
       - name: (linux) install llvmpipe, lavapipe, vulkan sdk, alsa
         if: matrix.os == 'ubuntu-latest'
         shell: bash
         run: |
           set -e
           sudo apt-get update -y -qq
-          # vulkan sdk
           wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
           sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
           sudo add-apt-repository ppa:kisak/kisak-mesa
           sudo apt-get update
           sudo apt install -y libegl1-mesa libgl1-mesa-dri libxcb-xfixes0-dev vulkan-sdk mesa-vulkan-drivers pkg-config libasound2-dev
 
+      - name: (windows) install dxc
+        if: matrix.os == 'windows-2022'
+        uses: napokue/setup-dxc@v1.1.0
+
+      - name: (windows) install warp
+        if: matrix.os == 'windows-2022'
+        shell: bash
+        run: |
+          set -e
+          curl.exe -L https://www.nuget.org/api/v2/package/Microsoft.Direct3D.WARP/1.0.7.1 -o warp.zip
+          7z.exe e warp.zip -owarp build/native/amd64/d3d10warp.dll
+          mkdir -p target/ratchet/debug/deps
+          cp -v warp/d3d10warp.dll target/ratchet/debug/
+          cp -v warp/d3d10warp.dll target/ratchet/debug/deps
+
+      - name: (windows) install mesa
+        if: matrix.os == 'windows-2022'
+        shell: bash
+        run: |
+          set -e
+          curl.exe -L https://github.com/pal1000/mesa-dist-win/releases/download/23.2.1/mesa3d-23.2.1-release-msvc.7z -o mesa.7z
+          7z.exe e mesa.7z -omesa x64/{opengl32.dll,libgallium_wgl.dll,libglapi.dll,vulkan_lvp.dll,lvp_icd.x86_64.json}
+          cp -v mesa/* target/ratchet/debug/
+          cp -v mesa/* target/ratchet/debug/deps
+          echo "VK_DRIVER_FILES=$PWD/mesa/lvp_icd.x86_64.json" >> "$GITHUB_ENV"
+          echo "GALLIUM_DRIVER=llvmpipe" >> "$GITHUB_ENV"
+
       - name: Setup python 
         uses: actions/setup-python@v5
         with:
           python-version: '3.10.6'
           cache: 'pip'
       - run: pip install -r requirements.txt
-      - name: Run tests
-        run: cargo test tensor -- --test-threads=1 --nocapture 
-      - name: Install wasm-pack 
+
+      - name: run tests
+        shell: bash
         run: |
-          curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
-      - name: Run integration tests
-        run: (cd crates/ratchet-integration-tests;sh run-tests.sh)
+          set -e
+          cargo nextest run --features pyo3 --no-fail-fast 
+
+          # - name: Install wasm-pack 
+          #   run: |
+          #     curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
+
+          # - name: Run integration tests
+          #   run: (cd crates/ratchet-integration-tests;sh run-tests.sh)
diff --git a/crates/ratchet-core/build.rs b/crates/ratchet-core/build.rs
@@ -54,7 +54,7 @@ fn embed_kernels() -> anyhow::Result<()> {
 
         writeln!(
             &mut file,
-            "    m.insert(\"{}\", include_str!(\"{}\"));",
+            "    m.insert(\"{}\", include_str!(r\"{}\"));",
             name,
             path.display()
         )?;

diff --git a/crates/ratchet-core/src/gpu/device.rs b/crates/ratchet-core/src/gpu/device.rs
@@ -131,7 +131,6 @@ impl WgpuDevice {
         let backends = wgpu::util::backend_bits_from_env().unwrap_or(wgpu::Backends::PRIMARY);
         let adapter = instance
             .enumerate_adapters(backends)
-            .into_iter()
             .max_by_key(|adapter| match adapter.get_info().device_type {
                 DeviceType::DiscreteGpu => 5,
                 DeviceType::Other => 4,

diff --git a/crates/ratchet-core/src/gpu/pools/pipeline_layout_pool.rs b/crates/ratchet-core/src/gpu/pools/pipeline_layout_pool.rs
@@ -10,7 +10,7 @@ use super::{
 slotmap::new_key_type! { pub struct PipelineLayoutHandle; }
 
 #[derive(Debug, Clone, Hash, PartialEq, Eq)]
-pub(crate) struct PipelineLayoutDescriptor {
+pub struct PipelineLayoutDescriptor {
     pub entries: RVec<BindGroupLayoutHandle>,
 }
 

diff --git a/crates/ratchet-core/src/kernels.rs b/crates/ratchet-core/src/kernels.rs
@@ -7,19 +7,19 @@ lazy_static! {
         m.insert(
             "qgemm_vec4",
             include_str!(
-                "/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/qgemm_vec4.wgsl"
+                r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/qgemm_vec4.wgsl"
             ),
         );
         m.insert(
             "sgemm_scalar",
             include_str!(
-                "/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/sgemm_scalar.wgsl"
+                r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/sgemm_scalar.wgsl"
             ),
         );
         m.insert(
             "add_scalar",
             include_str!(
-                "/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/add_scalar.wgsl"
+                r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/add_scalar.wgsl"
             ),
         );
         m

diff --git a/crates/ratchet-core/src/tensor.rs b/crates/ratchet-core/src/tensor.rs
@@ -332,6 +332,105 @@ impl Tensor {
         }
     }
 
+    pub fn deep_clone(&self) -> Tensor {
+        let storage_guard = self.storage();
+        let storage = storage_guard.as_ref().unwrap();
+        let cloned_storage = storage.deep_clone().unwrap();
+        Tensor::new(
+            LazyOp::Const,
+            self.view.clone(),
+            Some(cloned_storage),
+            self.device.clone(),
+        )
+    }
+}
+
+impl Tensor {
+    pub fn all_close(&self, other: &Self, atol: f32, rtol: f32) -> anyhow::Result<()> {
+        if self.shape() != other.shape() {
+            anyhow::bail!("Shape mismatch {:?} != {:?}", self.shape(), other.shape())
+        }
+
+        let self_nd = self.to_ndarray_view::<f32>();
+        let other_nd = other.to_ndarray_view::<f32>();
+        let mut stats = CloseStats::new(atol, rtol);
+
+        ndarray::indices_of(&self_nd).into_iter().for_each(|idx| {
+            let (a, b) = (self_nd[&idx], other_nd[&idx]);
+            stats.update(&a, &b, idx);
+        });
+
+        if stats.fail_count > 0 {
+            anyhow::bail!(
+                "{} samples not close - AVGE={} MAE={} at {:?}",
+                stats.fail_count,
+                stats.avg_error(),
+                stats.max_abs_error,
+                stats.max_abs_error_idxs,
+            );
+        } else {
+            println!(
+                "All close - AVGE={} MAE={} at {:?}",
+                stats.avg_error(),
+                stats.max_abs_error,
+                stats.max_abs_error_idxs
+            );
+            Ok(())
+        }
+    }
+}
+
+struct CloseStats {
+    total_error: f32,
+    max_abs_error: f32,
+    max_abs_error_idxs: Option<ndarray::IxDyn>,
+    element_count: usize,
+    fail_count: usize,
+    atol: f32,
+    rtol: f32,
+}
+
+impl CloseStats {
+    fn new(atol: f32, rtol: f32) -> Self {
+        Self {
+            total_error: 0.0,
+            max_abs_error: 0.0,
+            max_abs_error_idxs: None,
+            element_count: 0,
+            fail_count: 0,
+            atol,
+            rtol,
+        }
+    }
+
+    fn update(&mut self, a: &f32, b: &f32, index: ndarray::IxDyn) {
+        let abs_diff = (a - b).abs();
+        self.total_error += abs_diff;
+        self.element_count += 1;
+
+        if abs_diff > self.max_abs_error {
+            self.max_abs_error = abs_diff;
+            self.max_abs_error_idxs = Some(index);
+        }
+
+        if !self.is_close(a, b, abs_diff) {
+            self.fail_count += 1;
+        }
+    }
+
+    fn avg_error(&self) -> f32 {
+        self.total_error / self.element_count as f32
+    }
+
+    fn is_close(&self, a: &f32, b: &f32, abs_diff: f32) -> bool {
+        (a.is_nan() && b.is_nan())
+            || (a.is_infinite() && b.is_infinite() && a.signum() == b.signum())
+            || abs_diff <= self.atol + self.rtol * b.abs()
+    }
+}
+
+/// Conversion to and from numpy arrays
+impl Tensor {
     #[cfg(feature = "pyo3")]
     pub fn into_ndarray<T: TensorDType>(self) -> ArrayD<T> {
         assert!(self.device().is_cpu());
@@ -346,6 +445,20 @@ impl Tensor {
         }
     }
 
+    #[cfg(feature = "pyo3")]
+    pub fn to_ndarray_view<T: TensorDType>(&self) -> ArrayViewD<T> {
+        assert!(self.device().is_cpu());
+        let shape = self.shape().to_vec();
+        if self.num_bytes() != 0 {
+            let storage_guard = self.storage();
+            let buffer = storage_guard.as_ref().unwrap().try_cpu().unwrap();
+            let (ptr, _) = buffer.inner().into_raw_parts();
+            unsafe { ArrayViewD::from_shape_ptr(shape, ptr as *const T) }
+        } else {
+            ArrayViewD::from_shape(shape, &[]).unwrap()
+        }
+    }
+
     #[cfg(feature = "pyo3")]
     pub fn to_py<'s, 'p: 's, T: TensorDType + numpy::Element>(
         &'s self,
@@ -358,18 +471,6 @@ impl Tensor {
         );
         PyArray::from_owned_array(*py, self.deep_clone().into_ndarray::<T>())
     }
-
-    pub fn deep_clone(&self) -> Tensor {
-        let storage_guard = self.storage();
-        let storage = storage_guard.as_ref().unwrap();
-        let cloned_storage = storage.deep_clone().unwrap();
-        Tensor::new(
-            LazyOp::Const,
-            self.view.clone(),
-            Some(cloned_storage),
-            self.device.clone(),
-        )
-    }
 }
 
 #[cfg(feature = "pyo3")]
@@ -444,6 +545,7 @@ def matmul(a, b):
         let c_gpu = a_gpu.matmul(&b_gpu)?;
         c_gpu.resolve()?;
         let d_gpu = c_gpu.to(Device::CPU)?;
+        ground?.all_close(&d_gpu, 1e-4, 1e-4)?;
         Ok(())
     }
 }
diff --git a/crates/ratchet-loader/src/ggml.rs b/crates/ratchet-loader/src/ggml.rs
@@ -192,7 +192,7 @@ impl GGMLWriter {
         model: &GGMLModel<M>,
     ) -> std::io::Result<()> {
         M::write_header(&model.header, writer)?;
-        for (name, tensor) in &model.tensors {
+        for (_name, _tensor) in &model.tensors {
             //Self::write_single(writer, tensor)?;
         }
         todo!()