Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: improve #28

Merged
merged 9 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 54 additions & 9 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,77 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
include:
- name: Windows x86_64
os: windows-2022
- name: Linux x86_64
os: ubuntu-latest
#TODO: android?
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@1.71

- name: Install cargo-nextest
uses: taiki-e/install-action@v2
with:
tool: cargo-nextest

- name: (linux) install llvmpipe, lavapipe, vulkan sdk, alsa
if: matrix.os == 'ubuntu-latest'
shell: bash
run: |
set -e
sudo apt-get update -y -qq
# vulkan sdk
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo add-apt-repository ppa:kisak/kisak-mesa
sudo apt-get update
sudo apt install -y libegl1-mesa libgl1-mesa-dri libxcb-xfixes0-dev vulkan-sdk mesa-vulkan-drivers pkg-config libasound2-dev

- name: (windows) install dxc
if: matrix.os == 'windows-2022'
uses: napokue/setup-dxc@v1.1.0

- name: (windows) install warp
if: matrix.os == 'windows-2022'
shell: bash
run: |
set -e
curl.exe -L https://www.nuget.org/api/v2/package/Microsoft.Direct3D.WARP/1.0.7.1 -o warp.zip
7z.exe e warp.zip -owarp build/native/amd64/d3d10warp.dll
mkdir -p target/ratchet/debug/deps
cp -v warp/d3d10warp.dll target/ratchet/debug/
cp -v warp/d3d10warp.dll target/ratchet/debug/deps

- name: (windows) install mesa
if: matrix.os == 'windows-2022'
shell: bash
run: |
set -e
curl.exe -L https://github.com/pal1000/mesa-dist-win/releases/download/23.2.1/mesa3d-23.2.1-release-msvc.7z -o mesa.7z
7z.exe e mesa.7z -omesa x64/{opengl32.dll,libgallium_wgl.dll,libglapi.dll,vulkan_lvp.dll,lvp_icd.x86_64.json}
cp -v mesa/* target/ratchet/debug/
cp -v mesa/* target/ratchet/debug/deps
echo "VK_DRIVER_FILES=$PWD/mesa/lvp_icd.x86_64.json" >> "$GITHUB_ENV"
echo "GALLIUM_DRIVER=llvmpipe" >> "$GITHUB_ENV"

- name: Setup python
uses: actions/setup-python@v5
with:
python-version: '3.10.6'
cache: 'pip'
- run: pip install -r requirements.txt
- name: Run tests
run: cargo test tensor -- --test-threads=1 --nocapture
- name: Install wasm-pack

- name: run tests
shell: bash
run: |
curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Run integration tests
run: (cd crates/ratchet-integration-tests;sh run-tests.sh)
set -e
cargo nextest run --features pyo3 --no-fail-fast

# - name: Install wasm-pack
# run: |
# curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh

# - name: Run integration tests
# run: (cd crates/ratchet-integration-tests;sh run-tests.sh)
2 changes: 1 addition & 1 deletion crates/ratchet-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ fn embed_kernels() -> anyhow::Result<()> {

writeln!(
&mut file,
" m.insert(\"{}\", include_str!(\"{}\"));",
" m.insert(\"{}\", include_str!(r\"{}\"));",
name,
path.display()
)?;
Expand Down
1 change: 0 additions & 1 deletion crates/ratchet-core/src/gpu/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ impl WgpuDevice {
let backends = wgpu::util::backend_bits_from_env().unwrap_or(wgpu::Backends::PRIMARY);
let adapter = instance
.enumerate_adapters(backends)
.into_iter()
.max_by_key(|adapter| match adapter.get_info().device_type {
DeviceType::DiscreteGpu => 5,
DeviceType::Other => 4,
Expand Down
2 changes: 1 addition & 1 deletion crates/ratchet-core/src/gpu/pools/pipeline_layout_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::{
slotmap::new_key_type! { pub struct PipelineLayoutHandle; }

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub(crate) struct PipelineLayoutDescriptor {
pub struct PipelineLayoutDescriptor {
pub entries: RVec<BindGroupLayoutHandle>,
}

Expand Down
6 changes: 3 additions & 3 deletions crates/ratchet-core/src/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@ lazy_static! {
m.insert(
"qgemm_vec4",
include_str!(
"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/qgemm_vec4.wgsl"
r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/qgemm_vec4.wgsl"
),
);
m.insert(
"sgemm_scalar",
include_str!(
"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/sgemm_scalar.wgsl"
r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/sgemm_scalar.wgsl"
),
);
m.insert(
"add_scalar",
include_str!(
"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/add_scalar.wgsl"
r"/Users/fleetwood/Code/ratchet/crates/ratchet-core/kernels/add_scalar.wgsl"
),
);
m
Expand Down
126 changes: 114 additions & 12 deletions crates/ratchet-core/src/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,105 @@ impl Tensor {
}
}

pub fn deep_clone(&self) -> Tensor {
let storage_guard = self.storage();
let storage = storage_guard.as_ref().unwrap();
let cloned_storage = storage.deep_clone().unwrap();
Tensor::new(
LazyOp::Const,
self.view.clone(),
Some(cloned_storage),
self.device.clone(),
)
}
}

impl Tensor {
pub fn all_close(&self, other: &Self, atol: f32, rtol: f32) -> anyhow::Result<()> {
if self.shape() != other.shape() {
anyhow::bail!("Shape mismatch {:?} != {:?}", self.shape(), other.shape())
}

let self_nd = self.to_ndarray_view::<f32>();
let other_nd = other.to_ndarray_view::<f32>();
let mut stats = CloseStats::new(atol, rtol);

ndarray::indices_of(&self_nd).into_iter().for_each(|idx| {
let (a, b) = (self_nd[&idx], other_nd[&idx]);
stats.update(&a, &b, idx);
});

if stats.fail_count > 0 {
anyhow::bail!(
"{} samples not close - AVGE={} MAE={} at {:?}",
stats.fail_count,
stats.avg_error(),
stats.max_abs_error,
stats.max_abs_error_idxs,
);
} else {
println!(
"All close - AVGE={} MAE={} at {:?}",
stats.avg_error(),
stats.max_abs_error,
stats.max_abs_error_idxs
);
Ok(())
}
}
}

struct CloseStats {
total_error: f32,
max_abs_error: f32,
max_abs_error_idxs: Option<ndarray::IxDyn>,
element_count: usize,
fail_count: usize,
atol: f32,
rtol: f32,
}

impl CloseStats {
fn new(atol: f32, rtol: f32) -> Self {
Self {
total_error: 0.0,
max_abs_error: 0.0,
max_abs_error_idxs: None,
element_count: 0,
fail_count: 0,
atol,
rtol,
}
}

fn update(&mut self, a: &f32, b: &f32, index: ndarray::IxDyn) {
let abs_diff = (a - b).abs();
self.total_error += abs_diff;
self.element_count += 1;

if abs_diff > self.max_abs_error {
self.max_abs_error = abs_diff;
self.max_abs_error_idxs = Some(index);
}

if !self.is_close(a, b, abs_diff) {
self.fail_count += 1;
}
}

fn avg_error(&self) -> f32 {
self.total_error / self.element_count as f32
}

fn is_close(&self, a: &f32, b: &f32, abs_diff: f32) -> bool {
(a.is_nan() && b.is_nan())
|| (a.is_infinite() && b.is_infinite() && a.signum() == b.signum())
|| abs_diff <= self.atol + self.rtol * b.abs()
}
}

/// Conversion to and from numpy arrays
impl Tensor {
#[cfg(feature = "pyo3")]
pub fn into_ndarray<T: TensorDType>(self) -> ArrayD<T> {
assert!(self.device().is_cpu());
Expand All @@ -346,6 +445,20 @@ impl Tensor {
}
}

#[cfg(feature = "pyo3")]
pub fn to_ndarray_view<T: TensorDType>(&self) -> ArrayViewD<T> {
assert!(self.device().is_cpu());
let shape = self.shape().to_vec();
if self.num_bytes() != 0 {
let storage_guard = self.storage();
let buffer = storage_guard.as_ref().unwrap().try_cpu().unwrap();
let (ptr, _) = buffer.inner().into_raw_parts();
unsafe { ArrayViewD::from_shape_ptr(shape, ptr as *const T) }
} else {
ArrayViewD::from_shape(shape, &[]).unwrap()
}
}

#[cfg(feature = "pyo3")]
pub fn to_py<'s, 'p: 's, T: TensorDType + numpy::Element>(
&'s self,
Expand All @@ -358,18 +471,6 @@ impl Tensor {
);
PyArray::from_owned_array(*py, self.deep_clone().into_ndarray::<T>())
}

pub fn deep_clone(&self) -> Tensor {
let storage_guard = self.storage();
let storage = storage_guard.as_ref().unwrap();
let cloned_storage = storage.deep_clone().unwrap();
Tensor::new(
LazyOp::Const,
self.view.clone(),
Some(cloned_storage),
self.device.clone(),
)
}
}

#[cfg(feature = "pyo3")]
Expand Down Expand Up @@ -444,6 +545,7 @@ def matmul(a, b):
let c_gpu = a_gpu.matmul(&b_gpu)?;
c_gpu.resolve()?;
let d_gpu = c_gpu.to(Device::CPU)?;
ground?.all_close(&d_gpu, 1e-4, 1e-4)?;
Ok(())
}
}
2 changes: 1 addition & 1 deletion crates/ratchet-loader/src/ggml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ impl GGMLWriter {
model: &GGMLModel<M>,
) -> std::io::Result<()> {
M::write_header(&model.header, writer)?;
for (name, tensor) in &model.tensors {
for (_name, _tensor) in &model.tensors {
//Self::write_single(writer, tensor)?;
}
todo!()
Expand Down
Loading