Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Commit

Permalink
Merge pull request #428 from LLukas22/feat/ggml-update
Browse files Browse the repository at this point in the history
Build against newer GGML version
  • Loading branch information
philpax authored Nov 12, 2023
2 parents 52c2bb6 + 5e4b35f commit e5e0fe1
Show file tree
Hide file tree
Showing 25 changed files with 1,268 additions and 420 deletions.
5 changes: 5 additions & 0 deletions binaries/generate-ggml-bindings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ fn generate_main(ggml_path: &Path, src_path: &Path) {
.allowlist_file(r".*ggml.h")
.header(ggml_path.join("k_quants.h").to_string_lossy())
.allowlist_file(r".*k_quants.h")
.header(ggml_path.join("ggml-alloc.h").to_string_lossy())
.allowlist_file(r".*ggml-alloc.h")
// Suppress some warnings
.raw_line("#![allow(non_upper_case_globals)]")
.raw_line("#![allow(non_camel_case_types)]")
Expand Down Expand Up @@ -88,6 +90,9 @@ fn generate_metal(ggml_path: &Path, src_path: &Path) {
generate_extra("metal", ggml_path, src_path, |b| {
b.header(ggml_path.join("ggml-metal.h").to_string_lossy())
.allowlist_file(r".*ggml-metal\.h")
.raw_line("use super::ggml_tensor;")
.raw_line("use super::ggml_log_callback;")
.raw_line("use super::ggml_cgraph;")
});
}

Expand Down
2 changes: 1 addition & 1 deletion binaries/llm-test/configs/mpt.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{
"Inference": {
"input": "When a llama rides a crab, ",
"output": "When a llama rides a crab,  the llama is called the \"crab rider\".\nThe crabs are very popular in South America, especially Brazil. They have been used as transportation for many years and they can carry up to five people at once!",
"output": "When a llama rides a crab,  the llama is called the \"crab rider\"\nThe Llamas are an animal that can be found in The Maze. They have no special abilities, but they do drop Llamaskin and occasionally some other items when killed by players or monsters alike (see below). It's unknown if there was ever any sort of breeding system for these animals as it seems to only exist on this one world so far; however their existence has been confirmed through player reports from multiple worlds where people claim having seen them before being able see anything else about what happened after seeing just 1-2 at most per game session which makes me believe",
"maximum_token_count": 128
}
},
Expand Down
13 changes: 4 additions & 9 deletions crates/ggml/src/accelerator/metal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ pub struct MetalContext {

impl MetalContext {
/// Create a new Metal context
pub fn new(n_threads: usize) -> Self {
let raw = unsafe { metal::ggml_metal_init(n_threads.try_into().unwrap()) };
pub fn new() -> Self {
let raw = unsafe { metal::ggml_metal_init(1) };

MetalContext {
contexts: vec![],
Expand Down Expand Up @@ -83,19 +83,14 @@ impl MetalContext {
unsafe {
metal::ggml_metal_graph_compute(
self.ptr.as_ptr(),
graph.inner as *mut ggml_sys::ggml_cgraph as *mut metal::ggml_cgraph,
graph.inner as *mut ggml_sys::ggml_cgraph,
);
}
}

/// Reads a tensor from Metal
pub fn get_tensor(&self, tensor: &Tensor) {
unsafe {
metal::ggml_metal_get_tensor(
self.ptr.as_ptr(),
tensor.ptr.as_ptr() as *mut metal::ggml_tensor,
)
}
unsafe { metal::ggml_metal_get_tensor(self.ptr.as_ptr(), tensor.ptr.as_ptr()) }
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/ggml/src/accelerator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pub fn initialize(device: i32) {
//TODO: Make this configurable
sys::cuda::ggml_init_cublas();
sys::cuda::ggml_cuda_set_main_device(device);
sys::cuda::ggml_cuda_set_mul_mat_q(true);
let split = 1.0f32;
sys::cuda::ggml_cuda_set_tensor_split(&split as *const f32);
}
Expand Down
59 changes: 27 additions & 32 deletions crates/ggml/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct Context {
/// allocated tensors. Tensors are owned by the object, so a [`Tensor`]
/// contains a `Weak` reference underneath and doesn't let you do anything
/// with it if the underlying context has been deallocated.
inner: Arc<ContextInner>,
pub inner: Arc<ContextInner>,

/// The storage for this context. This is stored so that the buffer can be dropped when the context is dropped.
storage: Option<ContextStorage>,
Expand All @@ -31,7 +31,7 @@ pub struct Context {
}

/// Contains state shared between a context and its tensors
pub(crate) struct ContextInner {
pub struct ContextInner {
pub ptr: NonNull<sys::ggml_context>,

/// Offloaded tensors. Used to free them when the context is dropped.
Expand Down Expand Up @@ -73,7 +73,12 @@ impl ContextInner {
/// Controls how the context uses memory.
pub enum ContextStorage {
/// Use the provided buffer as memory.
Buffer(Buffer),
Buffer {
/// The buffer to use as memory.
buffer: Buffer,
/// Whether to allocate tensors into this buffer.
allocate: bool,
},
/// Use the provided memory mapped file as memory.
Mmap(Mmap),
/// Allocate `mem_size` bytes of memory.
Expand All @@ -94,7 +99,10 @@ impl ContextStorage {
/// Returns the `Buffer` if this is a `Buffer` variant.
pub fn as_buffer(&self) -> Option<&Buffer> {
match self {
Self::Buffer(v) => Some(v),
Self::Buffer {
buffer: v,
allocate: _,
} => Some(v),
_ => None,
}
}
Expand All @@ -115,7 +123,16 @@ impl PartialEq for ContextStorage {
fn eq(&self, other: &Self) -> bool {
use ContextStorage::*;
match (self, other) {
(Buffer(l0), Buffer(r0)) => l0 == r0,
(
Buffer {
buffer: l0,
allocate: l1,
},
Buffer {
buffer: r0,
allocate: r1,
},
) => l0 == r0 && l1 == r1,
(Mmap(l0), Mmap(r0)) => l0.as_ptr() == r0.as_ptr(),
(Allocate { mem_size: l }, Allocate { mem_size: r }) => l == r,
_ => false,
Expand All @@ -130,10 +147,10 @@ impl Context {
/// Creates a new [Context] with the given storage.
pub fn new(storage: ContextStorage) -> Self {
let init_params = match &storage {
ContextStorage::Buffer(buffer) => sys::ggml_init_params {
ContextStorage::Buffer { buffer, allocate } => sys::ggml_init_params {
mem_size: buffer.size(),
mem_buffer: buffer.data,
no_alloc: false,
no_alloc: !allocate,
},
ContextStorage::Mmap(mmap) => sys::ggml_init_params {
mem_size: mmap.len(),
Expand All @@ -160,8 +177,8 @@ impl Context {

/// Creates a new [Context] with the specified buffer.
/// The buffer will be used by GGML.
pub fn new_with_buffer(buffer: Buffer) -> Self {
Self::new(ContextStorage::Buffer(buffer))
pub fn new_with_buffer(buffer: Buffer, allocate: bool) -> Self {
Self::new(ContextStorage::Buffer { buffer, allocate })
}

/// Creates a new [Context] with the specified memory mapped file.
Expand Down Expand Up @@ -206,28 +223,6 @@ impl Context {
unsafe { sys::ggml_used_mem(self.as_ptr()) }
}

/// Sets the scratch buffer to be used by this [Context].
///
/// If `scratch_buffer` is `None`, the scratch buffer will be disabled.
pub fn use_scratch<'a>(&'a self, scratch_buffer: Option<&'a Buffer>) {
let (size, data) = if let Some(buffer) = scratch_buffer {
(buffer.size(), buffer.data)
} else {
(0, std::ptr::null_mut())
};
// SAFETY: this just passes (most likely uninitialized) memory buffer to the ggml C API
unsafe {
sys::ggml_set_scratch(
self.as_ptr(),
sys::ggml_scratch {
offs: 0,
size,
data,
},
);
}
}

/// Creates a new 1D tensor.
pub fn new_tensor_1d(&self, typ: Type, ne0: usize) -> Tensor {
let raw = unsafe { sys::ggml_new_tensor_1d(self.as_ptr(), typ.into(), usize_to_i64(ne0)) };
Expand Down Expand Up @@ -294,7 +289,7 @@ impl Context {

/// Creates a new tensor with the values of `a`, but normalized.
pub fn op_norm(&self, a: &Tensor) -> Tensor {
let tensor = unsafe { sys::ggml_norm(self.as_ptr(), a.ptr.as_ptr()) };
let tensor = unsafe { sys::ggml_norm(self.as_ptr(), a.ptr.as_ptr(), crate::DEFAULT_EPS) };
self.new_tensor_raw(tensor)
}

Expand Down
Loading

0 comments on commit e5e0fe1

Please sign in to comment.