Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Standalone loader #125

Merged
merged 46 commits into from
Apr 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
bdbea68
Add loader stub for GGJT
iacore Apr 6, 2023
b0a666f
Add loading code for ggjt
iacore Apr 6, 2023
9eefdc5
code cleanup that doesn't change anything
iacore Apr 6, 2023
c212c53
more code cleanup
iacore Apr 6, 2023
bfaec3a
minor change
iacore Apr 7, 2023
b6044ee
Add non-mmap loader for GGJT
iacore Apr 7, 2023
1872dda
Prefer traits in loader.rs
iacore Apr 7, 2023
ec1fca7
cargo fmt
iacore Apr 7, 2023
cc846ae
cargo clippy --fix
iacore Apr 7, 2023
bf847dd
Remove ggml::Tensor::set_data
iacore Apr 7, 2023
ea7094c
fix(llama): buffer tokens until valid UTF-8
philpax Apr 7, 2023
c848d5e
Add standalone loader
iacore Apr 8, 2023
8390593
Move loader to standalone crate llama-loader
iacore Apr 8, 2023
15fe19b
[llama-loader] Support non-copy loader
iacore Apr 8, 2023
2e9311d
Use functions from the new crate
iacore Apr 8, 2023
4dd0fc5
Merge branch 'main' into llama-loader
philpax Apr 13, 2023
c40e36e
Merge branch 'main' of github.com:rustformers/llama-rs into llama-loader
philpax Apr 13, 2023
34429e0
refactor(llama): pass mut tensors down
philpax Apr 13, 2023
38e7d58
feat/loader Make hparams configurable
iacore Apr 14, 2023
5dfc55d
feat/loader Add hook to support multi-part model loading
iacore Apr 14, 2023
48efd74
rename llama-loader to ggml-loader
iacore Apr 14, 2023
0fbbedd
Merge branch 'main' into llama-loader
philpax Apr 19, 2023
d65996d
fix
jon-chuang Apr 12, 2023
267d8ae
no_alloc
jon-chuang Apr 12, 2023
81a6979
chore: fix clippy
philpax Apr 19, 2023
80d189e
refactor(util): make find_all_model_files error
philpax Apr 19, 2023
85e1148
UnsupportedElementtype -> UnsupportedElementType
philpax Apr 19, 2023
3f29992
feat: experimental loader2 wire-up (incomplete)
philpax Apr 19, 2023
94951c4
fix dead doc link
philpax Apr 19, 2023
69f355b
feat: turn mmap on by default, add --no-mmap
philpax Apr 19, 2023
17bc0cc
Fix loading GGJT
iacore Apr 20, 2023
6641ae9
minor fix
iacore Apr 20, 2023
3910b6a
Add mmap
iacore Apr 20, 2023
e4834bd
cargo fmt
iacore Apr 20, 2023
c380cee
Make loader2 default
iacore Apr 20, 2023
5b9788b
fix: remove dbg!(start_pos)
philpax Apr 22, 2023
cbf0756
fix: respect --no-mmap
philpax Apr 22, 2023
8813b0f
Merge branch 'main' of github.com:rustformers/llama-rs into llama-loader
philpax Apr 22, 2023
430abfe
chore: remove old comments
philpax Apr 22, 2023
bf6a917
chore: remove unused error case
philpax Apr 22, 2023
9b908ae
fix: remove some panics
philpax Apr 22, 2023
d8c4ca6
feat: remove AlreadyAdded error
philpax Apr 22, 2023
cabc4c9
minor fix
iacore Apr 22, 2023
1930496
fix: Vocabulary::push_token is infallible
philpax Apr 22, 2023
bdb9856
fix: bail on multipart models with loader2
philpax Apr 22, 2023
b41fe14
refactor: make Vocabulary::push_token pub(crate)
philpax Apr 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 10 additions & 22 deletions llama-rs/src/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,37 +84,25 @@ pub(crate) fn load(
// Load vocabulary
// ===============
let vocabulary = {
let mut id_to_token = vec![];
let mut id_to_token_score = vec![];
let mut token_to_id = HashMap::new();
let mut max_token_length = 0;
let mut vocab = Vocabulary::default();

for i in 0..hparams.n_vocab {
let len = read_i32(&mut reader)?;
let id = i as TokenId;
let token = read_bytes_with_len(&mut reader, len.try_into()?)?;
max_token_length = max_token_length.max(token.len());
id_to_token.push(token.clone());
token_to_id.insert(token, TokenId::try_from(i)?);

// Token score, currently unused
match model_type {
ContainerType::GGMF | ContainerType::GGJT => {
let score = read_f32(&mut reader)?;
id_to_token_score.push(score);
}

let score = match model_type {
ContainerType::GGMF | ContainerType::GGJT => read_f32(&mut reader)?,
ContainerType::GGML => {
// Legacy model, set empty score
id_to_token_score.push(0.);
0.
}
}
}
};

Vocabulary {
id_to_token,
id_to_token_score,
token_to_id,
max_token_length,
vocab.push_token(id, token, score)?;
}

vocab
};

// for the big tensors, we have the option to store the data in 16-bit
Expand Down
10 changes: 1 addition & 9 deletions llama-rs/src/vocabulary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ pub enum AddTokenError {
/// The actual ID.
actual_id: TokenId,
},
#[error("a token with the same id already exists, id={id}")]
/// A token with the same ID was already added.
AlreadyAdded {
/// The ID of the token that was already added.
id: TokenId,
},
}

impl Vocabulary {
Expand All @@ -65,9 +59,7 @@ impl Vocabulary {
self.max_token_length = self.max_token_length.max(content.len());
self.id_to_token.push(content.clone());
self.id_to_token_score.push(score);
if self.token_to_id.insert(content, id).is_some() {
iacore marked this conversation as resolved.
Show resolved Hide resolved
return Err(AddTokenError::AlreadyAdded { id });
}
self.token_to_id.insert(content, id);
Ok(())
}

Expand Down