-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bugfix: missing hparam type_vocab_size
#32
base: master
Are you sure you want to change the base?
Changes from 3 commits
dd15c0f
a3c8548
7ef3126
79825d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ struct bert_hparams | |
int32_t n_intermediate = 1536; | ||
int32_t n_head = 12; | ||
int32_t n_layer = 6; | ||
int32_t n_vocab_size = 2; | ||
int32_t f16 = 1; | ||
}; | ||
|
||
|
@@ -364,6 +365,7 @@ struct bert_ctx * bert_load_from_file(const char *fname) | |
fin.read((char *)&hparams.n_intermediate, sizeof(hparams.n_intermediate)); | ||
fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); | ||
fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); | ||
fin.read((char *)&hparams.n_vocab_size, sizeof(hparams.n_vocab_size)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so does here. |
||
fin.read((char *)&hparams.f16, sizeof(hparams.f16)); | ||
|
||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); | ||
|
@@ -372,6 +374,7 @@ struct bert_ctx * bert_load_from_file(const char *fname) | |
printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate); | ||
printf("%s: n_head = %d\n", __func__, hparams.n_head); | ||
printf("%s: n_layer = %d\n", __func__, hparams.n_layer); | ||
printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size); | ||
printf("%s: f16 = %d\n", __func__, hparams.f16); | ||
} | ||
|
||
|
@@ -489,11 +492,13 @@ struct bert_ctx * bert_load_from_file(const char *fname) | |
const int n_intermediate = hparams.n_intermediate; | ||
const int n_max_tokens = hparams.n_max_tokens; | ||
const int n_vocab = hparams.n_vocab; | ||
const int n_vocab_size = hparams.n_vocab_size; | ||
|
||
|
||
model.layers.resize(n_layer); | ||
|
||
model.word_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); | ||
model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, 2); | ||
model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab_size); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @skeskinen There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
model.position_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_max_tokens); | ||
|
||
model.ln_e_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is a break change since new field.
I believe some others will happen in future.
So we may try to shift into
GGUF
.