Skip to content

Commit

Permalink
Update for compat with quantize-stats
Browse files Browse the repository at this point in the history
  • Loading branch information
comex committed Apr 8, 2023
1 parent 3bc64a6 commit 4ae05a2
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 9 deletions.
8 changes: 3 additions & 5 deletions examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,13 @@ int main(int argc, char ** argv) {
}
}

// Sort tensors for consistent output
const auto tensors = llama_internal_get_tensor_map(ctx);
std::map<std::string, struct ggml_tensor *> tensors_sorted { tensors.begin(), tensors.end() };
const auto &tensors = llama_internal_get_tensor_map(ctx);

// check layer tensors
int included_layers = 0;
int64_t max_nelements = 0;
bool is_f16 = false;
for (const auto& kv_tensor : tensors_sorted) {
for (const auto& kv_tensor : tensors) {
if (!layer_included(params, kv_tensor.first)) {
continue;
}
Expand Down Expand Up @@ -315,7 +313,7 @@ int main(int argc, char ** argv) {

error_stats global_stats {};

for (const auto& kv_tensor : tensors_sorted) {
for (const auto& kv_tensor : tensors) {
if (!layer_included(params, kv_tensor.first)) {
continue;
}
Expand Down
20 changes: 17 additions & 3 deletions llama.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// XXX: CHECK REGEX
#include "llama_util.h"
#include "llama.h"

Expand Down Expand Up @@ -148,6 +149,9 @@ struct llama_model {
llama_mlock mlock_buf;
llama_mlock mlock_mmap;

// for quantize-stats only
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;

~llama_model() {
if (ctx) {
ggml_free(ctx);
Expand Down Expand Up @@ -634,10 +638,13 @@ struct llama_model_loader {
return tensor;
}

void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
void done_getting_tensors() {
if (num_ggml_tensors_created != tensors_map.tensors.size()) {
throw std::string("llama.cpp: file contained more tensors than expected");
}
}

void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
size_t data_size = 0;
for (const llama_load_tensor & lt : tensors_map.tensors) {
data_size += lt.size;
Expand Down Expand Up @@ -928,6 +935,13 @@ static void llama_model_load_internal(
}
}

ml->done_getting_tensors();

// populate `tensors_by_name`
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
}

ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);

model.mapping = std::move(ml->mapping);
Expand Down Expand Up @@ -1887,6 +1901,6 @@ const char * llama_print_system_info(void) {
}

// For internal test use
std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx) {
return ctx->model.tensors;
std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
return ctx->model.tensors_by_name;
}
2 changes: 1 addition & 1 deletion llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ extern "C" {
//
// Internal function exposed for tests and benchmarks
//
std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx);
std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
#endif

#endif

0 comments on commit 4ae05a2

Please sign in to comment.