Skip to content

Commit

Permalink
Merge branch 'upstream' into concedo_experimental
Browse files Browse the repository at this point in the history
# Conflicts:
#	Makefile
#	README.md
#	examples/server/CMakeLists.txt
#	ggml/src/CMakeLists.txt
  • Loading branch information
LostRuins committed Sep 15, 2024
2 parents 53bf0fb + 3c7989f commit ab41e32
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 5 deletions.
2 changes: 1 addition & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,7 @@ def prepare_tensors(self):
raise ValueError(f"Unprocessed norms: {norms}")


@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
class LlamaModel(Model):
model_arch = gguf.MODEL_ARCH.LLAMA

Expand Down
12 changes: 12 additions & 0 deletions examples/server/public/loading.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="5">
</head>
<body>
<div id="loading">
The model is loading. Please wait.<br/>
The user interface will appear soon.
</div>
</body>
</html>
13 changes: 11 additions & 2 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "system-prompts.js.hpp"
#include "prompt-formats.js.hpp"
#include "json-schema-to-grammar.mjs.hpp"
#include "loading.html.hpp"

#include <atomic>
#include <chrono>
Expand Down Expand Up @@ -2593,10 +2594,16 @@ int main(int argc, char ** argv) {
return false;
};

auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
server_state current_state = state.load();
if (current_state == SERVER_STATE_LOADING_MODEL) {
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
auto tmp = string_split(req.path, '.');
if (req.path == "/" || tmp.back() == "html") {
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
res.status = 503;
} else {
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
}
return false;
}
return true;
Expand Down Expand Up @@ -2987,6 +2994,8 @@ int main(int argc, char ** argv) {
}, [&](json error_data) {
server_sent_event(sink, "error", error_data);
});
static const std::string ev_done = "data: [DONE]\n\n";
sink.write(ev_done.data(), ev_done.size());
sink.done();
return true;
};
Expand Down
2 changes: 2 additions & 0 deletions examples/server/tests/features/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,8 @@ async def oai_chat_completions(user_prompt,
event_data = line.split(': ', 1)
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
chunk_raw = event_data[1]
if chunk_raw == '[DONE]':
break

chunk = json.loads(chunk_raw)
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3419,7 +3419,7 @@ double ggml_type_sizef(enum ggml_type type) {
}

GGML_CALL const char * ggml_type_name(enum ggml_type type) {
return type_traits[type].type_name;
return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
}

GGML_CALL bool ggml_is_quantized(enum ggml_type type) {
Expand Down
2 changes: 1 addition & 1 deletion src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15903,7 +15903,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_ubatch & batch) {

// clear unused states
for (int i = 0; i < n_kv; ++i) {
uint32_t cell_id = i + kv_self.head;
const uint32_t cell_id = i + kv_self.head;
llama_kv_cell & kv_cell = lctx.kv_self.cells[cell_id];

data[i] = (float) (kv_cell.src >= 0);
Expand Down

0 comments on commit ab41e32

Please sign in to comment.