Skip to content

Commit

Permalink
server : fixes
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Nov 22, 2024
1 parent 7dc6ae5 commit 0c74590
Showing 1 changed file with 13 additions and 16 deletions.
29 changes: 13 additions & 16 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,18 +645,16 @@ struct server_context {

// Clear any sampling context
for (server_slot & slot : slots) {
if (slot.smpl != nullptr) {
llama_free(slot.ctx_dft);
slot.ctx_dft = nullptr;
common_sampler_free(slot.smpl);
slot.smpl = nullptr;

common_speculative_free(slot.spec);
slot.spec = nullptr;
llama_free(slot.ctx_dft);
slot.ctx_dft = nullptr;

common_sampler_free(slot.smpl);
slot.smpl = nullptr;
common_speculative_free(slot.spec);
slot.spec = nullptr;

llama_batch_free(slot.batch_spec);
}
llama_batch_free(slot.batch_spec);
}

llama_batch_free(batch);
Expand Down Expand Up @@ -688,15 +686,9 @@ struct server_context {

auto params_dft = params;

params_dft.model = params.model_draft;
params_dft.model = params.model_draft;
params_dft.n_gpu_layers = params.n_gpu_layers_draft;

if (params.draft_cpuparams.n_threads > 0) {
params_dft.cpuparams.n_threads = params.draft_cpuparams.n_threads;
}

params_dft.cpuparams_batch.n_threads = params.draft_cpuparams_batch.n_threads;

common_init_result llama_init_dft = common_init_from_params(params_dft);

model_dft = llama_init_dft.model;
Expand All @@ -708,10 +700,15 @@ struct server_context {

if (!common_speculative_are_compatible(ctx, llama_init_dft.context)) {
SRV_ERR("the draft model '%s' is not compatible with the target model '%s'\n", params.model_draft.c_str(), params.model.c_str());

llama_free (llama_init_dft.context);
llama_free_model(llama_init_dft.model);

return false;
}

cparams_dft = common_context_params_to_llama(params);
cparams_dft.n_batch = llama_n_ctx(llama_init_dft.context);

// the context is not needed - we will create one for each slot
llama_free(llama_init_dft.context);
Expand Down

0 comments on commit 0c74590

Please sign in to comment.