Skip to content

Commit

Permalink
Latest commits +added more presets for Nemo-based models
Browse files Browse the repository at this point in the history
* updated Readme, will come back to it soon
  • Loading branch information
MaggotHATE committed Aug 16, 2024
1 parent 6e084a5 commit b665a51
Show file tree
Hide file tree
Showing 16 changed files with 147,916 additions and 147,494 deletions.
7 changes: 3 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -444,14 +444,14 @@ OBJS_GGUF = \
ifdef OPENBLAS64
CXXFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
LDFLAGS += $(shell pkg-config --libs openblas64)
LDFLAGS += $(shell pkg-config --libs openblas64) --static
OBJS_GGUF += $(TMP)t_ggml-blas.o
endif # GGML_OPENBLAS

ifdef OPENBLAS
CXXFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
CFLAGS += $(shell pkg-config --cflags-only-other openblas)
LDFLAGS += $(shell pkg-config --libs openblas)
LDFLAGS += $(shell pkg-config --libs openblas) --static
OBJS_GGUF += $(TMP)t_ggml-blas.o
endif # GGML_OPENBLAS

Expand Down Expand Up @@ -563,7 +563,6 @@ endif
CXXFLAGS_CL += -lclblast -lOpenCL
CXXFLAGS_UI_CL += -lclblast -lOpenCL


#OBJS_GGUF_CL = $(TMP)cl_ggml-quants.o $(TMP)cl_ggml-opencl-gguf.o $(TMP)cl_ggml.o $(TMP)cl_ggml-alloc.o $(TMP)cl_ggml-backend.o $(TMP)cl_llama.o $(TMP)cl_sampling.o $(TMP)cl_common.o $(TMP)cl_grammar-parser.o
OBJS_GGUF_CL = \
$(TMP)clt_ggml.o \
Expand All @@ -583,7 +582,7 @@ OBJS_GGUF_CL = \
$(TMP)clt_unicode.o \
$(TMP)clt_unicode-data.o \
$(TMP)clt_sgemm.o

$(TMP)clt_ggml-opencl-gguf.o: $(ggmlsrc_f)/ggml-opencl.cpp $(ggmlsrc_f)/ggml-opencl.h
$(CXX) $(CXXFLAGS_CL) -c $< -o $@

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Libraries:
* `make chat_cl` for Clblast build
* `make chat_vk` for Vulkan build
* `make chatTest`, `make chatTest_cl` and `make chatTest_vk` for building the debugging program
* for CPU-only builds use `OPENBLAS64=1` to enable OpenBLAS (helps with prompt processing)
* if your GPU/iGPU don't support Vulkan, compile with SDL2=1
* if you need Windows console for debugging, compile with CONW=1
* see more in makefile
Expand Down
6 changes: 0 additions & 6 deletions base/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1294,12 +1294,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
return text;
}

bool llama_should_add_bos_token(const llama_model * model) {
const int add_bos = llama_add_bos_token(model);

return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
}

//
// YAML utils
//
Expand Down
4 changes: 0 additions & 4 deletions base/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,6 @@ std::string llama_detokenize(
const std::vector<llama_token> & tokens,
bool special = true);

// Uses the value from the model metadata if possible, otherwise
// defaults to true when model type is SPM, otherwise false.
bool llama_should_add_bos_token(const llama_model * model);

//
// YAML utils
//
Expand Down
13 changes: 5 additions & 8 deletions base/ggml/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1018,10 +1018,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
#define GGML_SCHED_MAX_BACKENDS 16
#endif

#ifndef GGML_SCHED_MAX_SPLITS
#define GGML_SCHED_MAX_SPLITS 2048
#endif

#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
#endif
Expand Down Expand Up @@ -1125,7 +1121,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
}

#if 0
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
#define GGML_SCHED_MAX_SPLITS_DEBUG 4096
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
#define GET_CAUSE(node) causes[hash_id(node)]
#else
Expand Down Expand Up @@ -1549,7 +1546,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
GGML_ASSERT(sched->splits != NULL);
}
GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
split = &sched->splits[i_split];
split->backend_id = node_backend_id;
split->i_start = i;
Expand Down Expand Up @@ -1865,13 +1861,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));

const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));

sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
sched->context_buffer = malloc(sched->context_buffer_size);

const int initial_splits_capacity = 16;
Expand Down
Loading

0 comments on commit b665a51

Please sign in to comment.