- Native backend returns 768-dim vectors via llama.cpp / C wrapper (/usr/local/lib/libllama_wrap.so) - Wired :native into embed-object dispatch and exported from passepartout package - Model preloads at daemon startup with EMBEDDING_PROVIDER=native (~30s) - Lazy loading via *embedding-backend* :native also works (first call ~45s) - C wrapper bridges CFFI pointer params to llama.cpp struct-by-value API - Correct struct layouts: llama_model_params(72B), llama_context_params(136B), llama_batch(56B) - BERT pooling: llama_get_embeddings_seq, llama_tokenize takes vocab* not model* - FiveAM tests pass: dimensions, self-similarity, semantic ranking - Fixed pre-existing HITL crash: boundp guard for *hitl-pending* in core-loop-act - Lazy load guard prevents double-load of native file in embedding-native-ensure-loaded - ROADMAP: v0.4.0 items marked DONE, v0.4.1 native embedding updated with actual implementation
25 lines
919 B
C
25 lines
919 B
C
// C wrapper for llama.cpp — bridges CFFI pointer params to struct-by-value
|
|
// Compile: gcc -shared -fPIC -I/tmp/llama.cpp/include -o libllama_wrap.so llama_wrap.c -L/usr/local/lib -lllama
|
|
|
|
#include <llama.h>
|
|
|
|
struct llama_model * llama_wrap_model_load(const char * path, struct llama_model_params * params) {
|
|
return llama_model_load_from_file(path, *params);
|
|
}
|
|
|
|
struct llama_context * llama_wrap_new_context(struct llama_model * model, struct llama_context_params * params) {
|
|
return llama_init_from_model(model, *params);
|
|
}
|
|
|
|
int32_t llama_wrap_encode(struct llama_context * ctx, struct llama_batch * batch) {
|
|
return llama_encode(ctx, *batch);
|
|
}
|
|
|
|
void llama_wrap_batch_init(struct llama_batch * batch, int32_t n_tokens, int32_t embd, int32_t n_seq_max) {
|
|
*batch = llama_batch_init(n_tokens, embd, n_seq_max);
|
|
}
|
|
|
|
void llama_wrap_batch_free(struct llama_batch * batch) {
|
|
llama_batch_free(*batch);
|
|
}
|