1 module llama.model; 2 3 import llama.llama; 4 import llama.owned; 5 6 /// Default model params with `nGpuLayers` GPU layers. 7 llama_model_params modelParams(int nGpuLayers = 0) @nogc nothrow 8 { 9 auto p = llama_model_default_params(); 10 p.n_gpu_layers = nGpuLayers; 11 return p; 12 } 13 14 /// A loaded llama model that frees itself on destruction. 15 struct LlamaModel 16 { 17 mixin Owned!(llama_model, llama_model_free); 18 19 /// Load from a GGUF file with pre-built params. Check `if (model)` after loading. 20 static LlamaModel loadFromFile(const(char)* path, llama_model_params params) @nogc nothrow 21 { 22 return LlamaModel(llama_model_load_from_file(path, params)); 23 } 24 25 /// Load from a D string path, with optional GPU layer count. 26 static LlamaModel loadFromFile(string path, int nGpuLayers = 0) 27 { 28 import std.string : toStringz; 29 return LlamaModel(llama_model_load_from_file(path.toStringz, modelParams(nGpuLayers))); 30 } 31 32 /// Load only the vocabulary (no weights). Useful for tokenization without inference. 33 static LlamaModel loadVocabOnly(string path) 34 { 35 import std.string : toStringz; 36 auto p = modelParams(0); 37 p.vocab_only = true; 38 return LlamaModel(llama_model_load_from_file(path.toStringz, p)); 39 } 40 41 /// Model vocabulary. 42 @property const(llama_vocab)* vocab() @nogc nothrow { return llama_model_get_vocab(_ptr); } 43 44 /// Number of tokens in the vocabulary. 45 @property int nVocab() @nogc nothrow 46 { 47 return llama_vocab_n_tokens(cast(llama_vocab*) llama_model_get_vocab(_ptr)); 48 } 49 50 @property int nEmbd() @nogc nothrow { return llama_model_n_embd(_ptr); } /// Embedding size. 51 @property int nLayer() @nogc nothrow { return llama_model_n_layer(_ptr); } /// Number of layers. 52 @property int nHead() @nogc nothrow { return llama_model_n_head(_ptr); } /// Attention head count. 53 54 @property bool hasEncoder() @nogc nothrow { return llama_model_has_encoder(_ptr); } /// True for encoder-decoder models (e.g. T5). 55 @property bool hasDecoder() @nogc nothrow { return llama_model_has_decoder(_ptr); } 56 @property bool isRecurrent() @nogc nothrow { return llama_model_is_recurrent(_ptr); } /// True for recurrent models (Mamba, RWKV, etc.). 57 58 /// Start token for the decoder; falls back to BOS for encoder-decoder models. 59 @property llama_token decoderStartToken() @nogc nothrow 60 { 61 llama_token t = llama_model_decoder_start_token(_ptr); 62 if (t == LLAMA_TOKEN_NULL) 63 t = llama_vocab_bos(cast(llama_vocab*) llama_model_get_vocab(_ptr)); 64 return t; 65 } 66 67 @property int nCtxTrain() @nogc nothrow { return llama_model_n_ctx_train(_ptr); } /// Training context length. 68 @property ulong nParams() @nogc nothrow { return llama_model_n_params(_ptr); } /// Total parameter count. 69 @property ulong size() @nogc nothrow { return llama_model_size(_ptr); } /// Model size in bytes. 70 71 /// Short description string (architecture + size). 72 @property string desc() @trusted 73 { 74 char[256] buf; 75 int n = llama_model_desc(_ptr, buf.ptr, buf.length); 76 return n > 0 ? buf[0 .. n].idup : ""; 77 } 78 79 /++ 80 Jinja chat template embedded in the model (or the named variant). 81 Returns `null` if none is available. 82 Pass `name = null` for the default template. 83 +/ 84 const(char)* chatTemplate(const(char)* name = null) @trusted @nogc nothrow 85 { 86 return llama_model_chat_template(_ptr, name); 87 } 88 89 // ── Metadata access ─────────────────────────────────────────────────────── 90 91 /// Number of key/value metadata pairs. 92 @property int metaCount() @nogc nothrow { return llama_model_meta_count(_ptr); } 93 94 /// Metadata key name at `index`. Returns `""` on failure. 95 string metaKeyAt(int index) @trusted 96 { 97 char[512] buf; 98 int n = llama_model_meta_key_by_index(_ptr, index, buf.ptr, buf.length); 99 return n >= 0 ? buf[0 .. n].idup : ""; 100 } 101 102 /// Metadata value (as string) at `index`. Returns `""` on failure. 103 string metaValAt(int index) @trusted 104 { 105 char[4096] buf; 106 int n = llama_model_meta_val_str_by_index(_ptr, index, buf.ptr, buf.length); 107 return n >= 0 ? buf[0 .. n].idup : ""; 108 } 109 110 /// Metadata value (as string) for the given `key`. Returns `""` on failure. 111 string metaVal(string key) @trusted 112 { 113 import std.string : toStringz; 114 char[4096] buf; 115 int n = llama_model_meta_val_str(_ptr, key.toStringz, buf.ptr, buf.length); 116 return n >= 0 ? buf[0 .. n].idup : ""; 117 } 118 }