1 module llama.model;
2 
3 import llama.llama;
4 import llama.owned;
5 
6 /// Default model params with `nGpuLayers` GPU layers.
7 llama_model_params modelParams(int nGpuLayers = 0) @nogc nothrow
8 {
9     auto p = llama_model_default_params();
10     p.n_gpu_layers = nGpuLayers;
11     return p;
12 }
13 
14 /// A loaded llama model that frees itself on destruction.
15 struct LlamaModel
16 {
17     mixin Owned!(llama_model, llama_model_free);
18 
19     /// Load from a GGUF file with pre-built params. Check `if (model)` after loading.
20     static LlamaModel loadFromFile(const(char)* path, llama_model_params params) @nogc nothrow
21     {
22         return LlamaModel(llama_model_load_from_file(path, params));
23     }
24 
25     /// Load from a D string path, with optional GPU layer count.
26     static LlamaModel loadFromFile(string path, int nGpuLayers = 0)
27     {
28         import std.string : toStringz;
29         return LlamaModel(llama_model_load_from_file(path.toStringz, modelParams(nGpuLayers)));
30     }
31 
32     /// Load only the vocabulary (no weights). Useful for tokenization without inference.
33     static LlamaModel loadVocabOnly(string path)
34     {
35         import std.string : toStringz;
36         auto p = modelParams(0);
37         p.vocab_only = true;
38         return LlamaModel(llama_model_load_from_file(path.toStringz, p));
39     }
40 
41     /// Model vocabulary.
42     @property const(llama_vocab)* vocab() @nogc nothrow { return llama_model_get_vocab(_ptr); }
43 
44     /// Number of tokens in the vocabulary.
45     @property int nVocab() @nogc nothrow
46     {
47         return llama_vocab_n_tokens(cast(llama_vocab*) llama_model_get_vocab(_ptr));
48     }
49 
50     @property int nEmbd()  @nogc nothrow { return llama_model_n_embd(_ptr); }  /// Embedding size.
51     @property int nLayer() @nogc nothrow { return llama_model_n_layer(_ptr); } /// Number of layers.
52     @property int nHead()  @nogc nothrow { return llama_model_n_head(_ptr); }  /// Attention head count.
53 
54     @property bool hasEncoder()  @nogc nothrow { return llama_model_has_encoder(_ptr); }  /// True for encoder-decoder models (e.g. T5).
55     @property bool hasDecoder()  @nogc nothrow { return llama_model_has_decoder(_ptr); }
56     @property bool isRecurrent() @nogc nothrow { return llama_model_is_recurrent(_ptr); } /// True for recurrent models (Mamba, RWKV, etc.).
57 
58     /// Start token for the decoder; falls back to BOS for encoder-decoder models.
59     @property llama_token decoderStartToken() @nogc nothrow
60     {
61         llama_token t = llama_model_decoder_start_token(_ptr);
62         if (t == LLAMA_TOKEN_NULL)
63             t = llama_vocab_bos(cast(llama_vocab*) llama_model_get_vocab(_ptr));
64         return t;
65     }
66 
67     @property int    nCtxTrain() @nogc nothrow { return llama_model_n_ctx_train(_ptr); } /// Training context length.
68     @property ulong  nParams()   @nogc nothrow { return llama_model_n_params(_ptr); }    /// Total parameter count.
69     @property ulong  size()      @nogc nothrow { return llama_model_size(_ptr); }        /// Model size in bytes.
70 
71     /// Short description string (architecture + size).
72     @property string desc() @trusted
73     {
74         char[256] buf;
75         int n = llama_model_desc(_ptr, buf.ptr, buf.length);
76         return n > 0 ? buf[0 .. n].idup : "";
77     }
78 
79     /++
80     Jinja chat template embedded in the model (or the named variant).
81     Returns `null` if none is available.
82     Pass `name = null` for the default template.
83     +/
84     const(char)* chatTemplate(const(char)* name = null) @trusted @nogc nothrow
85     {
86         return llama_model_chat_template(_ptr, name);
87     }
88 
89     // ── Metadata access ───────────────────────────────────────────────────────
90 
91     /// Number of key/value metadata pairs.
92     @property int metaCount() @nogc nothrow { return llama_model_meta_count(_ptr); }
93 
94     /// Metadata key name at `index`. Returns `""` on failure.
95     string metaKeyAt(int index) @trusted
96     {
97         char[512] buf;
98         int n = llama_model_meta_key_by_index(_ptr, index, buf.ptr, buf.length);
99         return n >= 0 ? buf[0 .. n].idup : "";
100     }
101 
102     /// Metadata value (as string) at `index`. Returns `""` on failure.
103     string metaValAt(int index) @trusted
104     {
105         char[4096] buf;
106         int n = llama_model_meta_val_str_by_index(_ptr, index, buf.ptr, buf.length);
107         return n >= 0 ? buf[0 .. n].idup : "";
108     }
109 
110     /// Metadata value (as string) for the given `key`. Returns `""` on failure.
111     string metaVal(string key) @trusted
112     {
113         import std.string : toStringz;
114         char[4096] buf;
115         int n = llama_model_meta_val_str(_ptr, key.toStringz, buf.ptr, buf.length);
116         return n >= 0 ? buf[0 .. n].idup : "";
117     }
118 }