1 module llama.chat; 2 3 import llama.llama; 4 5 /++ 6 Apply a Jinja chat template to a list of messages. 7 8 `tmpl` may be `null` to use an empty string (the model's embedded template is selected elsewhere). 9 Set `addAss = true` to append the assistant-turn prefix so the model continues from there. 10 11 Returns the number of bytes written to `buf`. If the return value exceeds `buf.length`, 12 reallocate and call again. 13 +/ 14 int chatApplyTemplate(const(char)* tmpl, 15 scope const(llama_chat_message)[] chat, 16 bool addAss, 17 scope char[] buf) @trusted @nogc nothrow 18 { 19 return llama_chat_apply_template(tmpl, 20 cast(llama_chat_message*) chat.ptr, chat.length, 21 addAss, buf.ptr, cast(int) buf.length); 22 } 23 24 /++ 25 Apply a chat template and return the result as a D string. 26 Allocates a buffer and retries once if it is too small. 27 +/ 28 string applyTemplate(const(char)* tmpl, 29 scope const(llama_chat_message)[] chat, 30 bool addAss = true) @trusted 31 { 32 char[] buf = new char[](1024); 33 auto mp = cast(llama_chat_message*) chat.ptr; 34 int n = llama_chat_apply_template(tmpl, mp, chat.length, 35 addAss, buf.ptr, cast(int) buf.length); 36 if (n > cast(int) buf.length) 37 { 38 buf = new char[](n + 1); 39 n = llama_chat_apply_template(tmpl, mp, chat.length, 40 addAss, buf.ptr, cast(int) buf.length); 41 } 42 return n > 0 ? buf[0 .. n].idup : ""; 43 } 44 45 /++ 46 Names of all built-in chat templates supported by `llama_chat_apply_template`. 47 Returns a GC-allocated slice of D strings. 48 +/ 49 string[] builtinTemplates() @trusted 50 { 51 // First call: query count. 52 int n = llama_chat_builtin_templates(null, 0); 53 if (n <= 0) return []; 54 55 auto ptrs = new const(char)*[](n); 56 llama_chat_builtin_templates(ptrs.ptr, n); 57 58 import std.string : fromStringz; 59 auto result = new string[](n); 60 foreach (i, p; ptrs) 61 result[i] = fromStringz(p).idup; 62 return result; 63 }