1 /**
2  * MIT License
3  *
4  * Copyright (c) 2025 Matheus C. França
5  *
6  * Permission is granted to use, modify, and distribute this software
7  * under the terms of the MIT License.
8  */
9 
10 /++
11  + Module providing a D language binding for the Ollama REST API.
12  +
13  + This module defines the `OllamaClient` class, which facilitates interaction with an Ollama server
14  + for tasks such as text generation, chat interactions, model management, embeddings, and tool calling.
15  + It supports both native Ollama endpoints and OpenAI-compatible endpoints, using `std.net.curl` for
16  + HTTP requests and `std.json` for JSON processing.
17  +
18  + Examples:
19  +     ---
20  +     import ollama.client;
21  +     import std.stdio;
22  +
23  +     void main() {
24  +         auto client = new OllamaClient();
25  +         auto response = client.generate("llama3", "What is the weather like?");
26  +         writeln(response["response"].str);
27  +     }
28  +     ---
29  +
30  + See_Also:
31  +     - $(LINK2 https://github.com/ollama/ollama/blob/main/docs/api.md, Ollama API Documentation)
32  +     - $(LINK2 https://github.com/ollama/ollama/blob/main/docs/openai.md, OpenAI Compatibility)
33  +/
34 module ollama.client;
35 
36 import std;
37 
38 @safe:
39 
40 // ---------------------------------------------------------------------------
41 // Helper: build a JSONValue object from a JSONValue[string] AA.
42 // Using JSONValue(JSONValue[string]) constructor is @safe; the .object
43 // property setter is @system and must be avoided in @safe code.
44 // ---------------------------------------------------------------------------
45 private JSONValue makeObject(JSONValue[string] fields) @safe
46 {
47     return JSONValue(fields);
48 }
49 
50 /++
51  + Typed options for controlling model generation behavior.
52  +
53  + Only non-default (explicitly set) fields are serialized to JSON.
54  + Float fields use `float.nan` as the "unset" sentinel; integer fields use `0`.
55  +
56  + Examples:
57  +     ---
58  +     OllamaOptions opts;
59  +     opts.temperature = 0.8f;
60  +     opts.num_ctx     = 4096;
61  +     opts.stop        = ["<|end|>", "\n\n"];
62  +     ---
63  +/
64 struct OllamaOptions
65 {
66     float temperature    = float.nan; /// Sampling temperature (0 = deterministic).
67     int   top_k          = 0;         /// Top-K sampling; 0 = disabled.
68     float top_p          = float.nan; /// Nucleus sampling threshold.
69     float min_p          = float.nan; /// Minimum probability threshold.
70     float repeat_penalty = float.nan; /// Penalty for repeated tokens.
71     int   repeat_last_n  = 0;         /// Tokens considered for repeat penalty.
72     int   seed           = 0;         /// Random seed; 0 = random.
73     int   num_predict    = 0;         /// Max tokens to generate; 0 = unlimited.
74     int   num_ctx        = 0;         /// Context window size; 0 = model default.
75     string[] stop;                    /// Stop sequences; empty = disabled.
76     int   mirostat       = 0;         /// Mirostat strategy (0=off,1=v1,2=v2).
77     float mirostat_tau   = float.nan; /// Mirostat target entropy.
78     float mirostat_eta   = float.nan; /// Mirostat learning rate.
79 
80     /++
81      + Serializes only non-default fields to a `JSONValue` object.
82      +
83      + Builds a `JSONValue[string]` AA (safe D operation) then wraps it.
84      +
85      + Returns: A `JSONValue` object containing only the explicitly-set fields.
86      +/
87     JSONValue toJson() const @safe
88     {
89         import std.math : isNaN;
90         JSONValue[string] fields;
91         if (!isNaN(temperature))    fields["temperature"]    = JSONValue(temperature);
92         if (top_k > 0)              fields["top_k"]          = JSONValue(top_k);
93         if (!isNaN(top_p))          fields["top_p"]          = JSONValue(top_p);
94         if (!isNaN(min_p))          fields["min_p"]          = JSONValue(min_p);
95         if (!isNaN(repeat_penalty)) fields["repeat_penalty"] = JSONValue(repeat_penalty);
96         if (repeat_last_n > 0)      fields["repeat_last_n"]  = JSONValue(repeat_last_n);
97         if (seed > 0)               fields["seed"]           = JSONValue(seed);
98         if (num_predict > 0)        fields["num_predict"]    = JSONValue(num_predict);
99         if (num_ctx > 0)            fields["num_ctx"]        = JSONValue(num_ctx);
100         if (mirostat > 0)           fields["mirostat"]       = JSONValue(mirostat);
101         if (!isNaN(mirostat_tau))   fields["mirostat_tau"]   = JSONValue(mirostat_tau);
102         if (!isNaN(mirostat_eta))   fields["mirostat_eta"]   = JSONValue(mirostat_eta);
103         if (stop.length > 0)
104         {
105             JSONValue[] arr;
106             foreach (s; stop) arr ~= JSONValue(s);
107             fields["stop"] = JSONValue(arr);
108         }
109         return makeObject(fields);
110     }
111 }
112 
113 ///
114 unittest
115 {
116     // Default options serialize to an empty JSON object
117     OllamaOptions def;
118     auto j0 = def.toJson();
119     assert(j0.type == JSONType.object);
120     assert(j0.objectNoRef.length == 0, "Default OllamaOptions should be empty");
121 
122     // Only set fields appear in output
123     OllamaOptions opts;
124     opts.temperature = 0.5f;  // 0.5 is exactly representable in float and double
125     opts.top_k       = 40;
126     opts.num_ctx     = 4096;
127     opts.stop        = ["<|end|>"];
128     auto j = opts.toJson();
129     assert(j["temperature"].type == JSONType.float_);
130     assert(j["temperature"].floating == 0.5);  // exact double comparison
131     assert(j["top_k"].integer == 40);
132     assert(j["num_ctx"].integer == 4096);
133     assert(j["stop"].arrayNoRef[0].str == "<|end|>");
134     assert("top_p"          !in j);
135     assert("min_p"          !in j);
136     assert("repeat_penalty" !in j);
137     assert("mirostat"       !in j);
138 
139     // temperature = 0.0 is a valid explicit value and must be included
140     OllamaOptions zeroTemp;
141     zeroTemp.temperature = 0.0f;
142     auto jz = zeroTemp.toJson();
143     assert("temperature" in jz);
144     assert(jz["temperature"].floating == 0.0);
145 }
146 
147 /++
148  + Function schema for tool/function calling definitions.
149  +
150  + Used inside `Tool` when registering callable tools with the model.
151  +/
152 struct ToolFunction
153 {
154     string    name;        /// Function name as called by the model.
155     string    description; /// Human-readable description.
156     JSONValue parameters;  /// JSON Schema object defining the function's parameters.
157 
158     /++
159      + Converts to a JSON object for the Ollama API `tools` array.
160      +
161      + Returns: A `JSONValue` with "name", "description", and optionally "parameters".
162      +/
163     JSONValue toJson() const @safe
164     {
165         JSONValue[string] fields = [
166             "name":        JSONValue(name),
167             "description": JSONValue(description),
168         ];
169         if (parameters.type != JSONType.null_)
170             fields["parameters"] = parameters;
171         return makeObject(fields);
172     }
173 }
174 
175 /++
176  + A tool (function) definition passed to `chat()` to enable tool/function calling.
177  +
178  + Examples:
179  +     ---
180  +     auto schema = parseJSON(`{
181  +         "type": "object",
182  +         "properties": {"location": {"type": "string"}},
183  +         "required": ["location"]
184  +     }`);
185  +     auto tool = Tool("function", ToolFunction("get_weather", "Get current weather", schema));
186  +     auto resp = client.chat("llama3", messages, JSONValue.init, false, [tool]);
187  +     ---
188  +/
189 struct Tool
190 {
191     string       type = "function"; /// Tool type; currently always "function".
192     ToolFunction function_;         /// The function definition.
193 
194     /++
195      + Converts to a JSON object for the Ollama API `tools` array.
196      +
197      + Returns: A `JSONValue` with "type" and "function" fields.
198      +/
199     JSONValue toJson() const @safe
200     {
201         JSONValue[string] fields = [
202             "type":     JSONValue(type),
203             "function": function_.toJson(),
204         ];
205         return makeObject(fields);
206     }
207 }
208 
209 /++
210  + Represents a tool/function call made by the model in a chat response.
211  +
212  + Access via `response["message"]["tool_calls"]` when the model calls a tool.
213  +/
214 struct ToolCall
215 {
216     string    id;        /// Optional tool call identifier.
217     string    name;      /// Name of the function called.
218     JSONValue arguments; /// Arguments passed to the function (JSON object).
219 
220     /++
221      + Converts to a JSON object matching the Ollama API tool call format.
222      +
223      + Returns: A `JSONValue` with "function" containing "name" and "arguments".
224      +/
225     JSONValue toJson() const @safe
226     {
227         JSONValue[string] funcFields = ["name": JSONValue(name)];
228         if (arguments.type != JSONType.null_)
229             funcFields["arguments"] = arguments;
230 
231         JSONValue[string] fields = ["function": makeObject(funcFields)];
232         if (id.length > 0)
233             fields["id"] = JSONValue(id);
234         return makeObject(fields);
235     }
236 }
237 
238 ///
239 unittest
240 {
241     // ToolFunction serialization
242     auto tf = ToolFunction("get_weather", "Fetch weather data",
243         parseJSON(`{"type":"object","properties":{"city":{"type":"string"}}}`));
244     auto jtf = tf.toJson();
245     assert(jtf["name"].str == "get_weather");
246     assert(jtf["description"].str == "Fetch weather data");
247     assert(jtf["parameters"]["type"].str == "object");
248 
249     // Tool serialization — JSON key must be "function" (not "function_")
250     auto tool = Tool("function", tf);
251     auto jt = tool.toJson();
252     assert(jt["type"].str == "function");
253     assert("function" in jt);
254     assert(jt["function"]["name"].str == "get_weather");
255 
256     // ToolCall with id and arguments
257     auto tc = ToolCall("call-1", "get_weather", parseJSON(`{"city":"Paris"}`));
258     auto jtc = tc.toJson();
259     assert(jtc["id"].str == "call-1");
260     assert(jtc["function"]["name"].str == "get_weather");
261     assert(jtc["function"]["arguments"]["city"].str == "Paris");
262 
263     // ToolCall without id — id key must be absent
264     auto tc2 = ToolCall("", "sum", parseJSON(`{"a":1,"b":2}`));
265     auto jtc2 = tc2.toJson();
266     assert("id" !in jtc2);
267     assert(jtc2["function"]["name"].str == "sum");
268 }
269 
270 /++
271  + Represents a single message in a chat interaction.
272  +
273  + Supports text, base64-encoded images (multimodal), and tool call results.
274  + Backward compatible: `Message("user", "hello")` still compiles.
275  +/
276 struct Message
277 {
278     string     role;       /// Sender role: "user", "assistant", or "system".
279     string     content;    /// Text content of the message.
280     string[]   images;     /// Optional base64-encoded images for multimodal input.
281     ToolCall[] tool_calls; /// Optional tool calls made by the assistant.
282 
283     /++
284      + Converts the message to a JSON object for the Ollama API.
285      +
286      + Returns: A `JSONValue` with "role", "content", and optionally "images"
287      + and "tool_calls".
288      +/
289     JSONValue toJson() const @safe
290     {
291         JSONValue[string] fields = [
292             "role":    JSONValue(role),
293             "content": JSONValue(content),
294         ];
295         if (images.length > 0)
296         {
297             JSONValue[] arr;
298             foreach (img; images) arr ~= JSONValue(img);
299             fields["images"] = JSONValue(arr);
300         }
301         if (tool_calls.length > 0)
302         {
303             JSONValue[] arr;
304             foreach (tc; tool_calls) arr ~= tc.toJson();
305             fields["tool_calls"] = JSONValue(arr);
306         }
307         return makeObject(fields);
308     }
309 }
310 
311 ///
312 unittest
313 {
314     // Basic message — no optional fields
315     auto m = Message("user", "Hello, world!");
316     auto j = m.toJson();
317     assert(j["role"].str == "user");
318     assert(j["content"].str == "Hello, world!");
319     assert("images"     !in j);
320     assert("tool_calls" !in j);
321 
322     // Message with images
323     auto m2 = Message("user", "What is in this image?", ["aGVsbG8="]);
324     auto j2 = m2.toJson();
325     assert(j2["images"].arrayNoRef.length == 1);
326     assert(j2["images"][0].str == "aGVsbG8=");
327 
328     // Message with tool_calls
329     auto tc = ToolCall("id-1", "search", parseJSON(`{"query":"D language"}`));
330     auto m3 = Message("assistant", "", null, [tc]);
331     auto j3 = m3.toJson();
332     assert(j3["tool_calls"].arrayNoRef.length == 1);
333     assert(j3["tool_calls"][0]["function"]["name"].str == "search");
334 
335     // Backward compatibility: two-field initialization still compiles
336     Message m4;
337     m4.role    = "system";
338     m4.content = "You are a helpful assistant.";
339     auto j4 = m4.toJson();
340     assert(j4["role"].str == "system");
341 }
342 
343 /// Callback type used by the streaming methods.
344 /// Receives one fully-parsed NDJSON chunk per call; `chunk["done"]` is
345 /// `true` on the final chunk.
346 alias StreamCallback = void delegate(JSONValue chunk) @safe;
347 
348 /++
349  + A client class for interacting with the Ollama REST API.
350  +
351  + Provides methods for text generation, chat, embeddings, tool calling, and model
352  + management using `std.net.curl` for HTTP and `std.json` for JSON.
353  +
354  + Examples:
355  +     ---
356  +     auto client = new OllamaClient();
357  +     auto resp = client.chat("llama3", [Message("user", "Hi there!")]);
358  +     writeln(resp["message"]["content"].str);
359  +     ---
360  +/
361 class OllamaClient
362 {
363     private string   host;
364     private Duration timeout = 60.seconds;
365 
366     /++
367      + Constructs a new Ollama client.
368      +
369      + Params:
370      +     host = Base URL of the Ollama server. Defaults to `DEFAULT_HOST`.
371      +/
372     this(string host = DEFAULT_HOST) @safe
373     {
374         this.host = host;
375     }
376 
377     /++
378      + Sets the timeout for HTTP requests.
379      +
380      + Params:
381      +     timeout = Duration to wait before timing out.
382      +/
383     void setTimeOut(Duration timeout) @safe
384     {
385         this.timeout = timeout;
386     }
387 
388     // -----------------------------------------------------------------------
389     // Private HTTP helpers
390     // -----------------------------------------------------------------------
391 
392     private JSONValue post(string url, JSONValue data, bool stream = false) @trusted
393     {
394         auto client = HTTP();
395         client.addRequestHeader("Content-Type", "application/json");
396         client.connectTimeout(timeout);
397 
398         auto jsonStr = data.toString();
399         auto response = std.net.curl.post(url, jsonStr, client);
400         auto jsonResponse = parseJSON(response);
401 
402         enforce("error" !in jsonResponse,
403             "HTTP request failed: " ~ ("message" in jsonResponse["error"]
404                 ? jsonResponse["error"]["message"].str : "Unknown error"));
405         return jsonResponse;
406     }
407 
408     private JSONValue get(string url) @trusted
409     {
410         auto client = HTTP();
411         client.connectTimeout(timeout);
412 
413         auto response = std.net.curl.get(url, client);
414         auto jsonResponse = parseJSON(response);
415         enforce("error" !in jsonResponse,
416             "HTTP request failed: " ~ ("message" in jsonResponse["error"]
417                 ? jsonResponse["error"]["message"].str : "Unknown error"));
418         return jsonResponse;
419     }
420 
421     /++
422      + HTTP DELETE with a JSON body, used by `deleteModel`.
423      +
424      + The Ollama API requires HTTP DELETE for `/api/delete`. `std.net.curl` has no
425      + free `del()` function; we use the `HTTP` class directly, setting the body via
426      + `postData` and then overriding the method to DELETE.
427      +/
428     private JSONValue del(string url, JSONValue data) @trusted
429     {
430         auto jsonStr = data.toString();
431         auto http = HTTP(url);
432         http.addRequestHeader("Content-Type", "application/json");
433         http.connectTimeout(timeout);
434         http.postData = cast(const(void)[]) jsonStr;
435         http.method   = HTTP.Method.del;
436 
437         char[] respBuf;
438         http.onReceive = (ubyte[] chunk) {
439             respBuf ~= cast(char[]) chunk;
440             return chunk.length;
441         };
442         http.perform();
443 
444         if (respBuf.length == 0)
445             return JSONValue((JSONValue[string]).init); // empty 200 OK = success
446 
447         auto jsonResp = parseJSON(respBuf);
448         enforce("error" !in jsonResp,
449             "HTTP request failed: " ~ ("message" in jsonResp["error"]
450                 ? jsonResp["error"]["message"].str : "Unknown error"));
451         return jsonResp;
452     }
453 
454     /++
455      + Low-level streaming POST helper.
456      +
457      + Sends `data` to `url` and dispatches each newline-delimited JSON chunk
458      + to `onChunk` as it arrives, enabling token-by-token streaming from
459      + `/api/generate` and `/api/chat`.
460      +/
461     private void postStream(string url, JSONValue data,
462         StreamCallback onChunk) @trusted
463     {
464         auto jsonStr = data.toString();
465         auto http = HTTP(url);
466         http.addRequestHeader("Content-Type", "application/json");
467         http.connectTimeout(timeout);
468         http.postData = cast(const(void)[]) jsonStr;
469 
470         char[] lineBuf;
471         http.onReceive = (ubyte[] chunk) {
472             lineBuf ~= cast(char[]) chunk;
473             size_t start = 0;
474             foreach (i; 0 .. lineBuf.length)
475             {
476                 if (lineBuf[i] == '\n')
477                 {
478                     if (i > start)
479                         onChunk(parseJSON(lineBuf[start .. i]));
480                     start = i + 1;
481                 }
482             }
483             lineBuf = lineBuf[start .. $].dup;
484             return chunk.length;
485         };
486         http.perform();
487         if (lineBuf.length > 0)
488             onChunk(parseJSON(lineBuf));
489     }
490 
491     // -----------------------------------------------------------------------
492     // Generation
493     // -----------------------------------------------------------------------
494 
495     /++
496      + Generates text based on a prompt using the specified model.
497      +
498      + Params:
499      +     model     = Model name (e.g. "llama3.1:8b").
500      +     prompt    = Input text.
501      +     options   = Raw `JSONValue` generation options (backward-compatible).
502      +     stream    = Whether to stream the response (not fully supported).
503      +     system    = Optional system prompt.
504      +     images    = Optional base64-encoded images for multimodal input.
505      +     format    = Structured output: `JSONValue("json")` or a JSON Schema.
506      +     suffix    = Text appended after the generated response.
507      +     keepAlive = How long to keep the model loaded (e.g. "5m", "0").
508      +     opts      = Typed `OllamaOptions`; takes precedence over `options`.
509      +
510      + Returns: A `JSONValue` containing `"response"`, `"done"`, and metadata.
511      +/
512     JSONValue generate(
513         string        model,
514         string        prompt,
515         JSONValue     options   = JSONValue.init,
516         bool          stream    = false,
517         string        system    = null,
518         string[]      images    = null,
519         JSONValue     format    = JSONValue.init,
520         string        suffix    = null,
521         string        keepAlive = null,
522         OllamaOptions opts      = OllamaOptions.init,
523     ) @safe
524     {
525         auto url = host ~ "/api/generate";
526 
527         JSONValue[string] fields = [
528             "model":  JSONValue(model),
529             "prompt": JSONValue(prompt),
530             "stream": JSONValue(stream),
531         ];
532 
533         // Typed OllamaOptions takes precedence over raw JSONValue options
534         auto optsJson = opts.toJson();
535         if (optsJson.objectNoRef.length > 0)
536             fields["options"] = optsJson;
537         else if (options.type != JSONType.null_)
538             fields["options"] = options;
539 
540         if (system.length    > 0) fields["system"]     = JSONValue(system);
541         if (suffix.length    > 0) fields["suffix"]     = JSONValue(suffix);
542         if (keepAlive.length > 0) fields["keep_alive"] = JSONValue(keepAlive);
543         if (format.type != JSONType.null_)
544             fields["format"] = format;
545 
546         if (images.length > 0)
547         {
548             JSONValue[] arr;
549             foreach (img; images) arr ~= JSONValue(img);
550             fields["images"] = JSONValue(arr);
551         }
552 
553         return post(url, makeObject(fields), stream);
554     }
555 
556     /++
557      + Streaming text generation — calls `onChunk` for every response token.
558      +
559      + Each call to `onChunk` receives one NDJSON chunk. The chunk contains a
560      + `"response"` string token and a boolean `"done"`. The final chunk has
561      + `"done": true` and carries usage/timing metadata.
562      +
563      + Params:
564      +     model     = Model name (e.g. "llama3.1:8b").
565      +     prompt    = Input prompt.
566      +     onChunk   = Callback invoked per chunk; must be `@safe`.
567      +     system    = Optional system prompt.
568      +     images    = Optional base64-encoded images (multimodal).
569      +     format    = Structured output: `JSONValue("json")` or JSON Schema.
570      +     keepAlive = How long to keep the model loaded.
571      +     opts      = Typed generation options.
572      +/
573     void generateStream(
574         string        model,
575         string        prompt,
576         StreamCallback onChunk,
577         string        system    = null,
578         string[]      images    = null,
579         JSONValue     format    = JSONValue.init,
580         string        keepAlive = null,
581         OllamaOptions opts      = OllamaOptions.init,
582     ) @safe
583     {
584         auto url = host ~ "/api/generate";
585         JSONValue[string] fields = [
586             "model":  JSONValue(model),
587             "prompt": JSONValue(prompt),
588             "stream": JSONValue(true),
589         ];
590         auto optsJson = opts.toJson();
591         if (optsJson.objectNoRef.length > 0) fields["options"]    = optsJson;
592         if (system.length    > 0)            fields["system"]     = JSONValue(system);
593         if (keepAlive.length > 0)            fields["keep_alive"] = JSONValue(keepAlive);
594         if (format.type != JSONType.null_)   fields["format"]     = format;
595         if (images.length > 0)
596         {
597             JSONValue[] arr;
598             foreach (img; images) arr ~= JSONValue(img);
599             fields["images"] = JSONValue(arr);
600         }
601         postStream(url, makeObject(fields), onChunk);
602     }
603 
604     // -----------------------------------------------------------------------
605     // Chat
606     // -----------------------------------------------------------------------
607 
608     /++
609      + Engages in a chat interaction using the specified model and message history.
610      +
611      + Params:
612      +     model     = Model name.
613      +     messages  = Array of `Message` structs (conversation history).
614      +     options   = Raw `JSONValue` generation options (backward-compatible).
615      +     stream    = Whether to stream the response (not fully supported).
616      +     tools     = Optional tool definitions for tool/function calling.
617      +     format    = Structured output schema or `JSONValue("json")`.
618      +     keepAlive = How long to keep the model loaded.
619      +     opts      = Typed `OllamaOptions`.
620      +
621      + Returns: A `JSONValue` with `"message"`, `"done"`, and metadata. When the
622      +          model calls a tool, `response["message"]["tool_calls"]` is populated.
623      +/
624     JSONValue chat(
625         string        model,
626         Message[]     messages,
627         JSONValue     options   = JSONValue.init,
628         bool          stream    = false,
629         Tool[]        tools     = null,
630         JSONValue     format    = JSONValue.init,
631         string        keepAlive = null,
632         OllamaOptions opts      = OllamaOptions.init,
633     ) @safe
634     {
635         auto url = host ~ "/api/chat";
636 
637         JSONValue[] msgArray;
638         foreach (msg; messages) msgArray ~= msg.toJson();
639 
640         JSONValue[string] fields = [
641             "model":    JSONValue(model),
642             "messages": JSONValue(msgArray),
643             "stream":   JSONValue(stream),
644         ];
645 
646         auto optsJson = opts.toJson();
647         if (optsJson.objectNoRef.length > 0)
648             fields["options"] = optsJson;
649         else if (options.type != JSONType.null_)
650             fields["options"] = options;
651 
652         if (keepAlive.length > 0)
653             fields["keep_alive"] = JSONValue(keepAlive);
654         if (format.type != JSONType.null_)
655             fields["format"] = format;
656 
657         if (tools.length > 0)
658         {
659             JSONValue[] arr;
660             foreach (t; tools) arr ~= t.toJson();
661             fields["tools"] = JSONValue(arr);
662         }
663 
664         return post(url, makeObject(fields), stream);
665     }
666 
667     /++
668      + Streaming chat — calls `onChunk` for every assistant token.
669      +
670      + Each chunk contains `"message": {"role": "assistant", "content": "<token>"}`.
671      + The final chunk has `"done": true` and carries usage metadata.
672      +
673      + Params:
674      +     model     = Model name.
675      +     messages  = Conversation history.
676      +     onChunk   = Callback invoked per chunk; must be `@safe`.
677      +     tools     = Optional tool definitions.
678      +     format    = Structured output schema or `JSONValue("json")`.
679      +     keepAlive = How long to keep the model loaded.
680      +     opts      = Typed generation options.
681      +/
682     void chatStream(
683         string        model,
684         Message[]     messages,
685         StreamCallback onChunk,
686         Tool[]        tools     = null,
687         JSONValue     format    = JSONValue.init,
688         string        keepAlive = null,
689         OllamaOptions opts      = OllamaOptions.init,
690     ) @safe
691     {
692         auto url = host ~ "/api/chat";
693         JSONValue[] msgArray;
694         foreach (msg; messages) msgArray ~= msg.toJson();
695 
696         JSONValue[string] fields = [
697             "model":    JSONValue(model),
698             "messages": JSONValue(msgArray),
699             "stream":   JSONValue(true),
700         ];
701         auto optsJson = opts.toJson();
702         if (optsJson.objectNoRef.length > 0) fields["options"]    = optsJson;
703         if (keepAlive.length > 0)            fields["keep_alive"] = JSONValue(keepAlive);
704         if (format.type != JSONType.null_)   fields["format"]     = format;
705         if (tools.length > 0)
706         {
707             JSONValue[] arr;
708             foreach (t; tools) arr ~= t.toJson();
709             fields["tools"] = JSONValue(arr);
710         }
711         postStream(url, makeObject(fields), onChunk);
712     }
713 
714     // -----------------------------------------------------------------------
715     // Embeddings
716     // -----------------------------------------------------------------------
717 
718     /++
719      + Generates an embedding vector for a single text input.
720      +
721      + Params:
722      +     model     = Embedding model name (e.g. "nomic-embed-text").
723      +     input     = Text to embed.
724      +     keepAlive = How long to keep the model loaded.
725      +
726      + Returns: A `JSONValue` with an `"embeddings"` array containing one vector.
727      +/
728     JSONValue embed(string model, string input, string keepAlive = null) @safe
729     {
730         auto url = host ~ "/api/embed";
731         JSONValue[string] fields = ["model": JSONValue(model), "input": JSONValue(input)];
732         if (keepAlive.length > 0)
733             fields["keep_alive"] = JSONValue(keepAlive);
734         return post(url, makeObject(fields));
735     }
736 
737     /++
738      + Generates embedding vectors for a batch of text inputs.
739      +
740      + Params:
741      +     model     = Embedding model name.
742      +     inputs    = Array of texts to embed.
743      +     keepAlive = How long to keep the model loaded.
744      +
745      + Returns: A `JSONValue` with an `"embeddings"` array, one vector per input.
746      +/
747     JSONValue embed(string model, string[] inputs, string keepAlive = null) @safe
748     {
749         auto url = host ~ "/api/embed";
750         JSONValue[] arr;
751         foreach (inp; inputs) arr ~= JSONValue(inp);
752         JSONValue[string] fields = ["model": JSONValue(model), "input": JSONValue(arr)];
753         if (keepAlive.length > 0)
754             fields["keep_alive"] = JSONValue(keepAlive);
755         return post(url, makeObject(fields));
756     }
757 
758     // -----------------------------------------------------------------------
759     // Model Management
760     // -----------------------------------------------------------------------
761 
762     /++
763      + Lists all locally available models.
764      +
765      + Returns: Pretty-printed JSON string of model details.
766      +/
767     string listModels() @safe
768     {
769         return get(host ~ "/api/tags").toPrettyString();
770     }
771 
772     /++
773      + Retrieves detailed information about a specific model.
774      +
775      + Params:
776      +     model = Model name to query.
777      +
778      + Returns: Pretty-printed JSON string of model metadata.
779      +/
780     string showModel(string model) @safe
781     {
782         return post(host ~ "/api/show",
783             makeObject(["name": JSONValue(model)])).toPrettyString();
784     }
785 
786     /++
787      + Creates a custom model from a modelfile.
788      +
789      + Params:
790      +     name      = New model name.
791      +     modelfile = Modelfile content string.
792      +
793      + Returns: A `JSONValue` with creation status.
794      +/
795     JSONValue createModel(string name, string modelfile) @safe
796     {
797         return post(host ~ "/api/create",
798             makeObject(["name": JSONValue(name), "modelfile": JSONValue(modelfile)]));
799     }
800 
801     /++
802      + Copies an existing model to a new name.
803      +
804      + Params:
805      +     source      = Source model name.
806      +     destination = Destination model name.
807      +
808      + Returns: A `JSONValue` with copy status.
809      +/
810     JSONValue copy(string source, string destination) @safe
811     {
812         return post(host ~ "/api/copy",
813             makeObject(["source": JSONValue(source), "destination": JSONValue(destination)]));
814     }
815 
816     /++
817      + Deletes a model from the Ollama server.
818      +
819      + Uses HTTP DELETE as required by the Ollama API specification.
820      +
821      + Params:
822      +     name = Model name to delete.
823      +
824      + Returns: A `JSONValue` (empty object on success).
825      +/
826     JSONValue deleteModel(string name) @safe
827     {
828         return del(host ~ "/api/delete", makeObject(["name": JSONValue(name)]));
829     }
830 
831     /++
832      + Downloads a model from the Ollama registry.
833      +
834      + Params:
835      +     name   = Model name to pull.
836      +     stream = Whether to stream progress (not fully supported).
837      +
838      + Returns: A `JSONValue` with pull status.
839      +/
840     JSONValue pull(string name, bool stream = false) @safe
841     {
842         return post(host ~ "/api/pull",
843             makeObject(["name": JSONValue(name), "stream": JSONValue(stream)]), stream);
844     }
845 
846     /++
847      + Uploads a model to the Ollama registry.
848      +
849      + Params:
850      +     name   = Model name to push.
851      +     stream = Whether to stream progress (not fully supported).
852      +
853      + Returns: A `JSONValue` with push status.
854      +/
855     JSONValue push(string name, bool stream = false) @safe
856     {
857         return post(host ~ "/api/push",
858             makeObject(["name": JSONValue(name), "stream": JSONValue(stream)]), stream);
859     }
860 
861     // -----------------------------------------------------------------------
862     // Server Operations
863     // -----------------------------------------------------------------------
864 
865     /++
866      + Retrieves the Ollama server version string.
867      +
868      + Returns: Version string (e.g. "0.6.2").
869      +/
870     string getVersion() @safe
871     {
872         return get(host ~ "/api/version")["version"].str;
873     }
874 
875     /++
876      + Lists currently running (loaded) models.
877      +
878      + Returns: Pretty-printed JSON string with model names, sizes, and expiry.
879      +/
880     string ps() @safe
881     {
882         return get(host ~ "/api/ps").toPrettyString();
883     }
884 
885 
886     // -----------------------------------------------------------------------
887     // OpenAI-Compatible Endpoints
888     // -----------------------------------------------------------------------
889 
890     /++
891      + Performs an OpenAI-style chat completion.
892      +
893      + Params:
894      +     model       = Model name.
895      +     messages    = Chat history as `Message` array.
896      +     maxTokens   = Maximum tokens to generate (0 = unlimited).
897      +     temperature = Sampling temperature (default 1.0).
898      +     stream      = Whether to stream (not fully supported).
899      +
900      + Returns: A `JSONValue` in OpenAI `ChatCompletion` format.
901      +/
902     JSONValue chatCompletions(
903         string    model,
904         Message[] messages,
905         int       maxTokens   = 0,
906         float     temperature = 1.0,
907         bool      stream      = false,
908     ) @trusted
909     {
910         auto url = host ~ "/v1/chat/completions";
911         JSONValue[] msgArray;
912         foreach (msg; messages) msgArray ~= msg.toJson();
913 
914         JSONValue[string] fields = [
915             "model":       JSONValue(model),
916             "messages":    JSONValue(msgArray),
917             "stream":      JSONValue(stream),
918             "temperature": JSONValue(temperature),
919         ];
920         if (maxTokens > 0)
921             fields["max_tokens"] = JSONValue(maxTokens);
922 
923         return post(url, makeObject(fields), stream);
924     }
925 
926     /++
927      + Performs an OpenAI-style text completion.
928      +
929      + Params:
930      +     model       = Model name.
931      +     prompt      = Input prompt.
932      +     maxTokens   = Maximum tokens to generate (0 = unlimited).
933      +     temperature = Sampling temperature (default 1.0).
934      +     stream      = Whether to stream (not fully supported).
935      +
936      + Returns: A `JSONValue` in OpenAI `Completion` format.
937      +/
938     JSONValue completions(
939         string model,
940         string prompt,
941         int    maxTokens   = 0,
942         float  temperature = 1.0,
943         bool   stream      = false,
944     ) @trusted
945     {
946         auto url = host ~ "/v1/completions";
947         JSONValue[string] fields = [
948             "model":       JSONValue(model),
949             "prompt":      JSONValue(prompt),
950             "stream":      JSONValue(stream),
951             "temperature": JSONValue(temperature),
952         ];
953         if (maxTokens > 0)
954             fields["max_tokens"] = JSONValue(maxTokens);
955 
956         return post(url, makeObject(fields), stream);
957     }
958 
959     /++
960      + Lists models in OpenAI-compatible format.
961      +
962      + Returns: Pretty-printed JSON string of model data.
963      +/
964     string getModels() @safe
965     {
966         return get(host ~ "/v1/models").toPrettyString();
967     }
968 }
969 
970 /// Default host URL for the Ollama server.
971 enum DEFAULT_HOST = "http://127.0.0.1:11434";