1 /** 2 * MIT License 3 * 4 * Copyright (c) 2025 Matheus C. França 5 * 6 * Permission is granted to use, modify, and distribute this software 7 * under the terms of the MIT License. 8 */ 9 10 /++ 11 + Module providing a D language binding for the Ollama REST API. 12 + 13 + This module defines the `OllamaClient` class, which facilitates interaction with an Ollama server 14 + for tasks such as text generation, chat interactions, model management, embeddings, and tool calling. 15 + It supports both native Ollama endpoints and OpenAI-compatible endpoints, using `std.net.curl` for 16 + HTTP requests and `std.json` for JSON processing. 17 + 18 + Examples: 19 + --- 20 + import ollama.client; 21 + import std.stdio; 22 + 23 + void main() { 24 + auto client = new OllamaClient(); 25 + auto response = client.generate("llama3", "What is the weather like?"); 26 + writeln(response["response"].str); 27 + } 28 + --- 29 + 30 + See_Also: 31 + - $(LINK2 https://github.com/ollama/ollama/blob/main/docs/api.md, Ollama API Documentation) 32 + - $(LINK2 https://github.com/ollama/ollama/blob/main/docs/openai.md, OpenAI Compatibility) 33 +/ 34 module ollama.client; 35 36 import std; 37 38 @safe: 39 40 // --------------------------------------------------------------------------- 41 // Helper: build a JSONValue object from a JSONValue[string] AA. 42 // Using JSONValue(JSONValue[string]) constructor is @safe; the .object 43 // property setter is @system and must be avoided in @safe code. 44 // --------------------------------------------------------------------------- 45 private JSONValue makeObject(JSONValue[string] fields) @safe 46 { 47 return JSONValue(fields); 48 } 49 50 /++ 51 + Typed options for controlling model generation behavior. 52 + 53 + Only non-default (explicitly set) fields are serialized to JSON. 54 + Float fields use `float.nan` as the "unset" sentinel; integer fields use `0`. 55 + 56 + Examples: 57 + --- 58 + OllamaOptions opts; 59 + opts.temperature = 0.8f; 60 + opts.num_ctx = 4096; 61 + opts.stop = ["<|end|>", "\n\n"]; 62 + --- 63 +/ 64 struct OllamaOptions 65 { 66 float temperature = float.nan; /// Sampling temperature (0 = deterministic). 67 int top_k = 0; /// Top-K sampling; 0 = disabled. 68 float top_p = float.nan; /// Nucleus sampling threshold. 69 float min_p = float.nan; /// Minimum probability threshold. 70 float repeat_penalty = float.nan; /// Penalty for repeated tokens. 71 int repeat_last_n = 0; /// Tokens considered for repeat penalty. 72 int seed = 0; /// Random seed; 0 = random. 73 int num_predict = 0; /// Max tokens to generate; 0 = unlimited. 74 int num_ctx = 0; /// Context window size; 0 = model default. 75 string[] stop; /// Stop sequences; empty = disabled. 76 int mirostat = 0; /// Mirostat strategy (0=off,1=v1,2=v2). 77 float mirostat_tau = float.nan; /// Mirostat target entropy. 78 float mirostat_eta = float.nan; /// Mirostat learning rate. 79 80 /++ 81 + Serializes only non-default fields to a `JSONValue` object. 82 + 83 + Builds a `JSONValue[string]` AA (safe D operation) then wraps it. 84 + 85 + Returns: A `JSONValue` object containing only the explicitly-set fields. 86 +/ 87 JSONValue toJson() const @safe 88 { 89 import std.math : isNaN; 90 JSONValue[string] fields; 91 if (!isNaN(temperature)) fields["temperature"] = JSONValue(temperature); 92 if (top_k > 0) fields["top_k"] = JSONValue(top_k); 93 if (!isNaN(top_p)) fields["top_p"] = JSONValue(top_p); 94 if (!isNaN(min_p)) fields["min_p"] = JSONValue(min_p); 95 if (!isNaN(repeat_penalty)) fields["repeat_penalty"] = JSONValue(repeat_penalty); 96 if (repeat_last_n > 0) fields["repeat_last_n"] = JSONValue(repeat_last_n); 97 if (seed > 0) fields["seed"] = JSONValue(seed); 98 if (num_predict > 0) fields["num_predict"] = JSONValue(num_predict); 99 if (num_ctx > 0) fields["num_ctx"] = JSONValue(num_ctx); 100 if (mirostat > 0) fields["mirostat"] = JSONValue(mirostat); 101 if (!isNaN(mirostat_tau)) fields["mirostat_tau"] = JSONValue(mirostat_tau); 102 if (!isNaN(mirostat_eta)) fields["mirostat_eta"] = JSONValue(mirostat_eta); 103 if (stop.length > 0) 104 { 105 JSONValue[] arr; 106 foreach (s; stop) arr ~= JSONValue(s); 107 fields["stop"] = JSONValue(arr); 108 } 109 return makeObject(fields); 110 } 111 } 112 113 /// 114 unittest 115 { 116 // Default options serialize to an empty JSON object 117 OllamaOptions def; 118 auto j0 = def.toJson(); 119 assert(j0.type == JSONType.object); 120 assert(j0.objectNoRef.length == 0, "Default OllamaOptions should be empty"); 121 122 // Only set fields appear in output 123 OllamaOptions opts; 124 opts.temperature = 0.5f; // 0.5 is exactly representable in float and double 125 opts.top_k = 40; 126 opts.num_ctx = 4096; 127 opts.stop = ["<|end|>"]; 128 auto j = opts.toJson(); 129 assert(j["temperature"].type == JSONType.float_); 130 assert(j["temperature"].floating == 0.5); // exact double comparison 131 assert(j["top_k"].integer == 40); 132 assert(j["num_ctx"].integer == 4096); 133 assert(j["stop"].arrayNoRef[0].str == "<|end|>"); 134 assert("top_p" !in j); 135 assert("min_p" !in j); 136 assert("repeat_penalty" !in j); 137 assert("mirostat" !in j); 138 139 // temperature = 0.0 is a valid explicit value and must be included 140 OllamaOptions zeroTemp; 141 zeroTemp.temperature = 0.0f; 142 auto jz = zeroTemp.toJson(); 143 assert("temperature" in jz); 144 assert(jz["temperature"].floating == 0.0); 145 } 146 147 /++ 148 + Function schema for tool/function calling definitions. 149 + 150 + Used inside `Tool` when registering callable tools with the model. 151 +/ 152 struct ToolFunction 153 { 154 string name; /// Function name as called by the model. 155 string description; /// Human-readable description. 156 JSONValue parameters; /// JSON Schema object defining the function's parameters. 157 158 /++ 159 + Converts to a JSON object for the Ollama API `tools` array. 160 + 161 + Returns: A `JSONValue` with "name", "description", and optionally "parameters". 162 +/ 163 JSONValue toJson() const @safe 164 { 165 JSONValue[string] fields = [ 166 "name": JSONValue(name), 167 "description": JSONValue(description), 168 ]; 169 if (parameters.type != JSONType.null_) 170 fields["parameters"] = parameters; 171 return makeObject(fields); 172 } 173 } 174 175 /++ 176 + A tool (function) definition passed to `chat()` to enable tool/function calling. 177 + 178 + Examples: 179 + --- 180 + auto schema = parseJSON(`{ 181 + "type": "object", 182 + "properties": {"location": {"type": "string"}}, 183 + "required": ["location"] 184 + }`); 185 + auto tool = Tool("function", ToolFunction("get_weather", "Get current weather", schema)); 186 + auto resp = client.chat("llama3", messages, JSONValue.init, false, [tool]); 187 + --- 188 +/ 189 struct Tool 190 { 191 string type = "function"; /// Tool type; currently always "function". 192 ToolFunction function_; /// The function definition. 193 194 /++ 195 + Converts to a JSON object for the Ollama API `tools` array. 196 + 197 + Returns: A `JSONValue` with "type" and "function" fields. 198 +/ 199 JSONValue toJson() const @safe 200 { 201 JSONValue[string] fields = [ 202 "type": JSONValue(type), 203 "function": function_.toJson(), 204 ]; 205 return makeObject(fields); 206 } 207 } 208 209 /++ 210 + Represents a tool/function call made by the model in a chat response. 211 + 212 + Access via `response["message"]["tool_calls"]` when the model calls a tool. 213 +/ 214 struct ToolCall 215 { 216 string id; /// Optional tool call identifier. 217 string name; /// Name of the function called. 218 JSONValue arguments; /// Arguments passed to the function (JSON object). 219 220 /++ 221 + Converts to a JSON object matching the Ollama API tool call format. 222 + 223 + Returns: A `JSONValue` with "function" containing "name" and "arguments". 224 +/ 225 JSONValue toJson() const @safe 226 { 227 JSONValue[string] funcFields = ["name": JSONValue(name)]; 228 if (arguments.type != JSONType.null_) 229 funcFields["arguments"] = arguments; 230 231 JSONValue[string] fields = ["function": makeObject(funcFields)]; 232 if (id.length > 0) 233 fields["id"] = JSONValue(id); 234 return makeObject(fields); 235 } 236 } 237 238 /// 239 unittest 240 { 241 // ToolFunction serialization 242 auto tf = ToolFunction("get_weather", "Fetch weather data", 243 parseJSON(`{"type":"object","properties":{"city":{"type":"string"}}}`)); 244 auto jtf = tf.toJson(); 245 assert(jtf["name"].str == "get_weather"); 246 assert(jtf["description"].str == "Fetch weather data"); 247 assert(jtf["parameters"]["type"].str == "object"); 248 249 // Tool serialization — JSON key must be "function" (not "function_") 250 auto tool = Tool("function", tf); 251 auto jt = tool.toJson(); 252 assert(jt["type"].str == "function"); 253 assert("function" in jt); 254 assert(jt["function"]["name"].str == "get_weather"); 255 256 // ToolCall with id and arguments 257 auto tc = ToolCall("call-1", "get_weather", parseJSON(`{"city":"Paris"}`)); 258 auto jtc = tc.toJson(); 259 assert(jtc["id"].str == "call-1"); 260 assert(jtc["function"]["name"].str == "get_weather"); 261 assert(jtc["function"]["arguments"]["city"].str == "Paris"); 262 263 // ToolCall without id — id key must be absent 264 auto tc2 = ToolCall("", "sum", parseJSON(`{"a":1,"b":2}`)); 265 auto jtc2 = tc2.toJson(); 266 assert("id" !in jtc2); 267 assert(jtc2["function"]["name"].str == "sum"); 268 } 269 270 /++ 271 + Represents a single message in a chat interaction. 272 + 273 + Supports text, base64-encoded images (multimodal), and tool call results. 274 + Backward compatible: `Message("user", "hello")` still compiles. 275 +/ 276 struct Message 277 { 278 string role; /// Sender role: "user", "assistant", or "system". 279 string content; /// Text content of the message. 280 string[] images; /// Optional base64-encoded images for multimodal input. 281 ToolCall[] tool_calls; /// Optional tool calls made by the assistant. 282 283 /++ 284 + Converts the message to a JSON object for the Ollama API. 285 + 286 + Returns: A `JSONValue` with "role", "content", and optionally "images" 287 + and "tool_calls". 288 +/ 289 JSONValue toJson() const @safe 290 { 291 JSONValue[string] fields = [ 292 "role": JSONValue(role), 293 "content": JSONValue(content), 294 ]; 295 if (images.length > 0) 296 { 297 JSONValue[] arr; 298 foreach (img; images) arr ~= JSONValue(img); 299 fields["images"] = JSONValue(arr); 300 } 301 if (tool_calls.length > 0) 302 { 303 JSONValue[] arr; 304 foreach (tc; tool_calls) arr ~= tc.toJson(); 305 fields["tool_calls"] = JSONValue(arr); 306 } 307 return makeObject(fields); 308 } 309 } 310 311 /// 312 unittest 313 { 314 // Basic message — no optional fields 315 auto m = Message("user", "Hello, world!"); 316 auto j = m.toJson(); 317 assert(j["role"].str == "user"); 318 assert(j["content"].str == "Hello, world!"); 319 assert("images" !in j); 320 assert("tool_calls" !in j); 321 322 // Message with images 323 auto m2 = Message("user", "What is in this image?", ["aGVsbG8="]); 324 auto j2 = m2.toJson(); 325 assert(j2["images"].arrayNoRef.length == 1); 326 assert(j2["images"][0].str == "aGVsbG8="); 327 328 // Message with tool_calls 329 auto tc = ToolCall("id-1", "search", parseJSON(`{"query":"D language"}`)); 330 auto m3 = Message("assistant", "", null, [tc]); 331 auto j3 = m3.toJson(); 332 assert(j3["tool_calls"].arrayNoRef.length == 1); 333 assert(j3["tool_calls"][0]["function"]["name"].str == "search"); 334 335 // Backward compatibility: two-field initialization still compiles 336 Message m4; 337 m4.role = "system"; 338 m4.content = "You are a helpful assistant."; 339 auto j4 = m4.toJson(); 340 assert(j4["role"].str == "system"); 341 } 342 343 /// Callback type used by the streaming methods. 344 /// Receives one fully-parsed NDJSON chunk per call; `chunk["done"]` is 345 /// `true` on the final chunk. 346 alias StreamCallback = void delegate(JSONValue chunk) @safe; 347 348 /++ 349 + A client class for interacting with the Ollama REST API. 350 + 351 + Provides methods for text generation, chat, embeddings, tool calling, and model 352 + management using `std.net.curl` for HTTP and `std.json` for JSON. 353 + 354 + Examples: 355 + --- 356 + auto client = new OllamaClient(); 357 + auto resp = client.chat("llama3", [Message("user", "Hi there!")]); 358 + writeln(resp["message"]["content"].str); 359 + --- 360 +/ 361 class OllamaClient 362 { 363 private string host; 364 private Duration timeout = 60.seconds; 365 366 /++ 367 + Constructs a new Ollama client. 368 + 369 + Params: 370 + host = Base URL of the Ollama server. Defaults to `DEFAULT_HOST`. 371 +/ 372 this(string host = DEFAULT_HOST) @safe 373 { 374 this.host = host; 375 } 376 377 /++ 378 + Sets the timeout for HTTP requests. 379 + 380 + Params: 381 + timeout = Duration to wait before timing out. 382 +/ 383 void setTimeOut(Duration timeout) @safe 384 { 385 this.timeout = timeout; 386 } 387 388 // ----------------------------------------------------------------------- 389 // Private HTTP helpers 390 // ----------------------------------------------------------------------- 391 392 private JSONValue post(string url, JSONValue data, bool stream = false) @trusted 393 { 394 auto client = HTTP(); 395 client.addRequestHeader("Content-Type", "application/json"); 396 client.connectTimeout(timeout); 397 398 auto jsonStr = data.toString(); 399 auto response = std.net.curl.post(url, jsonStr, client); 400 auto jsonResponse = parseJSON(response); 401 402 enforce("error" !in jsonResponse, 403 "HTTP request failed: " ~ ("message" in jsonResponse["error"] 404 ? jsonResponse["error"]["message"].str : "Unknown error")); 405 return jsonResponse; 406 } 407 408 private JSONValue get(string url) @trusted 409 { 410 auto client = HTTP(); 411 client.connectTimeout(timeout); 412 413 auto response = std.net.curl.get(url, client); 414 auto jsonResponse = parseJSON(response); 415 enforce("error" !in jsonResponse, 416 "HTTP request failed: " ~ ("message" in jsonResponse["error"] 417 ? jsonResponse["error"]["message"].str : "Unknown error")); 418 return jsonResponse; 419 } 420 421 /++ 422 + HTTP DELETE with a JSON body, used by `deleteModel`. 423 + 424 + The Ollama API requires HTTP DELETE for `/api/delete`. `std.net.curl` has no 425 + free `del()` function; we use the `HTTP` class directly, setting the body via 426 + `postData` and then overriding the method to DELETE. 427 +/ 428 private JSONValue del(string url, JSONValue data) @trusted 429 { 430 auto jsonStr = data.toString(); 431 auto http = HTTP(url); 432 http.addRequestHeader("Content-Type", "application/json"); 433 http.connectTimeout(timeout); 434 http.postData = cast(const(void)[]) jsonStr; 435 http.method = HTTP.Method.del; 436 437 char[] respBuf; 438 http.onReceive = (ubyte[] chunk) { 439 respBuf ~= cast(char[]) chunk; 440 return chunk.length; 441 }; 442 http.perform(); 443 444 if (respBuf.length == 0) 445 return JSONValue((JSONValue[string]).init); // empty 200 OK = success 446 447 auto jsonResp = parseJSON(respBuf); 448 enforce("error" !in jsonResp, 449 "HTTP request failed: " ~ ("message" in jsonResp["error"] 450 ? jsonResp["error"]["message"].str : "Unknown error")); 451 return jsonResp; 452 } 453 454 /++ 455 + Low-level streaming POST helper. 456 + 457 + Sends `data` to `url` and dispatches each newline-delimited JSON chunk 458 + to `onChunk` as it arrives, enabling token-by-token streaming from 459 + `/api/generate` and `/api/chat`. 460 +/ 461 private void postStream(string url, JSONValue data, 462 StreamCallback onChunk) @trusted 463 { 464 auto jsonStr = data.toString(); 465 auto http = HTTP(url); 466 http.addRequestHeader("Content-Type", "application/json"); 467 http.connectTimeout(timeout); 468 http.postData = cast(const(void)[]) jsonStr; 469 470 char[] lineBuf; 471 http.onReceive = (ubyte[] chunk) { 472 lineBuf ~= cast(char[]) chunk; 473 size_t start = 0; 474 foreach (i; 0 .. lineBuf.length) 475 { 476 if (lineBuf[i] == '\n') 477 { 478 if (i > start) 479 onChunk(parseJSON(lineBuf[start .. i])); 480 start = i + 1; 481 } 482 } 483 lineBuf = lineBuf[start .. $].dup; 484 return chunk.length; 485 }; 486 http.perform(); 487 if (lineBuf.length > 0) 488 onChunk(parseJSON(lineBuf)); 489 } 490 491 // ----------------------------------------------------------------------- 492 // Generation 493 // ----------------------------------------------------------------------- 494 495 /++ 496 + Generates text based on a prompt using the specified model. 497 + 498 + Params: 499 + model = Model name (e.g. "llama3.1:8b"). 500 + prompt = Input text. 501 + options = Raw `JSONValue` generation options (backward-compatible). 502 + stream = Whether to stream the response (not fully supported). 503 + system = Optional system prompt. 504 + images = Optional base64-encoded images for multimodal input. 505 + format = Structured output: `JSONValue("json")` or a JSON Schema. 506 + suffix = Text appended after the generated response. 507 + keepAlive = How long to keep the model loaded (e.g. "5m", "0"). 508 + opts = Typed `OllamaOptions`; takes precedence over `options`. 509 + 510 + Returns: A `JSONValue` containing `"response"`, `"done"`, and metadata. 511 +/ 512 JSONValue generate( 513 string model, 514 string prompt, 515 JSONValue options = JSONValue.init, 516 bool stream = false, 517 string system = null, 518 string[] images = null, 519 JSONValue format = JSONValue.init, 520 string suffix = null, 521 string keepAlive = null, 522 OllamaOptions opts = OllamaOptions.init, 523 ) @safe 524 { 525 auto url = host ~ "/api/generate"; 526 527 JSONValue[string] fields = [ 528 "model": JSONValue(model), 529 "prompt": JSONValue(prompt), 530 "stream": JSONValue(stream), 531 ]; 532 533 // Typed OllamaOptions takes precedence over raw JSONValue options 534 auto optsJson = opts.toJson(); 535 if (optsJson.objectNoRef.length > 0) 536 fields["options"] = optsJson; 537 else if (options.type != JSONType.null_) 538 fields["options"] = options; 539 540 if (system.length > 0) fields["system"] = JSONValue(system); 541 if (suffix.length > 0) fields["suffix"] = JSONValue(suffix); 542 if (keepAlive.length > 0) fields["keep_alive"] = JSONValue(keepAlive); 543 if (format.type != JSONType.null_) 544 fields["format"] = format; 545 546 if (images.length > 0) 547 { 548 JSONValue[] arr; 549 foreach (img; images) arr ~= JSONValue(img); 550 fields["images"] = JSONValue(arr); 551 } 552 553 return post(url, makeObject(fields), stream); 554 } 555 556 /++ 557 + Streaming text generation — calls `onChunk` for every response token. 558 + 559 + Each call to `onChunk` receives one NDJSON chunk. The chunk contains a 560 + `"response"` string token and a boolean `"done"`. The final chunk has 561 + `"done": true` and carries usage/timing metadata. 562 + 563 + Params: 564 + model = Model name (e.g. "llama3.1:8b"). 565 + prompt = Input prompt. 566 + onChunk = Callback invoked per chunk; must be `@safe`. 567 + system = Optional system prompt. 568 + images = Optional base64-encoded images (multimodal). 569 + format = Structured output: `JSONValue("json")` or JSON Schema. 570 + keepAlive = How long to keep the model loaded. 571 + opts = Typed generation options. 572 +/ 573 void generateStream( 574 string model, 575 string prompt, 576 StreamCallback onChunk, 577 string system = null, 578 string[] images = null, 579 JSONValue format = JSONValue.init, 580 string keepAlive = null, 581 OllamaOptions opts = OllamaOptions.init, 582 ) @safe 583 { 584 auto url = host ~ "/api/generate"; 585 JSONValue[string] fields = [ 586 "model": JSONValue(model), 587 "prompt": JSONValue(prompt), 588 "stream": JSONValue(true), 589 ]; 590 auto optsJson = opts.toJson(); 591 if (optsJson.objectNoRef.length > 0) fields["options"] = optsJson; 592 if (system.length > 0) fields["system"] = JSONValue(system); 593 if (keepAlive.length > 0) fields["keep_alive"] = JSONValue(keepAlive); 594 if (format.type != JSONType.null_) fields["format"] = format; 595 if (images.length > 0) 596 { 597 JSONValue[] arr; 598 foreach (img; images) arr ~= JSONValue(img); 599 fields["images"] = JSONValue(arr); 600 } 601 postStream(url, makeObject(fields), onChunk); 602 } 603 604 // ----------------------------------------------------------------------- 605 // Chat 606 // ----------------------------------------------------------------------- 607 608 /++ 609 + Engages in a chat interaction using the specified model and message history. 610 + 611 + Params: 612 + model = Model name. 613 + messages = Array of `Message` structs (conversation history). 614 + options = Raw `JSONValue` generation options (backward-compatible). 615 + stream = Whether to stream the response (not fully supported). 616 + tools = Optional tool definitions for tool/function calling. 617 + format = Structured output schema or `JSONValue("json")`. 618 + keepAlive = How long to keep the model loaded. 619 + opts = Typed `OllamaOptions`. 620 + 621 + Returns: A `JSONValue` with `"message"`, `"done"`, and metadata. When the 622 + model calls a tool, `response["message"]["tool_calls"]` is populated. 623 +/ 624 JSONValue chat( 625 string model, 626 Message[] messages, 627 JSONValue options = JSONValue.init, 628 bool stream = false, 629 Tool[] tools = null, 630 JSONValue format = JSONValue.init, 631 string keepAlive = null, 632 OllamaOptions opts = OllamaOptions.init, 633 ) @safe 634 { 635 auto url = host ~ "/api/chat"; 636 637 JSONValue[] msgArray; 638 foreach (msg; messages) msgArray ~= msg.toJson(); 639 640 JSONValue[string] fields = [ 641 "model": JSONValue(model), 642 "messages": JSONValue(msgArray), 643 "stream": JSONValue(stream), 644 ]; 645 646 auto optsJson = opts.toJson(); 647 if (optsJson.objectNoRef.length > 0) 648 fields["options"] = optsJson; 649 else if (options.type != JSONType.null_) 650 fields["options"] = options; 651 652 if (keepAlive.length > 0) 653 fields["keep_alive"] = JSONValue(keepAlive); 654 if (format.type != JSONType.null_) 655 fields["format"] = format; 656 657 if (tools.length > 0) 658 { 659 JSONValue[] arr; 660 foreach (t; tools) arr ~= t.toJson(); 661 fields["tools"] = JSONValue(arr); 662 } 663 664 return post(url, makeObject(fields), stream); 665 } 666 667 /++ 668 + Streaming chat — calls `onChunk` for every assistant token. 669 + 670 + Each chunk contains `"message": {"role": "assistant", "content": "<token>"}`. 671 + The final chunk has `"done": true` and carries usage metadata. 672 + 673 + Params: 674 + model = Model name. 675 + messages = Conversation history. 676 + onChunk = Callback invoked per chunk; must be `@safe`. 677 + tools = Optional tool definitions. 678 + format = Structured output schema or `JSONValue("json")`. 679 + keepAlive = How long to keep the model loaded. 680 + opts = Typed generation options. 681 +/ 682 void chatStream( 683 string model, 684 Message[] messages, 685 StreamCallback onChunk, 686 Tool[] tools = null, 687 JSONValue format = JSONValue.init, 688 string keepAlive = null, 689 OllamaOptions opts = OllamaOptions.init, 690 ) @safe 691 { 692 auto url = host ~ "/api/chat"; 693 JSONValue[] msgArray; 694 foreach (msg; messages) msgArray ~= msg.toJson(); 695 696 JSONValue[string] fields = [ 697 "model": JSONValue(model), 698 "messages": JSONValue(msgArray), 699 "stream": JSONValue(true), 700 ]; 701 auto optsJson = opts.toJson(); 702 if (optsJson.objectNoRef.length > 0) fields["options"] = optsJson; 703 if (keepAlive.length > 0) fields["keep_alive"] = JSONValue(keepAlive); 704 if (format.type != JSONType.null_) fields["format"] = format; 705 if (tools.length > 0) 706 { 707 JSONValue[] arr; 708 foreach (t; tools) arr ~= t.toJson(); 709 fields["tools"] = JSONValue(arr); 710 } 711 postStream(url, makeObject(fields), onChunk); 712 } 713 714 // ----------------------------------------------------------------------- 715 // Embeddings 716 // ----------------------------------------------------------------------- 717 718 /++ 719 + Generates an embedding vector for a single text input. 720 + 721 + Params: 722 + model = Embedding model name (e.g. "nomic-embed-text"). 723 + input = Text to embed. 724 + keepAlive = How long to keep the model loaded. 725 + 726 + Returns: A `JSONValue` with an `"embeddings"` array containing one vector. 727 +/ 728 JSONValue embed(string model, string input, string keepAlive = null) @safe 729 { 730 auto url = host ~ "/api/embed"; 731 JSONValue[string] fields = ["model": JSONValue(model), "input": JSONValue(input)]; 732 if (keepAlive.length > 0) 733 fields["keep_alive"] = JSONValue(keepAlive); 734 return post(url, makeObject(fields)); 735 } 736 737 /++ 738 + Generates embedding vectors for a batch of text inputs. 739 + 740 + Params: 741 + model = Embedding model name. 742 + inputs = Array of texts to embed. 743 + keepAlive = How long to keep the model loaded. 744 + 745 + Returns: A `JSONValue` with an `"embeddings"` array, one vector per input. 746 +/ 747 JSONValue embed(string model, string[] inputs, string keepAlive = null) @safe 748 { 749 auto url = host ~ "/api/embed"; 750 JSONValue[] arr; 751 foreach (inp; inputs) arr ~= JSONValue(inp); 752 JSONValue[string] fields = ["model": JSONValue(model), "input": JSONValue(arr)]; 753 if (keepAlive.length > 0) 754 fields["keep_alive"] = JSONValue(keepAlive); 755 return post(url, makeObject(fields)); 756 } 757 758 // ----------------------------------------------------------------------- 759 // Model Management 760 // ----------------------------------------------------------------------- 761 762 /++ 763 + Lists all locally available models. 764 + 765 + Returns: Pretty-printed JSON string of model details. 766 +/ 767 string listModels() @safe 768 { 769 return get(host ~ "/api/tags").toPrettyString(); 770 } 771 772 /++ 773 + Retrieves detailed information about a specific model. 774 + 775 + Params: 776 + model = Model name to query. 777 + 778 + Returns: Pretty-printed JSON string of model metadata. 779 +/ 780 string showModel(string model) @safe 781 { 782 return post(host ~ "/api/show", 783 makeObject(["name": JSONValue(model)])).toPrettyString(); 784 } 785 786 /++ 787 + Creates a custom model from a modelfile. 788 + 789 + Params: 790 + name = New model name. 791 + modelfile = Modelfile content string. 792 + 793 + Returns: A `JSONValue` with creation status. 794 +/ 795 JSONValue createModel(string name, string modelfile) @safe 796 { 797 return post(host ~ "/api/create", 798 makeObject(["name": JSONValue(name), "modelfile": JSONValue(modelfile)])); 799 } 800 801 /++ 802 + Copies an existing model to a new name. 803 + 804 + Params: 805 + source = Source model name. 806 + destination = Destination model name. 807 + 808 + Returns: A `JSONValue` with copy status. 809 +/ 810 JSONValue copy(string source, string destination) @safe 811 { 812 return post(host ~ "/api/copy", 813 makeObject(["source": JSONValue(source), "destination": JSONValue(destination)])); 814 } 815 816 /++ 817 + Deletes a model from the Ollama server. 818 + 819 + Uses HTTP DELETE as required by the Ollama API specification. 820 + 821 + Params: 822 + name = Model name to delete. 823 + 824 + Returns: A `JSONValue` (empty object on success). 825 +/ 826 JSONValue deleteModel(string name) @safe 827 { 828 return del(host ~ "/api/delete", makeObject(["name": JSONValue(name)])); 829 } 830 831 /++ 832 + Downloads a model from the Ollama registry. 833 + 834 + Params: 835 + name = Model name to pull. 836 + stream = Whether to stream progress (not fully supported). 837 + 838 + Returns: A `JSONValue` with pull status. 839 +/ 840 JSONValue pull(string name, bool stream = false) @safe 841 { 842 return post(host ~ "/api/pull", 843 makeObject(["name": JSONValue(name), "stream": JSONValue(stream)]), stream); 844 } 845 846 /++ 847 + Uploads a model to the Ollama registry. 848 + 849 + Params: 850 + name = Model name to push. 851 + stream = Whether to stream progress (not fully supported). 852 + 853 + Returns: A `JSONValue` with push status. 854 +/ 855 JSONValue push(string name, bool stream = false) @safe 856 { 857 return post(host ~ "/api/push", 858 makeObject(["name": JSONValue(name), "stream": JSONValue(stream)]), stream); 859 } 860 861 // ----------------------------------------------------------------------- 862 // Server Operations 863 // ----------------------------------------------------------------------- 864 865 /++ 866 + Retrieves the Ollama server version string. 867 + 868 + Returns: Version string (e.g. "0.6.2"). 869 +/ 870 string getVersion() @safe 871 { 872 return get(host ~ "/api/version")["version"].str; 873 } 874 875 /++ 876 + Lists currently running (loaded) models. 877 + 878 + Returns: Pretty-printed JSON string with model names, sizes, and expiry. 879 +/ 880 string ps() @safe 881 { 882 return get(host ~ "/api/ps").toPrettyString(); 883 } 884 885 886 // ----------------------------------------------------------------------- 887 // OpenAI-Compatible Endpoints 888 // ----------------------------------------------------------------------- 889 890 /++ 891 + Performs an OpenAI-style chat completion. 892 + 893 + Params: 894 + model = Model name. 895 + messages = Chat history as `Message` array. 896 + maxTokens = Maximum tokens to generate (0 = unlimited). 897 + temperature = Sampling temperature (default 1.0). 898 + stream = Whether to stream (not fully supported). 899 + 900 + Returns: A `JSONValue` in OpenAI `ChatCompletion` format. 901 +/ 902 JSONValue chatCompletions( 903 string model, 904 Message[] messages, 905 int maxTokens = 0, 906 float temperature = 1.0, 907 bool stream = false, 908 ) @trusted 909 { 910 auto url = host ~ "/v1/chat/completions"; 911 JSONValue[] msgArray; 912 foreach (msg; messages) msgArray ~= msg.toJson(); 913 914 JSONValue[string] fields = [ 915 "model": JSONValue(model), 916 "messages": JSONValue(msgArray), 917 "stream": JSONValue(stream), 918 "temperature": JSONValue(temperature), 919 ]; 920 if (maxTokens > 0) 921 fields["max_tokens"] = JSONValue(maxTokens); 922 923 return post(url, makeObject(fields), stream); 924 } 925 926 /++ 927 + Performs an OpenAI-style text completion. 928 + 929 + Params: 930 + model = Model name. 931 + prompt = Input prompt. 932 + maxTokens = Maximum tokens to generate (0 = unlimited). 933 + temperature = Sampling temperature (default 1.0). 934 + stream = Whether to stream (not fully supported). 935 + 936 + Returns: A `JSONValue` in OpenAI `Completion` format. 937 +/ 938 JSONValue completions( 939 string model, 940 string prompt, 941 int maxTokens = 0, 942 float temperature = 1.0, 943 bool stream = false, 944 ) @trusted 945 { 946 auto url = host ~ "/v1/completions"; 947 JSONValue[string] fields = [ 948 "model": JSONValue(model), 949 "prompt": JSONValue(prompt), 950 "stream": JSONValue(stream), 951 "temperature": JSONValue(temperature), 952 ]; 953 if (maxTokens > 0) 954 fields["max_tokens"] = JSONValue(maxTokens); 955 956 return post(url, makeObject(fields), stream); 957 } 958 959 /++ 960 + Lists models in OpenAI-compatible format. 961 + 962 + Returns: Pretty-printed JSON string of model data. 963 +/ 964 string getModels() @safe 965 { 966 return get(host ~ "/v1/models").toPrettyString(); 967 } 968 } 969 970 /// Default host URL for the Ollama server. 971 enum DEFAULT_HOST = "http://127.0.0.1:11434";