Hi!
In previous posts I shared how to host and chat with a Llama 2 model hosted locally with Ollama. (view post).
And then I also found OllamaSharp (nuget package and repo).
_OllamaSharp is a .NET binding for the Ollama API, making it easy to interact with Ollama using your favorite .NET languages.
So, I decided to try it, and create a Chat Completion and a Text Generation specific implementation for Semantic Kernel using this library.
The full test is a console app using both services with Semantic Kernel.
Text Generation Service
The Text Generation Service is an easy one. Just implement the interface Microsoft.SemanticKernel.TextGeneration.ITextGenerationService , and the generated code looks like this:
// Copyright (c) 2024 | |
// Author : Bruno Capuano | |
// Change Log : | |
// - Sample Text Generation Service for Ollama models | |
// | |
// The MIT License (MIT) | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
using Microsoft.SemanticKernel; | |
using Microsoft.SemanticKernel.ChatCompletion; | |
using OllamaSharp; | |
using OllamaSharp.Models.Chat; | |
namespace sk_ollamacsharp | |
{ | |
public class OllamaTextGenerationService : Microsoft.SemanticKernel.TextGeneration.ITextGenerationService | |
{ | |
// public property for the model url endpoint | |
public string ModelUrl { get; set; } | |
public string ModelName { get; set; } | |
public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException(); | |
public IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
{ | |
throw new NotImplementedException(); | |
} | |
public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
{ | |
var ollama = new OllamaApiClient(ModelUrl, ModelName); | |
var completionResponse = await ollama.GetCompletion(prompt, null, CancellationToken.None); | |
TextContent stc = new TextContent(completionResponse.Response); | |
return new List<TextContent> { stc }; | |
} | |
} | |
} |
Chat Completion Service
The chat completion, requires the implementation of the interface: IChatCompletionService. The code looks like this:
// Copyright (c) 2024 | |
// Author : Bruno Capuano | |
// Change Log : | |
// - Sample Chat Completion Service for Ollama models | |
// | |
// The MIT License (MIT) | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
using Microsoft.SemanticKernel; | |
using Microsoft.SemanticKernel.ChatCompletion; | |
using OllamaSharp; | |
using OllamaSharp.Models.Chat; | |
namespace sk_ollamacsharp | |
{ | |
public class OllamaChatCompletionService : IChatCompletionService | |
{ | |
// public property for the model url endpoint | |
public string ModelUrl { get; set; } | |
public string ModelName { get; set; } | |
public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException(); | |
public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
{ | |
var ollama = new OllamaApiClient(ModelUrl, ModelName); // (uri); | |
var chat = new Chat(ollama, _ => { }); | |
// iterate though chatHistory Messages | |
foreach (var message in chatHistory) | |
{ | |
if (message.Role == AuthorRole.System) | |
{ | |
await chat.SendAs(ChatRole.System, message.Content); | |
continue; | |
} | |
} | |
var lastMessage = chatHistory.LastOrDefault(); | |
string question = lastMessage.Content; | |
var chatResponse = ""; | |
var history = (await chat.Send(question, CancellationToken.None)).ToArray(); | |
var last = history.Last(); | |
chatResponse = last.Content; | |
chatHistory.AddAssistantMessage(chatResponse); | |
return chatHistory; | |
} | |
public IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) | |
{ | |
throw new NotImplementedException(); | |
} | |
} | |
} |
Test Chat Completion and Text Generation Services
With both services implemented, we can now code with Semantic Kernel to access these services.
The following code:
- Creates 2 services: text and chat, both with ollamasharp implementation
- Create a semantic kernel builder, register both services, and build a kernel.
- Using the kernel run a text generation sample, and later a chat history sample.
- In the chat sample, it also uses a System Message to define the chat behavior for the conversation.
- This is a test, there are a lot of improvements that can be made here.
// Copyright (c) 2024 | |
// Author : Bruno Capuano | |
// Change Log : | |
// - Sample console application to use llama2 LLM running locally in Ubuntu with Semantic Kernel | |
// | |
// The MIT License (MIT) | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
using Microsoft.Extensions.DependencyInjection; | |
using Microsoft.SemanticKernel; | |
using Microsoft.SemanticKernel.ChatCompletion; | |
using Microsoft.SemanticKernel.TextGeneration; | |
using sk_ollamacsharp; | |
// llama2 in Ubuntu local in WSL | |
var ollamaChat = new OllamaChatCompletionService(); | |
ollamaChat.ModelUrl = "http://localhost:11434"; | |
ollamaChat.ModelName = "llama2"; | |
var ollamaText = new OllamaTextGenerationService(); | |
ollamaText.ModelUrl = "http://localhost:11434"; | |
ollamaText.ModelName = "llama2"; | |
// semantic kernel builder | |
var builder = Kernel.CreateBuilder(); | |
builder.Services.AddKeyedSingleton<IChatCompletionService>("ollamaChat", ollamaChat); | |
builder.Services.AddKeyedSingleton<ITextGenerationService>("ollamaText", ollamaText); | |
var kernel = builder.Build(); | |
// text generation | |
var textGen = kernel.GetRequiredService<ITextGenerationService>(); | |
var response = textGen.GetTextContentsAsync("The weather in January in Toronto is usually ").Result; | |
Console.WriteLine(response[^1].Text); | |
// chat | |
var chat = kernel.GetRequiredService<IChatCompletionService>(); | |
var history = new ChatHistory(); | |
history.AddSystemMessage("You are a useful assistant that replies using a funny style and emojis. Your name is Goku."); | |
history.AddUserMessage("hi, who are you?"); | |
// print response | |
var result = await chat.GetChatMessageContentsAsync(history); | |
Console.WriteLine(result[^1].Content); |
The full code is available here: https://github.com/elbruno/semantickernel-localLLMs. And the main readme of the repo also needs to be updated.
Happy coding!
Greetings
El Bruno
More posts in my blog ElBruno.com.
More info in https://beacons.ai/elbruno
Top comments (0)