DEV Community

SupermanSpace
SupermanSpace

Posted on

16

Unity + OpenAI Vision and Voice

Image

Hey Unity devs!🌟

Let us explore how to integrate OpenAI with Unity today with two powerful scripts from this amazing github repository. Big props to the creative minds behind it! πŸ™Œ

TextToSpeech ScriptπŸ—£οΈ:

Have you ever wanted to convert text to speech without any effort? The TextToSpeech script can do just that! It uses OpenAI's magic to transform your text into a masterwork of audio. Here's a little sample of what it can achieve:

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Networking;
using System.Text;
using System.IO;

public class TextToSpeech : MonoBehaviour
{
    private string apiKey = "YOUR_API_KEY"; 
    private string baseUrl = "https://api.openai.com/v1/audio/speech";
    private string model = "tts-1";
    private string voice = "alloy";
    private string inputText = "Hello World, This is a test to see the TTS of OpenAI!";
    private string audioFileName = "speech.mp3";

    private void Start()
    {
        StartCoroutine(GenerateSpeech());
    }

    private IEnumerator GenerateSpeech()
    {

        var payload = new
        {
            model = model,
            voice = voice,
            input = inputText
        };

        // Convert the payload to a JSON string.
        string jsonPayload = JsonUtility.ToJson(payload);

        using (UnityWebRequest www = new UnityWebRequest(baseUrl, "POST"))
        {
            www.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonPayload));
            www.downloadHandler = new DownloadHandlerBuffer();
            www.SetRequestHeader("Authorization", "Bearer " + apiKey);
            www.SetRequestHeader("Content-Type", "application/json");

            yield return www.SendWebRequest();

            if (www.result == UnityWebRequest.Result.Success)
            {
                // Saving the audio data as an MP3 file.
                File.WriteAllBytes(audioFileName, www.downloadHandler.data);
                Debug.Log("Audio file saved as: " + audioFileName);
            }
            else
            {
                Debug.LogError("Failed to generate speech: " + www.error);
            }
        }
    }
}
Enter fullscreen mode Exit fullscreen mode

Just enter in your text, hit play, and voila! Your text is now a melody. πŸŽ‰

Image

OpenAI Vision Script πŸ“Έ:

Let us move on to discuss the AI Vision script, a script that lets you use OpenAI's GPT-4 Vision model to intelligently query images. Notice how simple it is:

using System.Collections;
using System.Collections.Generic;
using [System.IO](http://system.io/);
using UnityEngine;
using UnityEngine.Networking;

public class AIVision : MonoBehaviour
{
[SerializeField] private string openAIUrl = "https://api.openai.com/v1/chat/completions";
[SerializeField] private string apiKey = "YOUR_API_KEY";

public string[] imageUrls;
public string queryMessage = "What are in these images? Is there any difference between them?";

void Start()
{
    if (imageUrls.Length > 0)
    {
        StartCoroutine(PostImageQueryRequest(imageUrls));
    }
}

public void OnClickSend()
{
StartCoroutine(PostImageQueryRequest(imageUrls));
}

IEnumerator PostImageQueryRequest(string[] urls)
{
    var requestBody = new
    {
        model = "gpt-4-vision-preview",
        messages = BuildImageQueryMessages(urls),
        max_tokens = 300
    };

    string json = JsonUtility.ToJson(requestBody);

    using (UnityWebRequest webRequest = UnityWebRequest.Post(openAIUrl, "POST"))
    {
        byte[] jsonToSend = new System.Text.UTF8Encoding().GetBytes(json);
        webRequest.uploadHandler = new UploadHandlerRaw(jsonToSend);
        webRequest.uploadHandler.contentType = "application/json";
        webRequest.downloadHandler = new DownloadHandlerBuffer();
        webRequest.SetRequestHeader("Content-Type", "application/json");
        webRequest.SetRequestHeader("Authorization", "Bearer " + apiKey);

        yield return webRequest.SendWebRequest();

        if (webRequest.result != UnityWebRequest.Result.Success)
        {
            Debug.LogError("Error: " + webRequest.error);
        }
        else
        {
            Debug.Log("Response: " + webRequest.downloadHandler.text);
        }
    }
}

private object[] BuildImageQueryMessages(string[] urls)
{
    var messages = new List<object>
    {
        new { type = "text", text = queryMessage }
    };

    foreach (var url in urls)
    {
        messages.Add(new { type = "image_url", image_url = url });
    }

    return messages.ToArray();
}

public void LoadImagesFromFile(string filePath)
{
    if (!File.Exists(filePath))
    {
        Debug.LogError("File not found: " + filePath);
        return;
    }

    try
    {
        string[] lines = File.ReadAllLines(filePath);
        StartCoroutine(PostImageQueryRequest(lines));
    }
    catch (IOException e)
    {
        Debug.LogError("Error reading the file: " + e.Message);
    }
 }
}
Enter fullscreen mode Exit fullscreen mode

With just a snippet of code, you're equipped to query images and receive insightful responses. How cool is that? 😎

Image

How to Get Started:

  1. - API Key: Grab your OpenAI API key.
  2. - Integration: Copy these scripts into your Unity project.
  3. - Customisation: Tweak parameters to fit your needs.
  4. - Run: Hit play and enjoy!

Feel free to explore the full capabilities, and don't forget to give a shoutout to the creators of this repository!

Heroku

Build apps, not infrastructure.

Dealing with servers, hardware, and infrastructure can take up your valuable time. Discover the benefits of Heroku, the PaaS of choice for developers since 2007.

Visit Site

Top comments (1)

Collapse
 
_aa3f9424156679923b704 profile image
μ΄ν˜„λ―Ό β€’

Hi, I use your code.
then Error: HTTP/1.1 400 Bad Request occured.
Do you know how to solve this error?
I didn't edit your code except adding imageurl and apikey

The Most Contextual AI Development Assistant

Pieces.app image

Our centralized storage agent works on-device, unifying various developer tools to proactively capture and enrich useful materials, streamline collaboration, and solve complex problems through a contextual understanding of your unique workflow.

πŸ‘₯ Ideal for solo developers, teams, and cross-company projects

Learn more

πŸ‘‹ Kindness is contagious

Please leave a ❀️ or a friendly comment on this post if you found it helpful!

Okay