DEV Community

New Bing
New Bing

Posted on

How to use Weaviate to store OpenAI embedding vectors in a Golang program

Recently, I wanted to use the Vector Database to store OpenAI embedding vectors.
I've researched a variety of vector databases. Like Pinecone, Redis, Qdrant, Milvus, Weaviate, Chroma, pgvector. After comparing them, I chose Weaviate as my vector database. weaviate is developed in Golang, which makes it easier for me to install and test.

How to install Weaviate on Debian 12

First, Download Weaviate

Go to github.com/weaviate/weaviate, then find the version that is suitable for your operating system.
I chose to download weaviate-v1.20.3-linux-amd64.tar.gz

mkdir vectordb
cd vectordb
wget https://github.com/weaviate/weaviate/releases/download/v1.20.3/weaviate-v1.20.3-linux-amd64.tar.gz
Enter fullscreen mode Exit fullscreen mode

Then install Weaviate

First unzip the file, then add the configuration file.

tar -zxvf weaviate-v1.20.3-linux-amd64.tar.gz
Enter fullscreen mode Exit fullscreen mode

You need to configure the authentication and data persistence environment as below. We use API authentication for test.

export AUTHENTICATION_APIKEY_ENABLED=true
export AUTHENTICATION_APIKEY_ALLOWED_KEYS=q8Z5HN4U2w8jUDRGeGxyKkqvwjPg7w4P
export AUTHENTICATION_APIKEY_USERS=newbing@example.com
export PERSISTENCE_DATA_PATH=/home/newbing/vectordb/data
Enter fullscreen mode Exit fullscreen mode

Launch Weaviate

After unzip file and set the environment, you can launch a instance of weaviate.

./weaviate --host=127.0.0.1 --port=8181 --scheme=http
Enter fullscreen mode Exit fullscreen mode

Launch with supervisor

If you don't want to start or stop by hand, you can use Supervisor to manage the Weaviate instance.
The supervisor config file:

[program:weaviate]
directory=/home/newbing/vectordb/
command=/home/newbing/vectordb/weaviate --host=127.0.0.1 --port=8181 --scheme=http
autostart=true
autorestart=true
user=www
environment=HOME="/home/www",AUTHENTICATION_APIKEY_ENABLED="true",AUTHENTICATION_APIKEY_ALLOWED_KEYS="q8Z5HN4U2w7jUDRGeGxyKkqvwjPg7w4P",AUTHENTICATION_APIKEY_USERS="newbing@example.com",PERSISTENCE_DATA_PATH="/home/newbing/vectordb/data"
numprocs=1
redirect_stderr=true
stdout_logfile=/var/log/supervisor/weaviate.log
Enter fullscreen mode Exit fullscreen mode

Use Weaviate in Golang

Create weaviate client

package vector

import (
    "github.com/rs/zerolog/log"
    "github.com/spf13/viper"
    "github.com/weaviate/weaviate-go-client/v4/weaviate"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/auth"
    "net/url"
)

var (
    _cli *weaviate.Client
)

func GetClient() (*weaviate.Client, error) {
    if _cli != nil {
        return _cli, nil
    }
    uri, err := url.Parse(viper.GetString("weaviate.api"))
    if err != nil {
        log.Error().Err(err).
            Str("func", "init").
            Str("package", "vector").
            Msg("parse api addr failed")
        return nil, err
    }
    cfg := weaviate.Config{
        Host:       uri.Host,
        Scheme:     uri.Scheme,
        AuthConfig: auth.ApiKey{Value: viper.GetString("weaviate.key")},
    }
    client, err := weaviate.NewClient(cfg)
    if err != nil {
        log.Error().Err(err).
            Str("func", "init").
            Str("package", "vector").
            Msg("create client failed")
        client = nil
    }
    _cli = client
    return client, err
}

Enter fullscreen mode Exit fullscreen mode

Store vector to Weaviate


// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    created, err := cli.Data().Creator().
        WithClassName(class).
        WithProperties(props).
        WithVector(vector).
        Do(ctx)

    if err != nil {
        return nil, err
    }
    return created.Object, nil
}
Enter fullscreen mode Exit fullscreen mode

Search vector with similarity

// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
    if limit == 0 {
        limit = 10
    }
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "certainty"}, // only supported if distance==cosine
            {Name: "distance"},  // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    nearVector := cli.GraphQL().NearVectorArgBuilder().
        WithVector(vector).       // Replace with a compatible vector
        WithDistance(maxDistance) // set the max distance

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithNearVector(nearVector).
        WithLimit(limit).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    retList := make([]any, 0)
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}
Enter fullscreen mode Exit fullscreen mode

Find vector by attribute

// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
    retList := make([]any, 0)
    cli, err := GetClient()
    if err != nil {
        return retList, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "vector"}, // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}
Enter fullscreen mode Exit fullscreen mode

The whole weaviate store,search,find code

package vector

import (
    "context"
    "errors"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/filters"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
    "github.com/weaviate/weaviate/entities/models"
)

// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    created, err := cli.Data().Creator().
        WithClassName(class).
        WithProperties(props).
        WithVector(vector).
        Do(ctx)

    if err != nil {
        return nil, err
    }
    return created.Object, nil
}

// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
    if limit == 0 {
        limit = 10
    }
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "certainty"}, // only supported if distance==cosine
            {Name: "distance"},  // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    nearVector := cli.GraphQL().NearVectorArgBuilder().
        WithVector(vector).       // Replace with a compatible vector
        WithDistance(maxDistance) // set the max distance

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithNearVector(nearVector).
        WithLimit(limit).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    retList := make([]any, 0)
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}

// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
    retList := make([]any, 0)
    cli, err := GetClient()
    if err != nil {
        return retList, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "vector"}, // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}
Enter fullscreen mode Exit fullscreen mode

Finally, I'd like to introduce my new project: the GPT2API.

What is GPT2API?

Website: https://aicanvas.app/gpt
GPT2API is a platform to help you build API to make ChatGPT easier to use. You can build API and share it with the community, or you can call API from API.Hub, which is other API shared by the community.

Features:

  1. Build API with ChatGPT commands.
  2. Test API on the website.
  3. Share it with the community.
  4. Have the community extend the API.
  5. Sample code for your project.
  6. Cheap price for calling ChatGPT, $1 with 600K tokens.

If you have any questions about GPT2API or programming, you can contact me on the twitter. You are very welcome to experience GPT2API. I hope to get your comments.

Top comments (0)