Skip to content

Embeddings

๐Ÿ“ฆ embeddings package

Create embeddings

1
2
3
4
5
6
7
8
embedding, err := embeddings.CreateEmbedding(
    ollamaUrl,
    llm.Query4Embedding{
        Model:  "all-minilm",
        Prompt: "Jean-Luc Picard is a fictional character in the Star Trek franchise.",
    },
    "Picard", // identifier
)

Vector stores

A vector store allows to store and search for embeddings in an efficient way.

In memory vector store

Create a store:

1
2
3
store := embeddings.MemoryVectorStore{
    Records: make(map[string]llm.VectorRecord),
}

Save embeddings:

store.Save(embedding)

Search embeddings:

embeddingFromQuestion, err := embeddings.CreateEmbedding(
    ollamaUrl,
    llm.Query4Embedding{
        Model:  "all-minilm",
        Prompt: "Who is Jean-Luc Picard?",
    },
    "question",
)
// find the nearest vector
similarity, _ := store.SearchMaxSimilarity(embeddingFromQuestion)

documentsContent := `<context><doc>` + similarity.Prompt + `</doc></context>`

๐Ÿ‘€ you will find a complete example in examples/08-embeddings

Bbolt vector store

Bbolt is an embedded key/value database for Go.

Create a store, and open an existing store:

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

Note

๐Ÿ‘€ you will find a complete example in:

Redis vector store

Create a store, and open an existing store:

1
2
3
4
5
6
redisStore := embeddings.RedisVectorStore{}
err := redisStore.Initialize("localhost:6379", "", "chronicles-bucket")

if err != nil {
    log.Fatalln("๐Ÿ˜ก:", err)
}

Note

๐Ÿ‘€ you will find a complete example in:

Elasticsearch vector store

Create a store, and open an existing store:

cert, _ := os.ReadFile(os.Getenv("ELASTIC_CERT_PATH"))

elasticStore := embeddings.ElasticSearchStore{}
err := elasticStore.Initialize(
    []string{
        os.Getenv("ELASTIC_ADDRESS"),
    },
    os.Getenv("ELASTIC_USERNAME"),
    os.Getenv("ELASTIC_PASSWORD"),
    cert,
    "chronicles-index",
)

Note

๐Ÿ‘€ you will find a complete example in:

Additional data

you can add additional data to a vector record (embedding):

1
2
3
embedding.Text()
embedding.Reference()
embedding.MetaData()

Create embeddings

ollamaUrl := "http://localhost:11434"
embeddingsModel := "all-minilm"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

// Parse all golang source code of the examples
// Create embeddings from documents and save them in the store
counter := 0
_, err := content.ForEachFile("../../examples", ".go", func(path string) error {
    data, err := os.ReadFile(path)
    if err != nil {
        return err
    }

    fmt.Println("๐Ÿ“ Creating embedding from:", path)
    counter++
    embedding, err := embeddings.CreateEmbedding(
        ollamaUrl,
        llm.Query4Embedding{
            Model:  embeddingsModel,
            Prompt: string(data),
        },
        strconv.Itoa(counter), // don't forget the id (unique identifier)
    )
    fmt.Println("๐Ÿ“ฆ Created: ", len(embedding.Embedding))

    if err != nil {
        fmt.Println("๐Ÿ˜ก:", err)
    } else {
        _, err := store.Save(embedding)
        if err != nil {
            fmt.Println("๐Ÿ˜ก:", err)
        }
    }
    return nil
})
if err != nil {
    log.Fatalln("๐Ÿ˜ก:", err)
}
ollamaUrl := "http://localhost:11434"
embeddingsModel := "all-minilm"
chatModel := "magicoder:latest"

store := embeddings.BboltVectorStore{}
store.Initialize("../embeddings.db")

systemContent := `You are a Golang developer and an expert in computer programming.
Please make friendly answer for the noobs. Use the provided context and doc to answer.
Add source code examples if you can.`

// Question for the Chat system
userContent := `How to create a stream chat completion with Parakeet?`

// Create an embedding from the user question
embeddingFromQuestion, err := embeddings.CreateEmbedding(
    ollamaUrl,
    llm.Query4Embedding{
        Model:  embeddingsModel,
        Prompt: userContent,
    },
    "question",
)
if err != nil {
    log.Fatalln("๐Ÿ˜ก:", err)
}
fmt.Println("๐Ÿ”Ž searching for similarity...")

similarities, _ := store.SearchSimilarities(embeddingFromQuestion, 0.3)

// Generate the context from the similarities
// This will generate a string with a content like this one:
// `<context><doc>...<doc><doc>...<doc></context>`
documentsContent := embeddings.GenerateContextFromSimilarities(similarities)

fmt.Println("๐ŸŽ‰ similarities", len(similarities))

options := llm.SetOptions(map[string]interface{}{
    option.Temperature: 0.4,
    option.RepeatLastN: 2,
})

query := llm.Query{
    Model: chatModel,
    Messages: []llm.Message{
        {Role: "system", Content: systemContent},
        {Role: "system", Content: documentsContent},
        {Role: "user", Content: userContent},
    },
    Options: options,
    Stream: false,
}

fmt.Println("")
fmt.Println("๐Ÿค– answer:")

// Answer the question
_, err = completion.ChatStream(ollamaUrl, query,
    func(answer llm.Answer) error {
        fmt.Print(answer.Message.Content)
        return nil
    })

if err != nil {
    log.Fatal("๐Ÿ˜ก:", err)
}

Other similarity search methods

SearchMaxSimilarity searches for the vector record in the BboltVectorStore that has the maximum cosine distance similarity to the given embeddingFromQuestion:

similarity, _ := store.SearchMaxSimilarity(embeddingFromQuestion)

SearchTopNSimilarities searches for vector records in the BboltVectorStore that have a cosine distance similarity greater than or equal to the given limit and returns the top n records:

similarities, _ := store.SearchTopNSimilarities(embeddingFromQuestion, limit, n)