const korvus = require("korvus");
const openai = require("openai");
// Initialize our Collection
const collection = korvus.newCollection("openai-text-generation-demo");
// Initialize our Pipeline
// Our Pipeline will split and embed the `text` key of documents we upsert
const pipeline = korvus.newPipeline("v1", {
text: {
splitter: { model: "recursive_character" },
semantic_search: {
model: "mixedbread-ai/mxbai-embed-large-v1",
}
},
});
// Initialize our client connection to OpenAI
const client = new openai.OpenAI({
apiKey: process.env['OPENAI_API_KEY'], // This is the default and can be omitted
});
const main = async () => {
// Add our Pipeline to our Collection
await collection.add_pipeline(pipeline);
// Upsert our documents
// The `text` key of our documents will be split and embedded per our Pipeline specification above
let documents = [
{
id: "1",
text: "Korvus is incredibly fast and easy to use.",
},
{
id: "2",
text: "Tomatoes are incredible on burgers.",
},
]
await collection.upsert_documents(documents)
// Perform vector_search
// We are querying for the string "Is Korvus fast?"
// Notice that the `mixedbread-ai/mxbai-embed-large-v1` embedding model takes a prompt paramter when embedding for search
// We specify that we only want to return the `id` of documents. If the `document` key was blank it would return the entire document with every result
// Limit the results to 5. In our case we only have two documents in our Collection so we will only get two results
const query = "Is Korvus fast?"
const results = await collection.vector_search(
{
query: {
fields: {
text: {
query: query,
parameters: {
prompt:
"Represent this sentence for searching relevant passages: ",
}
},
},
},
document: {
keys: [
"id"
]
},
limit: 5,
},
pipeline);
console.log("Our search results: ")
console.log(results)
// After retrieving the context, we build our prompt for gpt-4o and make our completion request
const context = results[0].chunk
console.log("Model output: ")
const chatCompletion = await client.chat.completions.create({
messages: [{ role: 'user', content: `Answer the question:\n\n${query}\n\nGiven the context:\n\n${context}` }],
model: 'gpt-4o',
});
console.dir(chatCompletion, {depth: 10});
}
main().then(() => console.log("DONE!"))