content_copy
const pipeline = korvus.newPipeline("test_pipeline", {
abstract: {
semantic_search: {
model: "mixedbread-ai/mxbai-embed-large-v1",
},
full_text_search: { configuration: "english" },
},
body: {
splitter: { model: "recursive_character" },
semantic_search: {
model: "Alibaba-NLP/gte-base-en-v1.5",
},
},
});
const collection = korvus.newCollection("test_collection");
await collection.add_pipeline(pipeline);
content_copy
pipeline = Pipeline(
"test_pipeline",
{
"abstract": {
"semantic_search": {
"model": "mixedbread-ai/mxbai-embed-large-v1",
},
"full_text_search": {"configuration": "english"},
},
"body": {
"splitter": {"model": "recursive_character"},
"semantic_search": {
"model": "Alibaba-NLP/gte-base-en-v1.5",
},
},
},
)
collection = Collection("test_collection")
content_copy
let mut pipeline = Pipeline::new(
"test_pipeline",
Some(
serde_json::json!(
{
"abstract": {
"semantic_search": {
"model": "mixedbread-ai/mxbai-embed-large-v1",
},
"full_text_search": {"configuration": "english"},
},
"body": {
"splitter": {"model": "recursive_character"},
"semantic_search": {
"model": "Alibaba-NLP/gte-base-en-v1.5",
},
},
}
)
.into(),
),
)?;
let mut collection = Collection::new("test_collection", None)?;
collection.add_pipeline(&mut pipeline).await?;
content_copy
PipelineC *pipeline = korvus_pipelinec_new("test_pipeline", "{\
\"abstract\": {\
\"semantic_search\": {\
\"model\": \"mixedbread-ai/mxbai-embed-large-v1\"\
},\
\"full_text_search\": {\"configuration\": \"english\"}\
},\
\"body\": {\
\"splitter\": {\"model\": \"recursive_character\"},\
\"semantic_search\": {\
\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"\
}\
}\
}");
CollectionC * collection = korvus_collectionc_new("test_collection", NULL);
korvus_collectionc_add_pipeline(collection, pipeline);
content_copy
const results = await collection.search(
{
query: {
full_text_search: { abstract: { query: "What is the best database?", boost: 1.2 } },
semantic_search: {
abstract: {
query: "What is the best database?", boost: 2.0,
},
body: {
query: "What is the best database?", boost: 1.25, parameters: {
prompt:
"Represent this sentence for searching relevant passages: ",
}
},
},
filter: { user_id: { $eq: 1 } },
},
limit: 10
},
pipeline,
);
content_copy
results = await collection.search(
{
"query": {
"full_text_search": {
"abstract": {"query": "What is the best database?", "boost": 1.2}
},
"semantic_search": {
"abstract": {
"query": "What is the best database?",
"boost": 2.0,
},
"body": {
"query": "What is the best database?",
"boost": 1.25,
"parameters": {
"prompt": "Represent this sentence for searching relevant passages: ",
},
},
},
"filter": {"user_id": {"$eq": 1}},
},
"limit": 10,
},
pipeline,
)
content_copy
let results = collection
.search(serde_json::json!({
"query": {
"full_text_search": {
"abstract": {"query": "What is the best database?", "boost": 1.2}
},
"semantic_search": {
"abstract": {
"query": "What is the best database?",
"boost": 2.0,
},
"body": {
"query": "What is the best database?",
"boost": 1.25,
"parameters": {
"prompt": "Represent this sentence for searching relevant passages: ",
},
},
},
"filter": {"user_id": {"$eq": 1}},
},
"limit": 10,
}).into(), &mut pipeline)
.await?;
content_copy
char * results = korvus_collectionc_search(collection, "\
\"query\": {\
\"full_text_search\": {\
\"abstract\": {\"query\": \"What is the best database?\", \"boost\": 1.2}\
},\
\"semantic_search\": {\
\"abstract\": {\
\"query\": \"What is the best database?\",\
\"boost\": 2.0\
},\
\"body\": {\
\"query\": \"What is the best database?\",\
\"boost\": 1.25,\
\"parameters\": {\
\"prompt\": \"Represent this sentence for searching relevant passages: \"\
}\
}\
},\
\"filter\": {\"user_id\": {\"$eq\": 1}}\
},\
\"limit\": 10\
", pipeline);
Lets break this query down a little bit more. We are asking for a maximum of 10 documents ranked by full_text_search
on the abstract
and semantic_search
on the abstract
and body
. We are also filtering out all documents that do not have the key user_id
equal to 1
. The full_text_search
provides a score for the abstract
, and semantic_search
provides scores for the abstract
and the body
. The boost
parameter is a multiplier applied to these scores before they are summed together and sorted by score
DESC
.
More information and examples on this coming soon...