Semantic Search using Instructor model

This shows using instructor models in the pgml SDK for more advanced use cases.

Imports and Setup

content_copy link edit
const pgml = require("pgml");
require("dotenv").config();

content_copy link edit
from pgml import Collection, Model, Splitter, Pipeline
from datasets import load_dataset
from dotenv import load_dotenv

Initialize Collection

content_copy link edit
const collection = pgml.newCollection("my_javascript_qai_collection");

content_copy link edit
collection = Collection("squad_collection_1")

Create Pipeline

content_copy link edit
const model = pgml.newModel("hkunlp/instructor-base", "pgml", {
instruction: "Represent the Wikipedia document for retrieval: ",
});
const pipeline = pgml.newPipeline(
"my_javascript_qai_pipeline",
model,
pgml.newSplitter(),
);
await collection.add_pipeline(pipeline);

content_copy link edit
model = Model("hkunlp/instructor-base", parameters={
"instruction": "Represent the Wikipedia document for retrieval: "
})
pipeline = Pipeline("squad_instruction", model, Splitter())
await collection.add_pipeline(pipeline)

Upsert Documents

const documents = [
  {
    id: "...",
    text: "...",
  },
];

await collection.upsert_documents(documents);

content_copy link edit
data = load_dataset("squad")
documents = [
{"id": ..., "text": ...} for r in data
]
await collection.upsert_documents(documents)

Query

content_copy link edit
const queryResults = await collection
.query()
.vector_recall(query, pipeline, {
instruction:
"Represent the Wikipedia question for retrieving supporting documents: ",
})
.fetch_all();

content_copy link edit
results = await collection.query()
.vector_recall(query, pipeline, {
"instruction": "Represent the Wikipedia question for retrieving supporting documents: "
})
.fetch_all()