Semantic Search using Instructor model

This shows using instructor models in the pgml SDK for more advanced use cases.

Imports and Setup

content_copy link edit
const pgml = require("pgml");

content_copy link edit
from pgml import Collection, Model, Splitter, Pipeline
from datasets import load_dataset
from dotenv import load_dotenv

Initialize Collection

content_copy link edit
const collection = pgml.newCollection("my_javascript_qai_collection");

content_copy link edit
collection = Collection("squad_collection_1")

Create Pipeline

content_copy link edit
const model = pgml.newModel("hkunlp/instructor-base", "pgml", {
instruction: "Represent the Wikipedia document for retrieval: ",
const pipeline = pgml.newPipeline(
await collection.add_pipeline(pipeline);

content_copy link edit
model = Model("hkunlp/instructor-base", parameters={
"instruction": "Represent the Wikipedia document for retrieval: "
pipeline = Pipeline("squad_instruction", model, Splitter())
await collection.add_pipeline(pipeline)

Upsert Documents

const documents = [
    id: "...",
    text: "...",

await collection.upsert_documents(documents);

content_copy link edit
data = load_dataset("squad")
documents = [
{"id": ..., "text": ...} for r in data
await collection.upsert_documents(documents)


content_copy link edit
const queryResults = await collection
.vector_recall(query, pipeline, {
"Represent the Wikipedia question for retrieving supporting documents: ",

content_copy link edit
results = await collection.query()
.vector_recall(query, pipeline, {
"instruction": "Represent the Wikipedia question for retrieving supporting documents: "