Semantic search and retrieval-augmented generation (RAG) patterns with vector databases.

Embedding Strategies

import { OpenAI } from 'openai'

const openai = new OpenAI()

// Batch embedding for efficiency (max 2048 inputs per request for text-embedding-3-small)
async function embedTexts(texts: string[]): Promise<number[][]> {
  const BATCH_SIZE = 2048
  const allEmbeddings: number[][] = []

  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
    const batch = texts.slice(i, i + BATCH_SIZE)
    const response = await openai.embeddings.create({
      model: 'text-embedding-3-small',  // 1536 dimensions, good cost/quality
      input: batch,
      dimensions: 512,                  // Reduce dims for speed (Matryoshka)
    })
    allEmbeddings.push(...response.data.map(d => d.embedding))
  }

  return allEmbeddings
}

// Embed with prefix for asymmetric retrieval
async function embedForSearch(query: string): Promise<number[]> {
  const [embedding] = await embedTexts([`search_query: ${query}`])
  return embedding
}

async function embedForStorage(document: string): Promise<number[]> {
  const [embedding] = await embedTexts([`search_document: ${document}`])
  return embedding
}

Semantic search and retrieval-augmented generation (RAG) patterns with vector databases.

Embedding Strategies

import { OpenAI } from 'openai'

const openai = new OpenAI()

// Batch embedding for efficiency (max 2048 inputs per request for text-embedding-3-small)
async function embedTexts(texts: string[]): Promise<number[][]> {
  const BATCH_SIZE = 2048
  const allEmbeddings: number[][] = []

  for (let i = 0; i < texts.length; i += BATCH_SIZE) {
    const batch = texts.slice(i, i + BATCH_SIZE)
    const response = await openai.embeddings.create({
      model: 'text-embedding-3-small',  // 1536 dimensions, good cost/quality
      input: batch,
      dimensions: 512,                  // Reduce dims for speed (Matryoshka)
    })
    allEmbeddings.push(...response.data.map(d => d.embedding))
  }

  return allEmbeddings
}

// Embed with prefix for asymmetric retrieval
async function embedForSearch(query: string): Promise<number[]> {
  const [embedding] = await embedTexts([`search_query: ${query}`])
  return embedding
}

async function embedForStorage(document: string): Promise<number[]> {
  const [embedding] = await embedTexts([`search_document: ${document}`])
  return embedding
}

interface Chunk { id: string text: string metadata: { sourceId: string chunkIndex: number startChar: number endChar: number } } // Recursive character splitting with overlap function chunkText( text: string, chunkSize: number = 512, overlap: number = 50 ): Chunk[] { const separators = ['\n\n', '\n', '. ', ' '] return recursiveSplit(text, separators, chunkSize, overlap) } function recursiveSplit( text: string, separators: string[], chunkSize: number, overlap: number ): Chunk[] { if (text.length <= chunkSize) { return [{ id: crypto.randomUUID(), text, metadata: {} as any }] } const separator = separators.find(s => text.includes(s)) ?? '' const parts = text.split(separator) const chunks: Chunk[] = [] let current = '' for (const part of parts) { const candidate = current ? current + separator + part : part if (candidate.length > chunkSize && current) { chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any }) // Overlap: keep last N chars of previous chunk const overlapText = current.slice(-overlap) current = overlapText + separator + part } else { current = candidate } } if (current.trim()) { chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any }) } return chunks } // Semantic chunking: split at topic boundaries using embeddings async function semanticChunk(text: string, threshold: number = 0.3): Promise<Chunk[]> { const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text] const embeddings = await embedTexts(sentences) const chunks: string[][] = [[sentences[0]]] for (let i = 1; i < sentences.length; i++) { const similarity = cosineSimilarity(embeddings[i - 1], embeddings[i]) if (similarity < threshold) { // Low similarity = topic boundary = new chunk chunks.push([sentences[i]]) } else { chunks[chunks.length - 1].push(sentences[i]) } } return chunks.map((sentences, i) => ({ id: crypto.randomUUID(), text: sentences.join(' ').trim(), metadata: { sourceId: '', chunkIndex: i, startChar: 0, endChar: 0 } })) }

Vector Db Patterns

Embedding Strategies

Vector Db Patterns

Embedding Strategies

Chunking Strategies for RAG

Vector Search with Metadata Filtering

Hybrid Search (Vector + Keyword)

Reranking

Checklist

Anti-Patterns

Openai Whisper

Voice Call

Prose

Clawhub

Sherpa Onnx Tts

Openai Whisper Api