Design schemas, write queries for MongoDB and PostgreSQL. Use for database design, SQL/NoSQL queries, aggregation pipelines, indexes, migrations, replication, performance optimization, psql CLI.
Unified guide for working with MongoDB (document-oriented) and PostgreSQL (relational) databases. Choose the right database for your use case and master both systems.
Use when:
Database utility scripts in scripts/:
# Generate migration
python scripts/db_migrate.py --db mongodb --generate "add_user_index"
# Run backup
python scripts/db_backup.py --db postgres --output /backups/
# Check performance
python scripts/db_performance_check.py --db mongodb --threshold 100ms
MongoDB:
PostgreSQL:
The following sections are inlined from the skill's reference files for Cursor compatibility.
Aggregation pipeline for complex data transformations, analytics, and multi-stage processing.
Aggregation processes documents through multiple stages. Each stage transforms documents and passes results to next stage.
db.collection.aggregate([
{ /* Stage 1 */ },
{ /* Stage 2 */ },
{ /* Stage 3 */ }
])
// Filter early in pipeline for efficiency
db.orders.aggregate([
{ $match: { status: "completed", total: { $gte: 100 } } },
// Subsequent stages process only matched documents
])
// Multiple conditions
db.orders.aggregate([
{ $match: {
$and: [
{ orderDate: { $gte: startDate } },
{ status: { $in: ["completed", "shipped"] } }
]
}}
])
// Select and reshape fields
db.orders.aggregate([
{ $project: {
orderNumber: 1,
total: 1,
customerName: "$customer.name",
year: { $year: "$orderDate" },
_id: 0 // Exclude _id
}}
])
// Computed fields
db.orders.aggregate([
{ $project: {
total: 1,
tax: { $multiply: ["$total", 0.1] },
grandTotal: { $add: ["$total", { $multiply: ["$total", 0.1] }] }
}}
])
// Group and count
db.orders.aggregate([
{ $group: {
_id: "$status",
count: { $sum: 1 }
}}
])
// Multiple aggregations
db.orders.aggregate([
{ $group: {
_id: "$customerId",
totalSpent: { $sum: "$total" },
orderCount: { $sum: 1 },
avgOrderValue: { $avg: "$total" },
maxOrder: { $max: "$total" },
minOrder: { $min: "$total" }
}}
])
// Group by multiple fields
db.sales.aggregate([
{ $group: {
_id: {
year: { $year: "$date" },
month: { $month: "$date" },
product: "$productId"
},
revenue: { $sum: "$amount" }
}}
])
// Sort by field
db.orders.aggregate([
{ $sort: { total: -1 } } // -1: descending, 1: ascending
])
// Sort by multiple fields
db.orders.aggregate([
{ $sort: { status: 1, orderDate: -1 } }
])
// Limit results
db.orders.aggregate([
{ $sort: { orderDate: -1 } },
{ $limit: 10 }
])
// Pagination
const page = 2;
const pageSize = 20;
db.orders.aggregate([
{ $sort: { orderDate: -1 } },
{ $skip: (page - 1) * pageSize },
{ $limit: pageSize }
])
// Simple join
db.orders.aggregate([
{ $lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customer"
}},
{ $unwind: "$customer" } // Convert array to object
])
// Pipeline join (more powerful)
db.orders.aggregate([
{ $lookup: {
from: "products",
let: { items: "$items" },
pipeline: [
{ $match: { $expr: { $in: ["$_id", "$$items.productId"] } } },
{ $project: { name: 1, price: 1 } }
],
as: "productDetails"
}}
])
// Unwind array field
db.orders.aggregate([
{ $unwind: "$items" }
])
// Preserve null/empty arrays
db.orders.aggregate([
{ $unwind: {
path: "$items",
preserveNullAndEmptyArrays: true
}}
])
// Include array index
db.orders.aggregate([
{ $unwind: {
path: "$items",
includeArrayIndex: "itemIndex"
}}
])
// Add computed fields
db.orders.aggregate([
{ $addFields: {
totalWithTax: { $multiply: ["$total", 1.1] },
year: { $year: "$orderDate" }
}}
])
// Promote subdocument to root
db.orders.aggregate([
{ $replaceRoot: { newRoot: "$customer" } }
])
// Merge fields
db.orders.aggregate([
{ $replaceRoot: {
newRoot: { $mergeObjects: ["$customer", { orderId: "$_id" }] }
}}
])
// Basic math
db.products.aggregate([
{ $project: {
name: 1,
profit: { $subtract: ["$price", "$cost"] },
margin: { $multiply: [
{ $divide: [
{ $subtract: ["$price", "$cost"] },
"$price"
]},
100
]}
}}
])
// Other operators: $add, $multiply, $divide, $mod, $abs, $ceil, $floor, $round
// String manipulation
db.users.aggregate([
{ $project: {
fullName: { $concat: ["$firstName", " ", "$lastName"] },
email: { $toLower: "$email" },
initials: { $concat: [
{ $substr: ["$firstName", 0, 1] },
{ $substr: ["$lastName", 0, 1] }
]}
}}
])
// Other: $toUpper, $trim, $split, $substr, $regexMatch
// Date extraction
db.events.aggregate([
{ $project: {
event: 1,
year: { $year: "$timestamp" },
month: { $month: "$timestamp" },
day: { $dayOfMonth: "$timestamp" },
hour: { $hour: "$timestamp" },
dayOfWeek: { $dayOfWeek: "$timestamp" }
}}
])
// Date math
db.events.aggregate([
{ $project: {
event: 1,
expiresAt: { $add: ["$createdAt", 1000 * 60 * 60 * 24 * 30] }, // +30 days
ageInDays: { $divide: [
{ $subtract: [new Date(), "$createdAt"] },
1000 * 60 * 60 * 24
]}
}}
])
// Array operations
db.posts.aggregate([
{ $project: {
title: 1,
tagCount: { $size: "$tags" },
firstTag: { $arrayElemAt: ["$tags", 0] },
lastTag: { $arrayElemAt: ["$tags", -1] },
hasMongoDBTag: { $in: ["mongodb", "$tags"] }
}}
])
// Array filtering
db.posts.aggregate([
{ $project: {
title: 1,
activeTags: {
$filter: {
input: "$tags",
as: "tag",
cond: { $ne: ["$$tag.status", "deprecated"] }
}
}
}}
])
// $cond (ternary)
db.products.aggregate([
{ $project: {
name: 1,
status: {
$cond: {
if: { $gte: ["$stock", 10] },
then: "In Stock",
else: "Low Stock"
}
}
}}
])
// $switch (multiple conditions)
db.orders.aggregate([
{ $project: {
status: 1,
priority: {
$switch: {
branches: [
{ case: { $gte: ["$total", 1000] }, then: "High" },
{ case: { $gte: ["$total", 100] }, then: "Medium" }
],
default: "Low"
}
}
}}
])
// Daily sales
db.orders.aggregate([
{ $match: { orderDate: { $gte: startDate } } },
{ $group: {
_id: {
year: { $year: "$orderDate" },
month: { $month: "$orderDate" },
day: { $dayOfMonth: "$orderDate" }
},
revenue: { $sum: "$total" },
orderCount: { $sum: 1 }
}},
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])
// Multiple aggregations in one query
db.products.aggregate([
{ $match: { category: "electronics" } },
{ $facet: {
priceRanges: [
{ $bucket: {
groupBy: "$price",
boundaries: [0, 100, 500, 1000, 5000],
default: "5000+",
output: { count: { $sum: 1 } }
}}
],
topBrands: [
{ $group: { _id: "$brand", count: { $sum: 1 } } },
{ $sort: { count: -1 } },
{ $limit: 5 }
],
avgPrice: [
{ $group: { _id: null, avg: { $avg: "$price" } } }
]
}}
])
// Running totals and moving averages
db.sales.aggregate([
{ $setWindowFields: {
partitionBy: "$region",
sortBy: { date: 1 },
output: {
runningTotal: {
$sum: "$amount",
window: { documents: ["unbounded", "current"] }
},
movingAvg: {
$avg: "$amount",
window: { documents: [-7, 0] } // Last 7 days
}
}
}}
])
// Full-text search (requires text index)
db.articles.aggregate([
{ $match: { $text: { $search: "mongodb database" } } },
{ $addFields: { score: { $meta: "textScore" } } },
{ $sort: { score: -1 } },
{ $limit: 10 }
])
// Find nearby locations
db.places.aggregate([
{ $geoNear: {
near: { type: "Point", coordinates: [lon, lat] },
distanceField: "distance",
maxDistance: 5000,
spherical: true
}},
{ $limit: 10 }
])
db.collection.aggregate(pipeline, { allowDiskUse: true })
db.collection.explain("executionStats").aggregate(pipeline)
MongoDB Atlas is fully-managed cloud database service with automated backups, monitoring, and scaling.
mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
// Node.js
const { MongoClient } = require("mongodb");
const uri = "mongodb+srv://...";
const client = new MongoClient(uri);
await client.connect();
const db = client.db("myDatabase");
# Python
from pymongo import MongoClient
uri = "mongodb+srv://..."
client = MongoClient(uri)
db = client.myDatabase
// Via Atlas UI: Database → Add Database
// Via shell
use myNewDatabase
db.createCollection("myCollection")
// Via driver
const db = client.db("myNewDatabase");
await db.createCollection("myCollection");
// Set validation rules in Atlas UI or via shell
db.createCollection("users", {
validator: {
$jsonSchema: {
bsonType: "object",
required: ["email", "name"],
properties: {
email: { bsonType: "string", pattern: "^.+@.+$" },
age: { bsonType: "int", minimum: 0 }
}
}
}
})
// IP Whitelist (Atlas UI → Network Access)
// - Add IP Address: specific IPs
// - 0.0.0.0/0: allow from anywhere (dev only)
// - VPC Peering: private connection
// Connection string includes options
mongodb+srv://cluster.mongodb.net/?retryWrites=true&w=majority&ssl=true
// Create via Atlas UI → Database Access
// - Username/password authentication
// - AWS IAM authentication
// - X.509 certificates
// Roles:
// - atlasAdmin: full access
// - readWriteAnyDatabase: read/write all databases
// - readAnyDatabase: read-only all databases
// - read/readWrite: database-specific
// Encryption at rest (automatic on M10+)
// Encryption in transit (TLS/SSL, always enabled)
// Client-Side Field Level Encryption (CSFLE)
const autoEncryptionOpts = {
keyVaultNamespace: "encryption.__keyVault",
kmsProviders: {
aws: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY
}
}
};
const client = new MongoClient(uri, { autoEncryption: autoEncryptionOpts });
// Automatic continuous backups
// - Snapshots every 6-24 hours
// - Oplog for point-in-time recovery
// - Retention: 2+ days configurable
// Restore via Atlas UI:
// 1. Clusters → cluster name → Backup tab
// 2. Select snapshot or point in time
// 3. Download or restore to cluster
# Export using mongodump
mongodump --uri="mongodb+srv://user:[email protected]/mydb" --out=/backup
# Restore using mongorestore
mongorestore --uri="mongodb+srv://..." /backup/mydb
// Atlas UI → Metrics
// Key metrics:
// - Operations per second
// - Query execution times
// - Connections
// - Network I/O
// - Disk usage
// - CPU utilization
// Real-time Performance panel
// - Current operations
// - Slow queries
// - Index suggestions
// Configure via Atlas UI → Alerts
// Alert types:
// - High connections (> threshold)
// - High CPU usage (> 80%)
// - Disk usage (> 90%)
// - Replication lag
// - Backup failures
// Notification channels:
// - Email
// - SMS
// - Slack
// - PagerDuty
// - Webhook
// Automatic index recommendations
// Atlas UI → Performance Advisor
// Analyzes:
// - Slow queries
// - Missing indexes
// - Redundant indexes
// - Index usage statistics
// Provides:
// - Index creation commands
// - Expected performance improvement
// - Schema design suggestions
// Atlas UI → Search → Create Index
// JSON definition
{
"mappings": {
"dynamic": false,
"fields": {
"title": {
"type": "string",
"analyzer": "lucene.standard"
},
"description": {
"type": "string",
"analyzer": "lucene.english"
},
"tags": {
"type": "string"
}
}
}
}
// Aggregation pipeline with $search
db.articles.aggregate([
{
$search: {
text: {
query: "mongodb database tutorial",
path: ["title", "description"],
fuzzy: { maxEdits: 1 }
}
}
},
{ $limit: 10 },
{
$project: {
title: 1,
description: 1,
score: { $meta: "searchScore" }
}
}
])
// Autocomplete
db.articles.aggregate([
{
$search: {
autocomplete: {
query: "mong",
path: "title",
tokenOrder: "sequential"
}
}
}
])
// For AI similarity search (embeddings)
{
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 1536, // OpenAI embeddings
"similarity": "cosine"
}
]
}
// Search by similarity
db.products.aggregate([
{
$vectorSearch: {
index: "vector_index",
path: "embedding",
queryVector: [0.123, 0.456, ...], // 1536 dimensions
numCandidates: 100,
limit: 10
}
},
{
$project: {
name: 1,
description: 1,
score: { $meta: "vectorSearchScore" }
}
}
])
// Federated database instance
// Query data from:
// - Atlas clusters
// - AWS S3
// - HTTP endpoints
// Create virtual collection
{
"databases": [{
"name": "federated",
"collections": [{
"name": "sales",
"dataSources": [{
"storeName": "s3Store",
"path": "/sales/*.json"
}]
}]
}]
}
// Query like normal collection
use federated
db.sales.find({ region: "US" })
// Atlas UI → Charts → New Dashboard
// Data source: Atlas cluster
// Chart types: bar, line, pie, scatter, etc.
// Embed in application
<iframe
src="https://charts.mongodb.com/charts-project/embed/charts?id=..."
width="800"
height="600"
/>
# Install
npm install -g mongodb-atlas-cli
# Login
atlas auth login
# List clusters
atlas clusters list
# Create cluster
atlas clusters create myCluster --provider AWS --region US_EAST_1 --tier M10
# Manage users
atlas dbusers create --username myuser --password mypass
# Backups
atlas backups snapshots list --clusterName myCluster
const client = new MongoClient(uri, {
maxPoolSize: 50,
minPoolSize: 10
});
Enable authentication - Always use database users, not Atlas users
Restrict network access - IP whitelist or VPC peering
Monitor regularly - Set up alerts for key metrics
Index optimization - Use Performance Advisor recommendations
Backup verification - Regularly test restores
Right-size clusters - Start small, scale as needed
Multi-region - For global applications (M10+)
Read preferences - Use secondaries for read-heavy workloads
const client = new MongoClient(uri, {
readPreference: "secondaryPreferred"
});
const uri = process.env.MONGODB_URI;
// Check IP whitelist
// Verify credentials
// Test connection string
// Verbose logging
const client = new MongoClient(uri, {
serverSelectionTimeoutMS: 5000,
loggerLevel: "debug"
});
// Check Performance Advisor
// Review slow query logs
// Analyze index usage
db.collection.aggregate([{ $indexStats: {} }])
// Check connection count
db.serverStatus().connections
// MongoNetworkError: IP not whitelisted
// → Add IP to Network Access
// Authentication failed: wrong credentials
// → Verify username/password in Database Access
// Timeout: connection string or network issue
// → Check connection string format, DNS resolution
CRUD operations (Create, Read, Update, Delete) in MongoDB with query operators and atomic updates.
// Insert single document
db.users.insertOne({
name: "Alice",
email: "[email protected]",
age: 30,
createdAt: new Date()
})
// Returns: { acknowledged: true, insertedId: ObjectId("...") }
// Insert multiple documents
db.users.insertMany([
{ name: "Bob", age: 25 },
{ name: "Charlie", age: 35 },
{ name: "Diana", age: 28 }
])
// With ordered: false (continue on error)
db.users.insertMany(docs, { ordered: false })
// Find all documents
db.users.find()
// Find with filter
db.users.find({ age: { $gte: 18 } })
// Projection (select fields)
db.users.find({ status: "active" }, { name: 1, email: 1, _id: 0 })
// Cursor operations
db.users.find()
.sort({ createdAt: -1 })
.limit(10)
.skip(20)
// Get single document
db.users.findOne({ email: "[email protected]" })
// With projection
db.users.findOne({ _id: ObjectId("...") }, { name: 1, email: 1 })
// Count matching documents
db.users.countDocuments({ status: "active" })
// Fast estimate (uses metadata)
db.users.estimatedDocumentCount()
// Get unique values
db.users.distinct("status")
db.users.distinct("city", { country: "USA" })
// Update first matching document
db.users.updateOne(
{ email: "[email protected]" },
{ $set: { status: "verified" } }
)
// Upsert (insert if not exists)
db.users.updateOne(
{ email: "[email protected]" },
{ $set: { name: "New User" } },
{ upsert: true }
)
// Update all matching documents
db.users.updateMany(
{ lastLogin: { $lt: cutoffDate } },
{ $set: { status: "inactive" } }
)
// Multiple updates
db.users.updateMany(
{ status: "pending" },
{
$set: { status: "active" },
$currentDate: { updatedAt: true }
}
)
// Replace entire document (except _id)
db.users.replaceOne(
{ _id: ObjectId("...") },
{ name: "Alice", email: "[email protected]", age: 31 }
)
// Delete first matching document
db.users.deleteOne({ email: "[email protected]" })
// Delete all matching documents
db.users.deleteMany({ status: "deleted" })
// Delete all documents in collection
db.users.deleteMany({})
// $eq (equals)
db.users.find({ age: { $eq: 30 } })
db.users.find({ age: 30 }) // Implicit $eq
// $ne (not equals)
db.users.find({ status: { $ne: "deleted" } })
// $gt, $gte, $lt, $lte
db.users.find({ age: { $gt: 18, $lte: 65 } })
// $in (in array)
db.users.find({ status: { $in: ["active", "pending"] } })
// $nin (not in array)
db.users.find({ status: { $nin: ["deleted", "banned"] } })
// $and (implicit for multiple conditions)
db.users.find({ age: { $gte: 18 }, status: "active" })
// $and (explicit)
db.users.find({
$and: [
{ age: { $gte: 18 } },
{ status: "active" }
]
})
// $or
db.users.find({
$or: [
{ status: "active" },
{ verified: true }
]
})
// $not
db.users.find({ age: { $not: { $lt: 18 } } })
// $nor (not any condition)
db.users.find({
$nor: [
{ status: "deleted" },
{ status: "banned" }
]
})
// $exists
db.users.find({ phoneNumber: { $exists: true } })
db.users.find({ deletedAt: { $exists: false } })
// $type
db.users.find({ age: { $type: "int" } })
db.users.find({ age: { $type: ["int", "double"] } })
// $all (contains all elements)
db.posts.find({ tags: { $all: ["mongodb", "database"] } })
// $elemMatch (array element matches all conditions)
db.products.find({
reviews: {
$elemMatch: { rating: { $gte: 4 }, verified: true }
}
})
// $size (array length)
db.posts.find({ tags: { $size: 3 } })
// $regex (regular expression)
db.users.find({ name: { $regex: /^A/i } })
db.users.find({ email: { $regex: "@example\\.com$" } })
// Text search (requires text index)
db.articles.find({ $text: { $search: "mongodb database" } })
// $set (set field value)
db.users.updateOne(
{ _id: userId },
{ $set: { status: "active", updatedAt: new Date() } }
)
// $unset (remove field)
db.users.updateOne(
{ _id: userId },
{ $unset: { tempField: "" } }
)
// $rename (rename field)
db.users.updateMany(
{},
{ $rename: { "oldName": "newName" } }
)
// $currentDate (set to current date)
db.users.updateOne(
{ _id: userId },
{ $currentDate: { lastModified: true } }
)
// $inc (increment)
db.posts.updateOne(
{ _id: postId },
{ $inc: { views: 1, likes: 5 } }
)
// $mul (multiply)
db.products.updateOne(
{ _id: productId },
{ $mul: { price: 1.1 } } // 10% increase
)
// $min (update if new value is less)
db.scores.updateOne(
{ _id: scoreId },
{ $min: { lowestScore: 50 } }
)
// $max (update if new value is greater)
db.scores.updateOne(
{ _id: scoreId },
{ $max: { highestScore: 100 } }
)
// $push (add to array)
db.posts.updateOne(
{ _id: postId },
{ $push: { comments: { author: "Alice", text: "Great!" } } }
)
// $push with $each (multiple elements)
db.posts.updateOne(
{ _id: postId },
{ $push: { tags: { $each: ["mongodb", "database"] } } }
)
// $addToSet (add if not exists)
db.users.updateOne(
{ _id: userId },
{ $addToSet: { interests: "coding" } }
)
// $pull (remove matching elements)
db.users.updateOne(
{ _id: userId },
{ $pull: { tags: "deprecated" } }
)
// $pop (remove first/last element)
db.users.updateOne(
{ _id: userId },
{ $pop: { notifications: -1 } } // -1: first, 1: last
)
// $ (update first matching array element)
db.posts.updateOne(
{ _id: postId, "comments.author": "Alice" },
{ $set: { "comments.$.text": "Updated comment" } }
)
// $[] (update all array elements)
db.posts.updateOne(
{ _id: postId },
{ $set: { "comments.$[].verified": true } }
)
// $[<identifier>] (filtered positional)
db.posts.updateOne(
{ _id: postId },
{ $set: { "comments.$[elem].flagged": true } },
{ arrayFilters: [{ "elem.rating": { $lt: 2 } }] }
)
// Find and update (returns old document by default)
db.users.findOneAndUpdate(
{ email: "[email protected]" },
{ $set: { status: "active" } }
)
// Return new document
db.users.findOneAndUpdate(
{ email: "[email protected]" },
{ $set: { status: "active" } },
{ returnNewDocument: true }
)
// Upsert and return new
db.counters.findOneAndUpdate(
{ _id: "sequence" },
{ $inc: { value: 1 } },
{ upsert: true, returnNewDocument: true }
)
// Find and replace entire document
db.users.findOneAndReplace(
{ _id: ObjectId("...") },
{ name: "Alice", email: "[email protected]" },
{ returnNewDocument: true }
)
// Find and delete (returns deleted document)
const deletedUser = db.users.findOneAndDelete(
{ email: "[email protected]" }
)
// Ordered bulk write (stops on first error)
db.users.bulkWrite([
{ insertOne: { document: { name: "Alice" } } },
{ updateOne: {
filter: { name: "Bob" },
update: { $set: { age: 25 } }
}},
{ deleteOne: { filter: { name: "Charlie" } } }
])
// Unordered (continues on errors)
db.users.bulkWrite(operations, { ordered: false })
Index types, strategies, and performance optimization techniques for MongoDB.
Indexes improve query performance by allowing MongoDB to scan fewer documents. Without indexes, MongoDB performs collection scans (reads every document).
// Check if query uses index
db.users.find({ email: "[email protected]" }).explain("executionStats")
// Key metrics:
// - executionTimeMillis: query duration
// - totalDocsExamined: documents scanned
// - nReturned: documents returned
// - stage: IXSCAN (index) vs COLLSCAN (full scan)
// Create index on single field
db.users.createIndex({ email: 1 }) // 1: ascending, -1: descending
// Use case: queries filtering by email
db.users.find({ email: "[email protected]" })
// Drop index
db.users.dropIndex({ email: 1 })
db.users.dropIndex("email_1") // By name
// Index on multiple fields (order matters!)
db.orders.createIndex({ status: 1, createdAt: -1 })
// Supports queries on:
// 1. { status: "..." }
// 2. { status: "...", createdAt: ... }
// Does NOT efficiently support: { createdAt: ... } alone
// Left-to-right prefix rule
db.orders.createIndex({ a: 1, b: 1, c: 1 })
// Supports: {a}, {a,b}, {a,b,c}
// Not: {b}, {c}, {b,c}
// Create text index
db.articles.createIndex({ title: "text", body: "text" })
// Only one text index per collection
db.articles.createIndex({
title: "text",
body: "text",
tags: "text"
}, {
weights: {
title: 10, // Title matches weighted higher
body: 5,
tags: 3
}
})
// Search
db.articles.find({ $text: { $search: "mongodb database" } })
// Search with score
db.articles.find(
{ $text: { $search: "mongodb" } },
{ score: { $meta: "textScore" } }
).sort({ score: { $meta: "textScore" } })
// 2dsphere index (spherical geometry)
db.places.createIndex({ location: "2dsphere" })
// Document format
db.places.insertOne({
name: "Coffee Shop",
location: {
type: "Point",
coordinates: [-73.97, 40.77] // [longitude, latitude]
}
})
// Find nearby
db.places.find({
location: {
$near: {
$geometry: { type: "Point", coordinates: [-73.97, 40.77] },
$maxDistance: 5000 // meters
}
}
})
// Within polygon
db.places.find({
location: {
$geoWithin: {
$geometry: {
type: "Polygon",
coordinates: [[
[lon1, lat1], [lon2, lat2], [lon3, lat3], [lon1, lat1]
]]
}
}
}
})
// Index all fields in subdocuments
db.products.createIndex({ "attributes.$**": 1 })
// Supports queries on any nested field
db.products.find({ "attributes.color": "red" })
db.products.find({ "attributes.size": "large" })
// Specific paths only
db.products.createIndex(
{ "$**": 1 },
{ wildcardProjection: { "attributes.color": 1, "attributes.size": 1 } }
)
// Hashed index (for even distribution in sharding)
db.users.createIndex({ userId: "hashed" })
// Use case: shard key
sh.shardCollection("mydb.users", { userId: "hashed" })
// Delete documents after specified time
db.sessions.createIndex(
{ createdAt: 1 },
{ expireAfterSeconds: 3600 } // 1 hour
)
// Documents automatically deleted after createdAt + 3600 seconds
// Background task runs every 60 seconds
// Index only documents matching filter
db.orders.createIndex(
{ customerId: 1 },
{ partialFilterExpression: { status: "active" } }
)
// Index only used when query includes filter
db.orders.find({ customerId: "123", status: "active" }) // Uses index
db.orders.find({ customerId: "123" }) // Does not use index
// Enforce uniqueness
db.users.createIndex({ email: 1 }, { unique: true })
// Compound unique index
db.users.createIndex({ firstName: 1, lastName: 1 }, { unique: true })
// Sparse unique index (null values not indexed)
db.users.createIndex({ email: 1 }, { unique: true, sparse: true })
// Index only documents with field present
db.users.createIndex({ phoneNumber: 1 }, { sparse: true })
// Useful for optional fields
// Documents without phoneNumber not in index
// Show all indexes
db.collection.getIndexes()
// Index statistics
db.collection.aggregate([{ $indexStats: {} }])
// Background index (doesn't block operations)
db.collection.createIndex({ field: 1 }, { background: true })
// Index name
db.collection.createIndex({ field: 1 }, { name: "custom_index_name" })
// Case-insensitive index (collation)
db.collection.createIndex(
{ name: 1 },
{ collation: { locale: "en", strength: 2 } }
)
// Hide index (test before dropping)
db.collection.hideIndex("index_name")
// Check performance without index
// ...
// Unhide or drop
db.collection.unhideIndex("index_name")
db.collection.dropIndex("index_name")
// Rebuild all indexes (after data changes)
db.collection.reIndex()
// Useful after bulk deletions to reclaim space
// Query covered by index (no document fetch)
db.users.createIndex({ email: 1, name: 1 })
// Covered query (all fields in index)
db.users.find(
{ email: "[email protected]" },
{ email: 1, name: 1, _id: 0 } // Must exclude _id
)
// Check with explain: stage should be "IXSCAN" with no "FETCH"
// MongoDB can use multiple indexes
db.collection.createIndex({ a: 1 })
db.collection.createIndex({ b: 1 })
// Query may use both indexes
db.collection.find({ a: 1, b: 1 })
// Usually compound index is better
db.collection.createIndex({ a: 1, b: 1 })
// Force specific index
db.orders.find({ status: "active", city: "NYC" })
.hint({ status: 1, createdAt: -1 })
// Force no index (for testing)
db.orders.find({ status: "active" }).hint({ $natural: 1 })
// Optimal compound index order: Equality → Sort → Range
// Query
db.orders.find({
status: "completed", // Equality
category: "electronics" // Equality
}).sort({
orderDate: -1 // Sort
}).limit(10)
// Optimal index
db.orders.createIndex({
status: 1, // Equality first
category: 1, // Equality
orderDate: -1 // Sort last
})
// With range
db.orders.find({
status: "completed", // Equality
total: { $gte: 100 } // Range
}).sort({
orderDate: -1 // Sort
})
// Optimal index
db.orders.createIndex({
status: 1, // Equality
orderDate: -1, // Sort
total: 1 // Range last
})
// Query planner (default)
db.collection.find({ field: value }).explain()
// Execution stats
db.collection.find({ field: value }).explain("executionStats")
// All execution stats
db.collection.find({ field: value }).explain("allPlansExecution")
// Good performance indicators:
// - executionTimeMillis < 100ms
// - totalDocsExamined ≈ nReturned (examine only what's needed)
// - stage: "IXSCAN" (using index)
// - totalKeysExamined ≈ nReturned (index selectivity)
// Bad indicators:
// - stage: "COLLSCAN" (full collection scan)
// - totalDocsExamined >> nReturned (scanning too many docs)
// - executionTimeMillis > 1000ms
// High selectivity = good (returns few documents)
// Low selectivity = bad (returns many documents)
// Check selectivity
db.collection.aggregate([
{ $group: { _id: "$status", count: { $sum: 1 } } }
])
// Good for indexing: email, userId, orderId
// Bad for indexing: gender, status (few unique values)
// Always filter by tenant first
db.data.createIndex({ tenantId: 1, createdAt: -1 })
// All queries include tenantId
db.data.find({ tenantId: "tenant1", createdAt: { $gte: date } })
// Index on timestamp descending (recent data accessed more)
db.events.createIndex({ timestamp: -1 })
// Compound with filter fields
db.events.createIndex({ userId: 1, timestamp: -1 })
// Index foreign key fields
db.orders.createIndex({ customerId: 1 })
db.customers.createIndex({ _id: 1 }) // Default _id index
// Aggregation $lookup uses these indexes
db.collection.aggregate([{ $indexStats: {} }])
db.collection.stats().indexSizes
/pattern/ can't use index, /^pattern/ can// Current operations
db.currentOp()
// Slow queries (enable profiling)
db.setProfilingLevel(1, { slowms: 100 })
db.system.profile.find().sort({ ts: -1 }).limit(10)
// Index statistics
db.collection.aggregate([
{ $indexStats: {} },
{ $sort: { "accesses.ops": -1 } }
])
// Collection statistics
db.collection.stats()
// Check index sizes
db.collection.stats().indexSizes
// Total index size
db.collection.totalIndexSize()
// Recommend: indexes fit in RAM
// Monitor: db.serverStatus().mem
User management, backups, replication, maintenance, and production database administration.
-- Create user with password
CREATE USER appuser WITH PASSWORD 'secure_password';
-- Create superuser
CREATE USER admin WITH SUPERUSER PASSWORD 'admin_password';
-- Create role without login
CREATE ROLE readonly;
-- Create user with attributes
CREATE USER developer WITH
PASSWORD 'dev_pass'
CREATEDB
VALID UNTIL '2025-12-31';
-- Change password
ALTER USER appuser WITH PASSWORD 'new_password';
-- Add attributes
ALTER USER appuser WITH CREATEDB CREATEROLE;
-- Remove attributes
ALTER USER appuser WITH NOSUPERUSER;
-- Rename user
ALTER USER oldname RENAME TO newname;
-- Set connection limit
ALTER USER appuser CONNECTION LIMIT 10;
-- Create role hierarchy
CREATE ROLE readonly;
CREATE ROLE readwrite;
-- Grant role to user
GRANT readonly TO appuser;
GRANT readwrite TO developer;
-- Revoke role
REVOKE readonly FROM appuser;
-- Role membership
\du
-- Grant database access
GRANT CONNECT ON DATABASE mydb TO appuser;
-- Grant schema usage
GRANT USAGE ON SCHEMA public TO appuser;
-- Revoke access
REVOKE CONNECT ON DATABASE mydb FROM appuser;
-- Grant table permissions
GRANT SELECT ON users TO appuser;
GRANT SELECT, INSERT, UPDATE ON orders TO appuser;
GRANT ALL PRIVILEGES ON products TO appuser;
-- Grant on all tables
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
-- Revoke permissions
REVOKE INSERT ON users FROM appuser;
-- Grant specific columns
GRANT SELECT (id, name, email) ON users TO appuser;
GRANT UPDATE (status) ON orders TO appuser;
-- Grant sequence usage (for SERIAL/auto-increment)
GRANT USAGE, SELECT ON SEQUENCE users_id_seq TO appuser;
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO appuser;
-- Grant execute on function
GRANT EXECUTE ON FUNCTION get_user(integer) TO appuser;
-- Set default privileges for future objects
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO readwrite;
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT USAGE ON SEQUENCES TO readwrite;
-- Show table permissions
\dp users
-- Show role memberships
\du
-- Query permissions
SELECT grantee, privilege_type
FROM information_schema.role_table_grants
WHERE table_name = 'users';
# Dump database to SQL file
pg_dump mydb > mydb.sql
# Custom format (compressed, allows selective restore)
pg_dump -Fc mydb > mydb.dump
# Directory format (parallel dump)
pg_dump -Fd mydb -j 4 -f mydb_dir
# Specific table
pg_dump -t users mydb > users.sql
# Multiple tables
pg_dump -t users -t orders mydb > tables.sql
# Schema only
pg_dump -s mydb > schema.sql
# Data only
pg_dump -a mydb > data.sql
# Exclude table
pg_dump --exclude-table=logs mydb > mydb.sql
# With compression
pg_dump -Fc -Z 9 mydb > mydb.dump
# Dump all databases
pg_dumpall > all_databases.sql
# Only globals (roles, tablespaces)
pg_dumpall --globals-only > globals.sql
# Restore from custom format
pg_restore -d mydb mydb.dump
# Restore specific table
pg_restore -d mydb -t users mydb.dump
# List contents
pg_restore -l mydb.dump
# Parallel restore
pg_restore -d mydb -j 4 mydb.dump
# Clean database first
pg_restore -d mydb --clean mydb.dump
# Create database if not exists
pg_restore -C -d postgres mydb.dump
# Restore SQL dump
psql mydb < mydb.sql
# Create database and restore
createdb mydb
psql mydb < mydb.sql
# Single transaction
psql -1 mydb < mydb.sql
# Stop on error
psql --set ON_ERROR_STOP=on mydb < mydb.sql
#!/bin/bash
# backup.sh
# Configuration
DB_NAME="mydb"
BACKUP_DIR="/backups"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7
# Create backup
pg_dump -Fc "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_${DATE}.dump"
# Remove old backups
find "$BACKUP_DIR" -name "${DB_NAME}_*.dump" -mtime +$RETENTION_DAYS -delete
# Log
echo "Backup completed: ${DB_NAME}_${DATE}.dump"
# Enable WAL archiving (postgresql.conf)
wal_level = replica
archive_mode = on
archive_command = 'cp %p /archive/%f'
max_wal_senders = 3
# Base backup
pg_basebackup -D /backup/base -Ft -z -P
# Restore to point in time
# 1. Stop PostgreSQL
# 2. Restore base backup
# 3. Create recovery.conf with recovery_target_time
# 4. Start PostgreSQL
-- Create replication user
CREATE USER replicator WITH REPLICATION PASSWORD 'replica_pass';
-- Configure postgresql.conf
wal_level = replica
max_wal_senders = 3
wal_keep_size = 64MB
-- Configure pg_hba.conf
host replication replicator replica_ip/32 md5
# Stop replica PostgreSQL
systemctl stop postgresql
# Remove data directory
rm -rf /var/lib/postgresql/data/*
# Clone from primary
pg_basebackup -h primary_host -D /var/lib/postgresql/data -U replicator -P -R
# Start replica
systemctl start postgresql
# Check replication status
SELECT * FROM pg_stat_replication; -- On primary
-- Create publication
CREATE PUBLICATION my_publication FOR ALL TABLES;
-- Or specific tables
CREATE PUBLICATION my_publication FOR TABLE users, orders;
-- Check publications
\dRp
SELECT * FROM pg_publication;
-- Create subscription
CREATE SUBSCRIPTION my_subscription
CONNECTION 'host=primary_host dbname=mydb user=replicator password=replica_pass'
PUBLICATION my_publication;
-- Check subscriptions
\dRs
SELECT * FROM pg_subscription;
-- Monitor replication
SELECT * FROM pg_stat_subscription;
-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));
-- Table sizes
SELECT schemaname, tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Index sizes
SELECT schemaname, tablename, indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
-- Current connections
SELECT count(*) FROM pg_stat_activity;
-- Connections by database
SELECT datname, count(*) FROM pg_stat_activity GROUP BY datname;
-- Connection limit
SHOW max_connections;
-- Kill connection
SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid = 12345;
-- Active queries
SELECT pid, usename, state, query, query_start
FROM pg_stat_activity
WHERE state != 'idle';
-- Long-running queries
SELECT pid, now() - query_start AS duration, query
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;
-- Blocking queries
SELECT blocked.pid AS blocked_pid,
blocked.query AS blocked_query,
blocking.pid AS blocking_pid,
blocking.query AS blocking_query
FROM pg_stat_activity blocked
JOIN pg_stat_activity blocking
ON blocking.pid = ANY(pg_blocking_pids(blocked.pid));
-- Should be > 0.99 for good performance
SELECT
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) AS cache_hit_ratio
FROM pg_statio_user_tables;
-- Check for table bloat (requires pgstattuple extension)
CREATE EXTENSION pgstattuple;
SELECT schemaname, tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
pgstattuple(schemaname||'.'||tablename) AS stats
FROM pg_tables
WHERE schemaname = 'public';
-- Reclaim storage
VACUUM users;
-- Verbose
VACUUM VERBOSE users;
-- Full (locks table, rewrites)
VACUUM FULL users;
-- With analyze
VACUUM ANALYZE users;
-- All tables
VACUUM;
-- Check last vacuum
SELECT schemaname, tablename, last_vacuum, last_autovacuum
FROM pg_stat_user_tables;
-- Configure postgresql.conf
autovacuum = on
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.2
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.1
-- Rebuild index
REINDEX INDEX idx_users_email;
-- Rebuild all indexes on table
REINDEX TABLE users;
-- Rebuild database indexes
REINDEX DATABASE mydb;
-- Concurrently (doesn't lock)
REINDEX INDEX CONCURRENTLY idx_users_email;
-- Update statistics
ANALYZE users;
-- Specific columns
ANALYZE users(email, status);
-- All tables
ANALYZE;
-- Verbose
ANALYZE VERBOSE users;
SHOW config_file;
# Memory
shared_buffers = 4GB # 25% of RAM
work_mem = 64MB # Per operation
maintenance_work_mem = 512MB # VACUUM, CREATE INDEX
effective_cache_size = 12GB # OS cache estimate
# Query Planner
random_page_cost = 1.1 # Lower for SSD
effective_io_concurrency = 200 # Concurrent disk ops
# Connections
max_connections = 100
superuser_reserved_connections = 3
# Logging
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d.log'
log_rotation_age = 1d
log_min_duration_statement = 100 # Log slow queries
# Replication
wal_level = replica
max_wal_senders = 3
wal_keep_size = 64MB
# Autovacuum
autovacuum = on
autovacuum_vacuum_scale_factor = 0.2
autovacuum_analyze_scale_factor = 0.1
-- Reload config without restart
SELECT pg_reload_conf();
-- Or from shell
pg_ctl reload
# postgresql.conf
ssl = on
ssl_cert_file = '/path/to/server.crt'
ssl_key_file = '/path/to/server.key'
ssl_ca_file = '/path/to/ca.crt'
# TYPE DATABASE USER ADDRESS METHOD
# Local connections
local all postgres peer
local all all md5
# Remote connections
host all all 0.0.0.0/0 md5
host all all ::0/0 md5
# Replication
host replication replicator replica_ip/32 md5
# SSL required
hostssl all all 0.0.0.0/0 md5
-- Enable RLS
ALTER TABLE users ENABLE ROW LEVEL SECURITY;
-- Create policy
CREATE POLICY user_policy ON users
USING (user_id = current_user_id());
-- Drop policy
DROP POLICY user_policy ON users;
-- View policies
\d+ users
Backups
Monitoring
Security
Maintenance
Configuration
Replication
Query optimization, indexing strategies, EXPLAIN analysis, and performance tuning for PostgreSQL.
-- Show query plan
EXPLAIN SELECT * FROM users WHERE id = 1;
-- Output shows:
-- - Execution plan nodes
-- - Estimated costs
-- - Estimated rows
-- Execute query and show actual performance
EXPLAIN ANALYZE SELECT * FROM users WHERE age > 18;
-- Shows:
-- - Actual execution time
-- - Actual rows returned
-- - Planning time
-- - Execution time
-- Verbose output
EXPLAIN (VERBOSE) SELECT * FROM users;
-- Show buffer usage
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM users WHERE active = true;
-- JSON format
EXPLAIN (FORMAT JSON, ANALYZE) SELECT * FROM users;
-- All options
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, TIMING, COSTS)
SELECT * FROM users WHERE id = 1;
-- Full table scan (reads all rows)
EXPLAIN SELECT * FROM users WHERE name = 'Alice';
-- Output: Seq Scan on users
-- Indicates: no suitable index or small table
-- Uses index to find rows
EXPLAIN SELECT * FROM users WHERE id = 1;
-- Output: Index Scan using users_pkey on users
-- Best for: selective queries, small result sets
-- Query covered by index (no table access)
CREATE INDEX idx_users_email_name ON users(email, name);
EXPLAIN SELECT email, name FROM users WHERE email = '[email protected]';
-- Output: Index Only Scan using idx_users_email_name
-- Best performance: no heap fetch needed
-- Combines multiple indexes or handles large result sets
EXPLAIN SELECT * FROM users WHERE age > 18 AND status = 'active';
-- Output:
-- Bitmap Heap Scan on users
-- Recheck Cond: ...
-- -> Bitmap Index Scan on idx_age
-- Good for: moderate selectivity
-- For each row in outer table, scan inner table
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE c.id = 1;
-- Output: Nested Loop
-- Best for: small outer table, indexed inner table
-- Build hash table from smaller table
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id;
-- Output: Hash Join
-- Best for: large tables, equality conditions
-- Both inputs sorted on join key
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
ORDER BY o.customer_id;
-- Output: Merge Join
-- Best for: pre-sorted data, large sorted inputs
-- General purpose index
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_date ON orders(order_date);
-- Supports: =, <, <=, >, >=, BETWEEN, IN, IS NULL
-- Supports: ORDER BY, MIN/MAX
-- Multiple columns (order matters!)
CREATE INDEX idx_users_status_created ON users(status, created_at);
-- Supports queries on:
-- - status
-- - status, created_at
-- Does NOT support: created_at alone
-- Column order: most selective first
-- Exception: match query WHERE/ORDER BY patterns
-- Index subset of rows
CREATE INDEX idx_active_users ON users(email)
WHERE status = 'active';
-- Smaller index, faster queries with matching WHERE clause
-- Query must include WHERE status = 'active' to use index
-- Index on computed value
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Query must use same expression
SELECT * FROM users WHERE LOWER(email) = '[email protected]';
-- For array, JSONB, full-text search
CREATE INDEX idx_products_tags ON products USING GIN(tags);
CREATE INDEX idx_documents_data ON documents USING GIN(data);
-- Array queries
SELECT * FROM products WHERE tags @> ARRAY['featured'];
-- JSONB queries
SELECT * FROM documents WHERE data @> '{"status": "active"}';
-- For geometric data, range types, full-text
CREATE INDEX idx_locations_geom ON locations USING GiST(geom);
-- Geometric queries
SELECT * FROM locations WHERE geom && ST_MakeEnvelope(...);
-- Equality comparisons only
CREATE INDEX idx_users_hash_email ON users USING HASH(email);
-- Only supports: =
-- Rarely used (B-tree usually better)
-- For very large tables with natural clustering
CREATE INDEX idx_logs_brin_created ON logs USING BRIN(created_at);
-- Tiny index size, good for append-only data
-- Best for: time-series, logging, large tables
-- Bad
SELECT * FROM users WHERE id = 1;
-- Good (only needed columns)
SELECT id, name, email FROM users WHERE id = 1;
-- Limit result set
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;
-- PostgreSQL can stop early with LIMIT
-- Create index matching sort order
CREATE INDEX idx_users_created_desc ON users(created_at DESC);
-- Query uses index for sorting
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;
-- Include all queried columns in index
CREATE INDEX idx_users_email_name_status ON users(email, name, status);
-- Query covered by index (no table access)
SELECT name, status FROM users WHERE email = '[email protected]';
-- Prefer EXISTS for large subqueries
-- Bad
SELECT * FROM customers
WHERE id IN (SELECT customer_id FROM orders WHERE total > 1000);
-- Good
SELECT * FROM customers c
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.id AND o.total > 1000);
-- Filter before joining
-- Bad
SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE o.status = 'completed' AND c.country = 'USA';
-- Good (filter in subquery)
SELECT * FROM (
SELECT * FROM orders WHERE status = 'completed'
) o
JOIN (
SELECT * FROM customers WHERE country = 'USA'
) c ON o.customer_id = c.id;
-- Or use CTE
WITH filtered_orders AS (
SELECT * FROM orders WHERE status = 'completed'
),
filtered_customers AS (
SELECT * FROM customers WHERE country = 'USA'
)
SELECT * FROM filtered_orders o
JOIN filtered_customers c ON o.customer_id = c.id;
-- Bad (index not used)
SELECT * FROM users WHERE LOWER(email) = '[email protected]';
-- Good (create expression index)
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Then query uses index
-- Or store lowercase separately
ALTER TABLE users ADD COLUMN email_lower TEXT;
UPDATE users SET email_lower = LOWER(email);
CREATE INDEX idx_users_email_lower ON users(email_lower);
-- Analyze table (update statistics)
ANALYZE users;
-- Analyze specific columns
ANALYZE users(email, status);
-- Analyze all tables
ANALYZE;
-- Auto-analyze (configured in postgresql.conf)
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.1
-- Last analyze time
SELECT schemaname, tablename, last_analyze, last_autoanalyze
FROM pg_stat_user_tables;
-- Statistics targets (adjust for important columns)
ALTER TABLE users ALTER COLUMN email SET STATISTICS 1000;
-- Reclaim storage, update statistics
VACUUM users;
-- Verbose output
VACUUM VERBOSE users;
-- Full vacuum (rewrites table, locks table)
VACUUM FULL users;
-- Analyze after vacuum
VACUUM ANALYZE users;
-- Check autovacuum status
SELECT schemaname, tablename, last_vacuum, last_autovacuum
FROM pg_stat_user_tables;
-- Configure in postgresql.conf
autovacuum = on
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.2
-- Rebuild index
REINDEX INDEX idx_users_email;
-- Rebuild all indexes on table
REINDEX TABLE users;
-- Rebuild all indexes in schema
REINDEX SCHEMA public;
-- Current queries
SELECT pid, usename, state, query, query_start
FROM pg_stat_activity
WHERE state != 'idle';
-- Long-running queries
SELECT pid, now() - query_start AS duration, query
FROM pg_stat_activity
WHERE state != 'idle' AND now() - query_start > interval '5 minutes'
ORDER BY duration DESC;
-- Enable slow query logging (postgresql.conf)
log_min_duration_statement = 100 -- milliseconds
-- Or per session
SET log_min_duration_statement = 100;
-- Logs appear in PostgreSQL log files
-- Enable extension
CREATE EXTENSION pg_stat_statements;
-- View query statistics
SELECT query, calls, total_exec_time, mean_exec_time, rows
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
-- Reset statistics
SELECT pg_stat_statements_reset();
-- Index usage statistics
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan;
-- Unused indexes (idx_scan = 0)
SELECT schemaname, tablename, indexname
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND indexname NOT LIKE '%_pkey';
-- Index sizes
SELECT schemaname, tablename, indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
-- Tables with sequential scans
SELECT schemaname, tablename, seq_scan, seq_tup_read
FROM pg_stat_user_tables
WHERE seq_scan > 0
ORDER BY seq_tup_read DESC;
-- Consider adding indexes to high seq_scan tables
# Shared buffers (25% of RAM)
shared_buffers = 4GB
# Work memory (per operation)
work_mem = 64MB
# Maintenance work memory (VACUUM, CREATE INDEX)
maintenance_work_mem = 512MB
# Effective cache size (estimate of OS cache)
effective_cache_size = 12GB
# Random page cost (lower for SSD)
random_page_cost = 1.1
# Effective IO concurrency (number of concurrent disk operations)
effective_io_concurrency = 200
# Cost of parallel query startup
parallel_setup_cost = 1000
parallel_tuple_cost = 0.1
# Max connections
max_connections = 100
# Connection pooling recommended (pgBouncer)
Index strategy
Query optimization
Statistics
Monitoring
Maintenance
Configuration
Testing
Command-line interface for PostgreSQL: connection, meta-commands, scripting, and interactive usage.
# Connect to database
psql -U username -d database -h hostname -p 5432
# Connect using URI
psql postgresql://username:password@hostname:5432/database
# Environment variables
export PGUSER=postgres
export PGPASSWORD=mypassword
export PGHOST=localhost
export PGPORT=5432
export PGDATABASE=mydb
psql
# Format: hostname:port:database:username:password
# chmod 600 ~/.pgpass