Name: Ckdatabases
Author: droppii

Ckdatabases | Skills Pool

# Generate migration
python scripts/db_migrate.py --db mongodb --generate "add_user_index"

# Run backup
python scripts/db_backup.py --db postgres --output /backups/

# Check performance
python scripts/db_performance_check.py --db mongodb --threshold 100ms

db.collection.aggregate([
  { /* Stage 1 */ },
  { /* Stage 2 */ },
  { /* Stage 3 */ }
])

// Filter early in pipeline for efficiency
db.orders.aggregate([
  { $match: { status: "completed", total: { $gte: 100 } } },
  // Subsequent stages process only matched documents
])

// Multiple conditions
db.orders.aggregate([
  { $match: {
    $and: [
      { orderDate: { $gte: startDate } },
      { status: { $in: ["completed", "shipped"] } }
    ]
  }}
])

// Select and reshape fields
db.orders.aggregate([
  { $project: {
    orderNumber: 1,
    total: 1,
    customerName: "$customer.name",
    year: { $year: "$orderDate" },
    _id: 0  // Exclude _id
  }}
])

// Computed fields
db.orders.aggregate([
  { $project: {
    total: 1,
    tax: { $multiply: ["$total", 0.1] },
    grandTotal: { $add: ["$total", { $multiply: ["$total", 0.1] }] }
  }}
])

// Group and count
db.orders.aggregate([
  { $group: {
    _id: "$status",
    count: { $sum: 1 }
  }}
])

// Multiple aggregations
db.orders.aggregate([
  { $group: {
    _id: "$customerId",
    totalSpent: { $sum: "$total" },
    orderCount: { $sum: 1 },
    avgOrderValue: { $avg: "$total" },
    maxOrder: { $max: "$total" },
    minOrder: { $min: "$total" }
  }}
])

// Group by multiple fields
db.sales.aggregate([
  { $group: {
    _id: {
      year: { $year: "$date" },
      month: { $month: "$date" },
      product: "$productId"
    },
    revenue: { $sum: "$amount" }
  }}
])

// Sort by field
db.orders.aggregate([
  { $sort: { total: -1 } }  // -1: descending, 1: ascending
])

// Sort by multiple fields
db.orders.aggregate([
  { $sort: { status: 1, orderDate: -1 } }
])

// Limit results
db.orders.aggregate([
  { $sort: { orderDate: -1 } },
  { $limit: 10 }
])

// Pagination
const page = 2;
const pageSize = 20;
db.orders.aggregate([
  { $sort: { orderDate: -1 } },
  { $skip: (page - 1) * pageSize },
  { $limit: pageSize }
])

// Simple join
db.orders.aggregate([
  { $lookup: {
    from: "customers",
    localField: "customerId",
    foreignField: "_id",
    as: "customer"
  }},
  { $unwind: "$customer" }  // Convert array to object
])

// Pipeline join (more powerful)
db.orders.aggregate([
  { $lookup: {
    from: "products",
    let: { items: "$items" },
    pipeline: [
      { $match: { $expr: { $in: ["$_id", "$$items.productId"] } } },
      { $project: { name: 1, price: 1 } }
    ],
    as: "productDetails"
  }}
])

// Unwind array field
db.orders.aggregate([
  { $unwind: "$items" }
])

// Preserve null/empty arrays
db.orders.aggregate([
  { $unwind: {
    path: "$items",
    preserveNullAndEmptyArrays: true
  }}
])

// Include array index
db.orders.aggregate([
  { $unwind: {
    path: "$items",
    includeArrayIndex: "itemIndex"
  }}
])

// Add computed fields
db.orders.aggregate([
  { $addFields: {
    totalWithTax: { $multiply: ["$total", 1.1] },
    year: { $year: "$orderDate" }
  }}
])

// Promote subdocument to root
db.orders.aggregate([
  { $replaceRoot: { newRoot: "$customer" } }
])

// Merge fields
db.orders.aggregate([
  { $replaceRoot: {
    newRoot: { $mergeObjects: ["$customer", { orderId: "$_id" }] }
  }}
])

// Basic math
db.products.aggregate([
  { $project: {
    name: 1,
    profit: { $subtract: ["$price", "$cost"] },
    margin: { $multiply: [
      { $divide: [
        { $subtract: ["$price", "$cost"] },
        "$price"
      ]},
      100
    ]}
  }}
])

// Other operators: $add, $multiply, $divide, $mod, $abs, $ceil, $floor, $round

// String manipulation
db.users.aggregate([
  { $project: {
    fullName: { $concat: ["$firstName", " ", "$lastName"] },
    email: { $toLower: "$email" },
    initials: { $concat: [
      { $substr: ["$firstName", 0, 1] },
      { $substr: ["$lastName", 0, 1] }
    ]}
  }}
])

// Other: $toUpper, $trim, $split, $substr, $regexMatch

// Date extraction
db.events.aggregate([
  { $project: {
    event: 1,
    year: { $year: "$timestamp" },
    month: { $month: "$timestamp" },
    day: { $dayOfMonth: "$timestamp" },
    hour: { $hour: "$timestamp" },
    dayOfWeek: { $dayOfWeek: "$timestamp" }
  }}
])

// Date math
db.events.aggregate([
  { $project: {
    event: 1,
    expiresAt: { $add: ["$createdAt", 1000 * 60 * 60 * 24 * 30] }, // +30 days
    ageInDays: { $divide: [
      { $subtract: [new Date(), "$createdAt"] },
      1000 * 60 * 60 * 24
    ]}
  }}
])

// Array operations
db.posts.aggregate([
  { $project: {
    title: 1,
    tagCount: { $size: "$tags" },
    firstTag: { $arrayElemAt: ["$tags", 0] },
    lastTag: { $arrayElemAt: ["$tags", -1] },
    hasMongoDBTag: { $in: ["mongodb", "$tags"] }
  }}
])

// Array filtering
db.posts.aggregate([
  { $project: {
    title: 1,
    activeTags: {
      $filter: {
        input: "$tags",
        as: "tag",
        cond: { $ne: ["$$tag.status", "deprecated"] }
      }
    }
  }}
])

// $cond (ternary)
db.products.aggregate([
  { $project: {
    name: 1,
    status: {
      $cond: {
        if: { $gte: ["$stock", 10] },
        then: "In Stock",
        else: "Low Stock"
      }
    }
  }}
])

// $switch (multiple conditions)
db.orders.aggregate([
  { $project: {
    status: 1,
    priority: {
      $switch: {
        branches: [
          { case: { $gte: ["$total", 1000] }, then: "High" },
          { case: { $gte: ["$total", 100] }, then: "Medium" }
        ],
        default: "Low"
      }
    }
  }}
])

// Daily sales
db.orders.aggregate([
  { $match: { orderDate: { $gte: startDate } } },
  { $group: {
    _id: {
      year: { $year: "$orderDate" },
      month: { $month: "$orderDate" },
      day: { $dayOfMonth: "$orderDate" }
    },
    revenue: { $sum: "$total" },
    orderCount: { $sum: 1 }
  }},
  { $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])

// Multiple aggregations in one query
db.products.aggregate([
  { $match: { category: "electronics" } },
  { $facet: {
    priceRanges: [
      { $bucket: {
        groupBy: "$price",
        boundaries: [0, 100, 500, 1000, 5000],
        default: "5000+",
        output: { count: { $sum: 1 } }
      }}
    ],
    topBrands: [
      { $group: { _id: "$brand", count: { $sum: 1 } } },
      { $sort: { count: -1 } },
      { $limit: 5 }
    ],
    avgPrice: [
      { $group: { _id: null, avg: { $avg: "$price" } } }
    ]
  }}
])

// Running totals and moving averages
db.sales.aggregate([
  { $setWindowFields: {
    partitionBy: "$region",
    sortBy: { date: 1 },
    output: {
      runningTotal: {
        $sum: "$amount",
        window: { documents: ["unbounded", "current"] }
      },
      movingAvg: {
        $avg: "$amount",
        window: { documents: [-7, 0] }  // Last 7 days
      }
    }
  }}
])

// Full-text search (requires text index)
db.articles.aggregate([
  { $match: { $text: { $search: "mongodb database" } } },
  { $addFields: { score: { $meta: "textScore" } } },
  { $sort: { score: -1 } },
  { $limit: 10 }
])

// Find nearby locations
db.places.aggregate([
  { $geoNear: {
    near: { type: "Point", coordinates: [lon, lat] },
    distanceField: "distance",
    maxDistance: 5000,
    spherical: true
  }},
  { $limit: 10 }
])

db.collection.aggregate(pipeline, { allowDiskUse: true })

db.collection.explain("executionStats").aggregate(pipeline)

mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority

// Node.js
const { MongoClient } = require("mongodb");
const uri = "mongodb+srv://...";
const client = new MongoClient(uri);

await client.connect();
const db = client.db("myDatabase");

# Python
from pymongo import MongoClient
uri = "mongodb+srv://..."
client = MongoClient(uri)
db = client.myDatabase

// Via Atlas UI: Database → Add Database
// Via shell
use myNewDatabase
db.createCollection("myCollection")

// Via driver
const db = client.db("myNewDatabase");
await db.createCollection("myCollection");

// Set validation rules in Atlas UI or via shell
db.createCollection("users", {
  validator: {
    $jsonSchema: {
      bsonType: "object",
      required: ["email", "name"],
      properties: {
        email: { bsonType: "string", pattern: "^.+@.+$" },
        age: { bsonType: "int", minimum: 0 }
      }
    }
  }
})

// IP Whitelist (Atlas UI → Network Access)
// - Add IP Address: specific IPs
// - 0.0.0.0/0: allow from anywhere (dev only)
// - VPC Peering: private connection

// Connection string includes options
mongodb+srv://cluster.mongodb.net/?retryWrites=true&w=majority&ssl=true

// Create via Atlas UI → Database Access
// - Username/password authentication
// - AWS IAM authentication
// - X.509 certificates

// Roles:
// - atlasAdmin: full access
// - readWriteAnyDatabase: read/write all databases
// - readAnyDatabase: read-only all databases
// - read/readWrite: database-specific

// Encryption at rest (automatic on M10+)
// Encryption in transit (TLS/SSL, always enabled)

// Client-Side Field Level Encryption (CSFLE)
const autoEncryptionOpts = {
  keyVaultNamespace: "encryption.__keyVault",
  kmsProviders: {
    aws: {
      accessKeyId: process.env.AWS_ACCESS_KEY_ID,
      secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY
    }
  }
};

const client = new MongoClient(uri, { autoEncryption: autoEncryptionOpts });

// Automatic continuous backups
// - Snapshots every 6-24 hours
// - Oplog for point-in-time recovery
// - Retention: 2+ days configurable

// Restore via Atlas UI:
// 1. Clusters → cluster name → Backup tab
// 2. Select snapshot or point in time
// 3. Download or restore to cluster

# Export using mongodump
mongodump --uri="mongodb+srv://user:[email protected]/mydb" --out=/backup

# Restore using mongorestore
mongorestore --uri="mongodb+srv://..." /backup/mydb

// Atlas UI → Metrics
// Key metrics:
// - Operations per second
// - Query execution times
// - Connections
// - Network I/O
// - Disk usage
// - CPU utilization

// Real-time Performance panel
// - Current operations
// - Slow queries
// - Index suggestions

// Configure via Atlas UI → Alerts
// Alert types:
// - High connections (> threshold)
// - High CPU usage (> 80%)
// - Disk usage (> 90%)
// - Replication lag
// - Backup failures

// Notification channels:
// - Email
// - SMS
// - Slack
// - PagerDuty
// - Webhook

// Automatic index recommendations
// Atlas UI → Performance Advisor

// Analyzes:
// - Slow queries
// - Missing indexes
// - Redundant indexes
// - Index usage statistics

// Provides:
// - Index creation commands
// - Expected performance improvement
// - Schema design suggestions

// Atlas UI → Search → Create Index

// JSON definition
{
  "mappings": {
    "dynamic": false,
    "fields": {
      "title": {
        "type": "string",
        "analyzer": "lucene.standard"
      },
      "description": {
        "type": "string",
        "analyzer": "lucene.english"
      },
      "tags": {
        "type": "string"
      }
    }
  }
}

// Aggregation pipeline with $search
db.articles.aggregate([
  {
    $search: {
      text: {
        query: "mongodb database tutorial",
        path: ["title", "description"],
        fuzzy: { maxEdits: 1 }
      }
    }
  },
  { $limit: 10 },
  {
    $project: {
      title: 1,
      description: 1,
      score: { $meta: "searchScore" }
    }
  }
])

// Autocomplete
db.articles.aggregate([
  {
    $search: {
      autocomplete: {
        query: "mong",
        path: "title",
        tokenOrder: "sequential"
      }
    }
  }
])

// For AI similarity search (embeddings)
{
  "fields": [
    {
      "type": "vector",
      "path": "embedding",
      "numDimensions": 1536,  // OpenAI embeddings
      "similarity": "cosine"
    }
  ]
}

// Search by similarity
db.products.aggregate([
  {
    $vectorSearch: {
      index: "vector_index",
      path: "embedding",
      queryVector: [0.123, 0.456, ...],  // 1536 dimensions
      numCandidates: 100,
      limit: 10
    }
  },
  {
    $project: {
      name: 1,
      description: 1,
      score: { $meta: "vectorSearchScore" }
    }
  }
])

// Federated database instance
// Query data from:
// - Atlas clusters
// - AWS S3
// - HTTP endpoints

// Create virtual collection
{
  "databases": [{
    "name": "federated",
    "collections": [{
      "name": "sales",
      "dataSources": [{
        "storeName": "s3Store",
        "path": "/sales/*.json"
      }]
    }]
  }]
}

// Query like normal collection
use federated
db.sales.find({ region: "US" })

// Atlas UI → Charts → New Dashboard
// Data source: Atlas cluster
// Chart types: bar, line, pie, scatter, etc.

// Embed in application
<iframe
  src="https://charts.mongodb.com/charts-project/embed/charts?id=..."
  width="800"
  height="600"
/>

# Install
npm install -g mongodb-atlas-cli

# Login
atlas auth login

# List clusters
atlas clusters list

# Create cluster
atlas clusters create myCluster --provider AWS --region US_EAST_1 --tier M10

# Manage users
atlas dbusers create --username myuser --password mypass

# Backups
atlas backups snapshots list --clusterName myCluster

const client = new MongoClient(uri, {
  maxPoolSize: 50,
  minPoolSize: 10
});

const client = new MongoClient(uri, {
  readPreference: "secondaryPreferred"
});

const uri = process.env.MONGODB_URI;

// Check IP whitelist
// Verify credentials
// Test connection string

// Verbose logging
const client = new MongoClient(uri, {
  serverSelectionTimeoutMS: 5000,
  loggerLevel: "debug"
});

// Check Performance Advisor
// Review slow query logs
// Analyze index usage
db.collection.aggregate([{ $indexStats: {} }])

// Check connection count
db.serverStatus().connections

// MongoNetworkError: IP not whitelisted
// → Add IP to Network Access

// Authentication failed: wrong credentials
// → Verify username/password in Database Access

// Timeout: connection string or network issue
// → Check connection string format, DNS resolution

// Insert single document
db.users.insertOne({
  name: "Alice",
  email: "[email protected]",
  age: 30,
  createdAt: new Date()
})

// Returns: { acknowledged: true, insertedId: ObjectId("...") }

// Insert multiple documents
db.users.insertMany([
  { name: "Bob", age: 25 },
  { name: "Charlie", age: 35 },
  { name: "Diana", age: 28 }
])

// With ordered: false (continue on error)
db.users.insertMany(docs, { ordered: false })

// Find all documents
db.users.find()

// Find with filter
db.users.find({ age: { $gte: 18 } })

// Projection (select fields)
db.users.find({ status: "active" }, { name: 1, email: 1, _id: 0 })

// Cursor operations
db.users.find()
  .sort({ createdAt: -1 })
  .limit(10)
  .skip(20)

// Get single document
db.users.findOne({ email: "[email protected]" })

// With projection
db.users.findOne({ _id: ObjectId("...") }, { name: 1, email: 1 })

// Count matching documents
db.users.countDocuments({ status: "active" })

// Fast estimate (uses metadata)
db.users.estimatedDocumentCount()

// Get unique values
db.users.distinct("status")
db.users.distinct("city", { country: "USA" })

// Update first matching document
db.users.updateOne(
  { email: "[email protected]" },
  { $set: { status: "verified" } }
)

// Upsert (insert if not exists)
db.users.updateOne(
  { email: "[email protected]" },
  { $set: { name: "New User" } },
  { upsert: true }
)

// Update all matching documents
db.users.updateMany(
  { lastLogin: { $lt: cutoffDate } },
  { $set: { status: "inactive" } }
)

// Multiple updates
db.users.updateMany(
  { status: "pending" },
  {
    $set: { status: "active" },
    $currentDate: { updatedAt: true }
  }
)

// Replace entire document (except _id)
db.users.replaceOne(
  { _id: ObjectId("...") },
  { name: "Alice", email: "[email protected]", age: 31 }
)

// Delete first matching document
db.users.deleteOne({ email: "[email protected]" })

// Delete all matching documents
db.users.deleteMany({ status: "deleted" })

// Delete all documents in collection
db.users.deleteMany({})

// $eq (equals)
db.users.find({ age: { $eq: 30 } })
db.users.find({ age: 30 })  // Implicit $eq

// $ne (not equals)
db.users.find({ status: { $ne: "deleted" } })

// $gt, $gte, $lt, $lte
db.users.find({ age: { $gt: 18, $lte: 65 } })

// $in (in array)
db.users.find({ status: { $in: ["active", "pending"] } })

// $nin (not in array)
db.users.find({ status: { $nin: ["deleted", "banned"] } })

// $and (implicit for multiple conditions)
db.users.find({ age: { $gte: 18 }, status: "active" })

// $and (explicit)
db.users.find({
  $and: [
    { age: { $gte: 18 } },
    { status: "active" }
  ]
})

// $or
db.users.find({
  $or: [
    { status: "active" },
    { verified: true }
  ]
})

// $not
db.users.find({ age: { $not: { $lt: 18 } } })

// $nor (not any condition)
db.users.find({
  $nor: [
    { status: "deleted" },
    { status: "banned" }
  ]
})

// $exists
db.users.find({ phoneNumber: { $exists: true } })
db.users.find({ deletedAt: { $exists: false } })

// $type
db.users.find({ age: { $type: "int" } })
db.users.find({ age: { $type: ["int", "double"] } })

// $all (contains all elements)
db.posts.find({ tags: { $all: ["mongodb", "database"] } })

// $elemMatch (array element matches all conditions)
db.products.find({
  reviews: {
    $elemMatch: { rating: { $gte: 4 }, verified: true }
  }
})

// $size (array length)
db.posts.find({ tags: { $size: 3 } })

// $regex (regular expression)
db.users.find({ name: { $regex: /^A/i } })
db.users.find({ email: { $regex: "@example\\.com$" } })

// Text search (requires text index)
db.articles.find({ $text: { $search: "mongodb database" } })

// $set (set field value)
db.users.updateOne(
  { _id: userId },
  { $set: { status: "active", updatedAt: new Date() } }
)

// $unset (remove field)
db.users.updateOne(
  { _id: userId },
  { $unset: { tempField: "" } }
)

// $rename (rename field)
db.users.updateMany(
  {},
  { $rename: { "oldName": "newName" } }
)

// $currentDate (set to current date)
db.users.updateOne(
  { _id: userId },
  { $currentDate: { lastModified: true } }
)

// $inc (increment)
db.posts.updateOne(
  { _id: postId },
  { $inc: { views: 1, likes: 5 } }
)

// $mul (multiply)
db.products.updateOne(
  { _id: productId },
  { $mul: { price: 1.1 } }  // 10% increase
)

// $min (update if new value is less)
db.scores.updateOne(
  { _id: scoreId },
  { $min: { lowestScore: 50 } }
)

// $max (update if new value is greater)
db.scores.updateOne(
  { _id: scoreId },
  { $max: { highestScore: 100 } }
)

// $push (add to array)
db.posts.updateOne(
  { _id: postId },
  { $push: { comments: { author: "Alice", text: "Great!" } } }
)

// $push with $each (multiple elements)
db.posts.updateOne(
  { _id: postId },
  { $push: { tags: { $each: ["mongodb", "database"] } } }
)

// $addToSet (add if not exists)
db.users.updateOne(
  { _id: userId },
  { $addToSet: { interests: "coding" } }
)

// $pull (remove matching elements)
db.users.updateOne(
  { _id: userId },
  { $pull: { tags: "deprecated" } }
)

// $pop (remove first/last element)
db.users.updateOne(
  { _id: userId },
  { $pop: { notifications: -1 } }  // -1: first, 1: last
)

// $ (update first matching array element)
db.posts.updateOne(
  { _id: postId, "comments.author": "Alice" },
  { $set: { "comments.$.text": "Updated comment" } }
)

// $[] (update all array elements)
db.posts.updateOne(
  { _id: postId },
  { $set: { "comments.$[].verified": true } }
)

// $[<identifier>] (filtered positional)
db.posts.updateOne(
  { _id: postId },
  { $set: { "comments.$[elem].flagged": true } },
  { arrayFilters: [{ "elem.rating": { $lt: 2 } }] }
)

// Find and update (returns old document by default)
db.users.findOneAndUpdate(
  { email: "[email protected]" },
  { $set: { status: "active" } }
)

// Return new document
db.users.findOneAndUpdate(
  { email: "[email protected]" },
  { $set: { status: "active" } },
  { returnNewDocument: true }
)

// Upsert and return new
db.counters.findOneAndUpdate(
  { _id: "sequence" },
  { $inc: { value: 1 } },
  { upsert: true, returnNewDocument: true }
)

// Find and replace entire document
db.users.findOneAndReplace(
  { _id: ObjectId("...") },
  { name: "Alice", email: "[email protected]" },
  { returnNewDocument: true }
)

// Find and delete (returns deleted document)
const deletedUser = db.users.findOneAndDelete(
  { email: "[email protected]" }
)

// Ordered bulk write (stops on first error)
db.users.bulkWrite([
  { insertOne: { document: { name: "Alice" } } },
  { updateOne: {
    filter: { name: "Bob" },
    update: { $set: { age: 25 } }
  }},
  { deleteOne: { filter: { name: "Charlie" } } }
])

// Unordered (continues on errors)
db.users.bulkWrite(operations, { ordered: false })

// Check if query uses index
db.users.find({ email: "[email protected]" }).explain("executionStats")

// Key metrics:
// - executionTimeMillis: query duration
// - totalDocsExamined: documents scanned
// - nReturned: documents returned
// - stage: IXSCAN (index) vs COLLSCAN (full scan)

// Create index on single field
db.users.createIndex({ email: 1 })  // 1: ascending, -1: descending

// Use case: queries filtering by email
db.users.find({ email: "[email protected]" })

// Drop index
db.users.dropIndex({ email: 1 })
db.users.dropIndex("email_1")  // By name

// Index on multiple fields (order matters!)
db.orders.createIndex({ status: 1, createdAt: -1 })

// Supports queries on:
// 1. { status: "..." }
// 2. { status: "...", createdAt: ... }
// Does NOT efficiently support: { createdAt: ... } alone

// Left-to-right prefix rule
db.orders.createIndex({ a: 1, b: 1, c: 1 })
// Supports: {a}, {a,b}, {a,b,c}
// Not: {b}, {c}, {b,c}

// Create text index
db.articles.createIndex({ title: "text", body: "text" })

// Only one text index per collection
db.articles.createIndex({
  title: "text",
  body: "text",
  tags: "text"
}, {
  weights: {
    title: 10,    // Title matches weighted higher
    body: 5,
    tags: 3
  }
})

// Search
db.articles.find({ $text: { $search: "mongodb database" } })

// Search with score
db.articles.find(
  { $text: { $search: "mongodb" } },
  { score: { $meta: "textScore" } }
).sort({ score: { $meta: "textScore" } })

// 2dsphere index (spherical geometry)
db.places.createIndex({ location: "2dsphere" })

// Document format
db.places.insertOne({
  name: "Coffee Shop",
  location: {
    type: "Point",
    coordinates: [-73.97, 40.77]  // [longitude, latitude]
  }
})

// Find nearby
db.places.find({
  location: {
    $near: {
      $geometry: { type: "Point", coordinates: [-73.97, 40.77] },
      $maxDistance: 5000  // meters
    }
  }
})

// Within polygon
db.places.find({
  location: {
    $geoWithin: {
      $geometry: {
        type: "Polygon",
        coordinates: [[
          [lon1, lat1], [lon2, lat2], [lon3, lat3], [lon1, lat1]
        ]]
      }
    }
  }
})

// Index all fields in subdocuments
db.products.createIndex({ "attributes.$**": 1 })

// Supports queries on any nested field
db.products.find({ "attributes.color": "red" })
db.products.find({ "attributes.size": "large" })

// Specific paths only
db.products.createIndex(
  { "$**": 1 },
  { wildcardProjection: { "attributes.color": 1, "attributes.size": 1 } }
)

// Hashed index (for even distribution in sharding)
db.users.createIndex({ userId: "hashed" })

// Use case: shard key
sh.shardCollection("mydb.users", { userId: "hashed" })

// Delete documents after specified time
db.sessions.createIndex(
  { createdAt: 1 },
  { expireAfterSeconds: 3600 }  // 1 hour
)

// Documents automatically deleted after createdAt + 3600 seconds
// Background task runs every 60 seconds

// Index only documents matching filter
db.orders.createIndex(
  { customerId: 1 },
  { partialFilterExpression: { status: "active" } }
)

// Index only used when query includes filter
db.orders.find({ customerId: "123", status: "active" })  // Uses index
db.orders.find({ customerId: "123" })  // Does not use index

// Enforce uniqueness
db.users.createIndex({ email: 1 }, { unique: true })

// Compound unique index
db.users.createIndex({ firstName: 1, lastName: 1 }, { unique: true })

// Sparse unique index (null values not indexed)
db.users.createIndex({ email: 1 }, { unique: true, sparse: true })

// Index only documents with field present
db.users.createIndex({ phoneNumber: 1 }, { sparse: true })

// Useful for optional fields
// Documents without phoneNumber not in index

// Show all indexes
db.collection.getIndexes()

// Index statistics
db.collection.aggregate([{ $indexStats: {} }])

// Background index (doesn't block operations)
db.collection.createIndex({ field: 1 }, { background: true })

// Index name
db.collection.createIndex({ field: 1 }, { name: "custom_index_name" })

// Case-insensitive index (collation)
db.collection.createIndex(
  { name: 1 },
  { collation: { locale: "en", strength: 2 } }
)

// Hide index (test before dropping)
db.collection.hideIndex("index_name")

// Check performance without index
// ...

// Unhide or drop
db.collection.unhideIndex("index_name")
db.collection.dropIndex("index_name")

// Rebuild all indexes (after data changes)
db.collection.reIndex()

// Useful after bulk deletions to reclaim space

// Query covered by index (no document fetch)
db.users.createIndex({ email: 1, name: 1 })

// Covered query (all fields in index)
db.users.find(
  { email: "[email protected]" },
  { email: 1, name: 1, _id: 0 }  // Must exclude _id
)

// Check with explain: stage should be "IXSCAN" with no "FETCH"

// MongoDB can use multiple indexes
db.collection.createIndex({ a: 1 })
db.collection.createIndex({ b: 1 })

// Query may use both indexes
db.collection.find({ a: 1, b: 1 })

// Usually compound index is better
db.collection.createIndex({ a: 1, b: 1 })

// Force specific index
db.orders.find({ status: "active", city: "NYC" })
  .hint({ status: 1, createdAt: -1 })

// Force no index (for testing)
db.orders.find({ status: "active" }).hint({ $natural: 1 })

// Optimal compound index order: Equality → Sort → Range

// Query
db.orders.find({
  status: "completed",        // Equality
  category: "electronics"     // Equality
}).sort({
  orderDate: -1               // Sort
}).limit(10)

// Optimal index
db.orders.createIndex({
  status: 1,      // Equality first
  category: 1,    // Equality
  orderDate: -1   // Sort last
})

// With range
db.orders.find({
  status: "completed",        // Equality
  total: { $gte: 100 }       // Range
}).sort({
  orderDate: -1               // Sort
})

// Optimal index
db.orders.createIndex({
  status: 1,      // Equality
  orderDate: -1,  // Sort
  total: 1        // Range last
})

// Query planner (default)
db.collection.find({ field: value }).explain()

// Execution stats
db.collection.find({ field: value }).explain("executionStats")

// All execution stats
db.collection.find({ field: value }).explain("allPlansExecution")

// Good performance indicators:
// - executionTimeMillis < 100ms
// - totalDocsExamined ≈ nReturned (examine only what's needed)
// - stage: "IXSCAN" (using index)
// - totalKeysExamined ≈ nReturned (index selectivity)

// Bad indicators:
// - stage: "COLLSCAN" (full collection scan)
// - totalDocsExamined >> nReturned (scanning too many docs)
// - executionTimeMillis > 1000ms

// High selectivity = good (returns few documents)
// Low selectivity = bad (returns many documents)

// Check selectivity
db.collection.aggregate([
  { $group: { _id: "$status", count: { $sum: 1 } } }
])

// Good for indexing: email, userId, orderId
// Bad for indexing: gender, status (few unique values)

// Always filter by tenant first
db.data.createIndex({ tenantId: 1, createdAt: -1 })

// All queries include tenantId
db.data.find({ tenantId: "tenant1", createdAt: { $gte: date } })

// Index on timestamp descending (recent data accessed more)
db.events.createIndex({ timestamp: -1 })

// Compound with filter fields
db.events.createIndex({ userId: 1, timestamp: -1 })

// Index foreign key fields
db.orders.createIndex({ customerId: 1 })
db.customers.createIndex({ _id: 1 })  // Default _id index

// Aggregation $lookup uses these indexes

db.collection.aggregate([{ $indexStats: {} }])

db.collection.stats().indexSizes

// Current operations
db.currentOp()

// Slow queries (enable profiling)
db.setProfilingLevel(1, { slowms: 100 })
db.system.profile.find().sort({ ts: -1 }).limit(10)

// Index statistics
db.collection.aggregate([
  { $indexStats: {} },
  { $sort: { "accesses.ops": -1 } }
])

// Collection statistics
db.collection.stats()

// Check index sizes
db.collection.stats().indexSizes

// Total index size
db.collection.totalIndexSize()

// Recommend: indexes fit in RAM
// Monitor: db.serverStatus().mem

-- Create user with password
CREATE USER appuser WITH PASSWORD 'secure_password';

-- Create superuser
CREATE USER admin WITH SUPERUSER PASSWORD 'admin_password';

-- Create role without login
CREATE ROLE readonly;

-- Create user with attributes
CREATE USER developer WITH
  PASSWORD 'dev_pass'
  CREATEDB
  VALID UNTIL '2025-12-31';

-- Change password
ALTER USER appuser WITH PASSWORD 'new_password';

-- Add attributes
ALTER USER appuser WITH CREATEDB CREATEROLE;

-- Remove attributes
ALTER USER appuser WITH NOSUPERUSER;

-- Rename user
ALTER USER oldname RENAME TO newname;

-- Set connection limit
ALTER USER appuser CONNECTION LIMIT 10;

-- Create role hierarchy
CREATE ROLE readonly;
CREATE ROLE readwrite;

-- Grant role to user
GRANT readonly TO appuser;
GRANT readwrite TO developer;

-- Revoke role
REVOKE readonly FROM appuser;

-- Role membership
\du

-- Grant database access
GRANT CONNECT ON DATABASE mydb TO appuser;

-- Grant schema usage
GRANT USAGE ON SCHEMA public TO appuser;

-- Revoke access
REVOKE CONNECT ON DATABASE mydb FROM appuser;

-- Grant table permissions
GRANT SELECT ON users TO appuser;
GRANT SELECT, INSERT, UPDATE ON orders TO appuser;
GRANT ALL PRIVILEGES ON products TO appuser;

-- Grant on all tables
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;

-- Revoke permissions
REVOKE INSERT ON users FROM appuser;

-- Grant specific columns
GRANT SELECT (id, name, email) ON users TO appuser;
GRANT UPDATE (status) ON orders TO appuser;

-- Grant sequence usage (for SERIAL/auto-increment)
GRANT USAGE, SELECT ON SEQUENCE users_id_seq TO appuser;
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO appuser;

-- Grant execute on function
GRANT EXECUTE ON FUNCTION get_user(integer) TO appuser;

-- Set default privileges for future objects
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO readonly;

ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO readwrite;

ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT USAGE ON SEQUENCES TO readwrite;

-- Show table permissions
\dp users

-- Show role memberships
\du

-- Query permissions
SELECT grantee, privilege_type
FROM information_schema.role_table_grants
WHERE table_name = 'users';

# Dump database to SQL file
pg_dump mydb > mydb.sql

# Custom format (compressed, allows selective restore)
pg_dump -Fc mydb > mydb.dump

# Directory format (parallel dump)
pg_dump -Fd mydb -j 4 -f mydb_dir

# Specific table
pg_dump -t users mydb > users.sql

# Multiple tables
pg_dump -t users -t orders mydb > tables.sql

# Schema only
pg_dump -s mydb > schema.sql

# Data only
pg_dump -a mydb > data.sql

# Exclude table
pg_dump --exclude-table=logs mydb > mydb.sql

# With compression
pg_dump -Fc -Z 9 mydb > mydb.dump

# Dump all databases
pg_dumpall > all_databases.sql

# Only globals (roles, tablespaces)
pg_dumpall --globals-only > globals.sql

# Restore from custom format
pg_restore -d mydb mydb.dump

# Restore specific table
pg_restore -d mydb -t users mydb.dump

# List contents
pg_restore -l mydb.dump

# Parallel restore
pg_restore -d mydb -j 4 mydb.dump

# Clean database first
pg_restore -d mydb --clean mydb.dump

# Create database if not exists
pg_restore -C -d postgres mydb.dump

# Restore SQL dump
psql mydb < mydb.sql

# Create database and restore
createdb mydb
psql mydb < mydb.sql

# Single transaction
psql -1 mydb < mydb.sql

# Stop on error
psql --set ON_ERROR_STOP=on mydb < mydb.sql

#!/bin/bash
# backup.sh

# Configuration
DB_NAME="mydb"
BACKUP_DIR="/backups"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7

# Create backup
pg_dump -Fc "$DB_NAME" > "$BACKUP_DIR/${DB_NAME}_${DATE}.dump"

# Remove old backups
find "$BACKUP_DIR" -name "${DB_NAME}_*.dump" -mtime +$RETENTION_DAYS -delete

# Log
echo "Backup completed: ${DB_NAME}_${DATE}.dump"

# Enable WAL archiving (postgresql.conf)
wal_level = replica
archive_mode = on
archive_command = 'cp %p /archive/%f'
max_wal_senders = 3

# Base backup
pg_basebackup -D /backup/base -Ft -z -P

# Restore to point in time
# 1. Stop PostgreSQL
# 2. Restore base backup
# 3. Create recovery.conf with recovery_target_time
# 4. Start PostgreSQL

-- Create replication user
CREATE USER replicator WITH REPLICATION PASSWORD 'replica_pass';

-- Configure postgresql.conf
wal_level = replica
max_wal_senders = 3
wal_keep_size = 64MB

-- Configure pg_hba.conf
host replication replicator replica_ip/32 md5

# Stop replica PostgreSQL
systemctl stop postgresql

# Remove data directory
rm -rf /var/lib/postgresql/data/*

# Clone from primary
pg_basebackup -h primary_host -D /var/lib/postgresql/data -U replicator -P -R

# Start replica
systemctl start postgresql

# Check replication status
SELECT * FROM pg_stat_replication;  -- On primary

-- Create publication
CREATE PUBLICATION my_publication FOR ALL TABLES;

-- Or specific tables
CREATE PUBLICATION my_publication FOR TABLE users, orders;

-- Check publications
\dRp
SELECT * FROM pg_publication;

-- Create subscription
CREATE SUBSCRIPTION my_subscription
CONNECTION 'host=primary_host dbname=mydb user=replicator password=replica_pass'
PUBLICATION my_publication;

-- Check subscriptions
\dRs
SELECT * FROM pg_subscription;

-- Monitor replication
SELECT * FROM pg_stat_subscription;

-- Database size
SELECT pg_size_pretty(pg_database_size('mydb'));

-- Table sizes
SELECT schemaname, tablename,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;

-- Index sizes
SELECT schemaname, tablename, indexname,
  pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

-- Current connections
SELECT count(*) FROM pg_stat_activity;

-- Connections by database
SELECT datname, count(*) FROM pg_stat_activity GROUP BY datname;

-- Connection limit
SHOW max_connections;

-- Kill connection
SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid = 12345;

-- Active queries
SELECT pid, usename, state, query, query_start
FROM pg_stat_activity
WHERE state != 'idle';

-- Long-running queries
SELECT pid, now() - query_start AS duration, query
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;

-- Blocking queries
SELECT blocked.pid AS blocked_pid,
       blocked.query AS blocked_query,
       blocking.pid AS blocking_pid,
       blocking.query AS blocking_query
FROM pg_stat_activity blocked
JOIN pg_stat_activity blocking
  ON blocking.pid = ANY(pg_blocking_pids(blocked.pid));

-- Should be > 0.99 for good performance
SELECT
  sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) AS cache_hit_ratio
FROM pg_statio_user_tables;

-- Check for table bloat (requires pgstattuple extension)
CREATE EXTENSION pgstattuple;

SELECT schemaname, tablename,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
  pgstattuple(schemaname||'.'||tablename) AS stats
FROM pg_tables
WHERE schemaname = 'public';

-- Reclaim storage
VACUUM users;

-- Verbose
VACUUM VERBOSE users;

-- Full (locks table, rewrites)
VACUUM FULL users;

-- With analyze
VACUUM ANALYZE users;

-- All tables
VACUUM;

-- Check last vacuum
SELECT schemaname, tablename, last_vacuum, last_autovacuum
FROM pg_stat_user_tables;

-- Configure postgresql.conf
autovacuum = on
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.2
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.1

-- Rebuild index
REINDEX INDEX idx_users_email;

-- Rebuild all indexes on table
REINDEX TABLE users;

-- Rebuild database indexes
REINDEX DATABASE mydb;

-- Concurrently (doesn't lock)
REINDEX INDEX CONCURRENTLY idx_users_email;

-- Update statistics
ANALYZE users;

-- Specific columns
ANALYZE users(email, status);

-- All tables
ANALYZE;

-- Verbose
ANALYZE VERBOSE users;

SHOW config_file;

# Memory
shared_buffers = 4GB                 # 25% of RAM
work_mem = 64MB                      # Per operation
maintenance_work_mem = 512MB         # VACUUM, CREATE INDEX
effective_cache_size = 12GB          # OS cache estimate

# Query Planner
random_page_cost = 1.1               # Lower for SSD
effective_io_concurrency = 200       # Concurrent disk ops

# Connections
max_connections = 100
superuser_reserved_connections = 3

# Logging
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d.log'
log_rotation_age = 1d
log_min_duration_statement = 100     # Log slow queries

# Replication
wal_level = replica
max_wal_senders = 3
wal_keep_size = 64MB

# Autovacuum
autovacuum = on
autovacuum_vacuum_scale_factor = 0.2
autovacuum_analyze_scale_factor = 0.1

-- Reload config without restart
SELECT pg_reload_conf();

-- Or from shell
pg_ctl reload

# postgresql.conf
ssl = on
ssl_cert_file = '/path/to/server.crt'
ssl_key_file = '/path/to/server.key'
ssl_ca_file = '/path/to/ca.crt'

# TYPE  DATABASE        USER            ADDRESS                 METHOD

# Local connections
local   all             postgres                                peer
local   all             all                                     md5

# Remote connections
host    all             all             0.0.0.0/0               md5
host    all             all             ::0/0                   md5

# Replication
host    replication     replicator      replica_ip/32           md5

# SSL required
hostssl all             all             0.0.0.0/0               md5

-- Enable RLS
ALTER TABLE users ENABLE ROW LEVEL SECURITY;

-- Create policy
CREATE POLICY user_policy ON users
  USING (user_id = current_user_id());

-- Drop policy
DROP POLICY user_policy ON users;

-- View policies
\d+ users

-- Show query plan
EXPLAIN SELECT * FROM users WHERE id = 1;

-- Output shows:
-- - Execution plan nodes
-- - Estimated costs
-- - Estimated rows

-- Execute query and show actual performance
EXPLAIN ANALYZE SELECT * FROM users WHERE age > 18;

-- Shows:
-- - Actual execution time
-- - Actual rows returned
-- - Planning time
-- - Execution time

-- Verbose output
EXPLAIN (VERBOSE) SELECT * FROM users;

-- Show buffer usage
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM users WHERE active = true;

-- JSON format
EXPLAIN (FORMAT JSON, ANALYZE) SELECT * FROM users;

-- All options
EXPLAIN (ANALYZE, BUFFERS, VERBOSE, TIMING, COSTS)
SELECT * FROM users WHERE id = 1;

-- Full table scan (reads all rows)
EXPLAIN SELECT * FROM users WHERE name = 'Alice';

-- Output: Seq Scan on users
-- Indicates: no suitable index or small table

-- Uses index to find rows
EXPLAIN SELECT * FROM users WHERE id = 1;

-- Output: Index Scan using users_pkey on users
-- Best for: selective queries, small result sets

-- Query covered by index (no table access)
CREATE INDEX idx_users_email_name ON users(email, name);
EXPLAIN SELECT email, name FROM users WHERE email = '[email protected]';

-- Output: Index Only Scan using idx_users_email_name
-- Best performance: no heap fetch needed

-- Combines multiple indexes or handles large result sets
EXPLAIN SELECT * FROM users WHERE age > 18 AND status = 'active';

-- Output:
-- Bitmap Heap Scan on users
--   Recheck Cond: ...
--   -> Bitmap Index Scan on idx_age

-- Good for: moderate selectivity

-- For each row in outer table, scan inner table
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE c.id = 1;

-- Output: Nested Loop
-- Best for: small outer table, indexed inner table

-- Build hash table from smaller table
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id;

-- Output: Hash Join
-- Best for: large tables, equality conditions

-- Both inputs sorted on join key
EXPLAIN SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
ORDER BY o.customer_id;

-- Output: Merge Join
-- Best for: pre-sorted data, large sorted inputs

-- General purpose index
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_date ON orders(order_date);

-- Supports: =, <, <=, >, >=, BETWEEN, IN, IS NULL
-- Supports: ORDER BY, MIN/MAX

-- Multiple columns (order matters!)
CREATE INDEX idx_users_status_created ON users(status, created_at);

-- Supports queries on:
-- - status
-- - status, created_at
-- Does NOT support: created_at alone

-- Column order: most selective first
-- Exception: match query WHERE/ORDER BY patterns

-- Index subset of rows
CREATE INDEX idx_active_users ON users(email)
WHERE status = 'active';

-- Smaller index, faster queries with matching WHERE clause
-- Query must include WHERE status = 'active' to use index

-- Index on computed value
CREATE INDEX idx_users_lower_email ON users(LOWER(email));

-- Query must use same expression
SELECT * FROM users WHERE LOWER(email) = '[email protected]';

-- For array, JSONB, full-text search
CREATE INDEX idx_products_tags ON products USING GIN(tags);
CREATE INDEX idx_documents_data ON documents USING GIN(data);

-- Array queries
SELECT * FROM products WHERE tags @> ARRAY['featured'];

-- JSONB queries
SELECT * FROM documents WHERE data @> '{"status": "active"}';

-- For geometric data, range types, full-text
CREATE INDEX idx_locations_geom ON locations USING GiST(geom);

-- Geometric queries
SELECT * FROM locations WHERE geom && ST_MakeEnvelope(...);

-- Equality comparisons only
CREATE INDEX idx_users_hash_email ON users USING HASH(email);

-- Only supports: =
-- Rarely used (B-tree usually better)

-- For very large tables with natural clustering
CREATE INDEX idx_logs_brin_created ON logs USING BRIN(created_at);

-- Tiny index size, good for append-only data
-- Best for: time-series, logging, large tables

-- Bad
SELECT * FROM users WHERE id = 1;

-- Good (only needed columns)
SELECT id, name, email FROM users WHERE id = 1;

-- Limit result set
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;

-- PostgreSQL can stop early with LIMIT

-- Create index matching sort order
CREATE INDEX idx_users_created_desc ON users(created_at DESC);

-- Query uses index for sorting
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;

-- Include all queried columns in index
CREATE INDEX idx_users_email_name_status ON users(email, name, status);

-- Query covered by index (no table access)
SELECT name, status FROM users WHERE email = '[email protected]';

-- Prefer EXISTS for large subqueries
-- Bad
SELECT * FROM customers
WHERE id IN (SELECT customer_id FROM orders WHERE total > 1000);

-- Good
SELECT * FROM customers c
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.customer_id = c.id AND o.total > 1000);

-- Filter before joining
-- Bad
SELECT * FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE o.status = 'completed' AND c.country = 'USA';

-- Good (filter in subquery)
SELECT * FROM (
  SELECT * FROM orders WHERE status = 'completed'
) o
JOIN (
  SELECT * FROM customers WHERE country = 'USA'
) c ON o.customer_id = c.id;

-- Or use CTE
WITH filtered_orders AS (
  SELECT * FROM orders WHERE status = 'completed'
),
filtered_customers AS (
  SELECT * FROM customers WHERE country = 'USA'
)
SELECT * FROM filtered_orders o
JOIN filtered_customers c ON o.customer_id = c.id;

-- Bad (index not used)
SELECT * FROM users WHERE LOWER(email) = '[email protected]';

-- Good (create expression index)
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
-- Then query uses index

-- Or store lowercase separately
ALTER TABLE users ADD COLUMN email_lower TEXT;
UPDATE users SET email_lower = LOWER(email);
CREATE INDEX idx_users_email_lower ON users(email_lower);

-- Analyze table (update statistics)
ANALYZE users;

-- Analyze specific columns
ANALYZE users(email, status);

-- Analyze all tables
ANALYZE;

-- Auto-analyze (configured in postgresql.conf)
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.1

-- Last analyze time
SELECT schemaname, tablename, last_analyze, last_autoanalyze
FROM pg_stat_user_tables;

-- Statistics targets (adjust for important columns)
ALTER TABLE users ALTER COLUMN email SET STATISTICS 1000;

-- Reclaim storage, update statistics
VACUUM users;

-- Verbose output
VACUUM VERBOSE users;

-- Full vacuum (rewrites table, locks table)
VACUUM FULL users;

-- Analyze after vacuum
VACUUM ANALYZE users;

-- Check autovacuum status
SELECT schemaname, tablename, last_vacuum, last_autovacuum
FROM pg_stat_user_tables;

-- Configure in postgresql.conf
autovacuum = on
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.2

-- Rebuild index
REINDEX INDEX idx_users_email;

-- Rebuild all indexes on table
REINDEX TABLE users;

-- Rebuild all indexes in schema
REINDEX SCHEMA public;

-- Current queries
SELECT pid, usename, state, query, query_start
FROM pg_stat_activity
WHERE state != 'idle';

-- Long-running queries
SELECT pid, now() - query_start AS duration, query
FROM pg_stat_activity
WHERE state != 'idle' AND now() - query_start > interval '5 minutes'
ORDER BY duration DESC;

-- Enable slow query logging (postgresql.conf)
log_min_duration_statement = 100  -- milliseconds

-- Or per session
SET log_min_duration_statement = 100;

-- Logs appear in PostgreSQL log files

-- Enable extension
CREATE EXTENSION pg_stat_statements;

-- View query statistics
SELECT query, calls, total_exec_time, mean_exec_time, rows
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;

-- Reset statistics
SELECT pg_stat_statements_reset();

-- Index usage statistics
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan;

-- Unused indexes (idx_scan = 0)
SELECT schemaname, tablename, indexname
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND indexname NOT LIKE '%_pkey';

-- Index sizes
SELECT schemaname, tablename, indexname,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

-- Tables with sequential scans
SELECT schemaname, tablename, seq_scan, seq_tup_read
FROM pg_stat_user_tables
WHERE seq_scan > 0
ORDER BY seq_tup_read DESC;

-- Consider adding indexes to high seq_scan tables

# Shared buffers (25% of RAM)
shared_buffers = 4GB

# Work memory (per operation)
work_mem = 64MB

# Maintenance work memory (VACUUM, CREATE INDEX)
maintenance_work_mem = 512MB

# Effective cache size (estimate of OS cache)
effective_cache_size = 12GB

# Random page cost (lower for SSD)
random_page_cost = 1.1

# Effective IO concurrency (number of concurrent disk operations)
effective_io_concurrency = 200

# Cost of parallel query startup
parallel_setup_cost = 1000
parallel_tuple_cost = 0.1

# Max connections
max_connections = 100

# Connection pooling recommended (pgBouncer)

# Connect to database
psql -U username -d database -h hostname -p 5432

# Connect using URI
psql postgresql://username:password@hostname:5432/database

# Environment variables
export PGUSER=postgres
export PGPASSWORD=mypassword
export PGHOST=localhost
export PGPORT=5432
export PGDATABASE=mydb
psql

# Format: hostname:port:database:username:password
# chmod 600 ~/.pgpass

Ckdatabases

Databases Skill

When to Use This Skill

Reference Navigation

Database Design

Ckdatabases

Databases Skill

When to Use This Skill

Reference Navigation

Database Design

MongoDB References

PostgreSQL References

Python Utilities

Best Practices

Resources

Reference Workflows

mongodb aggregation

MongoDB Aggregation Pipeline

Pipeline Concept

Core Pipeline Stages

$match (Filter Documents)

$project (Reshape Documents)

$group (Aggregate Data)

$sort (Order Results)

$limit / $skip (Pagination)

$lookup (Join Collections)

$unwind (Deconstruct Arrays)

$addFields (Add New Fields)

$replaceRoot (Replace Document Root)

Aggregation Operators

Arithmetic Operators

String Operators

Date Operators

Array Operators

Conditional Operators

Advanced Patterns

Time-Based Aggregation

Faceted Search

Window Functions

Text Search with Aggregation

Geospatial Aggregation

Performance Tips

Best Practices

mongodb atlas

MongoDB Atlas Cloud Platform

Quick Start

Create Free Cluster

Connection String Format

Connect

Cluster Tiers

M0 (Free Tier)

M10+ (Dedicated Clusters)

Serverless

Database Configuration

Create Database

Schema Validation

Security

Network Access

Database Users

Encryption

Backups and Snapshots

Cloud Backups (M10+)

Manual Backups

Monitoring and Alerts

Metrics Dashboard

Alerts

Performance Advisor

Atlas Search (Full-Text Search)

Create Search Index

Search Queries

Atlas Vector Search (AI/ML)

Create Vector Search Index

Vector Search Query

Data Federation

Query Across Sources

Atlas Charts (Embedded Analytics)

Create Dashboard

Atlas CLI

Best Practices

Troubleshooting