Mask and redact sensitive data (PII, PCI, PHI) in databases, logs, and APIs. Use when protecting PII in dev/staging environments, redacting sensitive data from logs, or anonymizing data for analytics while preserving data structure.
-- Create masked view for dev access
CREATE OR REPLACE VIEW users_masked AS
SELECT
id,
-- Mask name: keep first letter + ***
LEFT(first_name, 1) || '***' AS first_name,
LEFT(last_name, 1) || '***' AS last_name,
-- Mask email: preserve domain
REGEXP_REPLACE(email, '^([^@])([^@]*)(@.+)$', '\1***\3') AS email,
-- Mask phone: show only last 4
'***-***-' || RIGHT(phone, 4) AS phone,
-- Mask SSN: show only last 4
'***-**-' || RIGHT(ssn, 4) AS ssn,
-- Keep non-sensitive fields as-is
created_at,
status,
country
FROM users;
-- Grant dev team access to masked view only (not base table)
GRANT SELECT ON users_masked TO dev_team;
REVOKE SELECT ON users FROM dev_team;
-- Column-level masking function using pgcrypto for format-preserving
CREATE OR REPLACE FUNCTION mask_pan(pan TEXT) RETURNS TEXT AS $$
BEGIN
RETURN RPAD(LEFT(pan, 6), LENGTH(pan) - 4, '*') || RIGHT(pan, 4);
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Dynamic masking based on current user role
CREATE OR REPLACE FUNCTION get_user_data(p_user_id UUID)
RETURNS TABLE (name TEXT, email TEXT, phone TEXT) AS $$
BEGIN
IF current_user = 'admin_role' THEN
RETURN QUERY SELECT u.name, u.email, u.phone FROM users u WHERE u.id = p_user_id;
ELSE
RETURN QUERY SELECT
LEFT(u.name, 1) || '***',
REGEXP_REPLACE(u.email, '^([^@])([^@]*)(@.+)$', '\1***\3'),
'***-***-' || RIGHT(u.phone, 4)
FROM users u WHERE u.id = p_user_id;
END IF;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
Microsoft Presidio — Auto-Detection
# Presidio automatically detects and masks PII using NLP
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine, AnonymizerConfig
from presidio_anonymizer.entities import OperatorConfig
analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()
def mask_text_presidio(text: str, masking_style: str = "replace") -> str:
"""Auto-detect and mask PII using Presidio NLP."""
results = analyzer.analyze(text=text, language="en")
if masking_style == "replace":
# Replace with type label: [EMAIL_ADDRESS]
operators = {
"DEFAULT": OperatorConfig("replace", {"new_value": "[REDACTED]"}),
"EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "[EMAIL]"}),
"PHONE_NUMBER": OperatorConfig("replace", {"new_value": "[PHONE]"}),
"PERSON": OperatorConfig("replace", {"new_value": "[NAME]"}),
"US_SSN": OperatorConfig("replace", {"new_value": "[SSN]"}),
}
elif masking_style == "hash":
# Hash for consistent pseudonymization (same input → same output)
operators = {"DEFAULT": OperatorConfig("hash", {"hash_type": "sha256"})}
anonymized = anonymizer.anonymize(
text=text,
analyzer_results=results,
operators=operators
)
return anonymized.text
# Example
text = "Contact John Smith at [email protected] or 555-123-4567"
print(mask_text_presidio(text))
# → "Contact [NAME] at [EMAIL] or [PHONE]"
Production DB → Dev DB Pipeline
#!/bin/bash
# mask-db-for-dev.sh — Safe production → dev data pipeline
set -e
PROD_DB="postgresql://prod-server/app"
DEV_DB="postgresql://dev-server/app_dev"
echo "Dumping production schema..."
pg_dump --schema-only $PROD_DB > schema.sql
echo "Applying schema to dev..."
psql $DEV_DB < schema.sql
echo "Copying and masking data..."
psql $PROD_DB -c "\COPY (
SELECT
id,
LEFT(first_name, 1) || 'XXXX' AS first_name,
'User' AS last_name,
'user_' || id || '@example.com' AS email,
'555-000-' || LPAD((ROW_NUMBER() OVER())::TEXT, 4, '0') AS phone,
created_at,
status
FROM users
) TO STDOUT WITH CSV" | psql $DEV_DB -c "\COPY users_masked FROM STDIN WITH CSV"
echo "Done. Dev database ready with masked data."
Compliance Checklist
PII inventory completed (what data, where it lives)