Name: Databricks Job Orchestrator
Author: rywerth-dbx

Skills suchen.../

Databricks Job Orchestrator | Skills Pool

{
  "questions": [{
    "question": "What type of compute would you like to use for this job?",
    "header": "Compute Type",
    "multiSelect": false,
    "options": [
      {
        "label": "Serverless Compute (Recommended)",
        "description": "Fast startup, automatically managed, pay-per-use. Requires workspace support."
      },
      {
        "label": "Jobs Compute (Classic)",
        "description": "Traditional job clusters with full control over configuration and instance types."
      },
      {
        "label": "Existing Cluster",
        "description": "Use an already running cluster. Good for testing but not recommended for production jobs."
      }
    ]
  }]
}

{
  "questions": [
    {
      "question": "What Spark version should the cluster use?",
      "header": "Spark Version",
      "multiSelect": false,
      "options": [
        {"label": "15.4.x-scala2.12 (Latest LTS)", "description": "Latest long-term support version with newest features"},
        {"label": "14.3.x-scala2.12 (Previous LTS)", "description": "Stable previous LTS version"},
        {"label": "13.3.x-scala2.12", "description": "Older stable version"},
        {"label": "Custom", "description": "I'll specify a different version"}
      ]
    },
    {
      "question": "What instance type should the cluster use?",
      "header": "Instance Type",
      "multiSelect": false,
      "options": [
        {"label": "i3.xlarge (General purpose)", "description": "Balanced compute and memory, good for most workloads"},
        {"label": "r5.xlarge (Memory optimized)", "description": "Higher memory for data-intensive operations"},
        {"label": "c5.xlarge (Compute optimized)", "description": "Higher CPU for compute-intensive tasks"},
        {"label": "Single Node", "description": "No workers, runs on driver only (for small jobs)"}
      ]
    },
    {
      "question": "How many worker nodes?",
      "header": "Workers",
      "multiSelect": false,
      "options": [
        {"label": "0 (Single Node)", "description": "No workers, driver only"},
        {"label": "2 workers", "description": "Small parallel processing"},
        {"label": "4 workers", "description": "Medium parallel processing"},
        {"label": "8+ workers", "description": "Large parallel processing"}
      ]
    }
  ]
}

{
  "questions": [{
    "question": "What is the cluster ID?",
    "header": "Cluster ID",
    "multiSelect": false,
    "options": [
      {"label": "List available clusters", "description": "Show me running clusters to choose from"},
      {"label": "I have a cluster ID", "description": "I'll provide the cluster ID directly"}
    ]
  }]
}

{
  "name": "My ETL Job",
  "tasks": [
    {
      "task_key": "extract_data",
      "notebook_task": {
        "notebook_path": "/Workspace/Users/[email protected]/extract_notebook",
        "source": "WORKSPACE"
      },
      "job_cluster_key": "default_cluster"
    }
  ],
  "job_clusters": [
    {
      "job_cluster_key": "default_cluster",
      "new_cluster": {
        "spark_version": "14.3.x-scala2.12",
        "node_type_id": "i3.xlarge",
        "num_workers": 2
      }
    }
  ]
}

# Create job from config file
databricks jobs create --json @job_config.json

# Or create with inline JSON
databricks jobs create --json '{
  "name": "Simple Job",
  "tasks": [{
    "task_key": "main_task",
    "notebook_task": {
      "notebook_path": "/Workspace/Users/[email protected]/notebook",
      "source": "WORKSPACE"
    },
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}'

#!/bin/bash

# Prompt for job details
read -p "Enter job name: " JOB_NAME
read -p "Enter notebook path (e.g., /Workspace/Users/[email protected]/notebook): " NOTEBOOK_PATH
read -p "Enter cluster ID (or press Enter to create new cluster): " CLUSTER_ID

# Build JSON config
if [[ -n "$CLUSTER_ID" ]]; then
  # Use existing cluster
  JOB_JSON=$(cat <<EOF
{
  "name": "$JOB_NAME",
  "tasks": [{
    "task_key": "main_task",
    "notebook_task": {
      "notebook_path": "$NOTEBOOK_PATH",
      "source": "WORKSPACE"
    },
    "existing_cluster_id": "$CLUSTER_ID"
  }]
}
EOF
)
else
  # Create new cluster
  read -p "Enter Spark version (e.g., 14.3.x-scala2.12): " SPARK_VERSION
  read -p "Enter node type (e.g., i3.xlarge): " NODE_TYPE
  read -p "Enter number of workers: " NUM_WORKERS

  JOB_JSON=$(cat <<EOF
{
  "name": "$JOB_NAME",
  "tasks": [{
    "task_key": "main_task",
    "notebook_task": {
      "notebook_path": "$NOTEBOOK_PATH",
      "source": "WORKSPACE"
    },
    "job_cluster_key": "job_cluster"
  }],
  "job_clusters": [{
    "job_cluster_key": "job_cluster",
    "new_cluster": {
      "spark_version": "$SPARK_VERSION",
      "node_type_id": "$NODE_TYPE",
      "num_workers": $NUM_WORKERS
    }
  }]
}
EOF
)
fi

# Create job
echo "$JOB_JSON" > /tmp/job_config.json
JOB_ID=$(databricks jobs create --json @/tmp/job_config.json | jq -r '.job_id')
echo "✅ Job created with ID: $JOB_ID"

# Run job by ID
databricks jobs run-now --job-id <job-id>

# Run job and capture run ID
RUN_ID=$(databricks jobs run-now --job-id <job-id> | jq -r '.run_id')
echo "Job run started with ID: $RUN_ID"

# List available jobs
databricks jobs list --output json | jq -r '.jobs[] | "\(.job_id): \(.settings.name)"'

# Prompt for job ID
read -p "Enter job ID to run: " JOB_ID

# Run job
RUN_ID=$(databricks jobs run-now --job-id "$JOB_ID" | jq -r '.run_id')
echo "✅ Job run started with ID: $RUN_ID"

# Get run status
databricks jobs get-run --run-id <run-id>

# Get just the state
databricks jobs get-run --run-id <run-id> | jq -r '.state.life_cycle_state'

# Poll until completion
while true; do
  STATE=$(databricks jobs get-run --run-id <run-id> | jq -r '.state.life_cycle_state')
  echo "Current state: $STATE"

  if [[ "$STATE" == "TERMINATED" ]] || [[ "$STATE" == "SKIPPED" ]] || [[ "$STATE" == "INTERNAL_ERROR" ]]; then
    RESULT=$(databricks jobs get-run --run-id <run-id> | jq -r '.state.result_state')
    echo "Final result: $RESULT"
    break
  fi

  sleep 10
done

# Monitor job run until completion
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py monitor <run-id>

# Get run output
databricks jobs get-run-output --run-id <run-id>

# Get run output as JSON
databricks jobs get-run-output --run-id <run-id> --output json | jq -r '.notebook_output.result'

# Get error message if failed
databricks jobs get-run --run-id <run-id> | jq -r '.state.state_message'

{
  "name": "Serverless Job",
  "tasks": [{
    "task_key": "main_task",
    "spark_python_task": {
      "python_file": "/Workspace/Users/[email protected]/script.py"
    },
    "environment_key": "default"
  }],
  "environments": [{
    "environment_key": "default",
    "spec": {
      "client": "1"
    }
  }]
}

{
  "name": "Jobs Compute Job",
  "tasks": [{
    "task_key": "main_task",
    "spark_python_task": {
      "python_file": "/Workspace/Users/[email protected]/script.py"
    },
    "job_cluster_key": "job_cluster"
  }],
  "job_clusters": [{
    "job_cluster_key": "job_cluster",
    "new_cluster": {
      "spark_version": "15.4.x-scala2.12",
      "node_type_id": "i3.xlarge",
      "num_workers": 2,
      "spark_conf": {
        "spark.speculation": "true"
      },
      "custom_tags": {
        "project": "analytics"
      }
    }
  }]
}

{
  "name": "Single Node Job",
  "tasks": [{
    "task_key": "main_task",
    "spark_python_task": {
      "python_file": "/Workspace/Users/[email protected]/script.py"
    },
    "job_cluster_key": "single_node"
  }],
  "job_clusters": [{
    "job_cluster_key": "single_node",
    "new_cluster": {
      "spark_version": "15.4.x-scala2.12",
      "node_type_id": "i3.xlarge",
      "num_workers": 0,
      "spark_conf": {
        "spark.master": "local[*, 4]",
        "spark.databricks.cluster.profile": "singleNode"
      },
      "custom_tags": {
        "ResourceClass": "SingleNode"
      }
    }
  }]
}

{
  "name": "Existing Cluster Job",
  "tasks": [{
    "task_key": "main_task",
    "spark_python_task": {
      "python_file": "/Workspace/Users/[email protected]/script.py"
    },
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}

{
  "name": "Notebook Job",
  "tasks": [{
    "task_key": "run_notebook",
    "notebook_task": {
      "notebook_path": "/Workspace/Users/[email protected]/my_notebook",
      "source": "WORKSPACE",
      "base_parameters": {
        "param1": "value1",
        "param2": "value2"
      }
    },
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}

{
  "name": "Python Script Job",
  "tasks": [{
    "task_key": "run_script",
    "spark_python_task": {
      "python_file": "dbfs:/scripts/my_script.py",
      "parameters": ["arg1", "arg2"]
    },
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}

{
  "name": "JAR Job",
  "tasks": [{
    "task_key": "run_jar",
    "spark_jar_task": {
      "main_class_name": "com.example.Main",
      "parameters": ["arg1", "arg2"]
    },
    "libraries": [{
      "jar": "dbfs:/jars/my-app.jar"
    }],
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}

{
  "name": "Multi-Task Workflow",
  "tasks": [
    {
      "task_key": "extract",
      "notebook_task": {
        "notebook_path": "/Workspace/Users/[email protected]/extract",
        "source": "WORKSPACE"
      },
      "job_cluster_key": "shared_cluster"
    },
    {
      "task_key": "transform",
      "depends_on": [{"task_key": "extract"}],
      "notebook_task": {
        "notebook_path": "/Workspace/Users/[email protected]/transform",
        "source": "WORKSPACE"
      },
      "job_cluster_key": "shared_cluster"
    },
    {
      "task_key": "load",
      "depends_on": [{"task_key": "transform"}],
      "notebook_task": {
        "notebook_path": "/Workspace/Users/[email protected]/load",
        "source": "WORKSPACE"
      },
      "job_cluster_key": "shared_cluster"
    }
  ],
  "job_clusters": [{
    "job_cluster_key": "shared_cluster",
    "new_cluster": {
      "spark_version": "14.3.x-scala2.12",
      "node_type_id": "i3.xlarge",
      "num_workers": 2
    }
  }]
}

{
  "name": "Daily ETL Job",
  "schedule": {
    "quartz_cron_expression": "0 0 2 * * ?",
    "timezone_id": "America/New_York",
    "pause_status": "UNPAUSED"
  },
  "tasks": [{
    "task_key": "etl_task",
    "notebook_task": {
      "notebook_path": "/Workspace/Users/[email protected]/etl_notebook",
      "source": "WORKSPACE"
    },
    "existing_cluster_id": "1234-567890-abcdef12"
  }]
}

# List all jobs
databricks jobs list

# List jobs with details (JSON)
databricks jobs list --output json | jq -r '.jobs[] | "\(.job_id): \(.settings.name)"'

# Filter by name
databricks jobs list --output json | jq -r '.jobs[] | select(.settings.name | contains("ETL")) | "\(.job_id): \(.settings.name)"'

# Get job configuration
databricks jobs get --job-id <job-id>

# Get just the job name
databricks jobs get --job-id <job-id> | jq -r '.settings.name'

# Get job tasks
databricks jobs get --job-id <job-id> | jq -r '.settings.tasks[] | .task_key'

# Update job settings
databricks jobs update --job-id <job-id> --json @updated_config.json

# Reset entire job configuration
databricks jobs reset --job-id <job-id> --json @new_config.json

# Delete job by ID
databricks jobs delete --job-id <job-id>

# Interactive delete
read -p "Enter job ID to delete: " JOB_ID
read -p "Are you sure you want to delete job $JOB_ID? (yes/no): " CONFIRM

if [[ "$CONFIRM" == "yes" ]]; then
  databricks jobs delete --job-id "$JOB_ID"
  echo "✅ Job deleted"
else
  echo "❌ Cancelled"
fi

# List recent runs for a job
databricks jobs list-runs --job-id <job-id>

# List all recent runs
databricks jobs list-runs --active-only false

# Get latest run for a job
databricks jobs list-runs --job-id <job-id> --limit 1 | jq -r '.runs[0]'

# Cancel a run
databricks jobs cancel-run --run-id <run-id>

# Cancel all active runs for a job
databricks jobs list-runs --job-id <job-id> --active-only true | \
  jq -r '.runs[] | .run_id' | \
  while read RUN_ID; do
    databricks jobs cancel-run --run-id "$RUN_ID"
    echo "Cancelled run $RUN_ID"
  done

Serverless-only workspace:
- Error: "Only serverless compute is supported in the workspace"
- Solution: Remove all cluster configurations and use environments:
```
{
  "tasks": [{
    "environment_key": "default"
  }],
  "environments": [{
    "environment_key": "default",
    "spec": {"client": "1"}
  }]
}
```
Missing environment for serverless:
- Error: "An environment is required for serverless task"
- Solution: Add environment_key to task and define environments array
Classic compute workspace:
- Error: "Serverless not supported"
- Solution: Use job_clusters or existing_cluster_id instead of environments

Validate JSON syntax:
```
cat job_config.json | jq .
```
Check required fields:
- name is required
- At least one task is required
- Each task needs task_key and a task type (notebook_task, spark_python_task, etc.)

Verify cluster exists:

databricks clusters get --cluster-id <cluster-id>

Check notebook path exists:

databricks workspace get-status /Workspace/path/to/notebook

Check job configuration:
```
databricks jobs get --job-id <job-id>
```

Verify cluster can start:

databricks clusters start --cluster-id <cluster-id>

Check for library conflicts or missing dependencies

Review job run error message:

databricks jobs get-run --run-id <run-id> | jq -r '.state.state_message'

Ensure run has completed:

databricks jobs get-run --run-id <run-id> | jq -r '.state.life_cycle_state'

Check if run was successful:

databricks jobs get-run --run-id <run-id> | jq -r '.state.result_state'

For failed runs, check error message instead:

databricks jobs get-run --run-id <run-id> | jq -r '.state.state_message'

Check schedule is not paused:

databricks jobs get --job-id <job-id> | jq -r '.settings.schedule.pause_status'

Verify cron expression is valid:
- Use https://crontab.guru for validation
- Databricks uses Quartz cron format (6 fields)

Check timezone is correct:

databricks jobs get --job-id <job-id> | jq -r '.settings.schedule.timezone_id'

# Upload notebook
databricks workspace import notebook.ipynb /Workspace/Users/[email protected]/notebook --format JUPYTER

# Create job to run it
databricks jobs create --json @job_config.json

from databricks.connect import DatabricksSession

spark = DatabricksSession.builder.getOrCreate()

# Test your code locally first
df = spark.read.table("catalog.schema.table")
df.show()

# Once working, upload to workspace and create job

# Create job from template
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py create

# List all jobs
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py list

# Run a job
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py run <job-id>

# Monitor job run until completion
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py monitor <run-id>

# Get job logs
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py logs <run-id>

# Cancel job run
python .claude/skills/databricks-job-orchestrator/scripts/job_helper.py cancel <run-id>

Databricks Job Orchestrator

Overview

⚠️ CRITICAL: Always Ask About Compute

Databricks Job Orchestrator

Overview

⚠️ CRITICAL: Always Ask About Compute

Workflow

1. Determine Compute Type (REQUIRED FIRST STEP)

2. Create a Job from Configuration File

2. Interactive Job Creation

3. Run a Job

4. Monitor Job Status

5. View Job Logs

Compute Configuration Examples

Serverless Compute

Jobs Compute (Classic Cluster)

Single Node Cluster

Existing Cluster

Common Job Configurations

Notebook Task

Python Script Task

JAR Task

Multi-Task Workflow

Scheduled Job

Common Operations

List All Jobs

Get Job Details

Update Job Configuration

Delete Job

List Job Runs

Cancel Running Job

Troubleshooting

Compute Type Errors

Job Creation Fails

Job Fails to Start

Cannot View Logs

Schedule Not Working

Cluster Auto-termination

Best Practices

Integration with Other Skills

With databricks-workspace-sync

With databricks-connect-config

Next Steps

Resources

scripts/job_helper.py

Mcporter

Sonoscli

Openhue

Healthcheck

Things Mac

Eightctl