Name: Spreadsheet Source Validated
Author: HKUDS

スキルを検索.../

Spreadsheet Source Validated | Skills Pool

import pandas as pd
from openpyxl import load_workbook

def validate_spreadsheet_integrity(file_path, required_sheets=None, required_columns=None):
    """Check spreadsheet structure before processing."""
    try:
        # Check file is readable
        wb = load_workbook(file_path, read_only=True)
        
        # Verify required sheets exist
        if required_sheets:
            missing_sheets = [s for s in required_sheets if s not in wb.sheetnames]
            if missing_sheets:
                print(f"ERROR: Missing sheets: {missing_sheets}", file=sys.stderr)
                wb.close()
                return False
        
        # Verify required columns (sample first sheet)
        if required_columns:
            ws = wb.active
            headers = [cell.value for cell in ws[1]]
            missing_cols = [c for c in required_columns if c not in headers]
            if missing_cols:
                print(f"ERROR: Missing columns: {missing_cols}", file=sys.stderr)
                wb.close()
                return False
        
        wb.close()
        print(f"VALIDATED: Spreadsheet structure OK")
        return True
        
    except Exception as e:
        print(f"ERROR: Cannot validate spreadsheet: {str(e)}", file=sys.stderr)
        return False

import requests
from urllib3.exceptions import SSLError, MaxRetryError

def validate_external_source(url, timeout=30, max_retries=2):
    """Verify external data source is accessible."""
    for attempt in range(max_retries + 1):
        try:
            response = requests.head(url, timeout=timeout, allow_redirects=True)
            if response.status_code == 200:
                print(f"VALIDATED: External source accessible ({url})")
                return True
            else:
                print(f"WARNING: External source returned {response.status_code}", file=sys.stderr)
        except (SSLError, MaxRetryError, requests.ConnectionError) as e:
            if attempt == max_retries:
                print(f"ERROR: External source inaccessible after {max_retries + 1} attempts: {url}", file=sys.stderr)
                return False
            print(f"RETRY {attempt + 1}/{max_retries}: {str(e)}", file=sys.stderr)
    
    return False

ALTERNATIVE_SOURCES = {
    'epa_water_data': [
        'https://dataservices.epa.illinois.gov/swap',  # Primary
        'https://www.epa.gov/safewater/data-and-reports',  # Federal fallback
        'https://waterdata.usgs.gov/nwis',  # USGS fallback
    ],
    'financial_data': [
        'internal_database.xlsx',  # Primary
        'backup_financial_data.csv',  # Local backup
        'request_from_stakeholder',  # Manual acquisition
    ]
}

def try_alternative_sources(source_key):
    """Iterate through alternative sources until one succeeds."""
    alternatives = ALTERNATIVE_SOURCES.get(source_key, [])
    
    for i, source in enumerate(alternatives):
        print(f"Attempting alternative {i + 1}/{len(alternatives)}: {source}")
        if source.startswith('http'):
            if validate_external_source(source):
                return source
        else:
            if Path(source).exists():
                print(f"SUCCESS: Alternative source found: {source}")
                return source
    
    print(f"ERROR: All alternatives exhausted for {source_key}", file=sys.stderr)
    return None

def report_data_access_failure(source, error_type, alternatives_tried=0):
    """Standardized error reporting for data access failures."""
    error_report = {
        'timestamp': datetime.now().isoformat(),
        'source': source,
        'error_type': error_type,
        'alternatives_tried': alternatives_tried,
        'action_required': 'Manual data acquisition or source configuration update'
    }
    
    print("=" * 60, file=sys.stderr)
    print("DATA ACCESS FAILURE REPORT", file=sys.stderr)
    print("=" * 60, file=sys.stderr)
    for key, value in error_report.items():
        print(f"{key}: {value}", file=sys.stderr)
    print("=" * 60, file=sys.stderr)
    
    return error_report

# Step 1: Write validation script
cat > validate_sources.py << 'EOF'
import sys
from pathlib import Path

sources = ['input.xlsx', 'config.json']
missing = [s for s in sources if not Path(s).exists()]

if missing:
    print(f"BLOCKED: Missing sources: {missing}", file=sys.stderr)
    sys.exit(1)

print("VALIDATED: All sources available")
EOF

# Step 2: Run validation
python3 validate_sources.py || exit 1

# Step 3: Write processing script
cat > process_spreadsheet.py << 'EOF'
import openpyxl
# Your spreadsheet code here
EOF

# Step 4: Execute processing
python3 process_spreadsheet.py

# Step 5: Clean up (optional)
rm validate_sources.py

#!/usr/bin/env python3
"""
Complete workflow: Validate -> Process -> Report
"""
import sys
from pathlib import Path
from datetime import datetime
from openpyxl import load_workbook

def main():
    # PHASE 1: Validation
    print(f"[{datetime.now().isoformat()}] Starting validation...")
    
    source_file = 'input_data.xlsx'
    if not Path(source_file).exists():
        print(f"ERROR: Source file not found: {source_file}", file=sys.stderr)
        # Check for alternatives
        for alt in ['backup_input.xlsx', 'data_backup.csv']:
            if Path(alt).exists():
                print(f"FALLBACK: Using alternative: {alt}")
                source_file = alt
                break
        else:
            print("ERROR: No alternative sources available", file=sys.stderr)
            sys.exit(1)
    
    # PHASE 2: Processing
    print(f"[{datetime.now().isoformat()}] Processing {source_file}...")
    try:
        wb = load_workbook(source_file)
        ws = wb.active
        
        # Your operations here
        for row in ws.iter_rows(min_row=2):
            pass  # Process data
        
        wb.save('output.xlsx')
        print(f"SUCCESS: Processed {ws.max_row - 1} rows")
        
    except Exception as e:
        print(f"ERROR: Processing failed: {str(e)}", file=sys.stderr)
        sys.exit(1)
    
    # PHASE 3: Cleanup
    print(f"[{datetime.now().isoformat()}] Complete")

if __name__ == '__main__':
    main()

Library	Best For
`openpyxl`	Reading/writing .xlsx files, formatting, formulas
`pandas`	Data manipulation, analysis, merging datasets
`xlrd`	Reading older .xls files (read-only)
`xlsxwriter`	Creating new .xlsx files with advanced formatting
`requests`	Validating external API/data sources
`pathlib`	Cross-platform file path validation

Spreadsheet Source Validated

Validated Python Execution for Spreadsheet Tasks

Pre-Execution Data Validation

Step 1: Verify Data Source Availability

Spreadsheet Source Validated

Validated Python Execution for Spreadsheet Tasks

Pre-Execution Data Validation

Step 1: Verify Data Source Availability

Step 2: Verify Data Integrity

Step 3: External Data Source Verification

When to Use This Skill

Handling Inaccessible Data Sources

Protocol for Failed Data Access

Alternative Data Source Identification

Error Reporting Protocol

Why Direct Execution?

How to Use

Recommended Pattern: Validate Then Execute

Complete Workflow Example

Best Practices

When NOT to Use This Skill

Common Libraries

Troubleshooting

Prose

Coding Agent (bash-first)

Create Prompt

Strategic Compact

Strategic Compact

Strategic Compact