Name: Medical Ocr
Author: Fadil369

搜索技能.../

Medical Ocr | Skills Pool

from google.cloud import vision
import io

def extract_medical_text(image_path):
    """Extract text from medical image using Google Cloud Vision"""
    client = vision.ImageAnnotatorClient()
    
    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations
    
    if texts:
        return texts[0].description
    return ""

import re

def extract_medications(text):
    """Extract medication names and dosages"""
    # Pattern for common medication formats
    med_pattern = r'([A-Z][a-z]+(?:in|ol|ide|ine))\s+(\d+\s*(?:mg|mcg|g|mL))'
    medications = re.findall(med_pattern, text)
    
    return [
        {"name": med[0], "dosage": med[1]}
        for med in medications
    ]

def extract_vital_signs(text):
    """Extract vital signs from text"""
    vitals = {}
    
    # Blood pressure pattern
    bp = re.search(r'BP[:\s]+(\d{2,3}/\d{2,3})', text, re.I)
    if bp:
        vitals['blood_pressure'] = bp.group(1)
    
    # Heart rate pattern
    hr = re.search(r'HR[:\s]+(\d{2,3})', text, re.I)
    if hr:
        vitals['heart_rate'] = hr.group(1)
    
    # Temperature pattern
    temp = re.search(r'Temp[:\s]+(\d{2,3}\.?\d*)', text, re.I)
    if temp:
        vitals['temperature'] = temp.group(1)
    
    return vitals

def classify_medical_document(text):
    """Determine the type of medical document"""
    text_lower = text.lower()
    
    if any(word in text_lower for word in ['prescription', 'rx', 'sig:']):
        return 'prescription'
    elif any(word in text_lower for word in ['lab results', 'test results', 'specimen']):
        return 'lab_results'
    elif any(word in text_lower for word in ['progress note', 'soap', 'assessment']):
        return 'clinical_notes'
    elif any(word in text_lower for word in ['discharge', 'summary']):
        return 'discharge_summary'
    else:
        return 'unknown'

def generate_fhir_medication_request(medication_data):
    """Generate FHIR R4 MedicationRequest from extracted data"""
    return {
        "resourceType": "MedicationRequest",
        "status": "active",
        "intent": "order",
        "medication": {
            "CodeableConcept": {
                "text": medication_data['name']
            }
        },
        "dosageInstruction": [{
            "text": medication_data['dosage'],
            "timing": {
                "repeat": {
                    "frequency": medication_data.get('frequency', 1),
                    "period": 1,
                    "periodUnit": "d"
                }
            },
            "doseAndRate": [{
                "doseQuantity": {
                    "value": medication_data.get('dose_value'),
                    "unit": medication_data.get('dose_unit', 'mg')
                }
            }]
        }]
    }

MEDICAL_ABBREVIATIONS = {
    # Frequency
    "BID": "Twice daily",
    "TID": "Three times daily",
    "QID": "Four times daily",
    "QD": "Once daily",
    "PRN": "As needed",
    "STAT": "Immediately",
    "AC": "Before meals",
    "PC": "After meals",
    "HS": "At bedtime",
    
    # Route
    "PO": "By mouth / Oral",
    "IV": "Intravenous",
    "IM": "Intramuscular",
    "SC/SQ": "Subcutaneous",
    "SL": "Sublingual",
    "TOP": "Topical",
    
    # Clinical
    "NPO": "Nothing by mouth",
    "SOB": "Shortness of breath",
    "N/V": "Nausea and vomiting",
    "CBC": "Complete Blood Count",
    "CMP": "Comprehensive Metabolic Panel",
    "CXR": "Chest X-Ray",
    "EKG/ECG": "Electrocardiogram",
    "BP": "Blood Pressure",
    "HR": "Heart Rate",
    "RR": "Respiratory Rate",
    "Temp": "Temperature"
}

def process_medical_document(image_path):
    """Complete pipeline for medical document processing"""
    
    # Step 1: OCR extraction
    raw_text = extract_medical_text(image_path)
    
    # Step 2: Document classification
    doc_type = classify_medical_document(raw_text)
    
    # Step 3: Entity extraction
    entities = {
        'medications': extract_medications(raw_text),
        'vitals': extract_vital_signs(raw_text),
        'type': doc_type
    }
    
    # Step 4: FHIR resource generation
    fhir_resources = []
    for med in entities['medications']:
        fhir_resources.append(
            generate_fhir_medication_request(med)
        )
    
    return {
        'raw_text': raw_text,
        'document_type': doc_type,
        'extracted_entities': entities,
        'fhir_resources': fhir_resources,
        'confidence': 0.85  # Calculate based on OCR confidence
    }

def calculate_confidence(ocr_result, extracted_entities):
    """Calculate overall confidence score"""
    scores = []
    
    # OCR confidence (from Google Vision)
    if hasattr(ocr_result, 'confidence'):
        scores.append(ocr_result.confidence)
    
    # Entity extraction confidence
    if extracted_entities['medications']:
        scores.append(0.9)  # High confidence for structured meds
    
    if extracted_entities['vitals']:
        scores.append(0.85)
    
    return sum(scores) / len(scores) if scores else 0.0

def validate_extracted_data(data):
    """Validate extracted medical data"""
    warnings = []
    
    # Check for missing critical fields
    if not data.get('medications'):
        warnings.append("No medications detected")
    
    # Validate vital signs ranges
    vitals = data.get('vitals', {})
    if 'blood_pressure' in vitals:
        bp = vitals['blood_pressure']
        systolic, diastolic = map(int, bp.split('/'))
        if systolic > 180 or diastolic > 120:
            warnings.append(f"Critical BP value: {bp}")
    
    return warnings

def audit_log_ocr_operation(user_id, document_id, operation):
    """Log OCR operation for HIPAA compliance"""
    log_entry = {
        "timestamp": datetime.utcnow().isoformat(),
        "user_id": user_id,
        "document_id": document_id,
        "operation": operation,
        "phi_accessed": True,
        "justification": "Clinical data extraction for patient care"
    }
    # Store in secure audit log
    return log_entry

def handle_ocr_errors(image_path):
    """Robust error handling for OCR operations"""
    try:
        result = extract_medical_text(image_path)
        if not result:
            return {
                "error": "No text detected",
                "suggestion": "Improve image quality or check orientation"
            }
        return {"success": True, "text": result}
    
    except Exception as e:
        return {
            "error": str(e),
            "image_path": image_path,
            "suggestion": "Check image format and API credentials"
        }

# Input: prescription.jpg
result = process_medical_document("prescription.jpg")

# Output:
{
    "document_type": "prescription",
    "medications": [
        {"name": "Metformin", "dosage": "500mg", "frequency": "BID"},
        {"name": "Lisinopril", "dosage": "10mg", "frequency": "QD"}
    ],
    "fhir_resources": [...],
    "confidence": 0.92
}

# Input: lab_results.jpg
result = process_medical_document("lab_results.jpg")

# Output:
{
    "document_type": "lab_results",
    "tests": [
        {"name": "Hemoglobin A1C", "value": "7.2", "unit": "%"},
        {"name": "Fasting Glucose", "value": "126", "unit": "mg/dL"}
    ],
    "confidence": 0.88
}

Medical Ocr

Medical OCR Processing Skill

Overview

When to Use This Skill

Core Capabilities

1. OCR Text Extraction

Medical Ocr

Medical OCR Processing Skill

Overview

When to Use This Skill

Core Capabilities

1. OCR Text Extraction

2. Medical Entity Recognition

3. Document Type Classification

4. FHIR Resource Generation

Medical Abbreviation Dictionary

Workflow Patterns

Complete OCR Pipeline

Quality Assurance

Confidence Scoring

Validation Rules

HIPAA Compliance Guidelines

Integration with Agents

Error Handling

Best Practices

Examples

Example 1: Process Prescription

Example 2: Extract Lab Results

Performance Metrics

Updates and Maintenance

Feishu Doc

Summarize

Nano Pdf

Diffs

Customs Trade Compliance

Nutrient Document Processing