Name: Agency Email Intelligence Engineer
Author: omeraltn

Skills suchen.../

Agency Email Intelligence Engineer | Skills Pool

# Connect to email source and fetch raw messages
import imaplib
import email
from email import policy

def fetch_thread(imap_conn, thread_ids):
    """Fetch and parse raw messages, preserving full MIME structure."""
    messages = []
    for msg_id in thread_ids:
        _, data = imap_conn.fetch(msg_id, "(RFC822)")
        raw = data[0][1]
        parsed = email.message_from_bytes(raw, policy=policy.default)
        messages.append({
            "message_id": parsed["Message-ID"],
            "in_reply_to": parsed["In-Reply-To"],
            "references": parsed["References"],
            "from": parsed["From"],
            "to": parsed["To"],
            "cc": parsed["CC"],
            "date": parsed["Date"],
            "subject": parsed["Subject"],
            "body": extract_body(parsed),
            "attachments": extract_attachments(parsed)
        })
    return messages

def reconstruct_thread(messages):
    """Build conversation topology from message headers.
    
    Key challenges:
    - Forwarded chains collapse multiple conversations into one message body
    - Quoted replies duplicate content (20-msg thread = ~4-5x token bloat)
    - Thread forks when people reply to different messages in the chain
    """
    # Build reply graph from In-Reply-To and References headers
    graph = {}
    for msg in messages:
        parent_id = msg["in_reply_to"]
        graph[msg["message_id"]] = {
            "parent": parent_id,
            "children": [],
            "message": msg
        }
    
    # Link children to parents
    for msg_id, node in graph.items():
        if node["parent"] and node["parent"] in graph:
            graph[node["parent"]]["children"].append(msg_id)
    
    # Deduplicate quoted content
    for msg_id, node in graph.items():
        node["message"]["unique_body"] = strip_quoted_content(
            node["message"]["body"],
            get_parent_bodies(node, graph)
        )
    
    return graph

def strip_quoted_content(body, parent_bodies):
    """Remove quoted text that duplicates parent messages.
    
    Handles multiple quoting styles:
    - Prefix quoting: lines starting with '>'
    - Delimiter quoting: '---Original Message---', 'On ... wrote:'
    - Outlook XML quoting: nested <div> blocks with specific classes
    """
    lines = body.split("\n")
    unique_lines = []
    in_quote_block = False
    
    for line in lines:
        if is_quote_delimiter(line):
            in_quote_block = True
            continue
        if in_quote_block and not line.strip():
            in_quote_block = False
            continue
        if not in_quote_block and not line.startswith(">"):
            unique_lines.append(line)
    
    return "\n".join(unique_lines)

def extract_structured_context(thread_graph):
    """Extract structured data from reconstructed thread.
    
    Produces:
    - Participant map with roles and activity patterns
    - Decision timeline (explicit commitments + implicit agreements)
    - Action items with correct participant attribution
    - Attachment references linked to discussion context
    """
    participants = build_participant_map(thread_graph)
    decisions = extract_decisions(thread_graph, participants)
    action_items = extract_action_items(thread_graph, participants)
    attachments = link_attachments_to_context(thread_graph)
    
    return {
        "thread_id": get_root_id(thread_graph),
        "message_count": len(thread_graph),
        "participants": participants,
        "decisions": decisions,
        "action_items": action_items,
        "attachments": attachments,
        "timeline": build_timeline(thread_graph)
    }

def extract_action_items(thread_graph, participants):
    """Extract action items with correct attribution.
    
    Critical: In a flattened thread, 'I' refers to different people
    in different messages. Without preserved From: headers, an LLM
    will misattribute tasks. This function binds each commitment
    to the actual sender of that message.
    """
    items = []
    for msg_id, node in thread_graph.items():
        sender = node["message"]["from"]
        commitments = find_commitments(node["message"]["unique_body"])
        for commitment in commitments:
            items.append({
                "task": commitment,
                "owner": participants[sender]["normalized_name"],
                "source_message": msg_id,
                "date": node["message"]["date"]
            })
    return items

def build_agent_context(thread_graph, query, token_budget=4000):
    """Assemble context for an AI agent, respecting token limits.
    
    Uses hybrid retrieval:
    1. Semantic search for query-relevant message segments
    2. Full-text search for exact entity/keyword matches
    3. Metadata filters (date range, participant, has_attachment)
    
    Returns structured JSON with source citations so the agent
    can ground its reasoning in specific messages.
    """
    # Retrieve relevant segments using hybrid search
    semantic_hits = semantic_search(query, thread_graph, top_k=20)
    keyword_hits = fulltext_search(query, thread_graph)
    merged = reciprocal_rank_fusion(semantic_hits, keyword_hits)
    
    # Assemble context within token budget
    context_blocks = []
    token_count = 0
    for hit in merged:
        block = format_context_block(hit)
        block_tokens = count_tokens(block)
        if token_count + block_tokens > token_budget:
            break
        context_blocks.append(block)
        token_count += block_tokens
    
    return {
        "query": query,
        "context": context_blocks,
        "metadata": {
            "thread_id": get_root_id(thread_graph),
            "messages_searched": len(thread_graph),
            "segments_returned": len(context_blocks),
            "token_usage": token_count
        },
        "citations": [
            {
                "message_id": block["source_message"],
                "sender": block["sender"],
                "date": block["date"],
                "relevance_score": block["score"]
            }
            for block in context_blocks
        ]
    }

# Example: LangChain tool wrapper
from langchain.tools import tool

@tool
def email_ask(query: str, datasource_id: str) -> dict:
    """Ask a natural language question about email threads.
    
    Returns a structured answer with source citations grounded
    in specific messages from the thread.
    """
    thread_graph = load_indexed_thread(datasource_id)
    context = build_agent_context(thread_graph, query)
    return context

@tool
def email_search(query: str, datasource_id: str, filters: dict = None) -> list:
    """Search across email threads using hybrid retrieval.
    
    Supports filters: date_range, participants, has_attachment,
    thread_subject, label.
    
    Returns ranked message segments with metadata.
    """
    results = hybrid_search(query, datasource_id, filters)
    return [format_search_result(r) for r in results]

Agency Email Intelligence Engineer

Email Intelligence Engineer Agent

🧠 Your Identity & Memory

🎯 Your Core Mission

Agency Email Intelligence Engineer

Email Intelligence Engineer Agent

🧠 Your Identity & Memory

🎯 Your Core Mission

Email Data Pipeline Engineering

Context Assembly for AI Agents

Production Email Processing

🚨 Critical Rules You Must Follow

Email Structure Awareness

Data Privacy and Security

📋 Your Core Capabilities

Email Parsing & Processing

Structural Analysis

Retrieval & Context Assembly

Integration Patterns

🔄 Your Workflow Process

Step 1: Email Ingestion & Normalization

Step 2: Thread Reconstruction & Deduplication

Step 3: Structural Analysis & Extraction

Step 4: Context Assembly & Tool Interface

💭 Your Communication Style

🎯 Your Success Metrics

🚀 Advanced Capabilities

Email-Specific Failure Mode Handling

Enterprise Scale Patterns

Quality Measurement & Monitoring

Openai Whisper

Voice Call

Prose

Clawhub

Sherpa Onnx Tts

Openai Whisper Api