Name: Lean Parsing Completeness
Author: kim-em

Buscar habilidades.../

Lean Parsing Completeness | Skills Pool

theorem parseHeader_ok_elim (data : ByteArray) (pos : Nat) (result pos' : ...)
    (h : parseHeader data pos = .ok (result, pos')) :
    pos' ≥ pos + 2 ∧ pos' ≤ data.size := by

simp only [parseFunction, bind, Except.bind] at h

by_cases h1 : guard_condition
· rw [if_pos h1] at h; ...   -- success branch
· rw [if_neg h1] at h; exact nomatch h  -- error branch is impossible

cases hstep : intermediateCall with
| error e => simp only [hstep] at h; exact nomatch h
| ok v =>
  rw [hstep] at h; dsimp only [Bind.bind, Except.bind] at h
  -- continue with v available

simp only [pure, Pure.pure, Except.pure] at h
obtain ⟨rfl, rfl⟩ := h

theorem parseLiteralsSection_treeless_complete
    (data : ByteArray) (pos : Nat) (prevHuff : Option ZstdHuffmanTable)
    (section : ZstdLiteralsSection) (pos' : Nat)
    (h : parseLiteralsSection data pos prevHuff = .ok (section, pos')) :
    pos' ≥ pos ∧ pos' ≤ data.size := by
  simp only [parseLiteralsSection, bind, Except.bind] at h
  -- Guard: bounds check
  by_cases h1 : data.size < pos + 1
  · rw [if_pos h1] at h; exact nomatch h
  · rw [if_neg h1] at h
    simp only [pure, Pure.pure, Except.pure] at h
    -- Case split on block type
    by_cases h2 : blockType == 0
    · rw [if_pos h2] at h
      cases hraw : parseRawLiterals data pos with
      | error e => simp only [hraw] at h; exact nomatch h
      | ok v =>
        rw [hraw] at h
        dsimp only [Bind.bind, Except.bind] at h
        obtain ⟨rfl, rfl⟩ := h
        exact ⟨by omega, parseRawLiterals_le_size hraw⟩
    · rw [if_neg h2] at h; ...

simp only [pure, Pure.pure, Except.pure] at h

simp only [getElem!_def] at h
split at h
· -- bounds hold: have the actual element
· -- bounds fail: contradiction with guard

have h1_le := step1_le_size hstep1   -- pos₁ ≤ data.size
have h2_le := step2_le_size hstep2   -- pos₂ ≤ data.size
have h1_ge := step1_pos_ge hstep1    -- pos₁ ≥ pos + k₁
have h2_ge := step2_pos_ge hstep2    -- pos₂ ≥ pos₁ + k₂
omega

theorem parseLoop_complete : ... := by
  induction fuel using parseLoop.induct generalizing acc
  · -- base case
  · -- recursive case: unfold one step, apply IH

simp only [beq_iff_eq] at h2  -- converts (x == y) = true to x = y

simp only [bne_iff_ne] at h2

by_cases h2 : sizeFormat == 0
· rw [if_pos h2] at h; ...
· rw [if_neg h2] at h
  by_cases h3 : sizeFormat == 1
  · rw [if_pos h3] at h; ...
  · rw [if_neg h3] at h
    by_cases h4 : sizeFormat == 2
    · rw [if_pos h4] at h; ...
    · rw [if_neg h4] at h
      -- last case: sizeFormat == 3 (use omega to establish)

-- Eliminator: extracts all useful facts from parse success
theorem parseX_ok_elim (h : parseX data pos = .ok (result, pos')) :
    pos' ≥ pos + minSize ∧ pos' ≤ data.size ∧ result.field ∈ validRange := by
  ...

-- Downstream theorems use the eliminator instead of re-analyzing
theorem parseY_complete (h : parseY data pos = .ok ...) : ... := by
  have ⟨hge, hle, hvalid⟩ := parseX_ok_elim hX
  ...

-- Example: field characterization for parseBlockHeader
theorem parseBlockHeader_blockType_eq ... := by
  unfold Zip.Native.parseBlockHeader at h
  unfold_except
  split at h
  · exact nomatch h          -- guard failure
  · split at h
    · obtain ⟨rfl, rfl⟩ := h; simp_all  -- typeVal = 0
    · obtain ⟨rfl, rfl⟩ := h; simp_all  -- typeVal = 1
    · obtain ⟨rfl, rfl⟩ := h; simp_all  -- typeVal = 2
    · exact nomatch h                     -- reserved type

match hresult : parseFunction data pos with
| .ok (a, b, c) => exact ⟨a, b, c, rfl⟩
| .error _ =>
  exfalso
  simp only [parseFunction, bind, Except.bind, ...] at hresult
  -- Now hresult is a hypothesis, no existentials to blow up

def rawSize (data : ByteArray) (pos : Nat) : Nat :=
  let sizeFormat := ((data[pos]! >>> 2) &&& 3).toNat
  if sizeFormat == 0 then 1 + ... else ...

def rawSize (data : ByteArray) (pos : Nat) : Nat :=
  if ((data[pos]! >>> 2 &&& 3).toNat == 0) then 1 + ... else ...

split at h; · exact nomatch h   -- error branch impossible
dsimp only [letFun] at h        -- reduce let bindings in h
split at h; · exact nomatch h   -- next guard

simp only [hguard_false, ↓reduceIte]  -- eliminate guard where possible
split                                   -- case-split on if/match in goal
· -- error case: derive contradiction
  rename_i hbad; exact absurd hbad (by omega)
· -- success case: continue
  split
  · rename_i hbad; exact absurd hbad (by ...)
  · apply ih; ...                       -- recursive step

dsimp only [Bind.bind, Except.bind] at h

theorem composed_completeness (data : ByteArray) (off : Nat)
    (hsize : data.size ≥ off + minBytes)
    (htypeVal : rawByteExpr = expectedValue)
    (hlastBit : ...)
    (hpayload : data.size ≥ off + headerSize + payloadSize) :
    ∃ result pos', topLevelFunction data off ... = .ok (result, pos') := by
  -- Step 1: Derive that the parser's guard is satisfiable
  --   e.g., typeVal ≠ 3 (reserved) from typeVal = 0/1/2
  have htypeNe3 : rawTypeExpr ≠ 3 := by rw [htypeVal]; decide
  -- Step 2: Obtain parse result via sub-function completeness
  obtain ⟨hdr, afterHdr, hparse⟩ := parseFunction_succeeds data off hsize htypeNe3
  -- Step 3: Extract field characterizations
  have htype := (parseFunction_blockType_eq ... hparse).1 htypeVal
  have hlast_eq := parseFunction_lastBlock_eq ... hparse
  have hbs_eq := parseFunction_blockSize_eq ... hparse
  have hpos_eq := parseFunction_pos_eq ... hparse
  -- Step 4: Derive high-level constraints from raw-byte hypotheses
  --   Thread htypeVal/hlastBit/hblockSize through the characterization rewrites
  have hlast : hdr.lastBlock = true := by rw [hlast_eq, hlastBit]; decide
  have hbs : ¬ hdr.blockSize > maxSize := by rw [hbs_eq]; exact Nat.not_lt.mpr ...
  -- Step 5: Obtain sub-operation result via its completeness theorem
  have hpayload' : data.size ≥ afterHdr + neededBytes := by rw [hpos_eq]; omega
  obtain ⟨block, afterBlock, hraw⟩ := subOperation_succeeds data afterHdr ... hpayload'
  -- Step 6: Close via the composition lemma
  have hoff : ¬ data.size ≤ off := by omega
  exact ⟨_, _, composition_lemma ... hoff hparse hbs htype hraw hlast⟩

theorem decompressBlocksWF_succeeds_two_raw_blocks
    (data : ByteArray) (off off2 : Nat)
    -- Block 1: non-last raw block
    (hsize1 : data.size ≥ off + 3)
    (htypeVal1 : (data[off]! >>> 1 &&& 3).toNat = 0)
    (hlastBit1 : data[off]! &&& 1 = 0)          -- non-last
    (hblockSize1 : ...)
    (hpayload1 : data.size ≥ off + 3 + blockSize1.toNat)
    -- Position threading
    (hoff2 : off2 = off + 3 + blockSize1.toNat)
    -- Block 2: last raw block (same pattern as single-block)
    (hsize2 : data.size ≥ off2 + 3)
    (htypeVal2 : (data[off2]! >>> 1 &&& 3).toNat = 0)
    (hlastBit2 : data[off2]! &&& 1 = 1)          -- last
    ... :
    ∃ result pos', decompressBlocksWF data off ... = .ok (result, pos') := by

  -- Steps 1-5 for block 1 (same as single-block recipe)
  obtain ⟨hdr1, afterHdr1, hparse1⟩ := parseBlockHeader_succeeds ...
  have hpos1_eq := parseBlockHeader_pos_eq ... hparse1
  obtain ⟨block1, afterBlock1, hraw1⟩ := decompressRawBlock_succeeds ...
  have hAfterBlock1_eq := decompressRawBlock_pos_eq ... hraw1
  -- Unify off2 with afterBlock1
  have hoff2_eq : off2 = afterBlock1 := by rw [hoff2, hpos1_eq]; omega
  subst hoff2_eq
  -- Now all block 2 hypotheses use afterBlock1 directly
  -- Steps 1-6 for block 2 (same as single-block recipe)
  ...
  -- Close via step theorem + single-block theorem
  exact ⟨_, _, raw_step ... hparse1 hraw1 hlast1_false (single_raw ... hparse2 ...)⟩

Block 1 \ Block 2	Raw	RLE	Compressed
Raw	`_two_raw_blocks`	`_raw_then_rle`	`_raw_then_compressed`
RLE	`_rle_then_raw`	`_two_rle_blocks`	`_rle_then_compressed`
Compressed	`_compressed_then_raw`	`_compressed_then_rle`	`_two_compressed`

decompressBlocksWF_succeeds_*   (block loop level)
    ↓
decompressFrame_succeeds_*      (frame level: + header/dict/checksum)
    ↓
decompressZstd_succeeds_*       (API level: + magic/end-of-data)

theorem decompressFrame_succeeds_single_raw
    (data : ByteArray) (off : Nat)
    (hsize : data.size ≥ off + frameHeaderMinSize)
    -- Frame header hypotheses
    (hmagic : ...)
    -- Block hypotheses (universally quantified over parsed header)
    (hblock : ∀ hdr afterHdr, parseFrameHeader data off = .ok (hdr, afterHdr) →
       ... block byte conditions ...) :
    ∃ content pos', decompressFrame data off ... = .ok (content, pos') := by
  obtain ⟨hdr, afterHdr, hparse⟩ := parseFrameHeader_succeeds ...
  have ⟨htype, hlast, ...⟩ := hblock hdr afterHdr hparse
  obtain ⟨result, pos', hblocks⟩ := decompressBlocksWF_succeeds_single_raw ...
  exact ⟨_, _, decompressFrame_single_block ... hparse hblocks ...⟩

theorem decompressZstd_succeeds_single_raw_frame ...
    (hterm : ∀ content pos', decompressFrame ... = .ok (content, pos') →
       pos' ≥ data.size) :
    ∃ output, decompressZstd data = .ok output := by
  obtain ⟨content, pos', hframe⟩ := decompressFrame_succeeds_single_raw ...
  exact ⟨_, decompressZstd_single_frame hframe (hterm content pos' hframe)⟩

Lean Parsing Completeness

Parsing Completeness Proof Patterns

Two Flavours of Completeness

1. Spec success implies native success

2. Native success implies properties (eliminator lemmas)

Lean Parsing Completeness

Parsing Completeness Proof Patterns

Two Flavours of Completeness

1. Spec success implies native success

2. Native success implies properties (eliminator lemmas)

Core Proof Technique

Step 1: Unfold the parsing function

Step 2: Eliminate error branches with guard discharge

Step 3: Case-split on intermediate results

Step 4: Extract equalities from the final pure

Step 5: Close with rfl or chain bounds

Complete Example

Common Sub-Patterns

PUnit.unit artifacts

getElem! with bounds

Chaining position bounds

WF-recursive parsers

BEq to Prop bridging

Multiple format cases (sizeFormat pattern)

Visibility Requirements

Eliminator Lemma Pattern

Field characterization from parse success

Existential goals: use match hresult not simp only

Helper definitions: avoid let bindings

split at h auto-resolves with context hypotheses

Goal-Side vs Hypothesis-Side Guard Elimination

Anti-Patterns

Don't use simp to close error branches

Don't unfold too deeply

Don't forget dsimp only after rw

Composed Completeness Proof Pattern

The 6-Step Recipe

Key Principles

When to Use This Pattern

Prerequisites

Two-Block Composed Completeness

Pattern: Position Threading with off2

The subst Technique

Block Type Matrix

Composition vs. Induction

Multi-Level Composition Chain

Frame-Level Pattern

API-Level Pattern

Cross-References

Session Logs

OpenClaw Test Heap Leaks

Node Connect

Openclaw Qa Testing

Openclaw Secret Scanning Maintainer

Flags

`getElem!` with bounds

`BEq` to `Prop` bridging

Existential goals: use `match hresult` not `simp only`

Helper definitions: avoid `let` bindings

`split at h` auto-resolves with context hypotheses

Don't use `simp` to close error branches

Don't forget `dsimp only` after `rw`

Pattern: Position Threading with `off2`

The `subst` Technique