Markdown format validation and quality checker. Use when (1) checking markdown syntax errors, (2) validating document structure, (3) fixing formatting issues, (4) ensuring markdown best practices, (5) reviewing generated markdown files.
Headings:
# symbols-, *, or +)Links:
[text](url)Images:
Code Blocks:
```Tables:
---Formatting:
Read the markdown file and check for syntax errors:
def check_markdown_syntax(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
issues = []
in_code_block = False
code_block_start = 0
for i, line in enumerate(lines, 1):
# Check code blocks
if line.strip().startswith('```'):
if in_code_block:
in_code_block = False
else:
in_code_block = True
code_block_start = i
# Skip checks inside code blocks
if in_code_block:
continue
# Check heading format
if line.startswith('#'):
if not line.startswith('# ') and len(line) > 1:
issues.append(f"Line {i}: Missing space after # in heading")
# Check list format
if line.strip().startswith(('-', '*', '+')):
if not line.strip()[1:2] == ' ':
issues.append(f"Line {i}: Missing space after list marker")
# Check trailing whitespace
if line.rstrip() != line.rstrip('\n'):
issues.append(f"Line {i}: Trailing whitespace")
# Check unclosed code blocks
if in_code_block:
issues.append(f"Line {code_block_start}: Unclosed code block")
return issues
Verify document structure:
def check_structure(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
issues = []
# Check heading hierarchy
import re
headings = re.findall(r'^(#{1,6})\s+(.+)$', content, re.MULTILINE)
prev_level = 0
for heading, text in headings:
level = len(heading)
# Check for skipped levels
if level > prev_level + 1:
issues.append(f"Heading '{text}': Skipped level (H{prev_level} → H{level})")
# Check for multiple H1
if level == 1 and prev_level == 1:
issues.append(f"Multiple H1 headings found")
prev_level = level
return issues
Check all links:
import os
from pathlib import Path
def check_links(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
issues = []
base_dir = Path(file_path).parent
# Find all markdown links
import re
links = re.findall(r'\[([^\]]+)\]\(([^\)]+)\)', content)
for text, url in links:
# Skip external URLs
if url.startswith(('http://', 'https://', 'mailto:')):
continue
# Check relative file paths
if not url.startswith('#'):
file_path = base_dir / url
if not file_path.exists():
issues.append(f"Broken link: [{text}]({url})")
return issues
def fix_heading_hierarchy(content):
"""Ensure proper heading hierarchy"""
lines = content.split('\n')
fixed_lines = []
prev_level = 0
for line in lines:
if line.startswith('#'):
# Count heading level
level = len(line) - len(line.lstrip('#'))
# Fix skipped levels
if level > prev_level + 1:
level = prev_level + 1
line = '#' * level + line.lstrip('#')
prev_level = level
fixed_lines.append(line)
return '\n'.join(fixed_lines)
def fix_list_formatting(content):
"""Ensure consistent list formatting"""
lines = content.split('\n')
fixed_lines = []
for line in lines:
# Fix list markers
if line.strip().startswith(('-', '*', '+')):
marker = line.strip()[0]
rest = line.strip()[1:].lstrip()
indent = len(line) - len(line.lstrip())
fixed_lines.append(' ' * indent + f'{marker} {rest}')
else:
fixed_lines.append(line)
return '\n'.join(fixed_lines)
def remove_trailing_whitespace(content):
"""Remove trailing whitespace from all lines"""
lines = content.split('\n')
return '\n'.join(line.rstrip() for line in lines)
# Install
npm install -g markdownlint-cli
# Check file
markdownlint document.md
# Fix automatically
markdownlint --fix document.md
# Check directory
markdownlint "**/*.md"
# Install
npm install -g remark-cli remark-preset-lint-recommended
# Check file
remark document.md
# Fix automatically
remark document.md --output
uv add markdown-it-py
from markdown_it import MarkdownIt
md = MarkdownIt()
tokens = md.parse(content)
# Validate structure
for token in tokens:
print(f"{token.type}: {token.tag}")
- for unordered lists**bold** not __bold__*italic* not _italic_```) not indented/ in pathsBefore finalizing markdown:
def quick_fix_markdown(file_path):
"""Apply common fixes to markdown file"""
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Apply fixes
content = remove_trailing_whitespace(content)
content = fix_list_formatting(content)
content = fix_heading_hierarchy(content)
# Write back
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✓ Fixed: {file_path}")
After converting PDF to markdown:
# Markdown Validation Report
**File:** `document.md`
**Date:** 2024-01-20
## Summary
- ✓ Syntax: Valid
- ⚠ Structure: 2 issues
- ✗ Links: 1 broken link
- ✓ Images: All valid
## Issues Found
### Structure Issues
1. Line 45: Skipped heading level (H2 → H4)
2. Line 78: Multiple H1 headings
### Broken Links
1. Line 123: `[Guide](../missing.md)` - File not found
## Recommendations
1. Fix heading hierarchy at line 45
2. Change second H1 to H2
3. Update or remove broken link at line 123
| Tool | Purpose | Installation |
|---|---|---|
| markdownlint | Comprehensive linting | npm install -g markdownlint-cli |
| remark | Markdown processor | npm install -g remark-cli |
| markdown-it-py | Python parser | uv add markdown-it-py |
| mdformat | Python formatter | uv add mdformat |
For detailed linting rules: See references/linting_rules.md
For style guide: See references/style_guide.md