Name: Race Data Import
Author: matsubo

搵技能.../

Race Data Import | Skills Pool

node -e "
const path = require('path');
const fs = require('fs');
const base = process.cwd();
const data = JSON.parse(fs.readFileSync('race-info.json'));
const missing = data.events.filter(e => {
  if (!e.image) return false;
  try { fs.accessSync(path.join(base, e.image)); return false; } catch { return true; }
});
if (missing.length === 0) console.log('All images OK');
else missing.forEach(e => console.log('MISSING:', e.id, e.image));
"

# Step 1: Find image file names via API
curl -s "https://commons.wikimedia.org/w/api.php?action=query&generator=categorymembers&gcmtitle=Category:CATEGORY_NAME&gcmtype=file&prop=imageinfo&iiprop=url|size&format=json&gcmlimit=10" \
  | python3 -c "
import json,sys
d=json.load(sys.stdin)
for p in d.get('query',{}).get('pages',{}).values():
  ii=p.get('imageinfo',[{}])
  url=ii[0].get('url','') if ii else ''
  size=ii[0].get('size',0) if ii else 0
  if url and size > 50000 and any(url.lower().endswith(e) for e in ['.jpg','.jpeg','.png']):
    print(url)
"

# Step 2: Download and convert to webp (use magick, NOT ffmpeg — ffmpeg lacks webp encoder)
curl -s -L -H "User-Agent: Mozilla/5.0" "DIRECT_URL" -o /tmp/src.jpg
magick /tmp/src.jpg -resize 300x200\> images/EVENT_ID.webp

# Search by keyword
curl -s "https://commons.wikimedia.org/w/api.php?action=query&list=search&srsearch=KEYWORD+Japan&srnamespace=6&format=json&srlimit=5" \
  | python3 -c "import json,sys; [print(r['title']) for r in json.load(sys.stdin)['query']['search']]"

# Get direct URL from file title
curl -s "https://commons.wikimedia.org/w/api.php?action=query&titles=File:FILENAME.jpg&prop=imageinfo&iiprop=url&format=json" \
  | python3 -c "import json,sys; p=list(json.load(sys.stdin)['query']['pages'].values())[0]; print(p['imageinfo'][0]['url'])"

head -5 master/YEAR/RACE_ID/result.tsv | cat -A | head -5

import re

with open('result.tsv') as f:
    lines = f.readlines()

header = lines[0]
fixed = [header]
for line in lines[1:]:
    cols = line.rstrip('\n').split('\t')
    # Detect if first col looks like a name fragment (not a number)
    if cols and cols[0] and not re.match(r'^\d+$', cols[0]):
        line = '\t' + line  # prepend empty bib column
    fixed.append(line)

with open('result.tsv', 'w') as f:
    f.writelines(fixed)

node -e "
const fs = require('fs');
const data = JSON.parse(fs.readFileSync('race-info.json'));
// Check a specific edition
const event = data.events.find(e => e.id === 'EVENT_ID');
const edition = event.editions.find(e => e.year === YEAR);
const cat = edition.categories[0];
const allCols = [...(cat.segments||[]).flatMap(s=>s.columns||[]), ...(cat.meta_columns||[]).map(m=>m.column)];
const tsv = fs.readFileSync(cat.result_tsv, 'utf8').split('\n')[0].split('\t');
console.log('Expected cols:', allCols.length, allCols);
console.log('TSV header cols:', tsv.length, tsv);
"

# 1. Validate race-info.json schema
bunx ajv-cli validate -s race-info-schema.json -d race-info.json

# 2. Validate any new weather files
bunx ajv-cli validate -s weather-schema.json -d master/YEAR/ID/weather-data.json

# 3. Build dist/data.json
bun run build

# 4. Confirm athlete count increased reasonably
node -e "const d=JSON.parse(require('fs').readFileSync('dist/data.json')); console.log('Categories:', d.categories?.length, 'Athletes:', d.results?.length ?? Object.values(d).filter(Array.isArray).flat().length)"

Race Data Import

Race Data Import Checklist

Mandatory Checklist (run through every time)

1. Image Policy

Race Data Import

Race Data Import Checklist

Mandatory Checklist (run through every time)

1. Image Policy

Verify missing images

Downloading images from Wikimedia Commons

Searching for images when category is unknown

2. TSV Column Alignment

Common pitfall: missing bib (No.) column

Detecting split names

Validate column count matches race-info.json

3. Build & Validate

Things Mac

Trello

Production Scheduling

Jira Integration

Production Scheduling

Cost Aware Llm Pipeline