Project_Velocity/backend/scripts/build_oracle_runtime_codebook.py

from __future__ import annotations

import json
from pathlib import Path

from backend.oracle.codebook_service import (
    _repo_root,
    _safe_load_json,
    _normalize_examples,
)


def main() -> None:
    root = _repo_root()
    primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
    secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
    fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
    output_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"

    corpora = []
    for path, label in (
        (primary_path, "gpt_5_4"),
        (secondary_path, "claude_sonnet_4_6"),
        (fallback_path, "runtime_seed_fallback"),
    ):
        if path.exists():
            corpora.extend(_normalize_examples(_safe_load_json(path), label))

    deduped = {}
    for example in corpora:
        key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
        existing = deduped.get(key)
        if existing is None:
            deduped[key] = example
            continue
        if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
            deduped[key] = example
        elif example.is_canonical and not existing.is_canonical:
            deduped[key] = example

    examples = list(deduped.values())
    chapters: dict[str, dict] = {}
    for example in examples:
        chapter = chapters.setdefault(
            example.chapter_id,
            {
                "chapter_id": example.chapter_id,
                "name": example.chapter_name,
                "subchapters": {},
            },
        )
        chapter["subchapters"].setdefault(
            example.subchapter_id,
            {
                "subchapter_id": example.subchapter_id,
                "name": example.subchapter_name,
            },
        )

    payload = {
        "_meta": {
            "generated_by": "backend/scripts/build_oracle_runtime_codebook.py",
            "source_priority": ["gpt_5_4", "claude_sonnet_4_6", "runtime_seed_fallback"],
            "example_count": len(examples),
        },
        "chapters": [
            {
                "chapter_id": chapter["chapter_id"],
                "name": chapter["name"],
                "subchapters": list(chapter["subchapters"].values()),
            }
            for chapter in sorted(chapters.values(), key=lambda item: item["chapter_id"])
        ],
        "seed_examples": [
            {
                "example_id": example.example_id,
                "chapter_id": example.chapter_id,
                "subchapter_id": example.subchapter_id,
                "title": example.title,
                "template_name": example.template_name,
                "component_type": example.component_type,
                "accepted_shapes": list(example.accepted_shapes),
                "example_json": example.example_json,
                "quality_notes": example.quality_notes,
                "is_canonical": example.is_canonical,
                "source_pack": example.source_pack,
                "surface_targets": list(example.surface_targets),
                "policy_tags": list(example.policy_tags),
                "backend_contract_hints": example.backend_contract_hints,
            }
            for example in sorted(
                examples,
                key=lambda item: (item.chapter_id, item.subchapter_id, item.template_name.lower(), item.title.lower()),
            )
        ],
    }

    output_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"Wrote merged Oracle runtime codebook to {output_path}")


if __name__ == "__main__":
    main()