Files
Project_Velocity/backend/scripts/build_oracle_runtime_codebook.py
2026-04-20 01:43:39 +05:30

104 lines
3.9 KiB
Python

from __future__ import annotations
import json
from pathlib import Path
from backend.oracle.codebook_service import (
_repo_root,
_safe_load_json,
_normalize_examples,
)
def main() -> None:
root = _repo_root()
primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
output_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"
corpora = []
for path, label in (
(primary_path, "gpt_5_4"),
(secondary_path, "claude_sonnet_4_6"),
(fallback_path, "runtime_seed_fallback"),
):
if path.exists():
corpora.extend(_normalize_examples(_safe_load_json(path), label))
deduped = {}
for example in corpora:
key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
existing = deduped.get(key)
if existing is None:
deduped[key] = example
continue
if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
deduped[key] = example
elif example.is_canonical and not existing.is_canonical:
deduped[key] = example
examples = list(deduped.values())
chapters: dict[str, dict] = {}
for example in examples:
chapter = chapters.setdefault(
example.chapter_id,
{
"chapter_id": example.chapter_id,
"name": example.chapter_name,
"subchapters": {},
},
)
chapter["subchapters"].setdefault(
example.subchapter_id,
{
"subchapter_id": example.subchapter_id,
"name": example.subchapter_name,
},
)
payload = {
"_meta": {
"generated_by": "backend/scripts/build_oracle_runtime_codebook.py",
"source_priority": ["gpt_5_4", "claude_sonnet_4_6", "runtime_seed_fallback"],
"example_count": len(examples),
},
"chapters": [
{
"chapter_id": chapter["chapter_id"],
"name": chapter["name"],
"subchapters": list(chapter["subchapters"].values()),
}
for chapter in sorted(chapters.values(), key=lambda item: item["chapter_id"])
],
"seed_examples": [
{
"example_id": example.example_id,
"chapter_id": example.chapter_id,
"subchapter_id": example.subchapter_id,
"title": example.title,
"template_name": example.template_name,
"component_type": example.component_type,
"accepted_shapes": list(example.accepted_shapes),
"example_json": example.example_json,
"quality_notes": example.quality_notes,
"is_canonical": example.is_canonical,
"source_pack": example.source_pack,
"surface_targets": list(example.surface_targets),
"policy_tags": list(example.policy_tags),
"backend_contract_hints": example.backend_contract_hints,
}
for example in sorted(
examples,
key=lambda item: (item.chapter_id, item.subchapter_id, item.template_name.lower(), item.title.lower()),
)
],
}
output_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Wrote merged Oracle runtime codebook to {output_path}")
if __name__ == "__main__":
main()