104 lines
3.9 KiB
Python
104 lines
3.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from backend.oracle.codebook_service import (
|
|
_repo_root,
|
|
_safe_load_json,
|
|
_normalize_examples,
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
root = _repo_root()
|
|
primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
|
|
secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
|
|
fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
|
|
output_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"
|
|
|
|
corpora = []
|
|
for path, label in (
|
|
(primary_path, "gpt_5_4"),
|
|
(secondary_path, "claude_sonnet_4_6"),
|
|
(fallback_path, "runtime_seed_fallback"),
|
|
):
|
|
if path.exists():
|
|
corpora.extend(_normalize_examples(_safe_load_json(path), label))
|
|
|
|
deduped = {}
|
|
for example in corpora:
|
|
key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
|
|
existing = deduped.get(key)
|
|
if existing is None:
|
|
deduped[key] = example
|
|
continue
|
|
if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
|
|
deduped[key] = example
|
|
elif example.is_canonical and not existing.is_canonical:
|
|
deduped[key] = example
|
|
|
|
examples = list(deduped.values())
|
|
chapters: dict[str, dict] = {}
|
|
for example in examples:
|
|
chapter = chapters.setdefault(
|
|
example.chapter_id,
|
|
{
|
|
"chapter_id": example.chapter_id,
|
|
"name": example.chapter_name,
|
|
"subchapters": {},
|
|
},
|
|
)
|
|
chapter["subchapters"].setdefault(
|
|
example.subchapter_id,
|
|
{
|
|
"subchapter_id": example.subchapter_id,
|
|
"name": example.subchapter_name,
|
|
},
|
|
)
|
|
|
|
payload = {
|
|
"_meta": {
|
|
"generated_by": "backend/scripts/build_oracle_runtime_codebook.py",
|
|
"source_priority": ["gpt_5_4", "claude_sonnet_4_6", "runtime_seed_fallback"],
|
|
"example_count": len(examples),
|
|
},
|
|
"chapters": [
|
|
{
|
|
"chapter_id": chapter["chapter_id"],
|
|
"name": chapter["name"],
|
|
"subchapters": list(chapter["subchapters"].values()),
|
|
}
|
|
for chapter in sorted(chapters.values(), key=lambda item: item["chapter_id"])
|
|
],
|
|
"seed_examples": [
|
|
{
|
|
"example_id": example.example_id,
|
|
"chapter_id": example.chapter_id,
|
|
"subchapter_id": example.subchapter_id,
|
|
"title": example.title,
|
|
"template_name": example.template_name,
|
|
"component_type": example.component_type,
|
|
"accepted_shapes": list(example.accepted_shapes),
|
|
"example_json": example.example_json,
|
|
"quality_notes": example.quality_notes,
|
|
"is_canonical": example.is_canonical,
|
|
"source_pack": example.source_pack,
|
|
"surface_targets": list(example.surface_targets),
|
|
"policy_tags": list(example.policy_tags),
|
|
"backend_contract_hints": example.backend_contract_hints,
|
|
}
|
|
for example in sorted(
|
|
examples,
|
|
key=lambda item: (item.chapter_id, item.subchapter_id, item.template_name.lower(), item.title.lower()),
|
|
)
|
|
],
|
|
}
|
|
|
|
output_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
print(f"Wrote merged Oracle runtime codebook to {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|