feat: Oracle Canvas Component Schema and Qwen 3.6 integration (#31)

Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #31
2026-04-20 01:43:39 +05:30
parent 57144e1bd3
commit e519339cc9
129 changed files with 625213 additions and 262 deletions
--- a/backend/oracle/codebook_service.py
+++ b/backend/oracle/codebook_service.py
@@ -0,0 +1,340 @@
+"""
+oracle/codebook_service.py
+Loads, normalizes, and retrieves Oracle Canvas codebook examples from the
+expanded GPT and Claude seed packs delivered in Sprint 1.
+
+The runtime treats the GPT pack as the primary normalized corpus and uses the
+Claude pack as a supplement when it adds unique examples or metadata.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+_TOKEN_RE = re.compile(r"[a-z0-9]+")
+_STOPWORDS = {
+    "a", "an", "and", "as", "at", "build", "canvas", "chart", "client", "clients",
+    "for", "from", "get", "give", "in", "into", "is", "list", "me", "of", "on",
+    "or", "oracle", "please", "render", "show", "surface", "that", "the", "this",
+    "to", "view", "with",
+}
+
+
+@dataclass(frozen=True)
+class CodebookExample:
+    example_id: str
+    chapter_id: str
+    chapter_name: str
+    subchapter_id: str
+    subchapter_name: str
+    title: str
+    template_name: str
+    component_type: str
+    accepted_shapes: tuple[str, ...]
+    example_json: dict[str, Any]
+    quality_notes: str
+    is_canonical: bool
+    source_pack: str
+    surface_targets: tuple[str, ...]
+    policy_tags: tuple[str, ...]
+    backend_contract_hints: dict[str, Any]
+    score_terms: tuple[str, ...]
+
+
+def _repo_root() -> Path:
+    return Path(__file__).resolve().parents[2]
+
+
+def _safe_load_json(path: Path) -> dict[str, Any]:
+    with path.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def _tokenize(value: str) -> list[str]:
+    lowered = value.lower()
+    return [tok for tok in _TOKEN_RE.findall(lowered) if tok not in _STOPWORDS and len(tok) > 1]
+
+
+def _make_template_id(example: dict[str, Any]) -> str:
+    base = "|".join(
+        [
+            example.get("chapter_id", ""),
+            example.get("subchapter_id", ""),
+            example.get("template_name", ""),
+            example.get("component_type", ""),
+        ]
+    )
+    return hashlib.sha1(base.encode("utf-8")).hexdigest()[:16]
+
+
+def _chapter_maps(payload: dict[str, Any]) -> tuple[dict[str, str], dict[str, str]]:
+    chapters: dict[str, str] = {}
+    subchapters: dict[str, str] = {}
+    for chapter in payload.get("chapters", []):
+        chapter_id = str(chapter.get("chapter_id", "")).strip()
+        if chapter_id:
+            chapters[chapter_id] = str(chapter.get("name", "")).strip()
+        for subchapter in chapter.get("subchapters", []):
+            sub_id = str(subchapter.get("subchapter_id", "")).strip()
+            if sub_id:
+                subchapters[sub_id] = str(subchapter.get("name", "")).strip()
+    return chapters, subchapters
+
+
+def _normalize_examples(payload: dict[str, Any], source_pack: str) -> list[CodebookExample]:
+    chapter_names, subchapter_names = _chapter_maps(payload)
+    raw_examples = payload.get("seed_examples") or payload.get("examples") or []
+    normalized: list[CodebookExample] = []
+    for raw in raw_examples:
+        chapter_id = str(raw.get("chapter_id", "")).strip()
+        subchapter_id = str(raw.get("subchapter_id", "")).strip()
+        title = str(raw.get("title") or raw.get("template_name") or "Oracle Component").strip()
+        template_name = str(raw.get("template_name") or title).strip()
+        component_type = str(raw.get("component_type") or "summary_card").strip()
+        example_json = raw.get("example_json") or {}
+        terms = _tokenize(
+            " ".join(
+                [
+                    title,
+                    template_name,
+                    component_type.replace("_", " "),
+                    chapter_names.get(chapter_id, ""),
+                    subchapter_names.get(subchapter_id, ""),
+                    str(raw.get("quality_notes", "")),
+                    " ".join(raw.get("policy_tags", []) or []),
+                ]
+            )
+        )
+        normalized.append(
+            CodebookExample(
+                example_id=str(raw.get("example_id") or _make_template_id(raw)),
+                chapter_id=chapter_id,
+                chapter_name=chapter_names.get(chapter_id, chapter_id),
+                subchapter_id=subchapter_id,
+                subchapter_name=subchapter_names.get(subchapter_id, subchapter_id),
+                title=title,
+                template_name=template_name,
+                component_type=component_type,
+                accepted_shapes=tuple(raw.get("accepted_shapes") or []),
+                example_json=example_json,
+                quality_notes=str(raw.get("quality_notes") or ""),
+                is_canonical=bool(raw.get("is_canonical")),
+                source_pack=source_pack,
+                surface_targets=tuple(raw.get("surface_targets") or []),
+                policy_tags=tuple(raw.get("policy_tags") or []),
+                backend_contract_hints=dict(raw.get("backend_contract_hints") or {}),
+                score_terms=tuple(terms),
+            )
+        )
+    return normalized
+
+
+class OracleCodebookService:
+    def __init__(self) -> None:
+        root = _repo_root()
+        self.runtime_merged_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"
+        self.primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
+        self.secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
+        self.fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
+
+    @lru_cache(maxsize=1)
+    def load(self) -> dict[str, Any]:
+        corpora: list[CodebookExample] = []
+        sources_loaded: list[str] = []
+        source_paths: list[tuple[Path, str]]
+        if self.runtime_merged_path.exists():
+            source_paths = [
+                (self.runtime_merged_path, "runtime_merged"),
+                (self.fallback_path, "runtime_seed_fallback"),
+            ]
+        else:
+            source_paths = [
+                (self.primary_path, "gpt_5_4"),
+                (self.secondary_path, "claude_sonnet_4_6"),
+                (self.fallback_path, "runtime_seed_fallback"),
+            ]
+
+        for path, label in source_paths:
+            if not path.exists():
+                continue
+            payload = _safe_load_json(path)
+            examples = _normalize_examples(payload, label)
+            if examples:
+                corpora.extend(examples)
+                sources_loaded.append(f"{label}:{len(examples)}")
+
+        deduped: dict[tuple[str, str, str], CodebookExample] = {}
+        for example in corpora:
+            key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
+            existing = deduped.get(key)
+            if existing is None:
+                deduped[key] = example
+                continue
+            # Prefer canonical GPT examples, then canonical examples, then richer source pack.
+            if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
+                deduped[key] = example
+            elif example.is_canonical and not existing.is_canonical:
+                deduped[key] = example
+
+        examples = list(deduped.values())
+        logger.info("Oracle codebook loaded from %s", ", ".join(sources_loaded) or "no sources")
+        return {
+            "examples": examples,
+            "source_summary": sources_loaded,
+            "template_count": len({(e.chapter_id, e.subchapter_id, e.template_name, e.component_type) for e in examples}),
+        }
+
+    def stats(self) -> dict[str, Any]:
+        data = self.load()
+        examples: list[CodebookExample] = data["examples"]
+        return {
+            "example_count": len(examples),
+            "template_count": data["template_count"],
+            "source_summary": data["source_summary"],
+        }
+
+    def list_templates(
+        self,
+        *,
+        category: str | None = None,
+        status: str | None = None,
+        search: str | None = None,
+        limit: int = 50,
+        offset: int = 0,
+    ) -> dict[str, Any]:
+        del status  # runtime codebook templates are always active catalog entries
+        examples: list[CodebookExample] = self.load()["examples"]
+        templates: dict[str, dict[str, Any]] = {}
+        for example in examples:
+            if category and category.lower() not in {example.chapter_name.lower(), example.subchapter_name.lower()}:
+                continue
+            if search:
+                terms = set(example.score_terms)
+                if not set(_tokenize(search)).intersection(terms):
+                    continue
+            template_id = _make_template_id(
+                {
+                    "chapter_id": example.chapter_id,
+                    "subchapter_id": example.subchapter_id,
+                    "template_name": example.template_name,
+                    "component_type": example.component_type,
+                }
+            )
+            record = templates.get(template_id)
+            if record is None:
+                templates[template_id] = {
+                    "templateId": template_id,
+                    "tenantId": "_system",
+                    "name": example.template_name,
+                    "category": example.chapter_name,
+                    "status": "catalog_active",
+                    "origin": "premade",
+                    "version": "codebook-v2",
+                    "acceptedShapes": list(example.accepted_shapes),
+                    "description": f"{example.subchapter_name} · {example.title}",
+                    "chapterId": example.chapter_id,
+                    "subchapterId": example.subchapter_id,
+                    "componentType": example.component_type,
+                    "sourcePack": example.source_pack,
+                    "useCount": 0,
+                    "updatedAt": None,
+                    "createdAt": None,
+                }
+        ordered = list(templates.values())
+        ordered.sort(key=lambda item: (item["category"], item["name"]))
+        total = len(ordered)
+        return {
+            "total": total,
+            "templates": ordered[offset: offset + limit],
+        }
+
+    def search_examples(self, prompt: str, *, limit: int = 8) -> list[CodebookExample]:
+        prompt_terms = set(_tokenize(prompt))
+        if not prompt_terms:
+            prompt_terms = set(_tokenize(prompt.replace("_", " ")))
+
+        scored: list[tuple[int, CodebookExample]] = []
+        for example in self.load()["examples"]:
+            score = 0
+            term_set = set(example.score_terms)
+            overlap = prompt_terms.intersection(term_set)
+            score += len(overlap) * 6
+            lowered_prompt = prompt.lower()
+            if example.template_name.lower() in lowered_prompt:
+                score += 24
+            if example.subchapter_name.lower() in lowered_prompt:
+                score += 20
+            if example.chapter_name.lower() in lowered_prompt:
+                score += 14
+            if example.component_type.replace("_", " ") in lowered_prompt:
+                score += 12
+            if example.is_canonical:
+                score += 8
+            if "live_data_first" in example.policy_tags:
+                score += 4
+            if score > 0:
+                scored.append((score, example))
+
+        scored.sort(key=lambda item: (-item[0], item[1].chapter_id, item[1].subchapter_id, item[1].title))
+        selected: list[CodebookExample] = []
+        seen: set[tuple[str, str]] = set()
+        for _, example in scored:
+            dedupe_key = (example.subchapter_id, example.template_name)
+            if dedupe_key in seen:
+                continue
+            seen.add(dedupe_key)
+            selected.append(example)
+            if len(selected) >= limit:
+                break
+        return selected
+
+    def synthesize_template(self, prompt: str, data_shapes: list[str] | None = None) -> dict[str, Any]:
+        match = next(iter(self.search_examples(prompt, limit=1)), None)
+        shapes = data_shapes or []
+        if match is None:
+            return {
+                "templateId": hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:16],
+                "tenantId": "_system",
+                "name": "Oracle Synthesized Draft",
+                "category": "Custom",
+                "status": "tenant_draft",
+                "origin": "synthesized",
+                "version": "1.0.0",
+                "acceptedShapes": shapes,
+                "description": f"Draft synthesized from prompt: {prompt[:120]}",
+            }
+
+        return {
+            "templateId": _make_template_id(
+                {
+                    "chapter_id": match.chapter_id,
+                    "subchapter_id": match.subchapter_id,
+                    "template_name": match.template_name,
+                    "component_type": match.component_type,
+                }
+            ),
+            "tenantId": "_system",
+            "name": match.template_name,
+            "category": match.chapter_name,
+            "status": "catalog_active",
+            "origin": "premade",
+            "version": "codebook-v2",
+            "acceptedShapes": list(match.accepted_shapes or shapes),
+            "description": f"Best codebook match · {match.subchapter_name}",
+            "componentType": match.component_type,
+            "chapterId": match.chapter_id,
+            "subchapterId": match.subchapter_id,
+            "sourcePack": match.source_pack,
+            "exampleJson": match.example_json,
+        }
+
+
+codebook_service = OracleCodebookService()