feat: Oracle Canvas Component Schema and Qwen 3.6 integration (#31)
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #31
This commit was merged in pull request #31.
This commit is contained in:
340
backend/oracle/codebook_service.py
Normal file
340
backend/oracle/codebook_service.py
Normal file
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
oracle/codebook_service.py
|
||||
Loads, normalizes, and retrieves Oracle Canvas codebook examples from the
|
||||
expanded GPT and Claude seed packs delivered in Sprint 1.
|
||||
|
||||
The runtime treats the GPT pack as the primary normalized corpus and uses the
|
||||
Claude pack as a supplement when it adds unique examples or metadata.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TOKEN_RE = re.compile(r"[a-z0-9]+")
|
||||
_STOPWORDS = {
|
||||
"a", "an", "and", "as", "at", "build", "canvas", "chart", "client", "clients",
|
||||
"for", "from", "get", "give", "in", "into", "is", "list", "me", "of", "on",
|
||||
"or", "oracle", "please", "render", "show", "surface", "that", "the", "this",
|
||||
"to", "view", "with",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CodebookExample:
|
||||
example_id: str
|
||||
chapter_id: str
|
||||
chapter_name: str
|
||||
subchapter_id: str
|
||||
subchapter_name: str
|
||||
title: str
|
||||
template_name: str
|
||||
component_type: str
|
||||
accepted_shapes: tuple[str, ...]
|
||||
example_json: dict[str, Any]
|
||||
quality_notes: str
|
||||
is_canonical: bool
|
||||
source_pack: str
|
||||
surface_targets: tuple[str, ...]
|
||||
policy_tags: tuple[str, ...]
|
||||
backend_contract_hints: dict[str, Any]
|
||||
score_terms: tuple[str, ...]
|
||||
|
||||
|
||||
def _repo_root() -> Path:
|
||||
return Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _safe_load_json(path: Path) -> dict[str, Any]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def _tokenize(value: str) -> list[str]:
|
||||
lowered = value.lower()
|
||||
return [tok for tok in _TOKEN_RE.findall(lowered) if tok not in _STOPWORDS and len(tok) > 1]
|
||||
|
||||
|
||||
def _make_template_id(example: dict[str, Any]) -> str:
|
||||
base = "|".join(
|
||||
[
|
||||
example.get("chapter_id", ""),
|
||||
example.get("subchapter_id", ""),
|
||||
example.get("template_name", ""),
|
||||
example.get("component_type", ""),
|
||||
]
|
||||
)
|
||||
return hashlib.sha1(base.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _chapter_maps(payload: dict[str, Any]) -> tuple[dict[str, str], dict[str, str]]:
|
||||
chapters: dict[str, str] = {}
|
||||
subchapters: dict[str, str] = {}
|
||||
for chapter in payload.get("chapters", []):
|
||||
chapter_id = str(chapter.get("chapter_id", "")).strip()
|
||||
if chapter_id:
|
||||
chapters[chapter_id] = str(chapter.get("name", "")).strip()
|
||||
for subchapter in chapter.get("subchapters", []):
|
||||
sub_id = str(subchapter.get("subchapter_id", "")).strip()
|
||||
if sub_id:
|
||||
subchapters[sub_id] = str(subchapter.get("name", "")).strip()
|
||||
return chapters, subchapters
|
||||
|
||||
|
||||
def _normalize_examples(payload: dict[str, Any], source_pack: str) -> list[CodebookExample]:
|
||||
chapter_names, subchapter_names = _chapter_maps(payload)
|
||||
raw_examples = payload.get("seed_examples") or payload.get("examples") or []
|
||||
normalized: list[CodebookExample] = []
|
||||
for raw in raw_examples:
|
||||
chapter_id = str(raw.get("chapter_id", "")).strip()
|
||||
subchapter_id = str(raw.get("subchapter_id", "")).strip()
|
||||
title = str(raw.get("title") or raw.get("template_name") or "Oracle Component").strip()
|
||||
template_name = str(raw.get("template_name") or title).strip()
|
||||
component_type = str(raw.get("component_type") or "summary_card").strip()
|
||||
example_json = raw.get("example_json") or {}
|
||||
terms = _tokenize(
|
||||
" ".join(
|
||||
[
|
||||
title,
|
||||
template_name,
|
||||
component_type.replace("_", " "),
|
||||
chapter_names.get(chapter_id, ""),
|
||||
subchapter_names.get(subchapter_id, ""),
|
||||
str(raw.get("quality_notes", "")),
|
||||
" ".join(raw.get("policy_tags", []) or []),
|
||||
]
|
||||
)
|
||||
)
|
||||
normalized.append(
|
||||
CodebookExample(
|
||||
example_id=str(raw.get("example_id") or _make_template_id(raw)),
|
||||
chapter_id=chapter_id,
|
||||
chapter_name=chapter_names.get(chapter_id, chapter_id),
|
||||
subchapter_id=subchapter_id,
|
||||
subchapter_name=subchapter_names.get(subchapter_id, subchapter_id),
|
||||
title=title,
|
||||
template_name=template_name,
|
||||
component_type=component_type,
|
||||
accepted_shapes=tuple(raw.get("accepted_shapes") or []),
|
||||
example_json=example_json,
|
||||
quality_notes=str(raw.get("quality_notes") or ""),
|
||||
is_canonical=bool(raw.get("is_canonical")),
|
||||
source_pack=source_pack,
|
||||
surface_targets=tuple(raw.get("surface_targets") or []),
|
||||
policy_tags=tuple(raw.get("policy_tags") or []),
|
||||
backend_contract_hints=dict(raw.get("backend_contract_hints") or {}),
|
||||
score_terms=tuple(terms),
|
||||
)
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
class OracleCodebookService:
|
||||
def __init__(self) -> None:
|
||||
root = _repo_root()
|
||||
self.runtime_merged_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"
|
||||
self.primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
|
||||
self.secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
|
||||
self.fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def load(self) -> dict[str, Any]:
|
||||
corpora: list[CodebookExample] = []
|
||||
sources_loaded: list[str] = []
|
||||
source_paths: list[tuple[Path, str]]
|
||||
if self.runtime_merged_path.exists():
|
||||
source_paths = [
|
||||
(self.runtime_merged_path, "runtime_merged"),
|
||||
(self.fallback_path, "runtime_seed_fallback"),
|
||||
]
|
||||
else:
|
||||
source_paths = [
|
||||
(self.primary_path, "gpt_5_4"),
|
||||
(self.secondary_path, "claude_sonnet_4_6"),
|
||||
(self.fallback_path, "runtime_seed_fallback"),
|
||||
]
|
||||
|
||||
for path, label in source_paths:
|
||||
if not path.exists():
|
||||
continue
|
||||
payload = _safe_load_json(path)
|
||||
examples = _normalize_examples(payload, label)
|
||||
if examples:
|
||||
corpora.extend(examples)
|
||||
sources_loaded.append(f"{label}:{len(examples)}")
|
||||
|
||||
deduped: dict[tuple[str, str, str], CodebookExample] = {}
|
||||
for example in corpora:
|
||||
key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
|
||||
existing = deduped.get(key)
|
||||
if existing is None:
|
||||
deduped[key] = example
|
||||
continue
|
||||
# Prefer canonical GPT examples, then canonical examples, then richer source pack.
|
||||
if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
|
||||
deduped[key] = example
|
||||
elif example.is_canonical and not existing.is_canonical:
|
||||
deduped[key] = example
|
||||
|
||||
examples = list(deduped.values())
|
||||
logger.info("Oracle codebook loaded from %s", ", ".join(sources_loaded) or "no sources")
|
||||
return {
|
||||
"examples": examples,
|
||||
"source_summary": sources_loaded,
|
||||
"template_count": len({(e.chapter_id, e.subchapter_id, e.template_name, e.component_type) for e in examples}),
|
||||
}
|
||||
|
||||
def stats(self) -> dict[str, Any]:
|
||||
data = self.load()
|
||||
examples: list[CodebookExample] = data["examples"]
|
||||
return {
|
||||
"example_count": len(examples),
|
||||
"template_count": data["template_count"],
|
||||
"source_summary": data["source_summary"],
|
||||
}
|
||||
|
||||
def list_templates(
|
||||
self,
|
||||
*,
|
||||
category: str | None = None,
|
||||
status: str | None = None,
|
||||
search: str | None = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> dict[str, Any]:
|
||||
del status # runtime codebook templates are always active catalog entries
|
||||
examples: list[CodebookExample] = self.load()["examples"]
|
||||
templates: dict[str, dict[str, Any]] = {}
|
||||
for example in examples:
|
||||
if category and category.lower() not in {example.chapter_name.lower(), example.subchapter_name.lower()}:
|
||||
continue
|
||||
if search:
|
||||
terms = set(example.score_terms)
|
||||
if not set(_tokenize(search)).intersection(terms):
|
||||
continue
|
||||
template_id = _make_template_id(
|
||||
{
|
||||
"chapter_id": example.chapter_id,
|
||||
"subchapter_id": example.subchapter_id,
|
||||
"template_name": example.template_name,
|
||||
"component_type": example.component_type,
|
||||
}
|
||||
)
|
||||
record = templates.get(template_id)
|
||||
if record is None:
|
||||
templates[template_id] = {
|
||||
"templateId": template_id,
|
||||
"tenantId": "_system",
|
||||
"name": example.template_name,
|
||||
"category": example.chapter_name,
|
||||
"status": "catalog_active",
|
||||
"origin": "premade",
|
||||
"version": "codebook-v2",
|
||||
"acceptedShapes": list(example.accepted_shapes),
|
||||
"description": f"{example.subchapter_name} · {example.title}",
|
||||
"chapterId": example.chapter_id,
|
||||
"subchapterId": example.subchapter_id,
|
||||
"componentType": example.component_type,
|
||||
"sourcePack": example.source_pack,
|
||||
"useCount": 0,
|
||||
"updatedAt": None,
|
||||
"createdAt": None,
|
||||
}
|
||||
ordered = list(templates.values())
|
||||
ordered.sort(key=lambda item: (item["category"], item["name"]))
|
||||
total = len(ordered)
|
||||
return {
|
||||
"total": total,
|
||||
"templates": ordered[offset: offset + limit],
|
||||
}
|
||||
|
||||
def search_examples(self, prompt: str, *, limit: int = 8) -> list[CodebookExample]:
|
||||
prompt_terms = set(_tokenize(prompt))
|
||||
if not prompt_terms:
|
||||
prompt_terms = set(_tokenize(prompt.replace("_", " ")))
|
||||
|
||||
scored: list[tuple[int, CodebookExample]] = []
|
||||
for example in self.load()["examples"]:
|
||||
score = 0
|
||||
term_set = set(example.score_terms)
|
||||
overlap = prompt_terms.intersection(term_set)
|
||||
score += len(overlap) * 6
|
||||
lowered_prompt = prompt.lower()
|
||||
if example.template_name.lower() in lowered_prompt:
|
||||
score += 24
|
||||
if example.subchapter_name.lower() in lowered_prompt:
|
||||
score += 20
|
||||
if example.chapter_name.lower() in lowered_prompt:
|
||||
score += 14
|
||||
if example.component_type.replace("_", " ") in lowered_prompt:
|
||||
score += 12
|
||||
if example.is_canonical:
|
||||
score += 8
|
||||
if "live_data_first" in example.policy_tags:
|
||||
score += 4
|
||||
if score > 0:
|
||||
scored.append((score, example))
|
||||
|
||||
scored.sort(key=lambda item: (-item[0], item[1].chapter_id, item[1].subchapter_id, item[1].title))
|
||||
selected: list[CodebookExample] = []
|
||||
seen: set[tuple[str, str]] = set()
|
||||
for _, example in scored:
|
||||
dedupe_key = (example.subchapter_id, example.template_name)
|
||||
if dedupe_key in seen:
|
||||
continue
|
||||
seen.add(dedupe_key)
|
||||
selected.append(example)
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
return selected
|
||||
|
||||
def synthesize_template(self, prompt: str, data_shapes: list[str] | None = None) -> dict[str, Any]:
|
||||
match = next(iter(self.search_examples(prompt, limit=1)), None)
|
||||
shapes = data_shapes or []
|
||||
if match is None:
|
||||
return {
|
||||
"templateId": hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:16],
|
||||
"tenantId": "_system",
|
||||
"name": "Oracle Synthesized Draft",
|
||||
"category": "Custom",
|
||||
"status": "tenant_draft",
|
||||
"origin": "synthesized",
|
||||
"version": "1.0.0",
|
||||
"acceptedShapes": shapes,
|
||||
"description": f"Draft synthesized from prompt: {prompt[:120]}",
|
||||
}
|
||||
|
||||
return {
|
||||
"templateId": _make_template_id(
|
||||
{
|
||||
"chapter_id": match.chapter_id,
|
||||
"subchapter_id": match.subchapter_id,
|
||||
"template_name": match.template_name,
|
||||
"component_type": match.component_type,
|
||||
}
|
||||
),
|
||||
"tenantId": "_system",
|
||||
"name": match.template_name,
|
||||
"category": match.chapter_name,
|
||||
"status": "catalog_active",
|
||||
"origin": "premade",
|
||||
"version": "codebook-v2",
|
||||
"acceptedShapes": list(match.accepted_shapes or shapes),
|
||||
"description": f"Best codebook match · {match.subchapter_name}",
|
||||
"componentType": match.component_type,
|
||||
"chapterId": match.chapter_id,
|
||||
"subchapterId": match.subchapter_id,
|
||||
"sourcePack": match.source_pack,
|
||||
"exampleJson": match.example_json,
|
||||
}
|
||||
|
||||
|
||||
codebook_service = OracleCodebookService()
|
||||
Reference in New Issue
Block a user