feat: Oracle Canvas Component Schema and Qwen 3.6 integration (#31)

Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #31
2026-04-20 01:43:39 +05:30
parent 57144e1bd3
commit e519339cc9
129 changed files with 625213 additions and 262 deletions
--- a/backend/oracle/prompt_orchestrator.py
+++ b/backend/oracle/prompt_orchestrator.py
@@ -17,6 +17,8 @@ from .policy_service import PolicyContext, PolicyService
 from .canvas_service import canvas_service
 from .data_access_gateway import data_access_gateway
 from .persona_service import persona_service
+from .codebook_service import codebook_service, CodebookExample
+from backend.services.runtime_llm_service import runtime_llm_service
 from backend.services.nemoclaw_runtime import nemoclaw_runtime

 try:
@@ -26,15 +28,30 @@ except Exception:  # pragma: no cover

 logger = logging.getLogger(__name__)

-_NEMOCLAW_URL = os.getenv("NEMOCLAW_API_URL", "")
-_NEMOCLAW_API_KEY = os.getenv("NEMOCLAW_API_KEY", "")
 _DB_URL = os.getenv("DATABASE_URL", "")

 policy_svc = PolicyService()


-def _now() -> str:
-    return datetime.now(timezone.utc).isoformat()
+def _now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+def _iso(value: datetime | None) -> str | None:
+    if value is None:
+        return None
+    return value.isoformat()
+
+
+def _coerce_datetime(value: datetime | str | None) -> datetime | None:
+    if value is None or isinstance(value, datetime):
+        return value
+    if isinstance(value, str) and value.strip():
+        try:
+            return datetime.fromisoformat(value)
+        except ValueError:
+            return None
+    return None


 # ── Execution store ───────────────────────────────────────────────────────────
@@ -52,10 +69,10 @@ _INTENT_KEYWORDS: dict[str, list[str]] = {
    "pipeline_board": ["pipeline", "stage", "kanban", "deal", "funnel"],
    "bar_chart": ["bar", "compare", "source", "channel", "distribution", "ranked", "lead", "whale"],
    "geo_map": ["map", "geographic", "location", "district", "region", "area", "dubai"],
-    "table": ["table", "list", "broker", "performance", "leaderboard", "rank", "top"],
+    "table": ["table", "list", "broker", "performance", "leaderboard", "rank", "top", "contact", "client", "account", "crm"],
    "line_chart": ["trend", "time", "monthly", "weekly", "absorption", "forecast"],
    "kpi_tile": ["kpi", "total", "summary", "attainment", "quota", "how many"],
-    "activity_stream": ["timeline", "activity", "history", "follow-up", "queue", "contact"],
+    "activity_stream": ["timeline", "activity", "history", "follow-up", "queue", "contact", "interaction", "message", "call", "email"],
 }


@@ -109,6 +126,129 @@ _DATASET_MAP: dict[str, str] = {
    "activity_stream": "lead_activity_log",
 }

+_CODEBOOK_COMPONENT_MAP: dict[str, str] = {
+    "summary_card": "kpi_tile",
+    "summary_strip": "kpi_tile",
+    "metric_card_group": "kpi_tile",
+    "compact_alert_card": "kpi_tile",
+    "gauge_stack": "kpi_tile",
+    "lead_profile_card": "table",
+    "property_card": "table",
+    "data_table": "table",
+    "leaderboard_table": "table",
+    "matrix_grid": "table",
+    "interaction_timeline": "activity_stream",
+    "message_thread_summary": "activity_stream",
+    "timeline": "activity_stream",
+    "bar_chart": "bar_chart",
+    "line_chart": "line_chart",
+    "heatmap": "geo_map",
+    "geo_map": "geo_map",
+    "pipeline_board": "pipeline_board",
+}
+
+
+def _component_plan_type_from_codebook(example: CodebookExample) -> str:
+    return _CODEBOOK_COMPONENT_MAP.get(example.component_type, "table")
+
+
+def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_type: str | None = None) -> str:
+    chapter = example.chapter_name.lower()
+    subchapter = example.subchapter_name.lower()
+    component_plan_type = component_plan_type or _component_plan_type_from_codebook(example)
+    lowered_prompt = prompt.lower()
+
+    if component_plan_type == "activity_stream":
+        return "crm_interaction_timeline"
+    if component_plan_type == "pipeline_board":
+        return "crm_opportunity_pipeline"
+    if component_plan_type == "line_chart" and any(term in lowered_prompt for term in ("trend", "time", "history", "growth")):
+        return "crm_property_interest_rollup"
+
+    if any(term in lowered_prompt for term in ("contact", "client 360", "crm", "account", "lead")):
+        if "timeline" in lowered_prompt or "message" in lowered_prompt or "call" in lowered_prompt or "email" in lowered_prompt:
+            return "crm_interaction_timeline"
+        if "pipeline" in lowered_prompt or "opportunit" in lowered_prompt:
+            return "crm_opportunity_pipeline"
+        if "interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt:
+            return "crm_property_interest_rollup"
+        return "crm_contacts_overview"
+
+    if "client" in chapter or "client" in subchapter or "contact" in subchapter:
+        return "crm_contacts_overview"
+    if "opportun" in chapter or "pipeline" in subchapter:
+        return "crm_opportunity_pipeline"
+    if "interaction" in chapter or "communication" in chapter or "timeline" in subchapter:
+        return "crm_interaction_timeline"
+    if "property" in chapter or "inventory" in chapter or "interest" in subchapter:
+        return "crm_property_interest_rollup"
+    return _DATASET_MAP.get(component_plan_type, "oracle_aggregated_metric")
+
+
+def _build_codebook_retrieval_plan(
+    prompt: str,
+    tenant_id: str,
+    actor_role: str,
+    matches: list[CodebookExample],
+) -> dict[str, Any]:
+    row_limit = 50 if actor_role in ("senior_broker", "junior_broker") else 200
+    desired_types = _detect_component_types(prompt)
+    if not desired_types:
+        desired_types = [_component_plan_type_from_codebook(matches[0])] if matches else ["table"]
+
+    title_hints: dict[str, str] = {}
+    for example in matches:
+        mapped = _component_plan_type_from_codebook(example)
+        title_hints.setdefault(mapped, example.title)
+
+    components: list[dict[str, Any]] = []
+    exemplar = matches[0]
+    for component_plan_type in desired_types[:4]:
+        dataset = _dataset_for_codebook(exemplar, prompt, component_plan_type)
+        components.append(
+            {
+                "suggestedType": component_plan_type,
+                "dataset": dataset,
+                "privacyTier": "standard",
+                "rowLimit": row_limit,
+                "joins": [],
+                "queryTemplate": f"SELECT * FROM {dataset} WHERE tenant_id = :tenant_id LIMIT :limit",
+                "queryParameters": {"tenant_id": tenant_id, "limit": row_limit},
+                "templateRef": {
+                    "exampleId": exemplar.example_id,
+                    "templateName": exemplar.template_name,
+                    "componentType": exemplar.component_type,
+                    "chapterName": exemplar.chapter_name,
+                    "subchapterName": exemplar.subchapter_name,
+                    "sourcePack": exemplar.source_pack,
+                },
+                "titleHint": title_hints.get(component_plan_type, exemplar.title),
+            }
+        )
+
+    return {
+        "planId": str(uuid.uuid4()),
+        "components": components,
+        "semanticModelVersion": "oracle_codebook_v2026_04_19_01",
+        "intentClass": "analytical",
+        "planner": "codebook_retrieval",
+    }
+
+
+_RUNTIME_ALLOWED_DATASETS = {
+    "deals",
+    "lead_daily_snapshot",
+    "lead_geo_interest_rollup",
+    "broker_performance",
+    "inventory_absorption",
+    "oracle_aggregated_metric",
+    "lead_activity_log",
+    "crm_contacts_overview",
+    "crm_opportunity_pipeline",
+    "crm_property_interest_rollup",
+    "crm_interaction_timeline",
+}
+

 class PromptOrchestrator:
    """
@@ -155,18 +295,35 @@ class PromptOrchestrator:
            "prompt": prompt,
            "intentClass": "analytical",
            "status": "planning",
-            "modelRuntime": "nemoclaw_hosted" if _NEMOCLAW_URL else "deterministic_fallback",
+            "modelRuntime": "runtime_llm" if runtime_llm_service._provider_catalog() else "deterministic_fallback",
            "semanticModelVersion": "oracle_semantic_v2026_04_08_01",
            "warnings": warnings,
            "componentsCreated": [],
            "clientRequestId": client_request_id,
            "createdAt": now,
+            "codebookMatches": [],
        }
        _DEMO_EXECUTIONS[execution_id] = execution
        await self._persist_execution(execution)

        # ── Step 1: Build retrieval plan ──────────────────────────────────────
-        if _NEMOCLAW_URL and _NEMOCLAW_API_KEY:
+        codebook_matches = codebook_service.search_examples(prompt, limit=4)
+        execution["codebookMatches"] = [
+            {
+                "exampleId": match.example_id,
+                "templateName": match.template_name,
+                "componentType": match.component_type,
+                "chapterName": match.chapter_name,
+                "subchapterName": match.subchapter_name,
+                "sourcePack": match.source_pack,
+            }
+            for match in codebook_matches
+        ]
+
+        if codebook_matches:
+            retrieval_plan = _build_codebook_retrieval_plan(prompt, tenant_id, actor_role, codebook_matches)
+            execution["status"] = "validated"
+        elif runtime_llm_service._provider_catalog():
            try:
                retrieval_plan = await self._call_nemoclaw(prompt, conversation_context or [], ctx)
                execution["status"] = "validated"
@@ -298,7 +455,7 @@ class PromptOrchestrator:
            comp: dict[str, Any] = {
                "componentId": component_id,
                "type": mapped_type,
-                "title": self._generate_title(prompt, ctype),
+                "title": str(plan.get("titleHint") or self._generate_title(prompt, ctype)),
                "description": f"Generated from: \"{prompt[:80]}\"",
                "dataSourceDescriptor": {
                    "descriptorId": str(uuid.uuid4()),
@@ -321,7 +478,7 @@ class PromptOrchestrator:
                    "promptExecutionId": execution_id,
                    "sourceBranchId": branch_id,
                    "createdBy": actor_id,
-                    "createdAt": _now(),
+                    "createdAt": _iso(_now()),
                },
                "renderingHints": self._rendering_hints(ctype),
                "layout": {
@@ -413,7 +570,7 @@ class PromptOrchestrator:
                "promptExecutionId": execution_id,
                "sourceBranchId": branch_id,
                "createdBy": actor_id,
-                "createdAt": _now(),
+                "createdAt": _iso(_now()),
            },
            "renderingHints": {"estimatedHeightPx": 180, "skeletonVariant": "text", "virtualizationPriority": 4},
            "layout": {
@@ -560,7 +717,7 @@ class PromptOrchestrator:
                "promptExecutionId": execution_id,
                "sourceBranchId": branch_id,
                "createdBy": actor_id,
-                "createdAt": _now(),
+                "createdAt": _iso(_now()),
            },
            "renderingHints": {"estimatedHeightPx": 140, "skeletonVariant": "generic", "virtualizationPriority": 5},
            "layout": {
@@ -601,24 +758,80 @@ class PromptOrchestrator:
        ctx: PolicyContext,
    ) -> dict[str, Any]:
        """
-        Calls the Nemoclaw hosted model endpoint.
-        Raises on failure so the orchestrator can fall back to demo.
+        Uses the shared runtime LLM service to propose a retrieval plan.
+        Raises on malformed output so the orchestrator can fall back safely.
        """
-        import httpx  # type: ignore
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            resp = await client.post(
-                f"{_NEMOCLAW_URL}/v1/oracle/plan",
-                headers={"Authorization": f"Bearer {_NEMOCLAW_API_KEY}"},
-                json={
-                    "prompt": prompt,
-                    "conversationContext": context,
-                    "tenantId": ctx.tenant_id,
-                    "actorRole": ctx.actor_role,
-                    "semanticModelVersion": "oracle_semantic_v2026_04_08_01",
+        row_limit = 50 if ctx.actor_role in ("senior_broker", "junior_broker") else 200
+        system_prompt = (
+            "You are the Oracle planner for Project Velocity. "
+            "Return JSON only. "
+            "Choose up to 4 analytical components for the prompt. "
+            "Allowed component types: pipeline_board, bar_chart, geo_map, table, line_chart, kpi_tile, activity_stream. "
+            "Allowed datasets: deals, lead_daily_snapshot, lead_geo_interest_rollup, broker_performance, inventory_absorption, "
+            "oracle_aggregated_metric, lead_activity_log, crm_contacts_overview, crm_opportunity_pipeline, "
+            "crm_property_interest_rollup, crm_interaction_timeline. "
+            "Return an object with keys semanticModelVersion, intentClass, components. "
+            "Each component must include suggestedType, dataset, and titleHint. "
+            "Do not emit SQL. Do not invent datasets outside the allowlist."
+        )
+        response = await runtime_llm_service.chat(
+            provider_id=None,
+            model=None,
+            system_prompt=system_prompt,
+            messages=[
+                *context,
+                {
+                    "role": "user",
+                    "content": json.dumps(
+                        {
+                            "prompt": prompt,
+                            "tenantId": ctx.tenant_id,
+                            "actorRole": ctx.actor_role,
+                            "rowLimit": row_limit,
+                        }
+                    ),
                },
+            ],
+            temperature=0.1,
+            response_format="json",
+            metadata={"planner": "oracle_canvas"},
+        )
+        payload = response.get("message", {}).get("parsedJson") or {}
+        components_payload = payload.get("components")
+        if not isinstance(components_payload, list) or not components_payload:
+            raise ValueError("Runtime LLM planner returned no components.")
+
+        normalized_components: list[dict[str, Any]] = []
+        for raw_component in components_payload[:4]:
+            if not isinstance(raw_component, dict):
+                continue
+            suggested_type = str(raw_component.get("suggestedType", "")).strip()
+            dataset = str(raw_component.get("dataset", "")).strip()
+            if suggested_type not in _DATASET_MAP or dataset not in _RUNTIME_ALLOWED_DATASETS:
+                continue
+            normalized_components.append(
+                {
+                    "suggestedType": suggested_type,
+                    "dataset": dataset,
+                    "privacyTier": "standard",
+                    "rowLimit": row_limit,
+                    "joins": [],
+                    "queryTemplate": f"SELECT * FROM {dataset} WHERE tenant_id = :tenant_id LIMIT :limit",
+                    "queryParameters": {"tenant_id": ctx.tenant_id, "limit": row_limit},
+                    "titleHint": str(raw_component.get("titleHint", "")).strip() or self._generate_title(prompt, suggested_type),
+                }
            )
-            resp.raise_for_status()
-            return resp.json()  # type: ignore[no-any-return]
+
+        if not normalized_components:
+            raise ValueError("Runtime LLM planner returned no valid whitelisted components.")
+
+        return {
+            "planId": str(uuid.uuid4()),
+            "components": normalized_components,
+            "semanticModelVersion": str(payload.get("semanticModelVersion") or "oracle_runtime_llm_v2026_04_19_01"),
+            "intentClass": str(payload.get("intentClass") or "analytical"),
+            "planner": "runtime_llm",
+        }

    async def get_execution(self, execution_id: str) -> dict[str, Any] | None:
        return _DEMO_EXECUTIONS.get(execution_id)
@@ -668,8 +881,8 @@ class PromptOrchestrator:
                execution.get("summary"),
                execution.get("componentsCreated", []),
                execution.get("clientRequestId"),
-                execution["createdAt"],
-                execution.get("completedAt"),
+                _coerce_datetime(execution["createdAt"]),
+                _coerce_datetime(execution.get("completedAt")),
            )
        finally:
            await conn.close()