fix: Oracle Canvas JSON Component Generation planning and orchestration logic

2026-04-24 05:14:11 +05:30
parent 9f27e6a017
commit cf602822b0
6 changed files with 1555 additions and 115 deletions
--- a/backend/oracle/execution_profiler.py
+++ b/backend/oracle/execution_profiler.py
@@ -0,0 +1,202 @@
 """
 oracle/execution_profiler.py
 Post-execution quality checks for Oracle natural DB queries.
 """
 from __future__ import annotations
 import re
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 _STALE_THRESHOLD_DAYS = 365
@dataclass
 class QualityIssue:
    code: str
    description: str
    severity: str
    replan_hint: str
@dataclass
 class ProfileResult:
    passed: bool
    row_count: int
    issues: list[QualityIssue] = field(default_factory=list)
    replan_hints: list[str] = field(default_factory=list)
    suggested_component_type: str | None = None
 def _extract_cardinality_from_prompt(prompt: str) -> int | None:
    lowered = prompt.lower()
    numeric_match = re.search(r"\b(?:top|last|latest|recent|first|show|which)\s+(\d{1,4})\b", lowered)
    if numeric_match:
        return int(numeric_match.group(1))
    words = {
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10,
        "eleven": 11,
        "twelve": 12,
        "fifteen": 15,
        "twenty": 20,
    }
    word_match = re.search(
        r"\b(?:top|last|latest|recent|first|show|which)\s+"
        r"(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|fifteen|twenty)\b",
        lowered,
    )
    if word_match:
        return words.get(word_match.group(1))
    return None
 def _all_null_measures(rows: list[dict[str, Any]], columns: list[str]) -> bool:
    if not rows or not columns:
        return False
    numeric_columns: list[str] = []
    for column in columns:
        saw_numeric = False
        all_null = True
        for row in rows[:20]:
            value = row.get(column)
            if value is not None:
                all_null = False
                if isinstance(value, (int, float)):
                    saw_numeric = True
        if saw_numeric:
            numeric_columns.append(column)
            if not all_null:
                return False
    if numeric_columns:
        return True
    return all(all(value is None for value in row.values()) for row in rows[:5])
 def _timestamps_are_stale(rows: list[dict[str, Any]], columns: list[str]) -> bool:
    timestamp_columns = [
        column for column in columns if any(token in column for token in ("_at", "date", "timestamp", "when", "time"))
    ]
    if not timestamp_columns or not rows:
        return False
    now = datetime.now(timezone.utc)
    checked = 0
    stale = 0
    for row in rows[:20]:
        for column in timestamp_columns:
            value = row.get(column)
            if value is None or not isinstance(value, str):
                continue
            try:
                parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
            except ValueError:
                continue
            if parsed.tzinfo is None:
                parsed = parsed.replace(tzinfo=timezone.utc)
            checked += 1
            if (now - parsed).days > _STALE_THRESHOLD_DAYS:
                stale += 1
    return checked > 0 and stale == checked
 class ExecutionProfiler:
    def profile(
        self,
        *,
        rows: list[dict[str, Any]],
        columns: list[str],
        sql: str,
        prompt: str,
        source_tables: list[str],
        row_limit: int,
    ) -> ProfileResult:
        del source_tables, row_limit
        issues: list[QualityIssue] = []
        sql_lower = sql.lower()
        if len(rows) == 0:
            issues.append(
                QualityIssue(
                    code="zero_rows",
                    description="Query returned zero rows.",
                    severity="blocking",
                    replan_hint=(
                        "The query returned zero rows. Use authoritative recency and business-semantic columns "
                        "from the semantic catalog. Avoid sparse or deprecated timestamp fields."
                    ),
                )
            )
        elif _all_null_measures(rows, columns):
            issues.append(
                QualityIssue(
                    code="all_null_measures",
                    description="Rows returned but numeric measure columns are null.",
                    severity="blocking",
                    replan_hint=(
                        "The query returned rows but numeric measures are null. "
                        "Check join keys and metric source columns."
                    ),
                )
            )
        requested_n = _extract_cardinality_from_prompt(prompt)
        if requested_n is not None and len(rows) > requested_n * 3:
            issues.append(
                QualityIssue(
                    code="cardinality_mismatch",
                    description=f"Prompt asked for about {requested_n} rows but query returned {len(rows)}.",
                    severity="warning",
                    replan_hint=f"Respect the requested result count and add LIMIT {requested_n}.",
                )
            )
        if rows and _timestamps_are_stale(rows, columns):
            issues.append(
                QualityIssue(
                    code="stale_timestamps",
                    description="Returned timestamps appear stale.",
                    severity="warning",
                    replan_hint="The result timestamps are stale. Use authoritative recency fields.",
                )
            )
        suggested_type: str | None = None
        if len(rows) == 1 and len(columns) <= 4:
            non_null_values = [value for value in rows[0].values() if value is not None]
            if non_null_values and all(isinstance(value, (int, float)) for value in non_null_values):
                suggested_type = "kpiTile"
                issues.append(
                    QualityIssue(
                        code="single_row_scalar",
                        description="Single scalar row is better rendered as KPI tile.",
                        severity="warning",
                        replan_hint="",
                    )
                )
        blocking = [issue for issue in issues if issue.severity == "blocking"]
        return ProfileResult(
            passed=len(blocking) == 0,
            row_count=len(rows),
            issues=issues,
            replan_hints=[issue.replan_hint for issue in issues if issue.replan_hint],
            suggested_component_type=suggested_type,
        )
 execution_profiler = ExecutionProfiler()
--- a/backend/oracle/natural_db_agent.py
+++ b/backend/oracle/natural_db_agent.py
@@ -1,9 +1,13 @@
 """
 Natural DB-first Oracle agent.
-The LLM can plan arbitrary analytical SELECT statements over the full public
+Pipeline:
-Velocity app schema. The executor enforces only a read-only SQL contract and a
+1. schema introspection
-UI row cap; write paths stay behind typed API endpoints.
+2. semantic SQL planning
 3. plan verification and optional repair
 4. SQL execution
 5. execution quality profiling and auto-replan
 6. visualization planning from actual result shape
 """
 from __future__ import annotations
@@ -17,6 +21,10 @@ from decimal import Decimal
 from typing import Any
 from backend.services.runtime_llm_service import runtime_llm_service
 from .execution_profiler import execution_profiler
 from .plan_verifier import plan_verifier
 from .semantic_catalog import CATALOG_VERSION, build_semantic_context_for_planner
 from .visualization_planner import VisualizationDecision, visualization_planner
 try:
    import asyncpg  # type: ignore
@@ -30,7 +38,7 @@ DESTRUCTIVE_SQL = re.compile(
    re.IGNORECASE,
 )
 TABLE_REF_RE = re.compile(r"\b(?:from|join)\s+([a-zA-Z_][\w.]*)(?:\s|$)", re.IGNORECASE)
-CTE_NAME_RE = re.compile(r"\b(?:with|,)\s*([a-zA-Z_][\w]*)\s+as\s*\(", re.IGNORECASE)
+_MAX_REPLAN_ATTEMPTS = 2
 def _json_safe(value: Any) -> Any:
@@ -39,9 +47,9 @@ def _json_safe(value: Any) -> Any:
    if isinstance(value, Decimal):
        return float(value)
    if isinstance(value, (list, tuple)):
-        return [_json_safe(v) for v in value]
+        return [_json_safe(item) for item in value]
    if isinstance(value, dict):
-        return {str(k): _json_safe(v) for k, v in value.items()}
+        return {str(key): _json_safe(item) for key, item in value.items()}
    return value
@@ -60,9 +68,11 @@ def db_ready() -> bool:
 async def connect_db() -> Any:
    if asyncpg is None:
        raise RuntimeError("asyncpg is not installed.")
    read_database_url = os.getenv("ORACLE_READ_DATABASE_URL", "")
    if read_database_url and not read_database_url.startswith("PLACEHOLDER"):
        return await asyncpg.connect(read_database_url)
    if all(os.getenv(name) for name in ("VELOCITY_DB_READ_NAME", "VELOCITY_DB_READ_USER", "VELOCITY_DB_READ_PASSWORD")):
        return await asyncpg.connect(
            host=os.getenv("VELOCITY_DB_READ_HOST", os.getenv("VELOCITY_DB_HOST", "127.0.0.1")),
@@ -71,9 +81,11 @@ async def connect_db() -> Any:
            user=os.environ["VELOCITY_DB_READ_USER"],
            password=os.environ["VELOCITY_DB_READ_PASSWORD"],
        )
    database_url = os.getenv("DATABASE_URL", "")
    if database_url and not database_url.startswith("PLACEHOLDER"):
        return await asyncpg.connect(database_url)
    return await asyncpg.connect(
        host=os.getenv("VELOCITY_DB_HOST", "127.0.0.1"),
        port=int(os.getenv("VELOCITY_DB_PORT", "5432")),
@@ -95,8 +107,12 @@ class NaturalQueryResult:
    source_tables: list[str]
    component_type: str
    warnings: list[str]
    visualization_decision: VisualizationDecision | None = None
    replan_count: int = 0
    semantic_catalog_version: str = CATALOG_VERSION
    def as_dict(self) -> dict[str, Any]:
        decision = self.visualization_decision
        return {
            "prompt": self.prompt,
            "sql": self.sql,
@@ -108,6 +124,23 @@ class NaturalQueryResult:
            "sourceTables": self.source_tables,
            "componentType": self.component_type,
            "warnings": self.warnings,
            "semanticCatalogVersion": self.semantic_catalog_version,
            "replanCount": self.replan_count,
            "visualizationDecision": {
                "xAxis": decision.x_axis,
                "yAxis": decision.y_axis,
                "dimensionCols": decision.dimension_cols,
                "measureCols": decision.measure_cols,
                "widthMode": decision.width_mode,
                "minHeightPx": decision.min_height_px,
                "skeletonVariant": decision.skeleton_variant,
                "vizParams": decision.viz_params,
                "dataBindings": decision.data_bindings,
                "confidence": decision.confidence,
                "reasoning": decision.reasoning,
            }
            if decision
            else {},
        }
@@ -118,48 +151,74 @@ def sanitize_sql(sql: str, row_limit: int) -> tuple[str, list[str], list[str]]:
        raise ValueError("Oracle SQL agent only accepts SELECT or WITH queries.")
    if DESTRUCTIVE_SQL.search(clean):
        raise ValueError("Oracle SQL agent blocked non-read SQL.")
-    tables = []
+
    tables: list[str] = []
    for match in TABLE_REF_RE.finditer(clean):
        table = match.group(1).split(".")[-1].strip('"').lower()
        if table in {"lateral", "select"}:
            continue
        if table and table not in tables:
            tables.append(table)
    if "limit" not in clean.lower():
        clean += f" LIMIT {row_limit}"
        warnings.append(f"Row cap {row_limit} auto-applied (query had no LIMIT).")
    return clean, tables, warnings
-def infer_component_type(prompt: str, columns: list[str], rows: list[dict[str, Any]]) -> str:
+def _detect_intents(prompt: str) -> list[str]:
-    lower = prompt.lower()
+    lowered = prompt.lower()
-    if any(term in lower for term in ("timeline", "conversation", "whatsapp", "message", "call", "email", "history")):
+    intents: list[str] = []
        return "activity_stream"
    if len(rows) == 1 and len(columns) <= 5 and any(isinstance(rows[0].get(c), (int, float)) for c in columns):
        return "kpi_tile"
    if any(c.endswith("_at") or c in {"date", "when", "timestamp", "happened_at"} for c in columns):
        if len(rows) > 1 and any(term in lower for term in ("trend", "over time", "timeseries")):
            return "line_chart"
        if any(term in lower for term in ("timeline", "activity", "last", "recent")):
            return "activity_stream"
    numeric_cols = [c for c in columns if rows and isinstance(rows[0].get(c), (int, float))]
    if numeric_cols and any(term in lower for term in ("count", "compare", "distribution", "most", "top", "by ")):
        return "bar_chart"
    return "table"
    if any(token in lowered for token in (
        "last contact", "last contacted", "recently contacted", "last call",
        "last message", "last whatsapp", "contacted us", "follow-up", "follow up",
        "days since", "no contact",
    )):
        intents.append("last_contacted")
-def _looks_like_property_rollup_prompt(prompt: str) -> bool:
+    if any(token in lowered for token in (
-    lower = prompt.lower()
+        "interested in", "shown interest", "interest in", "interested clients",
-    property_terms = ("property", "properties", "project", "projects")
+        "project interest", "property interest",
-    aggregate_terms = ("top", "most", "majority", "highest", "popular", "common")
+    )):
-    interest_terms = ("interest", "interested", "liked", "preference", "preferences")
+        intents.append("interested_clients")
-    return (
+
-        any(term in lower for term in property_terms)
+    if any(token in lowered for token in ("qd score", "qualification score", "desire score", "intent score", "qd")):
-        and any(term in lower for term in aggregate_terms)
+        intents.append("qd_score")
-        and any(term in lower for term in interest_terms)
+
-    )
+    if any(token in lowered for token in ("pipeline", "stage", "funnel", "kanban", "deal")):
        intents.append("pipeline")
    if any(token in lowered for token in ("site visit", "visited", "visit")):
        intents.append("site_visits")
    if any(token in lowered for token in ("call", "transcript", "whatsapp", "email", "message", "conversation", "interaction", "timeline", "activity")):
        intents.append("timeline")
    if any(token in lowered for token in ("objection", "concern", "complaint", "pushback")):
        intents.append("objections")
    if any(token in lowered for token in ("broker", "agent performance", "referral")):
        intents.append("broker_performance")
    if any(token in lowered for token in ("next action", "next step", "what should i do", "follow-up priority", "action queue")):
        intents.append("next_action")
    if any(token in lowered for token in ("project", "unit", "inventory", "available", "price", "configuration")):
        intents.append("inventory")
    if any(token in lowered for token in ("client 360", "dossier", "profile")):
        intents.append("client_360")
    if any(token in lowered for token in ("fact", "memory", "promise", "commitment", "budget", "preference")):
        intents.append("extracted_facts")
    return intents or ["last_contacted"]
 def title_from_prompt(prompt: str) -> str:
    words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")
-    return words[:1].upper() + words[1:80] if words else "Oracle Query Result"
+    return (words[:1].upper() + words[1:80]) if words else "Oracle Query Result"
 class NaturalDbAgent:
@@ -187,19 +246,22 @@ class NaturalDbAgent:
                ORDER BY c.table_name, c.ordinal_position
                """
            )
-            counts = {}
+            counts: dict[str, int | None] = {}
            for table in public_tables:
                exists = await conn.fetchval("SELECT to_regclass($1)", f"public.{table}")
                counts[table] = None if not exists else int(await conn.fetchval(f'SELECT COUNT(*) FROM "{table}"'))
            tables: dict[str, dict[str, Any]] = {}
            for row in rows:
                entry = tables.setdefault(row["table_name"], {"columns": [], "rowCount": counts.get(row["table_name"])})
-                entry["columns"].append({
+                entry["columns"].append(
-                    "name": row["column_name"],
+                    {
-                    "dataType": row["data_type"],
+                        "name": row["column_name"],
-                    "udtName": row["udt_name"],
+                        "dataType": row["data_type"],
-                    "nullable": row["is_nullable"] == "YES",
+                        "udtName": row["udt_name"],
-                })
+                        "nullable": row["is_nullable"] == "YES",
                    }
                )
            return {"available": True, "tables": tables, "allowedTables": public_tables}
        finally:
            if own_conn:
@@ -228,14 +290,19 @@ class NaturalDbAgent:
        return {
            "counts": counts,
            "expectedSyntheticV2Counts": expected,
-            "missingTables": [t for t, count in counts.items() if count is None],
+            "missingTables": [table for table, count in counts.items() if count is None],
-            "emptyTables": [t for t, count in counts.items() if count == 0],
+            "emptyTables": [table for table, count in counts.items() if count == 0],
-            "belowExpected": {t: {"expected": e, "actual": counts.get(t)} for t, e in expected.items() if (counts.get(t) or 0) < e},
+            "belowExpected": {
                table: {"expected": expected_count, "actual": counts.get(table)}
                for table, expected_count in expected.items()
                if (counts.get(table) or 0) < expected_count
            },
        }
    async def execute_prompt(self, prompt: str, *, row_limit: int = 100, conn: Any | None = None) -> NaturalQueryResult:
        if not prompt.strip():
            raise ValueError("Prompt is required.")
        own_conn = conn is None
        if conn is None:
            if not db_ready():
@@ -243,91 +310,249 @@ class NaturalDbAgent:
            conn = await connect_db()
        try:
            catalog = await self.schema_catalog(conn)
-            plan = await self._plan_sql(prompt, catalog, row_limit)
+            detected_intents = _detect_intents(prompt)
-            return await self._run_plan(conn, prompt, plan, row_limit)
+            return await self._pipeline(
                conn=conn,
                prompt=prompt,
                catalog=catalog,
                detected_intents=detected_intents,
                row_limit=row_limit,
                attempt=0,
                prior_feedback=None,
            )
        finally:
            if own_conn:
                await conn.close()
-    async def _run_plan(self, conn: Any, prompt: str, plan: dict[str, Any], row_limit: int) -> NaturalQueryResult:
+    async def _pipeline(
        self,
        *,
        conn: Any,
        prompt: str,
        catalog: dict[str, Any],
        detected_intents: list[str],
        row_limit: int,
        attempt: int,
        prior_feedback: str | None,
    ) -> NaturalQueryResult:
        warnings: list[str] = []
        plan = await self._plan_sql(
            prompt=prompt,
            catalog=catalog,
            detected_intents=detected_intents,
            row_limit=row_limit,
            prior_feedback=prior_feedback,
        )
        raw_sql = str(plan.get("sql") or "").strip()
        if not raw_sql:
            raise RuntimeError("Natural SQL planner returned no SQL.")
-        sql, tables, warnings = sanitize_sql(raw_sql, row_limit)
+
        verification = await plan_verifier.verify_and_repair(
            sql=raw_sql,
            prompt=prompt,
            detected_intents=detected_intents,
            row_limit=row_limit,
            llm_service=runtime_llm_service,
        )
        if verification.was_repaired:
            warnings.append(
                "Plan verifier repaired violations: "
                + ", ".join(violation.rule for violation in verification.violations if violation.severity == "blocking")
            )
        if not verification.passed and verification.repair_failed:
            warnings.append("Plan verifier found violations but repair failed. Proceeding with original SQL.")
        if verification.notes:
            warnings.extend(verification.notes)
        effective_sql, source_tables, sanitize_warnings = sanitize_sql(verification.sql, row_limit)
        warnings.extend(sanitize_warnings)
        try:
-            records = await conn.fetch(sql)
+            records = await conn.fetch(effective_sql)
        except Exception as exc:
            raise RuntimeError(f"Natural SQL execution failed: {exc}") from exc
        rows = [_json_safe(dict(record)) for record in records]
        columns = list(rows[0].keys()) if rows else []
-        component_type = infer_component_type(prompt, columns, rows)
+
        profile = execution_profiler.profile(
            rows=rows,
            columns=columns,
            sql=effective_sql,
            prompt=prompt,
            source_tables=source_tables,
            row_limit=row_limit,
        )
        if not profile.passed and attempt < _MAX_REPLAN_ATTEMPTS:
            feedback = " | ".join(profile.replan_hints)
            warnings.append(f"Auto-replan triggered (attempt {attempt + 1}): {feedback[:160]}")
            return await self._pipeline(
                conn=conn,
                prompt=prompt,
                catalog=catalog,
                detected_intents=detected_intents,
                row_limit=row_limit,
                attempt=attempt + 1,
                prior_feedback=feedback,
            )
        if not profile.passed:
            for issue in profile.issues:
                if issue.severity == "blocking":
                    warnings.append(f"Quality issue after {attempt} replans: [{issue.code}] {issue.description}")
        visualization_decision = visualization_planner.plan(
            rows=rows,
            columns=columns,
            prompt=prompt,
            source_tables=source_tables,
            profile_suggested_type=profile.suggested_component_type,
            title_from_planner=str(plan.get("title") or ""),
        )
        title = visualization_decision.title or str(plan.get("title") or title_from_prompt(prompt))
        summary = str(plan.get("rationale") or f"SQL-backed Oracle result from {', '.join(source_tables) or 'Velocity CRM'}.")
        return NaturalQueryResult(
            prompt=prompt,
-            sql=sql,
+            sql=effective_sql,
-            title=str(plan.get("title") or title_from_prompt(prompt)),
+            title=title,
-            summary=str(plan.get("rationale") or f"SQL-backed Oracle result from {', '.join(tables) or 'Velocity CRM'}."),
+            summary=summary,
            columns=columns,
            rows=rows,
            row_count=len(rows),
-            source_tables=tables,
+            source_tables=source_tables,
-            component_type=component_type,
+            component_type=visualization_decision.component_type,
            warnings=warnings,
            visualization_decision=visualization_decision,
            replan_count=attempt,
            semantic_catalog_version=CATALOG_VERSION,
        )
-    async def _plan_sql(self, prompt: str, catalog: dict[str, Any], row_limit: int) -> dict[str, Any]:
+    async def _plan_sql(
        self,
        *,
        prompt: str,
        catalog: dict[str, Any],
        detected_intents: list[str],
        row_limit: int,
        prior_feedback: str | None = None,
    ) -> dict[str, Any]:
        try:
            providers = runtime_llm_service._provider_catalog()
        except Exception:
            providers = {}
        if not providers:
-            raise RuntimeError("No runtime LLM providers are configured for Oracle natural planning.")
+            raise RuntimeError("No runtime LLM providers configured for Oracle natural planning.")
-        schema_brief = json.dumps(catalog.get("tables", {}), default=str)[:16000]
+
-        semantic_rules = """
+        schema_full = catalog.get("tables", {})
-Velocity SQL semantics:
+        relevant_tables = self._relevant_tables_for_intents(detected_intents)
- QD score means intel_qd_scores.current_value. Do not use crm_people.engagement_score, crm_leads.engagement_score, or intel_interactions.engagement_score as QD.
+        schema_brief_dict = {
- For project/property scoped prompts such as "in Atri Surya Toron", "interested in", "for project", or "for property", use crm_property_interests as the primary scoping table.
+            table: meta
- Prefer crm_property_interests.project_name for textual project matching. inventory_projects is optional for enrichment, not the primary client-to-project relationship.
+            for table, meta in schema_full.items()
- For client lists scoped to a project, join crm_people to crm_property_interests on person_id and filter project_name case-insensitively.
+            if table in relevant_tables or table in {"crm_people", "crm_leads", "inventory_projects", "inventory_units"}
- For lowest/highest/best/worst QD prompts, sort on intel_qd_scores.current_value ASC/DESC as requested.
+        }
- Respect the user-requested cardinality exactly when possible. If the prompt says five/top 5/lowest 5, return LIMIT 5.
+        schema_brief = json.dumps(schema_brief_dict, default=str)[:14000]
- When listing clients, include person identity fields from crm_people such as person_id, full_name, primary_phone, and primary_email.
+        semantic_context = build_semantic_context_for_planner(detected_intents, max_concepts=5)
- When aggregating top properties/projects, group by crm_property_interests.project_name and count DISTINCT person_id.
+
- You may use any table in the public schema that is relevant to the question.
+        replan_section = ""
- Use only read-only PostgreSQL SELECT/CTE queries.
+        if prior_feedback:
-"""
+            replan_section = (
-        system = (
+                f"\n\nPREVIOUS ATTEMPT FAILED - EXECUTION FEEDBACK:\n{prior_feedback}\n"
-            "You are Oracle's read-only PostgreSQL planner. Generate one useful SELECT or WITH query "
+                "You must address the feedback and change the query accordingly."
-            "for the user's CRM question. You have access to the full public schema. Return JSON with sql, title, rationale. "
+            )
-            "Never generate INSERT, UPDATE, DELETE, DDL, COPY, or permission statements."
+
-        )
+        response = await runtime_llm_service.chat(
-        try:
+            provider_id="sglang",
-            response = await runtime_llm_service.chat(
+            model=None,
-                provider_id="sglang",
+            system_prompt=(
-                model=None,
+                "You are Oracle's read-only PostgreSQL planner for Project Velocity CRM. "
-                system_prompt=system,
+                "Use the semantic catalog as the business source of truth, not raw column guessing. "
-                messages=[{
+                "Generate exactly one SELECT or WITH query. "
                "Return strict JSON with keys: sql, title, rationale. "
                "Never generate INSERT, UPDATE, DELETE, DDL, COPY, or permission statements."
            ),
            messages=[
                {
                    "role": "user",
                    "content": (
-                        f"Schema:\n{schema_brief}\n\n"
+                        f"SEMANTIC CATALOG:\n{semantic_context}\n\n"
-                        f"Semantic rules:\n{semantic_rules}\n\n"
+                        f"RAW SCHEMA:\n{schema_brief}\n\n"
-                        f"Question:\n{prompt}\n\n"
+                        f"DETECTED INTENTS: {', '.join(detected_intents)}\n\n"
-                        f"Row cap: {row_limit}\n\n"
+                        f"USER QUESTION:\n{prompt}\n\n"
-                        "Return strict JSON with keys: sql, title, rationale."
+                        f"ROW CAP: {row_limit}\n"
                        f"{replan_section}\n\n"
                        "Return strict JSON: {\"sql\": \"...\", \"title\": \"...\", \"rationale\": \"...\"}"
                    ),
-                }],
+                }
-                temperature=0.05,
+            ],
-                response_format="json",
+            temperature=0.05,
-                metadata={"agent": "oracle_natural_db_agent"},
+            response_format="json",
-            )
+            metadata={
-            message = response.get("message") or {}
+                "agent": "oracle_natural_db_agent_v2",
-            parsed = message.get("parsedJson")
+                "intents": detected_intents,
-            content = message.get("content") or "{}"
+                "catalog_version": CATALOG_VERSION,
-            if not isinstance(parsed, dict):
+            },
-                parsed = json.loads(content) if isinstance(content, str) else content
+        )
-            if isinstance(parsed, dict) and parsed.get("sql"):
+        message = response.get("message") or {}
-                return parsed
+        parsed = message.get("parsedJson")
-        except Exception as exc:
+        content = message.get("content") or "{}"
-            raise RuntimeError(f"Natural DB planner LLM failed: {exc}") from exc
+        if not isinstance(parsed, dict):
            parsed = json.loads(content) if isinstance(content, str) else content
        if isinstance(parsed, dict) and parsed.get("sql"):
            return parsed
        raise RuntimeError("Natural DB planner returned no valid SQL.")
    @staticmethod
    def _relevant_tables_for_intents(intents: list[str]) -> set[str]:
        intent_tables: dict[str, set[str]] = {
            "last_contacted": {
                "intel_interactions",
                "crm_people",
                "crm_leads",
                "read_last_contacted",
                "crm_last_contact_read_model",
            },
            "interested_clients": {
                "crm_property_interests",
                "crm_people",
                "inventory_projects",
                "intel_qd_scores",
            },
            "qd_score": {"intel_qd_scores", "crm_people"},
            "pipeline": {"crm_opportunities", "crm_leads", "crm_people", "inventory_projects"},
            "site_visits": {"intel_visits", "crm_people", "inventory_projects"},
            "timeline": {
                "intel_interactions",
                "intel_calls",
                "intel_whatsapp_threads",
                "intel_messages",
                "intel_emails",
                "intel_visits",
                "crm_people",
            },
            "objections": {"intel_call_objections", "crm_people", "inventory_projects"},
            "broker_performance": {"crm_leads", "crm_opportunities", "crm_people"},
            "next_action": {"read_next_best_action", "crm_people"},
            "inventory": {"inventory_projects", "inventory_units", "crm_property_interests"},
            "client_360": {
                "crm_people",
                "crm_leads",
                "intel_qd_scores",
                "crm_property_interests",
                "crm_opportunities",
                "intel_interactions",
                "read_last_contacted",
                "read_next_best_action",
            },
            "extracted_facts": {"intel_extracted_facts", "crm_people"},
        }
        tables: set[str] = set()
        for intent in intents:
            tables.update(intent_tables.get(intent, set()))
        return tables
 natural_db_agent = NaturalDbAgent()
--- a/backend/oracle/plan_verifier.py
+++ b/backend/oracle/plan_verifier.py
@@ -0,0 +1,235 @@
 """
 oracle/plan_verifier.py
 Verify planned SQL before execution and optionally repair common semantic errors.
 """
 from __future__ import annotations
 import json
 import logging
 import re
 from dataclasses import dataclass, field
 from typing import Any
 from .semantic_catalog import build_semantic_context_for_planner
 logger = logging.getLogger(__name__)
 _DESTRUCTIVE = re.compile(
    r"\b(insert|update|delete|drop|alter|truncate|copy|create|grant|revoke|call|execute|do|merge)\b",
    re.IGNORECASE,
 )
 _BAD_TIMESTAMP_PATTERNS: list[tuple[str, str]] = [
    ("edge_communication_events", "timestamp"),
    ("crm_property_interests", "last_discussed_at"),
    ("crm_property_interests", "last_interaction"),
 ]
 _BAD_SCORE_PATTERNS: list[tuple[str, str]] = [
    ("crm_people", "engagement_score"),
    ("crm_leads", "engagement_score"),
    ("intel_interactions", "engagement_score"),
    ("crm_people", "qd_score"),
    ("crm_leads", "qd_score"),
 ]
 _HALLUCINATED_COLUMNS: list[tuple[str, str]] = [
    ("intel_interactions", "broker_id"),
    ("intel_interactions", "sentiment"),
    ("crm_leads", "last_contacted_at"),
    ("crm_people", "last_contact"),
 ]
@dataclass
 class VerificationViolation:
    rule: str
    detail: str
    severity: str
@dataclass
 class VerificationResult:
    passed: bool
    sql: str
    original_sql: str
    violations: list[VerificationViolation] = field(default_factory=list)
    was_repaired: bool = False
    repair_attempted: bool = False
    repair_failed: bool = False
    notes: list[str] = field(default_factory=list)
 class PlanVerifier:
    def verify(self, sql: str, prompt: str, detected_intents: list[str], row_limit: int) -> VerificationResult:
        del prompt, detected_intents
        violations: list[VerificationViolation] = []
        sql_lower = sql.lower()
        if _DESTRUCTIVE.search(sql):
            violations.append(
                VerificationViolation(
                    rule="destructive_dml",
                    detail="SQL contains a write or DDL statement.",
                    severity="blocking",
                )
            )
        for table, column in _BAD_TIMESTAMP_PATTERNS:
            if table in sql_lower and column in sql_lower:
                violations.append(
                    VerificationViolation(
                        rule="deprecated_timestamp",
                        detail=(
                            f"SQL references {table}.{column}, which is sparse or deprecated. "
                            "Use intel_interactions.happened_at or read_last_contacted.last_contacted_at."
                        ),
                        severity="blocking",
                    )
                )
        for table, column in _BAD_SCORE_PATTERNS:
            if table in sql_lower and column in sql_lower:
                violations.append(
                    VerificationViolation(
                        rule="wrong_score_column",
                        detail=(
                            f"SQL references {table}.{column}, which is not the QD source of truth. "
                            "Use intel_qd_scores.current_value."
                        ),
                        severity="blocking",
                    )
                )
        for table, column in _HALLUCINATED_COLUMNS:
            if table in sql_lower and column in sql_lower:
                violations.append(
                    VerificationViolation(
                        rule="hallucinated_column",
                        detail=f"SQL references {table}.{column}, which does not exist in the live schema.",
                        severity="blocking",
                    )
                )
        if "limit" not in sql_lower:
            violations.append(
                VerificationViolation(
                    rule="missing_limit",
                    detail=f"SQL has no LIMIT clause; executor will enforce row cap {row_limit}.",
                    severity="warning",
                )
            )
        if re.search(r"\bselect\s+\*\b", sql_lower) and sql_lower.count("join") > 1:
            violations.append(
                VerificationViolation(
                    rule="select_star_join",
                    detail="SELECT * with multiple JOINs may create noisy wide rows.",
                    severity="warning",
                )
            )
        blocking = [violation for violation in violations if violation.severity == "blocking"]
        return VerificationResult(
            passed=len(blocking) == 0,
            sql=sql,
            original_sql=sql,
            violations=violations,
        )
    async def verify_and_repair(
        self,
        sql: str,
        prompt: str,
        detected_intents: list[str],
        row_limit: int,
        llm_service: Any | None = None,
    ) -> VerificationResult:
        result = self.verify(sql, prompt, detected_intents, row_limit)
        if result.passed:
            return result
        blocking = [violation for violation in result.violations if violation.severity == "blocking"]
        if not blocking:
            return result
        result.repair_attempted = True
        if llm_service is None:
            result.repair_failed = True
            result.notes.append("No LLM service available for SQL repair.")
            return result
        try:
            repaired_sql = await self._repair_sql(
                sql=sql,
                prompt=prompt,
                violations=blocking,
                detected_intents=detected_intents,
                row_limit=row_limit,
                llm_service=llm_service,
            )
        except Exception as exc:
            logger.warning("plan_verifier repair failed: %s", exc)
            result.repair_failed = True
            result.notes.append(f"Repair failed: {exc}")
            return result
        recheck = self.verify(repaired_sql, prompt, detected_intents, row_limit)
        recheck.original_sql = sql
        recheck.was_repaired = True
        recheck.repair_attempted = True
        recheck.notes.append(
            "Repaired violations: " + ", ".join(violation.rule for violation in blocking)
        )
        return recheck
    async def _repair_sql(
        self,
        *,
        sql: str,
        prompt: str,
        violations: list[VerificationViolation],
        detected_intents: list[str],
        row_limit: int,
        llm_service: Any,
    ) -> str:
        semantic_ctx = build_semantic_context_for_planner(detected_intents, max_concepts=4)
        violation_text = "\n".join(f"- [{violation.rule}] {violation.detail}" for violation in violations)
        response = await llm_service.chat(
            provider_id="sglang",
            model=None,
            system_prompt=(
                "You are Oracle's SQL repair agent. "
                "Fix only the listed violations. Return strict JSON with key 'sql'."
            ),
            messages=[
                {
                    "role": "user",
                    "content": (
                        f"Original prompt: {prompt}\n\n"
                        f"Semantic catalog:\n{semantic_ctx}\n\n"
                        f"Violations:\n{violation_text}\n\n"
                        f"Broken SQL:\n{sql}\n\n"
                        f"Row cap: {row_limit}\n\n"
                        "Return JSON: {\"sql\": \"<corrected SQL>\"}"
                    ),
                }
            ],
            temperature=0.0,
            response_format="json",
            metadata={"agent": "oracle_plan_verifier_repair"},
        )
        message = response.get("message") or {}
        parsed = message.get("parsedJson")
        if not isinstance(parsed, dict):
            content = message.get("content") or "{}"
            parsed = json.loads(content) if isinstance(content, str) else {}
        repaired = str(parsed.get("sql") or "").strip()
        if not repaired:
            raise ValueError("Repair LLM returned empty SQL.")
        return repaired
 plan_verifier = PlanVerifier()
--- a/backend/oracle/prompt_orchestrator.py
+++ b/backend/oracle/prompt_orchestrator.py
@@ -175,6 +175,27 @@ def _infer_chart_axes(rows: list[dict[str, Any]], columns: list[str]) -> tuple[s
    return x_axis, y_axis
 def _canonical_plan_type(plan_type: str) -> str:
    normalized = str(plan_type or "").strip()
    mapping = {
        "pipeline_board": "pipeline_board",
        "pipelineBoard": "pipeline_board",
        "bar_chart": "bar_chart",
        "barChart": "bar_chart",
        "geo_map": "geo_map",
        "geoMap": "geo_map",
        "table": "table",
        "line_chart": "line_chart",
        "lineChart": "line_chart",
        "kpi_tile": "kpi_tile",
        "kpiTile": "kpi_tile",
        "activity_stream": "activity_stream",
        "activityStream": "activity_stream",
        "timeline": "activity_stream",
    }
    return mapping.get(normalized, normalized or "table")
 _DATASET_MAP: dict[str, str] = {
    "pipeline_board": "crm_opportunity_pipeline",
    "bar_chart": "oracle_property_interest_rollup",
@@ -794,35 +815,35 @@ class PromptOrchestrator:
    ) -> dict[str, Any]:
        rows = result.get("rows") or []
        columns = result.get("columns") or (list(rows[0].keys()) if rows else [])
-        ctype = str(result.get("componentType") or "table")
+        ctype_raw = str(result.get("componentType") or "table")
-        mapped_type = self._map_type(ctype)
+        ctype = _canonical_plan_type(ctype_raw)
        mapped_type = self._map_type(ctype_raw)
        dataset = "oracle_natural_sql"
        component_id = str(uuid.uuid4())
        viz_decision = result.get("visualizationDecision") or {}
        x_axis, y_axis = _infer_chart_axes(rows, columns)
-        bindings = self._default_bindings(ctype)
+        bindings = dict(viz_decision.get("dataBindings") or self._default_bindings(ctype))
        viz_params = {
            **self._default_viz_params(ctype, dataset, rows),
            **dict(viz_decision.get("vizParams") or {}),
            "columns": columns,
            "sqlSummary": result.get("summary"),
            "sourceTables": result.get("sourceTables", []),
            "rowCount": result.get("rowCount", len(rows)),
        }
-        if ctype == "bar_chart":
+        if mapped_type in {"barChart", "lineChart"}:
-            if x_axis:
+            if not viz_params.get("xAxis") and x_axis:
                viz_params["xAxis"] = x_axis
-                bindings["dimensions"] = [x_axis]
+            if not viz_params.get("yAxis") and y_axis:
            if y_axis:
                viz_params["yAxis"] = y_axis
-                bindings["measures"] = [y_axis]
+            if not bindings.get("dimensions") and x_axis:
        elif ctype == "line_chart":
            if x_axis:
                bindings["dimensions"] = [x_axis]
-            if y_axis:
+            if not bindings.get("measures") and y_axis:
                bindings["measures"] = [y_axis]
        comp: dict[str, Any] = {
            "componentId": component_id,
            "type": mapped_type,
-            "title": result.get("title") or self._generate_title(prompt, ctype),
+            "title": result.get("title") or self._generate_title(prompt, ctype_raw),
            "description": f"SQL-backed Oracle result from: \"{prompt[:96]}\"",
            "dataSourceDescriptor": {
                "descriptorId": str(uuid.uuid4()),
@@ -849,12 +870,22 @@ class PromptOrchestrator:
                "sourceTables": result.get("sourceTables", []),
                "sqlSummary": result.get("summary"),
            },
-            "renderingHints": self._rendering_hints(ctype),
+            "renderingHints": {
                **self._rendering_hints(ctype),
                **(
                    {
                        "estimatedHeightPx": int(viz_decision.get("minHeightPx", 0) or 0),
                        "skeletonVariant": str(viz_decision.get("skeletonVariant") or ""),
                    }
                    if viz_decision
                    else {}
                ),
            },
            "layout": {
                "orderIndex": base_order + 100,
                "sectionId": section_id,
-                "widthMode": "full" if mapped_type in ("table", "pipelineBoard", "timeline", "activityStream") else "half",
+                "widthMode": str(viz_decision.get("widthMode") or ("full" if mapped_type in ("table", "pipelineBoard", "timeline", "activityStream") else "half")),
-                "minHeightPx": 320,
+                "minHeightPx": int(viz_decision.get("minHeightPx") or 320),
                "stickyHeader": False,
            },
            "accessControls": {
@@ -975,6 +1006,7 @@ class PromptOrchestrator:
    @staticmethod
    def _map_type(plan_type: str) -> str:
        plan_type = _canonical_plan_type(plan_type)
        mapping = {
            "pipeline_board": "pipelineBoard",
            "bar_chart": "barChart",
@@ -988,6 +1020,7 @@ class PromptOrchestrator:
    @staticmethod
    def _generate_title(prompt: str, comp_type: str) -> str:
        comp_type = _canonical_plan_type(comp_type)
        labels = {
            "pipeline_board": "Pipeline View",
            "bar_chart": "Comparative Analysis",
@@ -1001,6 +1034,7 @@ class PromptOrchestrator:
    @staticmethod
    def _default_viz_params(comp_type: str, dataset: str, rows: list[dict[str, Any]]) -> dict[str, Any]:
        comp_type = _canonical_plan_type(comp_type)
        first_row = rows[0] if rows else {}
        inferred_columns = [key for key in first_row.keys() if key not in {"avatar"}] or ["name", "status"]
        table_columns_by_dataset: dict[str, list[str]] = {
@@ -1038,10 +1072,12 @@ class PromptOrchestrator:
    @staticmethod
    def _default_bindings(comp_type: str) -> dict[str, Any]:
        del comp_type
        return {"dimensions": [], "measures": [], "series": [], "filters": []}
    @staticmethod
    def _rendering_hints(comp_type: str) -> dict[str, Any]:
        comp_type = _canonical_plan_type(comp_type)
        priority_map = {
            "pipeline_board": ("pipeline", 9), "bar_chart": ("chart", 8),
            "geo_map": ("map", 9), "table": ("table", 7),
--- a/backend/oracle/semantic_catalog.py
+++ b/backend/oracle/semantic_catalog.py
@@ -0,0 +1,360 @@
 """
 oracle/semantic_catalog.py
 Business-semantic layer for Oracle's natural DB planner.
 This sits between raw schema introspection and SQL generation. It defines:
 - authoritative tables and columns for business concepts
 - deprecated or sparse fields the planner should avoid
 - preferred join paths
 - compact semantic context for the planner prompt
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Any
 class Confidence:
    RELIABLE = "reliable"
    PARTIAL = "partial"
    SPARSE = "sparse"
    DEPRECATED = "deprecated"
@dataclass(frozen=True)
 class FieldDescriptor:
    table: str
    column: str
    confidence: str
    description: str
    notes: str = ""
@dataclass(frozen=True)
 class JoinPath:
    from_table: str
    from_col: str
    to_table: str
    to_col: str
    join_type: str = "INNER"
    notes: str = ""
@dataclass
 class ConceptDescriptor:
    concept_id: str
    label: str
    description: str
    authoritative_fields: list[FieldDescriptor]
    deprecated_fields: list[FieldDescriptor] = field(default_factory=list)
    preferred_join_paths: list[JoinPath] = field(default_factory=list)
    usage_notes: str = ""
 CATALOG_VERSION = "velocity_semantic_v2026_04_25_01"
 CONCEPTS: list[ConceptDescriptor] = [
    ConceptDescriptor(
        concept_id="person_identity",
        label="Client Identity",
        description="Canonical identity record for a person in CRM.",
        authoritative_fields=[
            FieldDescriptor("crm_people", "person_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("crm_people", "full_name", Confidence.RELIABLE, "Display name"),
            FieldDescriptor("crm_people", "primary_email", Confidence.RELIABLE, "Email"),
            FieldDescriptor("crm_people", "primary_phone", Confidence.RELIABLE, "Phone"),
            FieldDescriptor("crm_people", "persona_labels", Confidence.PARTIAL, "Buyer persona labels"),
        ],
        usage_notes=(
            "Anchor client-level queries on crm_people.person_id. "
            "Treat crm_people as the identity source of truth."
        ),
    ),
    ConceptDescriptor(
        concept_id="lead_funnel",
        label="Lead Funnel",
        description="Lead ownership, stage, status, and urgency.",
        authoritative_fields=[
            FieldDescriptor("crm_leads", "lead_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("crm_leads", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("crm_leads", "stage", Confidence.RELIABLE, "Current funnel stage"),
            FieldDescriptor("crm_leads", "status", Confidence.RELIABLE, "Lead status"),
            FieldDescriptor("crm_leads", "assigned_user_id", Confidence.RELIABLE, "Owning user"),
            FieldDescriptor("crm_leads", "budget_band", Confidence.PARTIAL, "Budget band"),
            FieldDescriptor("crm_leads", "urgency", Confidence.PARTIAL, "Urgency tag"),
        ],
        preferred_join_paths=[
            JoinPath("crm_people", "person_id", "crm_leads", "person_id"),
        ],
    ),
    ConceptDescriptor(
        concept_id="qd_score",
        label="QD Score",
        description="Qualification / Desire score source of truth.",
        authoritative_fields=[
            FieldDescriptor("intel_qd_scores", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("intel_qd_scores", "current_value", Confidence.RELIABLE, "Authoritative QD score"),
            FieldDescriptor("intel_qd_scores", "score_type", Confidence.RELIABLE, "Score family"),
            FieldDescriptor("intel_qd_scores", "computed_at", Confidence.RELIABLE, "Score timestamp"),
        ],
        deprecated_fields=[
            FieldDescriptor("crm_people", "engagement_score", Confidence.DEPRECATED, "Not QD"),
            FieldDescriptor("crm_leads", "engagement_score", Confidence.DEPRECATED, "Not QD"),
            FieldDescriptor("intel_interactions", "engagement_score", Confidence.DEPRECATED, "Not QD"),
        ],
        usage_notes=(
            "When a prompt mentions QD, qualification, desire, or intent score, "
            "use intel_qd_scores.current_value. Do not substitute engagement_score."
        ),
    ),
    ConceptDescriptor(
        concept_id="communication_events",
        label="Communication Events",
        description="Authoritative recent-contact and interaction history source.",
        authoritative_fields=[
            FieldDescriptor("intel_interactions", "interaction_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("intel_interactions", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("intel_interactions", "channel", Confidence.RELIABLE, "Interaction channel"),
            FieldDescriptor("intel_interactions", "interaction_type", Confidence.RELIABLE, "Interaction type"),
            FieldDescriptor("intel_interactions", "happened_at", Confidence.RELIABLE, "Primary recency timestamp"),
            FieldDescriptor("intel_interactions", "summary", Confidence.RELIABLE, "Interaction summary"),
        ],
        deprecated_fields=[
            FieldDescriptor("edge_communication_events", "timestamp", Confidence.SPARSE, "Do not use for recency"),
            FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.SPARSE, "Do not use for recency"),
        ],
        preferred_join_paths=[
            JoinPath("crm_people", "person_id", "intel_interactions", "person_id", "LEFT"),
            JoinPath("intel_interactions", "interaction_id", "intel_calls", "interaction_id", "LEFT"),
            JoinPath("intel_interactions", "interaction_id", "intel_messages", "interaction_id", "LEFT"),
            JoinPath("intel_interactions", "interaction_id", "intel_emails", "interaction_id", "LEFT"),
        ],
        usage_notes=(
            "For recent contact, last contact, or contacted us, prefer intel_interactions.happened_at. "
            "Use read_last_contacted if available for precomputed summaries."
        ),
    ),
    ConceptDescriptor(
        concept_id="last_contact_read_model",
        label="Last Contact Read Model",
        description="Per-person last-contact summary materialization.",
        authoritative_fields=[
            FieldDescriptor("read_last_contacted", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("read_last_contacted", "last_contacted_at", Confidence.RELIABLE, "Last contact time"),
            FieldDescriptor("read_last_contacted", "last_channel", Confidence.RELIABLE, "Last contact channel"),
            FieldDescriptor("read_last_contacted", "days_since_last_contact", Confidence.RELIABLE, "Recency in days"),
            FieldDescriptor("read_last_contacted", "staleness_label", Confidence.RELIABLE, "Hot/warm/cold bucket"),
        ],
        deprecated_fields=[
            FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.DEPRECATED, "Stale field"),
        ],
        usage_notes=(
            "If this table exists, prefer it for last-contact prompts over rebuilding recency from raw interactions."
        ),
    ),
    ConceptDescriptor(
        concept_id="next_best_action",
        label="Next Best Action",
        description="Precomputed follow-up action recommendations.",
        authoritative_fields=[
            FieldDescriptor("read_next_best_action", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("read_next_best_action", "action_label", Confidence.RELIABLE, "Human-readable action"),
            FieldDescriptor("read_next_best_action", "urgency", Confidence.RELIABLE, "Urgency"),
            FieldDescriptor("read_next_best_action", "recommended_channel", Confidence.RELIABLE, "Suggested channel"),
            FieldDescriptor("read_next_best_action", "execute_within_hours", Confidence.RELIABLE, "Action SLA"),
        ],
    ),
    ConceptDescriptor(
        concept_id="property_interest",
        label="Property Interest",
        description="Client-level project or unit interest records.",
        authoritative_fields=[
            FieldDescriptor("crm_property_interests", "interest_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("crm_property_interests", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("crm_property_interests", "project_id", Confidence.PARTIAL, "FK to inventory_projects"),
            FieldDescriptor("crm_property_interests", "project_name", Confidence.RELIABLE, "Primary text project scope"),
            FieldDescriptor("crm_property_interests", "unit_id", Confidence.PARTIAL, "FK to inventory_units"),
            FieldDescriptor("crm_property_interests", "interest_level", Confidence.RELIABLE, "Interest strength"),
            FieldDescriptor("crm_property_interests", "configuration_preference", Confidence.PARTIAL, "Configuration"),
            FieldDescriptor("crm_property_interests", "budget_min", Confidence.PARTIAL, "Minimum budget"),
            FieldDescriptor("crm_property_interests", "budget_max", Confidence.PARTIAL, "Maximum budget"),
            FieldDescriptor("crm_property_interests", "financing_plan", Confidence.PARTIAL, "Financing plan"),
            FieldDescriptor("crm_property_interests", "notes", Confidence.PARTIAL, "Free-text notes"),
        ],
        deprecated_fields=[
            FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.DEPRECATED, "Do not use for recency"),
        ],
        preferred_join_paths=[
            JoinPath("crm_people", "person_id", "crm_property_interests", "person_id", "LEFT"),
            JoinPath("crm_property_interests", "project_id", "inventory_projects", "project_id", "LEFT"),
        ],
        usage_notes=(
            "For prompts scoped to a specific property or project, filter on crm_property_interests.project_name "
            "case-insensitively. For top properties, group by project_name and count distinct person_id."
        ),
    ),
    ConceptDescriptor(
        concept_id="opportunities",
        label="Opportunities",
        description="Deal pipeline records.",
        authoritative_fields=[
            FieldDescriptor("crm_opportunities", "opportunity_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("crm_opportunities", "lead_id", Confidence.RELIABLE, "FK to crm_leads"),
            FieldDescriptor("crm_opportunities", "project_id", Confidence.RELIABLE, "FK to inventory_projects"),
            FieldDescriptor("crm_opportunities", "stage", Confidence.RELIABLE, "Opportunity stage"),
            FieldDescriptor("crm_opportunities", "value", Confidence.RELIABLE, "Deal value"),
            FieldDescriptor("crm_opportunities", "probability", Confidence.PARTIAL, "Probability"),
            FieldDescriptor("crm_opportunities", "next_action", Confidence.RELIABLE, "Next action"),
        ],
        preferred_join_paths=[
            JoinPath("crm_people", "person_id", "crm_leads", "person_id"),
            JoinPath("crm_leads", "lead_id", "crm_opportunities", "lead_id", "LEFT"),
            JoinPath("crm_opportunities", "project_id", "inventory_projects", "project_id", "LEFT"),
        ],
    ),
    ConceptDescriptor(
        concept_id="site_visits",
        label="Site Visits",
        description="Physical visit records and outcomes.",
        authoritative_fields=[
            FieldDescriptor("intel_visits", "visit_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("intel_visits", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("intel_visits", "project_id", Confidence.PARTIAL, "FK to inventory_projects"),
            FieldDescriptor("intel_visits", "project_name", Confidence.PARTIAL, "Project name"),
            FieldDescriptor("intel_visits", "visited_at", Confidence.RELIABLE, "Visit timestamp"),
            FieldDescriptor("intel_visits", "visit_notes", Confidence.RELIABLE, "Visit notes"),
        ],
    ),
    ConceptDescriptor(
        concept_id="inventory",
        label="Inventory",
        description="Project and unit master data.",
        authoritative_fields=[
            FieldDescriptor("inventory_projects", "project_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("inventory_projects", "project_name", Confidence.RELIABLE, "Project name"),
            FieldDescriptor("inventory_projects", "developer_name", Confidence.RELIABLE, "Developer"),
            FieldDescriptor("inventory_projects", "micro_market", Confidence.RELIABLE, "Micro market"),
            FieldDescriptor("inventory_units", "unit_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("inventory_units", "project_id", Confidence.RELIABLE, "FK to inventory_projects"),
            FieldDescriptor("inventory_units", "configuration", Confidence.RELIABLE, "Configuration"),
            FieldDescriptor("inventory_units", "price_current", Confidence.RELIABLE, "Current price"),
            FieldDescriptor("inventory_units", "status", Confidence.RELIABLE, "Unit status"),
        ],
    ),
    ConceptDescriptor(
        concept_id="extracted_facts",
        label="Extracted Facts",
        description="AI-extracted CRM memory facts.",
        authoritative_fields=[
            FieldDescriptor("intel_extracted_facts", "fact_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("intel_extracted_facts", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("intel_extracted_facts", "fact_type", Confidence.RELIABLE, "Fact type"),
            FieldDescriptor("intel_extracted_facts", "fact_text", Confidence.RELIABLE, "Fact text"),
            FieldDescriptor("intel_extracted_facts", "confidence", Confidence.RELIABLE, "Extraction confidence"),
            FieldDescriptor("intel_extracted_facts", "effective_date", Confidence.PARTIAL, "Fact date"),
        ],
    ),
    ConceptDescriptor(
        concept_id="call_objections",
        label="Call Objections",
        description="Structured objections extracted from calls.",
        authoritative_fields=[
            FieldDescriptor("intel_call_objections", "objection_id", Confidence.RELIABLE, "Primary key"),
            FieldDescriptor("intel_call_objections", "person_id", Confidence.RELIABLE, "FK to crm_people"),
            FieldDescriptor("intel_call_objections", "objection_type", Confidence.RELIABLE, "Objection type"),
            FieldDescriptor("intel_call_objections", "objection_text", Confidence.RELIABLE, "Objection text"),
            FieldDescriptor("intel_call_objections", "intensity", Confidence.RELIABLE, "Intensity"),
            FieldDescriptor("intel_call_objections", "was_resolved", Confidence.RELIABLE, "Resolution flag"),
            FieldDescriptor("intel_call_objections", "raised_at", Confidence.RELIABLE, "Raised timestamp"),
        ],
    ),
 ]
 _CONCEPT_INDEX: dict[str, ConceptDescriptor] = {concept.concept_id: concept for concept in CONCEPTS}
 def get_concept(concept_id: str) -> ConceptDescriptor | None:
    return _CONCEPT_INDEX.get(concept_id)
 def all_concepts() -> list[ConceptDescriptor]:
    return CONCEPTS
 INTENT_CONCEPT_MAP: dict[str, list[str]] = {
    "last_contacted": ["last_contact_read_model", "communication_events", "person_identity"],
    "interested_clients": ["property_interest", "person_identity", "lead_funnel"],
    "qd_score": ["qd_score", "person_identity"],
    "pipeline": ["opportunities", "lead_funnel", "person_identity"],
    "site_visits": ["site_visits", "person_identity", "property_interest"],
    "timeline": ["communication_events", "person_identity"],
    "objections": ["call_objections", "communication_events", "person_identity"],
    "broker_performance": ["lead_funnel", "opportunities"],
    "next_action": ["next_best_action", "person_identity", "lead_funnel"],
    "inventory": ["inventory", "property_interest"],
    "extracted_facts": ["extracted_facts", "person_identity"],
    "client_360": [
        "person_identity",
        "lead_funnel",
        "qd_score",
        "communication_events",
        "property_interest",
        "opportunities",
        "next_best_action",
    ],
 }
 def concepts_for_intent(intent: str) -> list[ConceptDescriptor]:
    ids = INTENT_CONCEPT_MAP.get(intent, ["person_identity", "lead_funnel"])
    return [_CONCEPT_INDEX[concept_id] for concept_id in ids if concept_id in _CONCEPT_INDEX]
 def _field_to_dict(field: FieldDescriptor) -> dict[str, Any]:
    return {
        "table": field.table,
        "column": field.column,
        "confidence": field.confidence,
        "description": field.description,
        **({"notes": field.notes} if field.notes else {}),
    }
 def concept_to_dict(concept: ConceptDescriptor) -> dict[str, Any]:
    return {
        "concept_id": concept.concept_id,
        "label": concept.label,
        "description": concept.description,
        "authoritative_fields": [_field_to_dict(field) for field in concept.authoritative_fields],
        "deprecated_fields": [_field_to_dict(field) for field in concept.deprecated_fields],
        "preferred_join_paths": [
            {
                "from": f"{join.from_table}.{join.from_col}",
                "to": f"{join.to_table}.{join.to_col}",
                "join_type": join.join_type,
                **({"notes": join.notes} if join.notes else {}),
            }
            for join in concept.preferred_join_paths
        ],
        **({"usage_notes": concept.usage_notes} if concept.usage_notes else {}),
    }
 def build_semantic_context_for_planner(detected_intents: list[str], *, max_concepts: int = 5) -> str:
    import json
    seen: set[str] = set()
    ordered: list[ConceptDescriptor] = []
    for intent in detected_intents:
        for concept in concepts_for_intent(intent):
            if concept.concept_id not in seen:
                seen.add(concept.concept_id)
                ordered.append(concept)
    return json.dumps(
        {
            "catalog_version": CATALOG_VERSION,
            "concepts": [concept_to_dict(concept) for concept in ordered[:max_concepts]],
        },
        separators=(",", ":"),
    )
--- a/backend/oracle/visualization_planner.py
+++ b/backend/oracle/visualization_planner.py
@@ -0,0 +1,382 @@
 """
 oracle/visualization_planner.py
 Pick Oracle canvas renderer types from actual result shape.
 """
 from __future__ import annotations
 import re
 from dataclasses import dataclass
 from typing import Any
@dataclass
 class ColumnProfile:
    name: str
    is_numeric: bool
    is_string: bool
    is_datetime: bool
    is_boolean: bool
    null_rate: float
    sample_values: list[Any]
@dataclass
 class VisualizationDecision:
    component_type: str
    x_axis: str | None
    y_axis: str | None
    series_cols: list[str]
    dimension_cols: list[str]
    measure_cols: list[str]
    title: str
    width_mode: str
    min_height_px: int
    skeleton_variant: str
    viz_params: dict[str, Any]
    data_bindings: dict[str, Any]
    confidence: float
    reasoning: str
 def _looks_like_timestamp(value: str) -> bool:
    return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))
 def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
    if not rows:
        return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]
    sample_size = min(len(rows), 20)
    profiles: list[ColumnProfile] = []
    for column in columns:
        values = [rows[index].get(column) for index in range(sample_size)]
        non_null = [value for value in values if value is not None]
        null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
        profiles.append(
            ColumnProfile(
                name=column,
                is_numeric=any(isinstance(value, (int, float)) for value in non_null),
                is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
                is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
                is_boolean=any(isinstance(value, bool) for value in non_null),
                null_rate=null_rate,
                sample_values=non_null[:3],
            )
        )
    return profiles
 _DIMENSION_HINTS = {
    "name", "full_name", "project_name", "developer_name", "agent_name",
    "broker_company", "category", "label", "stage", "channel", "type",
    "micro_market", "district", "status", "persona", "nationality",
 }
 _MEASURE_HINTS = {
    "count", "total", "sum", "avg", "average", "value", "score", "rate",
    "current_value", "qd_score", "probability", "interest_count", "visit_count",
    "interaction_count", "days", "amount", "revenue",
 }
 _TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}
 _PREFERRED_X = [
    "project_name", "developer_name", "category", "stage", "channel",
    "micro_market", "broker_company", "agent_name", "name", "full_name",
    "label", "status", "type",
 ]
 _PREFERRED_Y = [
    "count", "total", "interested_clients", "interest_count", "client_count",
    "current_value", "qd_score", "value", "probability", "interaction_count",
    "visit_count", "days_since_last_contact",
 ]
 _TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
    "crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
    "intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
    "crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
    "intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
    "read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
    "crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
    "intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
    "intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
    "read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
 }
 def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
    for candidate in preferred:
        if candidate in candidates:
            return candidate
    return candidates[0] if candidates else None
 def _title_from_prompt(prompt: str) -> str:
    words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
    return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"
 class VisualizationPlanner:
    def plan(
        self,
        *,
        rows: list[dict[str, Any]],
        columns: list[str],
        prompt: str,
        source_tables: list[str],
        profile_suggested_type: str | None = None,
        title_from_planner: str | None = None,
    ) -> VisualizationDecision:
        profiles = _profile_columns(rows, columns)
        classifications = {profile.name: self._classify_column(profile) for profile in profiles}
        dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
        measures = [column for column, kind in classifications.items() if kind == "measure"]
        timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
        row_count = len(rows)
        prompt_lower = prompt.lower()
        if profile_suggested_type:
            return self._build_decision(
                component_type=profile_suggested_type,
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning=f"Execution profiler suggested {profile_suggested_type}",
                confidence=0.9,
            )
        timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
        if any(term in prompt_lower for term in timeline_terms) and timestamps:
            return self._build_decision(
                component_type="activityStream",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Activity-like prompt plus timestamped result.",
                confidence=0.88,
            )
        if row_count == 1 and measures and not dimensions:
            return self._build_decision(
                component_type="kpiTile",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Single numeric row.",
                confidence=0.92,
            )
        if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
            return self._build_decision(
                component_type="lineChart",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Temporal series plus measure.",
                confidence=0.87,
            )
        if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
            return self._build_decision(
                component_type="pipelineBoard",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Pipeline-like prompt and stage-like data.",
                confidence=0.85,
            )
        if dimensions and measures and row_count <= 30 and not timestamps:
            return self._build_decision(
                component_type="barChart",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Categorical dimension plus measure.",
                confidence=0.8,
            )
        return self._build_decision(
            component_type="table",
            dimensions=dimensions,
            measures=measures,
            timestamps=timestamps,
            columns=columns,
            rows=rows,
            row_count=row_count,
            prompt=prompt,
            source_tables=source_tables,
            title=title_from_planner,
            reasoning="Default structured table.",
            confidence=0.7,
        )
    @staticmethod
    def _classify_column(profile: ColumnProfile) -> str:
        lower = profile.name.lower()
        if lower.endswith("_id"):
            return "identity"
        if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
            return "timestamp"
        if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
            return "dimension"
        if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
            return "measure"
        return "other"
    def _build_decision(
        self,
        *,
        component_type: str,
        dimensions: list[str],
        measures: list[str],
        timestamps: list[str],
        columns: list[str],
        rows: list[dict[str, Any]],
        row_count: int,
        prompt: str,
        source_tables: list[str],
        title: str | None,
        reasoning: str,
        confidence: float,
    ) -> VisualizationDecision:
        x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
        y_axis = _pick_axis(measures, _PREFERRED_Y)
        if component_type == "table":
            display_columns = self._table_columns(columns, source_tables)
        else:
            display_columns = columns
        viz_params = self._build_viz_params(
            component_type=component_type,
            x_axis=x_axis,
            y_axis=y_axis,
            display_columns=display_columns,
            row_count=row_count,
        )
        data_bindings = {
            "dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
            "measures": measures[:3],
            "series": [],
            "filters": [],
        }
        width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
        height_map = {
            "kpiTile": 140,
            "barChart": 320,
            "lineChart": 320,
            "activityStream": 380,
            "table": 300,
            "pipelineBoard": 400,
        }
        skeleton_map = {
            "kpiTile": "kpi",
            "barChart": "chart",
            "lineChart": "chart",
            "activityStream": "table",
            "table": "table",
            "pipelineBoard": "pipeline",
        }
        return VisualizationDecision(
            component_type=component_type,
            x_axis=x_axis,
            y_axis=y_axis,
            series_cols=[],
            dimension_cols=dimensions,
            measure_cols=measures,
            title=title or _title_from_prompt(prompt),
            width_mode=width_mode,
            min_height_px=height_map.get(component_type, 300),
            skeleton_variant=skeleton_map.get(component_type, "generic"),
            viz_params=viz_params,
            data_bindings=data_bindings,
            confidence=confidence,
            reasoning=reasoning,
        )
    @staticmethod
    def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
        for table in source_tables:
            preset = _TABLE_COLUMN_PRESETS.get(table)
            if preset:
                matched = [column for column in preset if column in all_columns]
                if matched:
                    return matched
        return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]
    @staticmethod
    def _build_viz_params(
        *,
        component_type: str,
        x_axis: str | None,
        y_axis: str | None,
        display_columns: list[str],
        row_count: int,
    ) -> dict[str, Any]:
        del row_count
        if component_type == "barChart":
            return {
                "xAxis": x_axis or "category",
                "yAxis": y_axis or "value",
                "sort": "desc",
                "showLabels": True,
                "legend": False,
            }
        if component_type == "lineChart":
            return {"showPoints": True, "smooth": True}
        if component_type == "kpiTile":
            return {"label": "Result", "trend": "", "comparisonLabel": ""}
        if component_type == "table":
            return {
                "columns": display_columns,
                "emptyStateTitle": "No matching records found",
                "emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
                "rankBy": y_axis,
                "showTopBadge": False,
            }
        if component_type == "activityStream":
            return {"showUrgencyIndicator": True}
        if component_type == "pipelineBoard":
            return {"showValue": True, "colorByStage": True}
        return {}
 visualization_planner = VisualizationPlanner()