fix: Oracle Canvas JSON Component Generation planning and orchestration logic

2026-04-24 05:14:11 +05:30
parent 9f27e6a017
commit cf602822b0
6 changed files with 1555 additions and 115 deletions
--- a/backend/oracle/visualization_planner.py
+++ b/backend/oracle/visualization_planner.py
@@ -0,0 +1,382 @@
+"""
+oracle/visualization_planner.py
+
+Pick Oracle canvas renderer types from actual result shape.
+"""
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class ColumnProfile:
+    name: str
+    is_numeric: bool
+    is_string: bool
+    is_datetime: bool
+    is_boolean: bool
+    null_rate: float
+    sample_values: list[Any]
+
+
+@dataclass
+class VisualizationDecision:
+    component_type: str
+    x_axis: str | None
+    y_axis: str | None
+    series_cols: list[str]
+    dimension_cols: list[str]
+    measure_cols: list[str]
+    title: str
+    width_mode: str
+    min_height_px: int
+    skeleton_variant: str
+    viz_params: dict[str, Any]
+    data_bindings: dict[str, Any]
+    confidence: float
+    reasoning: str
+
+
+def _looks_like_timestamp(value: str) -> bool:
+    return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))
+
+
+def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
+    if not rows:
+        return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]
+
+    sample_size = min(len(rows), 20)
+    profiles: list[ColumnProfile] = []
+    for column in columns:
+        values = [rows[index].get(column) for index in range(sample_size)]
+        non_null = [value for value in values if value is not None]
+        null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
+        profiles.append(
+            ColumnProfile(
+                name=column,
+                is_numeric=any(isinstance(value, (int, float)) for value in non_null),
+                is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
+                is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
+                is_boolean=any(isinstance(value, bool) for value in non_null),
+                null_rate=null_rate,
+                sample_values=non_null[:3],
+            )
+        )
+    return profiles
+
+
+_DIMENSION_HINTS = {
+    "name", "full_name", "project_name", "developer_name", "agent_name",
+    "broker_company", "category", "label", "stage", "channel", "type",
+    "micro_market", "district", "status", "persona", "nationality",
+}
+_MEASURE_HINTS = {
+    "count", "total", "sum", "avg", "average", "value", "score", "rate",
+    "current_value", "qd_score", "probability", "interest_count", "visit_count",
+    "interaction_count", "days", "amount", "revenue",
+}
+_TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}
+
+_PREFERRED_X = [
+    "project_name", "developer_name", "category", "stage", "channel",
+    "micro_market", "broker_company", "agent_name", "name", "full_name",
+    "label", "status", "type",
+]
+_PREFERRED_Y = [
+    "count", "total", "interested_clients", "interest_count", "client_count",
+    "current_value", "qd_score", "value", "probability", "interaction_count",
+    "visit_count", "days_since_last_contact",
+]
+
+_TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
+    "crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
+    "intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
+    "crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
+    "intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
+    "read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
+    "crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
+    "intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
+    "intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
+    "read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
+}
+
+
+def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
+    for candidate in preferred:
+        if candidate in candidates:
+            return candidate
+    return candidates[0] if candidates else None
+
+
+def _title_from_prompt(prompt: str) -> str:
+    words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
+    return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"
+
+
+class VisualizationPlanner:
+    def plan(
+        self,
+        *,
+        rows: list[dict[str, Any]],
+        columns: list[str],
+        prompt: str,
+        source_tables: list[str],
+        profile_suggested_type: str | None = None,
+        title_from_planner: str | None = None,
+    ) -> VisualizationDecision:
+        profiles = _profile_columns(rows, columns)
+        classifications = {profile.name: self._classify_column(profile) for profile in profiles}
+
+        dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
+        measures = [column for column, kind in classifications.items() if kind == "measure"]
+        timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
+        row_count = len(rows)
+        prompt_lower = prompt.lower()
+
+        if profile_suggested_type:
+            return self._build_decision(
+                component_type=profile_suggested_type,
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning=f"Execution profiler suggested {profile_suggested_type}",
+                confidence=0.9,
+            )
+
+        timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
+        if any(term in prompt_lower for term in timeline_terms) and timestamps:
+            return self._build_decision(
+                component_type="activityStream",
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning="Activity-like prompt plus timestamped result.",
+                confidence=0.88,
+            )
+
+        if row_count == 1 and measures and not dimensions:
+            return self._build_decision(
+                component_type="kpiTile",
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning="Single numeric row.",
+                confidence=0.92,
+            )
+
+        if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
+            return self._build_decision(
+                component_type="lineChart",
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning="Temporal series plus measure.",
+                confidence=0.87,
+            )
+
+        if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
+            return self._build_decision(
+                component_type="pipelineBoard",
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning="Pipeline-like prompt and stage-like data.",
+                confidence=0.85,
+            )
+
+        if dimensions and measures and row_count <= 30 and not timestamps:
+            return self._build_decision(
+                component_type="barChart",
+                dimensions=dimensions,
+                measures=measures,
+                timestamps=timestamps,
+                columns=columns,
+                rows=rows,
+                row_count=row_count,
+                prompt=prompt,
+                source_tables=source_tables,
+                title=title_from_planner,
+                reasoning="Categorical dimension plus measure.",
+                confidence=0.8,
+            )
+
+        return self._build_decision(
+            component_type="table",
+            dimensions=dimensions,
+            measures=measures,
+            timestamps=timestamps,
+            columns=columns,
+            rows=rows,
+            row_count=row_count,
+            prompt=prompt,
+            source_tables=source_tables,
+            title=title_from_planner,
+            reasoning="Default structured table.",
+            confidence=0.7,
+        )
+
+    @staticmethod
+    def _classify_column(profile: ColumnProfile) -> str:
+        lower = profile.name.lower()
+        if lower.endswith("_id"):
+            return "identity"
+        if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
+            return "timestamp"
+        if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
+            return "dimension"
+        if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
+            return "measure"
+        return "other"
+
+    def _build_decision(
+        self,
+        *,
+        component_type: str,
+        dimensions: list[str],
+        measures: list[str],
+        timestamps: list[str],
+        columns: list[str],
+        rows: list[dict[str, Any]],
+        row_count: int,
+        prompt: str,
+        source_tables: list[str],
+        title: str | None,
+        reasoning: str,
+        confidence: float,
+    ) -> VisualizationDecision:
+        x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
+        y_axis = _pick_axis(measures, _PREFERRED_Y)
+
+        if component_type == "table":
+            display_columns = self._table_columns(columns, source_tables)
+        else:
+            display_columns = columns
+
+        viz_params = self._build_viz_params(
+            component_type=component_type,
+            x_axis=x_axis,
+            y_axis=y_axis,
+            display_columns=display_columns,
+            row_count=row_count,
+        )
+        data_bindings = {
+            "dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
+            "measures": measures[:3],
+            "series": [],
+            "filters": [],
+        }
+        width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
+        height_map = {
+            "kpiTile": 140,
+            "barChart": 320,
+            "lineChart": 320,
+            "activityStream": 380,
+            "table": 300,
+            "pipelineBoard": 400,
+        }
+        skeleton_map = {
+            "kpiTile": "kpi",
+            "barChart": "chart",
+            "lineChart": "chart",
+            "activityStream": "table",
+            "table": "table",
+            "pipelineBoard": "pipeline",
+        }
+
+        return VisualizationDecision(
+            component_type=component_type,
+            x_axis=x_axis,
+            y_axis=y_axis,
+            series_cols=[],
+            dimension_cols=dimensions,
+            measure_cols=measures,
+            title=title or _title_from_prompt(prompt),
+            width_mode=width_mode,
+            min_height_px=height_map.get(component_type, 300),
+            skeleton_variant=skeleton_map.get(component_type, "generic"),
+            viz_params=viz_params,
+            data_bindings=data_bindings,
+            confidence=confidence,
+            reasoning=reasoning,
+        )
+
+    @staticmethod
+    def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
+        for table in source_tables:
+            preset = _TABLE_COLUMN_PRESETS.get(table)
+            if preset:
+                matched = [column for column in preset if column in all_columns]
+                if matched:
+                    return matched
+        return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]
+
+    @staticmethod
+    def _build_viz_params(
+        *,
+        component_type: str,
+        x_axis: str | None,
+        y_axis: str | None,
+        display_columns: list[str],
+        row_count: int,
+    ) -> dict[str, Any]:
+        del row_count
+        if component_type == "barChart":
+            return {
+                "xAxis": x_axis or "category",
+                "yAxis": y_axis or "value",
+                "sort": "desc",
+                "showLabels": True,
+                "legend": False,
+            }
+        if component_type == "lineChart":
+            return {"showPoints": True, "smooth": True}
+        if component_type == "kpiTile":
+            return {"label": "Result", "trend": "", "comparisonLabel": ""}
+        if component_type == "table":
+            return {
+                "columns": display_columns,
+                "emptyStateTitle": "No matching records found",
+                "emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
+                "rankBy": y_axis,
+                "showTopBadge": False,
+            }
+        if component_type == "activityStream":
+            return {"showUrgencyIndicator": True}
+        if component_type == "pipelineBoard":
+            return {"showValue": True, "colorByStage": True}
+        return {}
+
+
+visualization_planner = VisualizationPlanner()