""" oracle/visualization_planner.py Pick Oracle canvas renderer types from actual result shape. """ from __future__ import annotations import re from dataclasses import dataclass from typing import Any @dataclass class ColumnProfile: name: str is_numeric: bool is_string: bool is_datetime: bool is_boolean: bool null_rate: float sample_values: list[Any] @dataclass class VisualizationDecision: component_type: str x_axis: str | None y_axis: str | None series_cols: list[str] dimension_cols: list[str] measure_cols: list[str] title: str width_mode: str min_height_px: int skeleton_variant: str viz_params: dict[str, Any] data_bindings: dict[str, Any] confidence: float reasoning: str def _looks_like_timestamp(value: str) -> bool: return bool(re.match(r"\d{4}-\d{2}-\d{2}", value)) def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]: if not rows: return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns] sample_size = min(len(rows), 20) profiles: list[ColumnProfile] = [] for column in columns: values = [rows[index].get(column) for index in range(sample_size)] non_null = [value for value in values if value is not None] null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0 profiles.append( ColumnProfile( name=column, is_numeric=any(isinstance(value, (int, float)) for value in non_null), is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]), is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]), is_boolean=any(isinstance(value, bool) for value in non_null), null_rate=null_rate, sample_values=non_null[:3], ) ) return profiles _DIMENSION_HINTS = { "name", "full_name", "project_name", "developer_name", "agent_name", "broker_company", "category", "label", "stage", "channel", "type", "micro_market", "district", "status", "persona", "nationality", } _MEASURE_HINTS = { "count", "total", "sum", "avg", "average", "value", "score", "rate", "current_value", "qd_score", "probability", "interest_count", "visit_count", "interaction_count", "days", "amount", "revenue", } _TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"} _PREFERRED_X = [ "project_name", "developer_name", "category", "stage", "channel", "micro_market", "broker_company", "agent_name", "name", "full_name", "label", "status", "type", ] _PREFERRED_Y = [ "count", "total", "interested_clients", "interest_count", "client_count", "current_value", "qd_score", "value", "probability", "interaction_count", "visit_count", "days_since_last_contact", ] _TABLE_COLUMN_PRESETS: dict[str, list[str]] = { "crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"], "intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"], "crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"], "intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"], "read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"], "crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"], "intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"], "intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"], "read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"], } def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None: for candidate in preferred: if candidate in candidates: return candidate return candidates[0] if candidates else None def _title_from_prompt(prompt: str) -> str: words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72] return (words[:1].upper() + words[1:]) if words else "Oracle Query Result" class VisualizationPlanner: def plan( self, *, rows: list[dict[str, Any]], columns: list[str], prompt: str, source_tables: list[str], profile_suggested_type: str | None = None, title_from_planner: str | None = None, ) -> VisualizationDecision: profiles = _profile_columns(rows, columns) classifications = {profile.name: self._classify_column(profile) for profile in profiles} dimensions = [column for column, kind in classifications.items() if kind == "dimension"] measures = [column for column, kind in classifications.items() if kind == "measure"] timestamps = [column for column, kind in classifications.items() if kind == "timestamp"] row_count = len(rows) prompt_lower = prompt.lower() if profile_suggested_type: return self._build_decision( component_type=profile_suggested_type, dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning=f"Execution profiler suggested {profile_suggested_type}", confidence=0.9, ) timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction") if any(term in prompt_lower for term in timeline_terms) and timestamps: return self._build_decision( component_type="activityStream", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Activity-like prompt plus timestamped result.", confidence=0.88, ) if row_count == 1 and measures and not dimensions: return self._build_decision( component_type="kpiTile", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Single numeric row.", confidence=0.92, ) if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")): return self._build_decision( component_type="lineChart", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Temporal series plus measure.", confidence=0.87, ) if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")): return self._build_decision( component_type="pipelineBoard", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Pipeline-like prompt and stage-like data.", confidence=0.85, ) if dimensions and measures and row_count <= 30 and not timestamps: return self._build_decision( component_type="barChart", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Categorical dimension plus measure.", confidence=0.8, ) return self._build_decision( component_type="table", dimensions=dimensions, measures=measures, timestamps=timestamps, columns=columns, rows=rows, row_count=row_count, prompt=prompt, source_tables=source_tables, title=title_from_planner, reasoning="Default structured table.", confidence=0.7, ) @staticmethod def _classify_column(profile: ColumnProfile) -> str: lower = profile.name.lower() if lower.endswith("_id"): return "identity" if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS): return "timestamp" if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric): return "dimension" if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS): return "measure" return "other" def _build_decision( self, *, component_type: str, dimensions: list[str], measures: list[str], timestamps: list[str], columns: list[str], rows: list[dict[str, Any]], row_count: int, prompt: str, source_tables: list[str], title: str | None, reasoning: str, confidence: float, ) -> VisualizationDecision: x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps)) y_axis = _pick_axis(measures, _PREFERRED_Y) if component_type == "table": display_columns = self._table_columns(columns, source_tables) else: display_columns = columns viz_params = self._build_viz_params( component_type=component_type, x_axis=x_axis, y_axis=y_axis, display_columns=display_columns, row_count=row_count, ) data_bindings = { "dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []), "measures": measures[:3], "series": [], "filters": [], } width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half" height_map = { "kpiTile": 140, "barChart": 320, "lineChart": 320, "activityStream": 380, "table": 300, "pipelineBoard": 400, } skeleton_map = { "kpiTile": "kpi", "barChart": "chart", "lineChart": "chart", "activityStream": "table", "table": "table", "pipelineBoard": "pipeline", } return VisualizationDecision( component_type=component_type, x_axis=x_axis, y_axis=y_axis, series_cols=[], dimension_cols=dimensions, measure_cols=measures, title=title or _title_from_prompt(prompt), width_mode=width_mode, min_height_px=height_map.get(component_type, 300), skeleton_variant=skeleton_map.get(component_type, "generic"), viz_params=viz_params, data_bindings=data_bindings, confidence=confidence, reasoning=reasoning, ) @staticmethod def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]: for table in source_tables: preset = _TABLE_COLUMN_PRESETS.get(table) if preset: matched = [column for column in preset if column in all_columns] if matched: return matched return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8] @staticmethod def _build_viz_params( *, component_type: str, x_axis: str | None, y_axis: str | None, display_columns: list[str], row_count: int, ) -> dict[str, Any]: del row_count if component_type == "barChart": return { "xAxis": x_axis or "category", "yAxis": y_axis or "value", "sort": "desc", "showLabels": True, "legend": False, } if component_type == "lineChart": return {"showPoints": True, "smooth": True} if component_type == "kpiTile": return {"label": "Result", "trend": "", "comparisonLabel": ""} if component_type == "table": return { "columns": display_columns, "emptyStateTitle": "No matching records found", "emptyStateDescription": "The query ran successfully but returned no rows for this prompt.", "rankBy": y_axis, "showTopBadge": False, } if component_type == "activityStream": return {"showUrgencyIndicator": True} if component_type == "pipelineBoard": return {"showValue": True, "colorByStage": True} return {} visualization_planner = VisualizationPlanner()