Project_Velocity/backend/oracle/visualization_planner.py

"""
oracle/visualization_planner.py

Pick Oracle canvas renderer types from actual result shape.
"""
from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Any


@dataclass
class ColumnProfile:
    name: str
    is_numeric: bool
    is_string: bool
    is_datetime: bool
    is_boolean: bool
    null_rate: float
    sample_values: list[Any]


@dataclass
class VisualizationDecision:
    component_type: str
    x_axis: str | None
    y_axis: str | None
    series_cols: list[str]
    dimension_cols: list[str]
    measure_cols: list[str]
    title: str
    width_mode: str
    min_height_px: int
    skeleton_variant: str
    viz_params: dict[str, Any]
    data_bindings: dict[str, Any]
    confidence: float
    reasoning: str


def _looks_like_timestamp(value: str) -> bool:
    return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))


def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
    if not rows:
        return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]

    sample_size = min(len(rows), 20)
    profiles: list[ColumnProfile] = []
    for column in columns:
        values = [rows[index].get(column) for index in range(sample_size)]
        non_null = [value for value in values if value is not None]
        null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
        profiles.append(
            ColumnProfile(
                name=column,
                is_numeric=any(isinstance(value, (int, float)) for value in non_null),
                is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
                is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
                is_boolean=any(isinstance(value, bool) for value in non_null),
                null_rate=null_rate,
                sample_values=non_null[:3],
            )
        )
    return profiles


_DIMENSION_HINTS = {
    "name", "full_name", "project_name", "developer_name", "agent_name",
    "broker_company", "category", "label", "stage", "channel", "type",
    "micro_market", "district", "status", "persona", "nationality",
}
_MEASURE_HINTS = {
    "count", "total", "sum", "avg", "average", "value", "score", "rate",
    "current_value", "qd_score", "probability", "interest_count", "visit_count",
    "interaction_count", "days", "amount", "revenue",
}
_TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}

_PREFERRED_X = [
    "project_name", "developer_name", "category", "stage", "channel",
    "micro_market", "broker_company", "agent_name", "name", "full_name",
    "label", "status", "type",
]
_PREFERRED_Y = [
    "count", "total", "interested_clients", "interest_count", "client_count",
    "current_value", "qd_score", "value", "probability", "interaction_count",
    "visit_count", "days_since_last_contact",
]

_TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
    "crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
    "intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
    "crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
    "intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
    "read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
    "crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
    "intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
    "intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
    "read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
}


def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
    for candidate in preferred:
        if candidate in candidates:
            return candidate
    return candidates[0] if candidates else None


def _title_from_prompt(prompt: str) -> str:
    words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
    return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"


class VisualizationPlanner:
    def plan(
        self,
        *,
        rows: list[dict[str, Any]],
        columns: list[str],
        prompt: str,
        source_tables: list[str],
        profile_suggested_type: str | None = None,
        title_from_planner: str | None = None,
    ) -> VisualizationDecision:
        profiles = _profile_columns(rows, columns)
        classifications = {profile.name: self._classify_column(profile) for profile in profiles}

        dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
        measures = [column for column, kind in classifications.items() if kind == "measure"]
        timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
        row_count = len(rows)
        prompt_lower = prompt.lower()

        if profile_suggested_type:
            return self._build_decision(
                component_type=profile_suggested_type,
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning=f"Execution profiler suggested {profile_suggested_type}",
                confidence=0.9,
            )

        timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
        if any(term in prompt_lower for term in timeline_terms) and timestamps:
            return self._build_decision(
                component_type="activityStream",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Activity-like prompt plus timestamped result.",
                confidence=0.88,
            )

        if row_count == 1 and measures and not dimensions:
            return self._build_decision(
                component_type="kpiTile",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Single numeric row.",
                confidence=0.92,
            )

        if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
            return self._build_decision(
                component_type="lineChart",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Temporal series plus measure.",
                confidence=0.87,
            )

        if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
            return self._build_decision(
                component_type="pipelineBoard",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Pipeline-like prompt and stage-like data.",
                confidence=0.85,
            )

        if dimensions and measures and row_count <= 30 and not timestamps:
            return self._build_decision(
                component_type="barChart",
                dimensions=dimensions,
                measures=measures,
                timestamps=timestamps,
                columns=columns,
                rows=rows,
                row_count=row_count,
                prompt=prompt,
                source_tables=source_tables,
                title=title_from_planner,
                reasoning="Categorical dimension plus measure.",
                confidence=0.8,
            )

        return self._build_decision(
            component_type="table",
            dimensions=dimensions,
            measures=measures,
            timestamps=timestamps,
            columns=columns,
            rows=rows,
            row_count=row_count,
            prompt=prompt,
            source_tables=source_tables,
            title=title_from_planner,
            reasoning="Default structured table.",
            confidence=0.7,
        )

    @staticmethod
    def _classify_column(profile: ColumnProfile) -> str:
        lower = profile.name.lower()
        if lower.endswith("_id"):
            return "identity"
        if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
            return "timestamp"
        if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
            return "dimension"
        if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
            return "measure"
        return "other"

    def _build_decision(
        self,
        *,
        component_type: str,
        dimensions: list[str],
        measures: list[str],
        timestamps: list[str],
        columns: list[str],
        rows: list[dict[str, Any]],
        row_count: int,
        prompt: str,
        source_tables: list[str],
        title: str | None,
        reasoning: str,
        confidence: float,
    ) -> VisualizationDecision:
        x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
        y_axis = _pick_axis(measures, _PREFERRED_Y)

        if component_type == "table":
            display_columns = self._table_columns(columns, source_tables)
        else:
            display_columns = columns

        viz_params = self._build_viz_params(
            component_type=component_type,
            x_axis=x_axis,
            y_axis=y_axis,
            display_columns=display_columns,
            row_count=row_count,
        )
        data_bindings = {
            "dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
            "measures": measures[:3],
            "series": [],
            "filters": [],
        }
        width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
        height_map = {
            "kpiTile": 140,
            "barChart": 320,
            "lineChart": 320,
            "activityStream": 380,
            "table": 300,
            "pipelineBoard": 400,
        }
        skeleton_map = {
            "kpiTile": "kpi",
            "barChart": "chart",
            "lineChart": "chart",
            "activityStream": "table",
            "table": "table",
            "pipelineBoard": "pipeline",
        }

        return VisualizationDecision(
            component_type=component_type,
            x_axis=x_axis,
            y_axis=y_axis,
            series_cols=[],
            dimension_cols=dimensions,
            measure_cols=measures,
            title=title or _title_from_prompt(prompt),
            width_mode=width_mode,
            min_height_px=height_map.get(component_type, 300),
            skeleton_variant=skeleton_map.get(component_type, "generic"),
            viz_params=viz_params,
            data_bindings=data_bindings,
            confidence=confidence,
            reasoning=reasoning,
        )

    @staticmethod
    def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
        for table in source_tables:
            preset = _TABLE_COLUMN_PRESETS.get(table)
            if preset:
                matched = [column for column in preset if column in all_columns]
                if matched:
                    return matched
        return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]

    @staticmethod
    def _build_viz_params(
        *,
        component_type: str,
        x_axis: str | None,
        y_axis: str | None,
        display_columns: list[str],
        row_count: int,
    ) -> dict[str, Any]:
        del row_count
        if component_type == "barChart":
            return {
                "xAxis": x_axis or "category",
                "yAxis": y_axis or "value",
                "sort": "desc",
                "showLabels": True,
                "legend": False,
            }
        if component_type == "lineChart":
            return {"showPoints": True, "smooth": True}
        if component_type == "kpiTile":
            return {"label": "Result", "trend": "", "comparisonLabel": ""}
        if component_type == "table":
            return {
                "columns": display_columns,
                "emptyStateTitle": "No matching records found",
                "emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
                "rankBy": y_axis,
                "showTopBadge": False,
            }
        if component_type == "activityStream":
            return {"showUrgencyIndicator": True}
        if component_type == "pipelineBoard":
            return {"showValue": True, "colorByStage": True}
        return {}


visualization_planner = VisualizationPlanner()