383 lines
14 KiB
Python
383 lines
14 KiB
Python
"""
|
|
oracle/visualization_planner.py
|
|
|
|
Pick Oracle canvas renderer types from actual result shape.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class ColumnProfile:
|
|
name: str
|
|
is_numeric: bool
|
|
is_string: bool
|
|
is_datetime: bool
|
|
is_boolean: bool
|
|
null_rate: float
|
|
sample_values: list[Any]
|
|
|
|
|
|
@dataclass
|
|
class VisualizationDecision:
|
|
component_type: str
|
|
x_axis: str | None
|
|
y_axis: str | None
|
|
series_cols: list[str]
|
|
dimension_cols: list[str]
|
|
measure_cols: list[str]
|
|
title: str
|
|
width_mode: str
|
|
min_height_px: int
|
|
skeleton_variant: str
|
|
viz_params: dict[str, Any]
|
|
data_bindings: dict[str, Any]
|
|
confidence: float
|
|
reasoning: str
|
|
|
|
|
|
def _looks_like_timestamp(value: str) -> bool:
|
|
return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))
|
|
|
|
|
|
def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
|
|
if not rows:
|
|
return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]
|
|
|
|
sample_size = min(len(rows), 20)
|
|
profiles: list[ColumnProfile] = []
|
|
for column in columns:
|
|
values = [rows[index].get(column) for index in range(sample_size)]
|
|
non_null = [value for value in values if value is not None]
|
|
null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
|
|
profiles.append(
|
|
ColumnProfile(
|
|
name=column,
|
|
is_numeric=any(isinstance(value, (int, float)) for value in non_null),
|
|
is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
|
|
is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
|
|
is_boolean=any(isinstance(value, bool) for value in non_null),
|
|
null_rate=null_rate,
|
|
sample_values=non_null[:3],
|
|
)
|
|
)
|
|
return profiles
|
|
|
|
|
|
_DIMENSION_HINTS = {
|
|
"name", "full_name", "project_name", "developer_name", "agent_name",
|
|
"broker_company", "category", "label", "stage", "channel", "type",
|
|
"micro_market", "district", "status", "persona", "nationality",
|
|
}
|
|
_MEASURE_HINTS = {
|
|
"count", "total", "sum", "avg", "average", "value", "score", "rate",
|
|
"current_value", "qd_score", "probability", "interest_count", "visit_count",
|
|
"interaction_count", "days", "amount", "revenue",
|
|
}
|
|
_TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}
|
|
|
|
_PREFERRED_X = [
|
|
"project_name", "developer_name", "category", "stage", "channel",
|
|
"micro_market", "broker_company", "agent_name", "name", "full_name",
|
|
"label", "status", "type",
|
|
]
|
|
_PREFERRED_Y = [
|
|
"count", "total", "interested_clients", "interest_count", "client_count",
|
|
"current_value", "qd_score", "value", "probability", "interaction_count",
|
|
"visit_count", "days_since_last_contact",
|
|
]
|
|
|
|
_TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
|
|
"crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
|
|
"intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
|
|
"crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
|
|
"intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
|
|
"read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
|
|
"crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
|
|
"intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
|
|
"intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
|
|
"read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
|
|
}
|
|
|
|
|
|
def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
|
|
for candidate in preferred:
|
|
if candidate in candidates:
|
|
return candidate
|
|
return candidates[0] if candidates else None
|
|
|
|
|
|
def _title_from_prompt(prompt: str) -> str:
|
|
words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
|
|
return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"
|
|
|
|
|
|
class VisualizationPlanner:
|
|
def plan(
|
|
self,
|
|
*,
|
|
rows: list[dict[str, Any]],
|
|
columns: list[str],
|
|
prompt: str,
|
|
source_tables: list[str],
|
|
profile_suggested_type: str | None = None,
|
|
title_from_planner: str | None = None,
|
|
) -> VisualizationDecision:
|
|
profiles = _profile_columns(rows, columns)
|
|
classifications = {profile.name: self._classify_column(profile) for profile in profiles}
|
|
|
|
dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
|
|
measures = [column for column, kind in classifications.items() if kind == "measure"]
|
|
timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
|
|
row_count = len(rows)
|
|
prompt_lower = prompt.lower()
|
|
|
|
if profile_suggested_type:
|
|
return self._build_decision(
|
|
component_type=profile_suggested_type,
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning=f"Execution profiler suggested {profile_suggested_type}",
|
|
confidence=0.9,
|
|
)
|
|
|
|
timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
|
|
if any(term in prompt_lower for term in timeline_terms) and timestamps:
|
|
return self._build_decision(
|
|
component_type="activityStream",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Activity-like prompt plus timestamped result.",
|
|
confidence=0.88,
|
|
)
|
|
|
|
if row_count == 1 and measures and not dimensions:
|
|
return self._build_decision(
|
|
component_type="kpiTile",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Single numeric row.",
|
|
confidence=0.92,
|
|
)
|
|
|
|
if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
|
|
return self._build_decision(
|
|
component_type="lineChart",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Temporal series plus measure.",
|
|
confidence=0.87,
|
|
)
|
|
|
|
if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
|
|
return self._build_decision(
|
|
component_type="pipelineBoard",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Pipeline-like prompt and stage-like data.",
|
|
confidence=0.85,
|
|
)
|
|
|
|
if dimensions and measures and row_count <= 30 and not timestamps:
|
|
return self._build_decision(
|
|
component_type="barChart",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Categorical dimension plus measure.",
|
|
confidence=0.8,
|
|
)
|
|
|
|
return self._build_decision(
|
|
component_type="table",
|
|
dimensions=dimensions,
|
|
measures=measures,
|
|
timestamps=timestamps,
|
|
columns=columns,
|
|
rows=rows,
|
|
row_count=row_count,
|
|
prompt=prompt,
|
|
source_tables=source_tables,
|
|
title=title_from_planner,
|
|
reasoning="Default structured table.",
|
|
confidence=0.7,
|
|
)
|
|
|
|
@staticmethod
|
|
def _classify_column(profile: ColumnProfile) -> str:
|
|
lower = profile.name.lower()
|
|
if lower.endswith("_id"):
|
|
return "identity"
|
|
if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
|
|
return "timestamp"
|
|
if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
|
|
return "dimension"
|
|
if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
|
|
return "measure"
|
|
return "other"
|
|
|
|
def _build_decision(
|
|
self,
|
|
*,
|
|
component_type: str,
|
|
dimensions: list[str],
|
|
measures: list[str],
|
|
timestamps: list[str],
|
|
columns: list[str],
|
|
rows: list[dict[str, Any]],
|
|
row_count: int,
|
|
prompt: str,
|
|
source_tables: list[str],
|
|
title: str | None,
|
|
reasoning: str,
|
|
confidence: float,
|
|
) -> VisualizationDecision:
|
|
x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
|
|
y_axis = _pick_axis(measures, _PREFERRED_Y)
|
|
|
|
if component_type == "table":
|
|
display_columns = self._table_columns(columns, source_tables)
|
|
else:
|
|
display_columns = columns
|
|
|
|
viz_params = self._build_viz_params(
|
|
component_type=component_type,
|
|
x_axis=x_axis,
|
|
y_axis=y_axis,
|
|
display_columns=display_columns,
|
|
row_count=row_count,
|
|
)
|
|
data_bindings = {
|
|
"dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
|
|
"measures": measures[:3],
|
|
"series": [],
|
|
"filters": [],
|
|
}
|
|
width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
|
|
height_map = {
|
|
"kpiTile": 140,
|
|
"barChart": 320,
|
|
"lineChart": 320,
|
|
"activityStream": 380,
|
|
"table": 300,
|
|
"pipelineBoard": 400,
|
|
}
|
|
skeleton_map = {
|
|
"kpiTile": "kpi",
|
|
"barChart": "chart",
|
|
"lineChart": "chart",
|
|
"activityStream": "table",
|
|
"table": "table",
|
|
"pipelineBoard": "pipeline",
|
|
}
|
|
|
|
return VisualizationDecision(
|
|
component_type=component_type,
|
|
x_axis=x_axis,
|
|
y_axis=y_axis,
|
|
series_cols=[],
|
|
dimension_cols=dimensions,
|
|
measure_cols=measures,
|
|
title=title or _title_from_prompt(prompt),
|
|
width_mode=width_mode,
|
|
min_height_px=height_map.get(component_type, 300),
|
|
skeleton_variant=skeleton_map.get(component_type, "generic"),
|
|
viz_params=viz_params,
|
|
data_bindings=data_bindings,
|
|
confidence=confidence,
|
|
reasoning=reasoning,
|
|
)
|
|
|
|
@staticmethod
|
|
def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
|
|
for table in source_tables:
|
|
preset = _TABLE_COLUMN_PRESETS.get(table)
|
|
if preset:
|
|
matched = [column for column in preset if column in all_columns]
|
|
if matched:
|
|
return matched
|
|
return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]
|
|
|
|
@staticmethod
|
|
def _build_viz_params(
|
|
*,
|
|
component_type: str,
|
|
x_axis: str | None,
|
|
y_axis: str | None,
|
|
display_columns: list[str],
|
|
row_count: int,
|
|
) -> dict[str, Any]:
|
|
del row_count
|
|
if component_type == "barChart":
|
|
return {
|
|
"xAxis": x_axis or "category",
|
|
"yAxis": y_axis or "value",
|
|
"sort": "desc",
|
|
"showLabels": True,
|
|
"legend": False,
|
|
}
|
|
if component_type == "lineChart":
|
|
return {"showPoints": True, "smooth": True}
|
|
if component_type == "kpiTile":
|
|
return {"label": "Result", "trend": "", "comparisonLabel": ""}
|
|
if component_type == "table":
|
|
return {
|
|
"columns": display_columns,
|
|
"emptyStateTitle": "No matching records found",
|
|
"emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
|
|
"rankBy": y_axis,
|
|
"showTopBadge": False,
|
|
}
|
|
if component_type == "activityStream":
|
|
return {"showUrgencyIndicator": True}
|
|
if component_type == "pipelineBoard":
|
|
return {"showValue": True, "colorByStage": True}
|
|
return {}
|
|
|
|
|
|
visualization_planner = VisualizationPlanner()
|