Files
Project_Velocity/backend/oracle/visualization_planner.py

383 lines
14 KiB
Python

"""
oracle/visualization_planner.py
Pick Oracle canvas renderer types from actual result shape.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any
@dataclass
class ColumnProfile:
name: str
is_numeric: bool
is_string: bool
is_datetime: bool
is_boolean: bool
null_rate: float
sample_values: list[Any]
@dataclass
class VisualizationDecision:
component_type: str
x_axis: str | None
y_axis: str | None
series_cols: list[str]
dimension_cols: list[str]
measure_cols: list[str]
title: str
width_mode: str
min_height_px: int
skeleton_variant: str
viz_params: dict[str, Any]
data_bindings: dict[str, Any]
confidence: float
reasoning: str
def _looks_like_timestamp(value: str) -> bool:
return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))
def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
if not rows:
return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]
sample_size = min(len(rows), 20)
profiles: list[ColumnProfile] = []
for column in columns:
values = [rows[index].get(column) for index in range(sample_size)]
non_null = [value for value in values if value is not None]
null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
profiles.append(
ColumnProfile(
name=column,
is_numeric=any(isinstance(value, (int, float)) for value in non_null),
is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
is_boolean=any(isinstance(value, bool) for value in non_null),
null_rate=null_rate,
sample_values=non_null[:3],
)
)
return profiles
_DIMENSION_HINTS = {
"name", "full_name", "project_name", "developer_name", "agent_name",
"broker_company", "category", "label", "stage", "channel", "type",
"micro_market", "district", "status", "persona", "nationality",
}
_MEASURE_HINTS = {
"count", "total", "sum", "avg", "average", "value", "score", "rate",
"current_value", "qd_score", "probability", "interest_count", "visit_count",
"interaction_count", "days", "amount", "revenue",
}
_TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}
_PREFERRED_X = [
"project_name", "developer_name", "category", "stage", "channel",
"micro_market", "broker_company", "agent_name", "name", "full_name",
"label", "status", "type",
]
_PREFERRED_Y = [
"count", "total", "interested_clients", "interest_count", "client_count",
"current_value", "qd_score", "value", "probability", "interaction_count",
"visit_count", "days_since_last_contact",
]
_TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
"crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
"intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
"crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
"intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
"read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
"crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
"intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
"intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
"read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
}
def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
for candidate in preferred:
if candidate in candidates:
return candidate
return candidates[0] if candidates else None
def _title_from_prompt(prompt: str) -> str:
words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"
class VisualizationPlanner:
def plan(
self,
*,
rows: list[dict[str, Any]],
columns: list[str],
prompt: str,
source_tables: list[str],
profile_suggested_type: str | None = None,
title_from_planner: str | None = None,
) -> VisualizationDecision:
profiles = _profile_columns(rows, columns)
classifications = {profile.name: self._classify_column(profile) for profile in profiles}
dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
measures = [column for column, kind in classifications.items() if kind == "measure"]
timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
row_count = len(rows)
prompt_lower = prompt.lower()
if profile_suggested_type:
return self._build_decision(
component_type=profile_suggested_type,
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning=f"Execution profiler suggested {profile_suggested_type}",
confidence=0.9,
)
timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
if any(term in prompt_lower for term in timeline_terms) and timestamps:
return self._build_decision(
component_type="activityStream",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Activity-like prompt plus timestamped result.",
confidence=0.88,
)
if row_count == 1 and measures and not dimensions:
return self._build_decision(
component_type="kpiTile",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Single numeric row.",
confidence=0.92,
)
if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
return self._build_decision(
component_type="lineChart",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Temporal series plus measure.",
confidence=0.87,
)
if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
return self._build_decision(
component_type="pipelineBoard",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Pipeline-like prompt and stage-like data.",
confidence=0.85,
)
if dimensions and measures and row_count <= 30 and not timestamps:
return self._build_decision(
component_type="barChart",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Categorical dimension plus measure.",
confidence=0.8,
)
return self._build_decision(
component_type="table",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Default structured table.",
confidence=0.7,
)
@staticmethod
def _classify_column(profile: ColumnProfile) -> str:
lower = profile.name.lower()
if lower.endswith("_id"):
return "identity"
if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
return "timestamp"
if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
return "dimension"
if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
return "measure"
return "other"
def _build_decision(
self,
*,
component_type: str,
dimensions: list[str],
measures: list[str],
timestamps: list[str],
columns: list[str],
rows: list[dict[str, Any]],
row_count: int,
prompt: str,
source_tables: list[str],
title: str | None,
reasoning: str,
confidence: float,
) -> VisualizationDecision:
x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
y_axis = _pick_axis(measures, _PREFERRED_Y)
if component_type == "table":
display_columns = self._table_columns(columns, source_tables)
else:
display_columns = columns
viz_params = self._build_viz_params(
component_type=component_type,
x_axis=x_axis,
y_axis=y_axis,
display_columns=display_columns,
row_count=row_count,
)
data_bindings = {
"dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
"measures": measures[:3],
"series": [],
"filters": [],
}
width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
height_map = {
"kpiTile": 140,
"barChart": 320,
"lineChart": 320,
"activityStream": 380,
"table": 300,
"pipelineBoard": 400,
}
skeleton_map = {
"kpiTile": "kpi",
"barChart": "chart",
"lineChart": "chart",
"activityStream": "table",
"table": "table",
"pipelineBoard": "pipeline",
}
return VisualizationDecision(
component_type=component_type,
x_axis=x_axis,
y_axis=y_axis,
series_cols=[],
dimension_cols=dimensions,
measure_cols=measures,
title=title or _title_from_prompt(prompt),
width_mode=width_mode,
min_height_px=height_map.get(component_type, 300),
skeleton_variant=skeleton_map.get(component_type, "generic"),
viz_params=viz_params,
data_bindings=data_bindings,
confidence=confidence,
reasoning=reasoning,
)
@staticmethod
def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
for table in source_tables:
preset = _TABLE_COLUMN_PRESETS.get(table)
if preset:
matched = [column for column in preset if column in all_columns]
if matched:
return matched
return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]
@staticmethod
def _build_viz_params(
*,
component_type: str,
x_axis: str | None,
y_axis: str | None,
display_columns: list[str],
row_count: int,
) -> dict[str, Any]:
del row_count
if component_type == "barChart":
return {
"xAxis": x_axis or "category",
"yAxis": y_axis or "value",
"sort": "desc",
"showLabels": True,
"legend": False,
}
if component_type == "lineChart":
return {"showPoints": True, "smooth": True}
if component_type == "kpiTile":
return {"label": "Result", "trend": "", "comparisonLabel": ""}
if component_type == "table":
return {
"columns": display_columns,
"emptyStateTitle": "No matching records found",
"emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
"rankBy": y_axis,
"showTopBadge": False,
}
if component_type == "activityStream":
return {"showUrgencyIndicator": True}
if component_type == "pipelineBoard":
return {"showValue": True, "colorByStage": True}
return {}
visualization_planner = VisualizationPlanner()