fix: Oracle Canvas JSON Component Generation planning and orchestration logic

2026-04-24 05:14:11 +05:30
parent 9f27e6a017
commit cf602822b0
6 changed files with 1555 additions and 115 deletions
--- a/backend/oracle/execution_profiler.py
+++ b/backend/oracle/execution_profiler.py
@@ -0,0 +1,202 @@
+"""
+oracle/execution_profiler.py
+
+Post-execution quality checks for Oracle natural DB queries.
+"""
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any
+
+_STALE_THRESHOLD_DAYS = 365
+
+
+@dataclass
+class QualityIssue:
+    code: str
+    description: str
+    severity: str
+    replan_hint: str
+
+
+@dataclass
+class ProfileResult:
+    passed: bool
+    row_count: int
+    issues: list[QualityIssue] = field(default_factory=list)
+    replan_hints: list[str] = field(default_factory=list)
+    suggested_component_type: str | None = None
+
+
+def _extract_cardinality_from_prompt(prompt: str) -> int | None:
+    lowered = prompt.lower()
+    numeric_match = re.search(r"\b(?:top|last|latest|recent|first|show|which)\s+(\d{1,4})\b", lowered)
+    if numeric_match:
+        return int(numeric_match.group(1))
+
+    words = {
+        "one": 1,
+        "two": 2,
+        "three": 3,
+        "four": 4,
+        "five": 5,
+        "six": 6,
+        "seven": 7,
+        "eight": 8,
+        "nine": 9,
+        "ten": 10,
+        "eleven": 11,
+        "twelve": 12,
+        "fifteen": 15,
+        "twenty": 20,
+    }
+    word_match = re.search(
+        r"\b(?:top|last|latest|recent|first|show|which)\s+"
+        r"(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|fifteen|twenty)\b",
+        lowered,
+    )
+    if word_match:
+        return words.get(word_match.group(1))
+    return None
+
+
+def _all_null_measures(rows: list[dict[str, Any]], columns: list[str]) -> bool:
+    if not rows or not columns:
+        return False
+
+    numeric_columns: list[str] = []
+    for column in columns:
+        saw_numeric = False
+        all_null = True
+        for row in rows[:20]:
+            value = row.get(column)
+            if value is not None:
+                all_null = False
+                if isinstance(value, (int, float)):
+                    saw_numeric = True
+        if saw_numeric:
+            numeric_columns.append(column)
+            if not all_null:
+                return False
+
+    if numeric_columns:
+        return True
+
+    return all(all(value is None for value in row.values()) for row in rows[:5])
+
+
+def _timestamps_are_stale(rows: list[dict[str, Any]], columns: list[str]) -> bool:
+    timestamp_columns = [
+        column for column in columns if any(token in column for token in ("_at", "date", "timestamp", "when", "time"))
+    ]
+    if not timestamp_columns or not rows:
+        return False
+
+    now = datetime.now(timezone.utc)
+    checked = 0
+    stale = 0
+    for row in rows[:20]:
+        for column in timestamp_columns:
+            value = row.get(column)
+            if value is None or not isinstance(value, str):
+                continue
+            try:
+                parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
+            except ValueError:
+                continue
+            if parsed.tzinfo is None:
+                parsed = parsed.replace(tzinfo=timezone.utc)
+            checked += 1
+            if (now - parsed).days > _STALE_THRESHOLD_DAYS:
+                stale += 1
+    return checked > 0 and stale == checked
+
+
+class ExecutionProfiler:
+    def profile(
+        self,
+        *,
+        rows: list[dict[str, Any]],
+        columns: list[str],
+        sql: str,
+        prompt: str,
+        source_tables: list[str],
+        row_limit: int,
+    ) -> ProfileResult:
+        del source_tables, row_limit
+        issues: list[QualityIssue] = []
+        sql_lower = sql.lower()
+
+        if len(rows) == 0:
+            issues.append(
+                QualityIssue(
+                    code="zero_rows",
+                    description="Query returned zero rows.",
+                    severity="blocking",
+                    replan_hint=(
+                        "The query returned zero rows. Use authoritative recency and business-semantic columns "
+                        "from the semantic catalog. Avoid sparse or deprecated timestamp fields."
+                    ),
+                )
+            )
+        elif _all_null_measures(rows, columns):
+            issues.append(
+                QualityIssue(
+                    code="all_null_measures",
+                    description="Rows returned but numeric measure columns are null.",
+                    severity="blocking",
+                    replan_hint=(
+                        "The query returned rows but numeric measures are null. "
+                        "Check join keys and metric source columns."
+                    ),
+                )
+            )
+
+        requested_n = _extract_cardinality_from_prompt(prompt)
+        if requested_n is not None and len(rows) > requested_n * 3:
+            issues.append(
+                QualityIssue(
+                    code="cardinality_mismatch",
+                    description=f"Prompt asked for about {requested_n} rows but query returned {len(rows)}.",
+                    severity="warning",
+                    replan_hint=f"Respect the requested result count and add LIMIT {requested_n}.",
+                )
+            )
+
+        if rows and _timestamps_are_stale(rows, columns):
+            issues.append(
+                QualityIssue(
+                    code="stale_timestamps",
+                    description="Returned timestamps appear stale.",
+                    severity="warning",
+                    replan_hint="The result timestamps are stale. Use authoritative recency fields.",
+                )
+            )
+
+        suggested_type: str | None = None
+        if len(rows) == 1 and len(columns) <= 4:
+            non_null_values = [value for value in rows[0].values() if value is not None]
+            if non_null_values and all(isinstance(value, (int, float)) for value in non_null_values):
+                suggested_type = "kpiTile"
+                issues.append(
+                    QualityIssue(
+                        code="single_row_scalar",
+                        description="Single scalar row is better rendered as KPI tile.",
+                        severity="warning",
+                        replan_hint="",
+                    )
+                )
+
+        blocking = [issue for issue in issues if issue.severity == "blocking"]
+        return ProfileResult(
+            passed=len(blocking) == 0,
+            row_count=len(rows),
+            issues=issues,
+            replan_hints=[issue.replan_hint for issue in issues if issue.replan_hint],
+            suggested_component_type=suggested_type,
+        )
+
+
+execution_profiler = ExecutionProfiler()