fix: Oracle Canvas Metadata and deterministic semantic repair

2026-04-24 15:44:00 +05:30
parent 8d41ba5549
commit 61258978e1
4 changed files with 568 additions and 14 deletions
--- a/backend/oracle/natural_db_agent.py
+++ b/backend/oracle/natural_db_agent.py
@@ -360,8 +360,13 @@ class NaturalDbAgent:
                "Plan verifier repaired violations: "
                + ", ".join(violation.rule for violation in verification.violations if violation.severity == "blocking")
            )
-        if not verification.passed and verification.repair_failed:
-            warnings.append("Plan verifier found violations but repair failed. Proceeding with original SQL.")
+        if not verification.passed:
+            details = "; ".join(
+                f"{violation.rule}: {violation.detail}"
+                for violation in verification.violations
+                if violation.severity == "blocking"
+            )
+            raise RuntimeError(f"Oracle SQL plan failed verification: {details}")
        if verification.notes:
            warnings.extend(verification.notes)

@@ -463,6 +468,25 @@ class NaturalDbAgent:
                f"\n\nPREVIOUS ATTEMPT FAILED - EXECUTION FEEDBACK:\n{prior_feedback}\n"
                "You must address the feedback and change the query accordingly."
            )
+        example_section = (
+            "CANONICAL SQL PATTERNS:\n"
+            "Generic top QD clients:\n"
+            "SELECT p.full_name, p.primary_email, p.primary_phone, q.current_value AS qd_score, q.score_type, q.computed_at "
+            "FROM intel_qd_scores q JOIN crm_people p ON p.person_id = q.person_id "
+            "WHERE q.score_type = 'overall' ORDER BY q.current_value DESC LIMIT 8;\n"
+            "Property-scoped lowest QD clients:\n"
+            "SELECT p.full_name, p.primary_email, pi.project_name, q.current_value AS qd_score "
+            "FROM crm_property_interests pi JOIN crm_people p ON p.person_id = pi.person_id "
+            "JOIN intel_qd_scores q ON q.person_id = p.person_id "
+            "WHERE q.score_type = 'overall' AND pi.project_name ILIKE '%Atri Surya Toron%' "
+            "ORDER BY q.current_value ASC LIMIT 5;\n"
+            "Recently contacted high-interest clients:\n"
+            "SELECT p.full_name, p.primary_email, lc.last_contact_at, lc.last_channel, q.current_value AS qd_score "
+            "FROM read_last_contacted lc JOIN crm_people p ON p.person_id = lc.person_id "
+            "LEFT JOIN intel_qd_scores q ON q.person_id = p.person_id AND q.score_type = 'overall' "
+            "WHERE lc.last_contact_at >= NOW() - INTERVAL '3 months' "
+            "ORDER BY q.current_value DESC NULLS LAST LIMIT 10;"
+        )

        response = await runtime_llm_service.chat(
            provider_id="sglang",
@@ -472,7 +496,8 @@ class NaturalDbAgent:
                "Use the semantic catalog as the business source of truth, not raw column guessing. "
                "Generate exactly one SELECT or WITH query. "
                "Return strict JSON with keys: sql, title, rationale. "
-                "Never generate INSERT, UPDATE, DELETE, DDL, COPY, or permission statements."
+                "Never generate INSERT, UPDATE, DELETE, DDL, COPY, or permission statements. "
+                "Never use columns that are not present in the raw schema."
            ),
            messages=[
                {
@@ -480,6 +505,14 @@ class NaturalDbAgent:
                    "content": (
                        f"SEMANTIC CATALOG:\n{semantic_context}\n\n"
                        f"RAW SCHEMA:\n{schema_brief}\n\n"
+                        "NON-NEGOTIABLE DATA RULES:\n"
+                        "- crm_people is identity only; it does not own QD scores.\n"
+                        "- For QD score prompts, join intel_qd_scores.person_id to crm_people.person_id and use intel_qd_scores.current_value.\n"
+                        "- Valid intel_qd_scores.score_type values are: overall, intent, engagement, urgency, financial_qualification.\n"
+                        "- Never filter intel_qd_scores.score_type = 'QD'. For generic QD prompts use score_type = 'overall'.\n"
+                        "- For contact recency, use read_last_contacted.last_contact_at or intel_interactions.happened_at.\n"
+                        "- Do not use edge_communication_events.timestamp or crm_property_interests.last_discussed_at for contact recency.\n\n"
+                        f"{example_section}\n\n"
                        f"DETECTED INTENTS: {', '.join(detected_intents)}\n\n"
                        f"USER QUESTION:\n{prompt}\n\n"
                        f"ROW CAP: {row_limit}\n"