Files
Project_Velocity/backend/oracle/data_access_gateway.py

485 lines
24 KiB
Python

"""
oracle/data_access_gateway.py
Read-only, policy-aware PostgreSQL query executor for Oracle datasets.
Nemoclaw/LLM is treated strictly as a planner. The gateway executes only
whitelisted read models and always applies policy before touching data.
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass
from typing import Any
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
from .policy_service import PolicyContext, PolicyService
logger = logging.getLogger(__name__)
_ALLOW_IN_MEMORY = os.getenv("ORACLE_ALLOW_IN_MEMORY_FALLBACK", "").lower() in {"1", "true", "yes"}
_DATASET_ALIASES = {
"crm_last_interacted_clients": "oracle_last_contacted_clients",
"crm_top_interested_clients": "oracle_top_interested_clients",
"crm_interaction_timeline": "oracle_client_interaction_timeline",
"crm_property_interest_rollup": "oracle_property_interest_rollup",
}
@dataclass
class QueryExecutionResult:
rows: list[dict[str, Any]]
warnings: list[str]
def _db_ready() -> bool:
if asyncpg is None:
return False
database_url = os.getenv("DATABASE_URL", "")
if database_url and not database_url.startswith("PLACEHOLDER"):
return True
return all(
os.getenv(name)
for name in ("VELOCITY_DB_NAME", "VELOCITY_DB_USER", "VELOCITY_DB_PASSWORD")
)
async def _connect_db() -> Any:
assert asyncpg is not None
database_url = os.getenv("DATABASE_URL", "")
if database_url and not database_url.startswith("PLACEHOLDER"):
return await asyncpg.connect(database_url)
return await asyncpg.connect(
host=os.getenv("VELOCITY_DB_HOST", "localhost"),
port=int(os.getenv("VELOCITY_DB_PORT", "5432")),
database=os.environ["VELOCITY_DB_NAME"],
user=os.environ["VELOCITY_DB_USER"],
password=os.environ["VELOCITY_DB_PASSWORD"],
)
class DataAccessGateway:
def __init__(self) -> None:
self.policy_service = PolicyService()
async def execute_component_plan(
self,
component_plan: dict[str, Any],
ctx: PolicyContext,
prompt: str,
) -> QueryExecutionResult:
dataset = str(component_plan.get("dataset", "")).strip()
if not dataset:
return QueryExecutionResult(rows=[], warnings=["Dataset missing from retrieval plan."])
validation = self.policy_service.validate_retrieval_plan(component_plan, ctx)
self.policy_service.audit_policy_check(ctx, dataset, validation)
if not validation.passed:
return QueryExecutionResult(rows=[], warnings=validation.errors)
if not _db_ready():
if _ALLOW_IN_MEMORY or "PYTEST_CURRENT_TEST" in os.environ:
return QueryExecutionResult(rows=[], warnings=[])
raise RuntimeError("Oracle requires DATABASE_URL and asyncpg for real-time data access.")
try:
rows = await self._query_dataset(
dataset=_DATASET_ALIASES.get(dataset, dataset),
row_limit=validation.effective_row_limit,
ctx=ctx,
prompt=prompt,
)
except Exception as exc:
logger.warning("DATA_GATEWAY query_failed dataset=%s error=%s", dataset, exc)
return QueryExecutionResult(rows=[], warnings=[f"{dataset}: {exc}"])
redacted = self.policy_service.redact(rows, validation.redaction_policy)
return QueryExecutionResult(rows=redacted, warnings=validation.warnings)
async def _query_dataset(
self,
*,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> list[dict[str, Any]]:
sql, params = self._build_whitelisted_query(dataset, row_limit, ctx, prompt)
conn = await _connect_db()
try:
records = await conn.fetch(sql, *params)
finally:
await conn.close()
return [dict(record) for record in records]
def _build_whitelisted_query(
self,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> tuple[str, list[Any]]:
lower_prompt = prompt.lower()
if dataset == "deals":
sql = """
SELECT stage, COUNT(*)::int AS count, COALESCE(SUM(value), 0)::float AS value,
COALESCE(json_agg(json_build_object('id', lead_id, 'name', lead_name, 'company', company, 'value', value_label, 'avatar', avatar_url)
ORDER BY value DESC NULLS LAST) FILTER (WHERE lead_id IS NOT NULL), '[]'::json) AS leads
FROM deals
WHERE tenant_id = $1
GROUP BY stage
ORDER BY COALESCE(SUM(value), 0) DESC, stage ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_daily_snapshot":
sql = """
SELECT source, COALESCE(SUM(qd_weighted_score), 0)::float AS qd_weighted_volume
FROM lead_daily_snapshot
WHERE tenant_id = $1
GROUP BY source
ORDER BY qd_weighted_volume DESC, source ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_geo_interest_rollup":
sql = """
SELECT district, lat, lng, COALESCE(lead_count, 0)::int AS lead_count,
COALESCE(avg_qd_score, 0)::float AS avg_qd_score,
COALESCE(x, 0)::float AS x, COALESCE(y, 0)::float AS y
FROM lead_geo_interest_rollup
WHERE tenant_id = $1
ORDER BY lead_count DESC, district ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "broker_performance":
sql = """
SELECT ROW_NUMBER() OVER (ORDER BY COUNT(DISTINCT l.person_id) DESC, COALESCE(u.full_name, u.email, u.id::text) ASC)::int AS rank,
COALESCE(u.full_name, u.email, u.id::text) AS name,
COUNT(DISTINCT l.person_id)::int AS deals_closed,
COALESCE(SUM(o.value), 0)::float AS revenue_generated,
u.avatar_url AS avatar
FROM users_and_roles u
LEFT JOIN crm_leads l ON l.assigned_user_id = u.id
LEFT JOIN crm_opportunities o ON o.lead_id = l.lead_id
WHERE u.is_active = TRUE
GROUP BY u.id, u.full_name, u.email, u.avatar_url
HAVING COUNT(DISTINCT l.person_id) > 0 OR COALESCE(SUM(o.value), 0) > 0
ORDER BY revenue_generated DESC, name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "inventory_absorption":
sql = """
SELECT period_label AS period, COALESCE(absorption_rate, 0)::float AS absorption_rate,
COALESCE(target_rate, 0)::float AS target_rate
FROM inventory_absorption
WHERE tenant_id = $1
ORDER BY period_start ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "oracle_aggregated_metric":
metric_name = "total_leads"
if "pipeline" in lower_prompt:
metric_name = "total_pipeline_value"
elif "quota" in lower_prompt or "attainment" in lower_prompt:
metric_name = "quota_attainment"
sql = """
SELECT metric_value, metric_label, trend_value, comparison_label
FROM oracle_aggregated_metric
WHERE tenant_id = $1 AND metric_name = $2
ORDER BY observed_at DESC
LIMIT 1
"""
return sql, [ctx.tenant_id, metric_name]
if dataset == "lead_activity_log":
if "follow-up" in lower_prompt or "queue" in lower_prompt:
sql = """
SELECT lead_name AS name, assigned_broker,
COALESCE(last_contact_hours_ago, 0)::int AS last_contact_hours_ago,
COALESCE(qd_score, 0)::float AS qd_score, urgency, avatar_url AS avatar
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY last_contact_hours_ago DESC, qd_score DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
sql = """
SELECT activity_type AS type, COALESCE(activity_title, activity_summary, activity_type) AS title,
activity_summary AS summary, actor_name AS actor,
TO_CHAR(activity_at, 'YYYY-MM-DD HH24:MI') AS date
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY activity_at DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "crm_contacts_overview":
sql = """
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email,
COALESCE(p.primary_phone, '') AS phone,
COALESCE(p.city, '') AS city,
COALESCE(p.buyer_type, 'unclassified') AS buyer_type,
COALESCE(q.current_value, 0)::float AS qd_score
FROM crm_people p
LEFT JOIN LATERAL (
SELECT current_value
FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY CASE WHEN q.score_type = 'engagement_score' THEN 0 WHEN q.score_type = 'intent_score' THEN 1 WHEN q.score_type = 'urgency_score' THEN 2 ELSE 3 END,
q.computed_at DESC
LIMIT 1
) q ON TRUE
ORDER BY qd_score DESC, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "crm_opportunity_pipeline":
sql = """
SELECT o.stage::text AS stage, COUNT(*)::int AS count, COALESCE(SUM(o.value), 0)::float AS value,
COALESCE(json_agg(json_build_object('id', o.opportunity_id, 'name', p.full_name, 'company', COALESCE(a.account_name, ''),
'value', COALESCE(o.value, 0), 'nextAction', COALESCE(o.next_action, ''))
ORDER BY o.value DESC NULLS LAST) FILTER (WHERE o.opportunity_id IS NOT NULL), '[]'::json) AS leads
FROM crm_opportunities o
JOIN crm_leads l ON l.lead_id = o.lead_id
JOIN crm_people p ON p.person_id = l.person_id
LEFT JOIN crm_accounts a ON a.account_id = l.account_id
GROUP BY o.stage
ORDER BY COALESCE(SUM(o.value), 0) DESC, o.stage::text ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_property_interest_rollup":
sql = """
SELECT COALESCE(pi.project_name, ip.project_name, 'Unknown Project') AS category,
COUNT(*)::int AS value,
ROUND(AVG(COALESCE((pi.budget_min + pi.budget_max) / 2.0, pi.budget_max, pi.budget_min, 0)), 2)::float AS average_budget,
MAX(pi.created_at) AS latest_interest_at
FROM crm_property_interests pi
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
GROUP BY COALESCE(pi.project_name, ip.project_name, 'Unknown Project')
ORDER BY value DESC, category ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_last_contacted_clients":
sql = """
WITH message_contacts AS (
SELECT i.person_id, MAX(m.delivered_at) AS contacted_at
FROM intel_messages m JOIN intel_interactions i ON i.interaction_id = m.interaction_id
GROUP BY i.person_id
), email_contacts AS (
SELECT i.person_id, MAX(e.sent_at) AS contacted_at
FROM intel_emails e JOIN intel_interactions i ON i.interaction_id = e.interaction_id
GROUP BY i.person_id
), call_contacts AS (
SELECT i.person_id, MAX(i.happened_at) AS contacted_at
FROM intel_calls c JOIN intel_interactions i ON i.interaction_id = c.interaction_id
GROUP BY i.person_id
), visit_contacts AS (
SELECT person_id, MAX(visited_at) AS contacted_at FROM intel_visits GROUP BY person_id
), thread_contacts AS (
SELECT person_id, MAX(last_message_at) AS contacted_at FROM intel_whatsapp_threads GROUP BY person_id
), interaction_contacts AS (
SELECT person_id, MAX(happened_at) AS contacted_at FROM intel_interactions GROUP BY person_id
), next_reminders AS (
SELECT DISTINCT ON (person_id) person_id, title AS next_action, due_at AS next_action_at
FROM intel_reminders
WHERE status IN ('pending', 'open', 'scheduled')
ORDER BY person_id, due_at ASC NULLS LAST
), contact_rollup AS (
SELECT p.person_id,
GREATEST(
COALESCE(mc.contacted_at, '-infinity'::timestamptz),
COALESCE(ec.contacted_at, '-infinity'::timestamptz),
COALESCE(cc.contacted_at, '-infinity'::timestamptz),
COALESCE(vc.contacted_at, '-infinity'::timestamptz),
COALESCE(tc.contacted_at, '-infinity'::timestamptz),
COALESCE(ic.contacted_at, '-infinity'::timestamptz)
) AS last_contacted_at,
mc.contacted_at AS last_message_at, ec.contacted_at AS last_email_at,
cc.contacted_at AS last_call_at, vc.contacted_at AS last_visit_at,
tc.contacted_at AS last_whatsapp_at, ic.contacted_at AS last_interaction_at
FROM crm_people p
LEFT JOIN message_contacts mc ON mc.person_id = p.person_id
LEFT JOIN email_contacts ec ON ec.person_id = p.person_id
LEFT JOIN call_contacts cc ON cc.person_id = p.person_id
LEFT JOIN visit_contacts vc ON vc.person_id = p.person_id
LEFT JOIN thread_contacts tc ON tc.person_id = p.person_id
LEFT JOIN interaction_contacts ic ON ic.person_id = p.person_id
)
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
NULLIF(cr.last_contacted_at, '-infinity'::timestamptz) AS last_contacted_at,
CASE
WHEN cr.last_contacted_at = cr.last_call_at THEN 'phone'
WHEN cr.last_contacted_at = cr.last_email_at THEN 'email'
WHEN cr.last_contacted_at = cr.last_visit_at THEN 'site_visit'
WHEN cr.last_contacted_at = cr.last_whatsapp_at THEN 'whatsapp'
WHEN cr.last_contacted_at = cr.last_message_at THEN 'message'
WHEN cr.last_contacted_at = cr.last_interaction_at THEN 'interaction'
ELSE 'unknown'
END AS last_contact_channel,
COALESCE(li.summary, nr.next_action, '') AS last_contact_summary,
COUNT(DISTINCT i.interaction_id)::int AS interaction_count,
COALESCE(q.current_value, 0)::float AS qd_score,
COALESCE(nr.next_action, '') AS next_action,
nr.next_action_at
FROM crm_people p
JOIN contact_rollup cr ON cr.person_id = p.person_id
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT summary
FROM intel_interactions li
WHERE li.person_id = p.person_id
ORDER BY li.happened_at DESC
LIMIT 1
) li ON TRUE
LEFT JOIN next_reminders nr ON nr.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
WHERE cr.last_contacted_at <> '-infinity'::timestamptz
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, cr.last_contacted_at,
cr.last_message_at, cr.last_email_at, cr.last_call_at, cr.last_visit_at,
cr.last_whatsapp_at, cr.last_interaction_at, li.summary, nr.next_action,
nr.next_action_at, q.current_value
ORDER BY last_contacted_at DESC NULLS LAST, interaction_count DESC, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_top_interested_clients":
sql = """
WITH interest_mentions AS (
SELECT i.person_id, COUNT(*)::int AS mention_count, MAX(COALESCE(m.delivered_at, i.happened_at)) AS last_mention_at
FROM intel_interactions i
LEFT JOIN intel_messages m ON m.interaction_id = i.interaction_id
WHERE LOWER(COALESCE(i.summary, '') || ' ' || COALESCE(m.message_text, '')) ~
'(interested|interest|shortlist|visit|book|budget|configuration|bhk|project|property)'
GROUP BY i.person_id
)
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
COUNT(DISTINCT pi.interest_id)::int AS explicit_interest_count,
COALESCE(MAX(im.mention_count), 0)::int AS inferred_interest_count,
(COUNT(DISTINCT pi.interest_id) + COALESCE(MAX(im.mention_count), 0))::int AS interest_count,
STRING_AGG(DISTINCT COALESCE(pi.project_name, ip.project_name), ', ' ORDER BY COALESCE(pi.project_name, ip.project_name)) AS projects,
GREATEST(COALESCE(MAX(pi.created_at), '-infinity'::timestamptz),
COALESCE(MAX(im.last_mention_at), '-infinity'::timestamptz),
COALESCE(p.updated_at, p.created_at)) AS last_interest_at,
COALESCE(q.current_value, 0)::float AS qd_score,
COALESCE(MAX(pi.notes), '') AS latest_interest_note
FROM crm_people p
LEFT JOIN crm_property_interests pi ON pi.person_id = p.person_id
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
LEFT JOIN interest_mentions im ON im.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
HAVING COUNT(DISTINCT pi.interest_id) > 0 OR COALESCE(MAX(im.mention_count), 0) > 0
ORDER BY interest_count DESC, qd_score DESC, last_interest_at DESC NULLS LAST, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_client_interaction_timeline":
sql = """
WITH timeline AS (
SELECT i.person_id, i.channel::text AS type, COALESCE(i.interaction_type, i.channel::text) AS title,
COALESCE(i.summary, '') AS detail, i.happened_at AS event_at, 'interaction' AS source_type
FROM intel_interactions i
UNION ALL
SELECT i.person_id, 'message', COALESCE(m.sender_role, 'message'), m.message_text, m.delivered_at, 'message'
FROM intel_messages m JOIN intel_interactions i ON i.interaction_id = m.interaction_id
UNION ALL
SELECT i.person_id, 'call', c.call_direction::text, COALESCE(t.full_text, c.call_outcome, 'Call record'), i.happened_at, 'call'
FROM intel_calls c
JOIN intel_interactions i ON i.interaction_id = c.interaction_id
LEFT JOIN intel_transcripts t ON t.call_id = c.call_id OR t.interaction_id = i.interaction_id
UNION ALL
SELECT i.person_id, 'email', COALESCE(e.subject, 'Email'), COALESCE(e.body_text, ''), e.sent_at, 'email'
FROM intel_emails e JOIN intel_interactions i ON i.interaction_id = e.interaction_id
UNION ALL
SELECT v.person_id, 'site_visit', COALESCE(v.project_name, 'Site visit'), COALESCE(v.visit_notes, ''), v.visited_at, 'visit'
FROM intel_visits v
UNION ALL
SELECT r.person_id, 'reminder', r.title, COALESCE(r.notes, r.status), COALESCE(r.due_at, r.created_at), 'reminder'
FROM intel_reminders r
UNION ALL
SELECT q.person_id, 'qd_score', q.score_type, COALESCE(q.reasoning, q.current_value::text), q.computed_at, 'qd_score'
FROM intel_qd_scores q
UNION ALL
SELECT qt.person_id, 'qd_timeseries', COALESCE(qt.signal_source, qt.score_type), qt.value::text, qt.timestamp, 'qd_timeseries'
FROM intel_qd_timeseries qt
)
SELECT t.type, t.title, CONCAT(p.full_name, ' - ', t.detail) AS summary,
p.full_name AS actor, TO_CHAR(t.event_at, 'YYYY-MM-DD HH24:MI') AS date,
t.source_type, t.event_at
FROM timeline t
JOIN crm_people p ON p.person_id = t.person_id
ORDER BY t.event_at DESC NULLS LAST
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_client_360_summary":
sql = """
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
COALESCE(l.status::text, 'unknown') AS lead_status,
COALESCE(l.budget_band, '') AS budget_band,
COALESCE(l.urgency, '') AS urgency,
COALESCE(q.current_value, 0)::float AS qd_score,
COUNT(DISTINCT pi.interest_id)::int AS interest_count,
COUNT(DISTINCT i.interaction_id)::int AS interaction_count,
MAX(i.happened_at) AS last_interaction_at,
STRING_AGG(DISTINCT COALESCE(pi.project_name, ip.project_name), ', ' ORDER BY COALESCE(pi.project_name, ip.project_name)) AS projects
FROM crm_people p
LEFT JOIN crm_leads l ON l.person_id = p.person_id
LEFT JOIN crm_property_interests pi ON pi.person_id = p.person_id
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, l.status, l.budget_band, l.urgency, q.current_value
ORDER BY qd_score DESC, interaction_count DESC, interest_count DESC, name ASC
LIMIT $1
"""
return sql, [row_limit]
raise ValueError(f"Dataset '{dataset}' is not whitelisted for Oracle execution.")
data_access_gateway = DataAccessGateway()