Files
Project_Velocity/backend/oracle/data_access_gateway.py
2026-04-12 02:01:36 +05:30

243 lines
8.5 KiB
Python

"""
oracle/data_access_gateway.py
Read-only, policy-aware PostgreSQL query executor for Oracle datasets.
Nemoclaw is treated strictly as a planner. The gateway executes only
whitelisted dataset queries and always injects the actor's tenant scope.
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass
from typing import Any
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
from .policy_service import PolicyContext, PolicyService
logger = logging.getLogger(__name__)
_DB_URL = os.getenv("DATABASE_URL", "")
_ALLOW_IN_MEMORY = os.getenv("ORACLE_ALLOW_IN_MEMORY_FALLBACK", "").lower() in {"1", "true", "yes"}
@dataclass
class QueryExecutionResult:
rows: list[dict[str, Any]]
warnings: list[str]
def _db_ready() -> bool:
return bool(_DB_URL and not _DB_URL.startswith("PLACEHOLDER") and asyncpg is not None)
class DataAccessGateway:
def __init__(self) -> None:
self.policy_service = PolicyService()
async def execute_component_plan(
self,
component_plan: dict[str, Any],
ctx: PolicyContext,
prompt: str,
) -> QueryExecutionResult:
dataset = str(component_plan.get("dataset", "")).strip()
if not dataset:
return QueryExecutionResult(rows=[], warnings=["Dataset missing from retrieval plan."])
validation = self.policy_service.validate_retrieval_plan(component_plan, ctx)
self.policy_service.audit_policy_check(ctx, dataset, validation)
if not validation.passed:
return QueryExecutionResult(rows=[], warnings=validation.errors)
if not _db_ready():
if _ALLOW_IN_MEMORY or "PYTEST_CURRENT_TEST" in os.environ:
return QueryExecutionResult(rows=[], warnings=[])
raise RuntimeError("Oracle requires DATABASE_URL and asyncpg for real-time data access.")
try:
rows = await self._query_dataset(
dataset=dataset,
row_limit=validation.effective_row_limit,
ctx=ctx,
prompt=prompt,
)
except Exception as exc:
logger.warning("DATA_GATEWAY query_failed dataset=%s error=%s", dataset, exc)
return QueryExecutionResult(rows=[], warnings=[f"{dataset}: {exc}"])
redacted = self.policy_service.redact(rows, validation.redaction_policy)
return QueryExecutionResult(rows=redacted, warnings=validation.warnings)
async def _query_dataset(
self,
*,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> list[dict[str, Any]]:
sql, params = self._build_whitelisted_query(dataset, row_limit, ctx, prompt)
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
records = await conn.fetch(sql, *params)
finally:
await conn.close()
return [dict(record) for record in records]
def _build_whitelisted_query(
self,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> tuple[str, list[Any]]:
lower_prompt = prompt.lower()
if dataset == "deals":
sql = """
SELECT
stage,
COUNT(*)::int AS count,
COALESCE(SUM(value), 0)::float AS value,
COALESCE(
json_agg(
json_build_object(
'id', lead_id,
'name', lead_name,
'company', company,
'value', value_label,
'avatar', avatar_url
)
ORDER BY value DESC NULLS LAST
) FILTER (WHERE lead_id IS NOT NULL),
'[]'::json
) AS leads
FROM deals
WHERE tenant_id = $1
GROUP BY stage
ORDER BY COALESCE(SUM(value), 0) DESC, stage ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_daily_snapshot":
sql = """
SELECT
source,
COALESCE(SUM(qd_weighted_score), 0)::float AS qd_weighted_volume
FROM lead_daily_snapshot
WHERE tenant_id = $1
GROUP BY source
ORDER BY qd_weighted_volume DESC, source ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_geo_interest_rollup":
sql = """
SELECT
district,
lat,
lng,
COALESCE(lead_count, 0)::int AS lead_count,
COALESCE(avg_qd_score, 0)::float AS avg_qd_score,
COALESCE(x, 0)::float AS x,
COALESCE(y, 0)::float AS y
FROM lead_geo_interest_rollup
WHERE tenant_id = $1
ORDER BY lead_count DESC, district ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "broker_performance":
sql = """
SELECT
ROW_NUMBER() OVER (ORDER BY COALESCE(revenue_generated, 0) DESC, broker_name ASC)::int AS rank,
broker_name AS name,
deals_closed::int AS deals_closed,
COALESCE(revenue_generated, 0)::float AS revenue_generated,
avatar_url AS avatar
FROM broker_performance
WHERE tenant_id = $1
ORDER BY revenue_generated DESC, broker_name ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "inventory_absorption":
sql = """
SELECT
period_label AS period,
COALESCE(absorption_rate, 0)::float AS absorption_rate,
COALESCE(target_rate, 0)::float AS target_rate
FROM inventory_absorption
WHERE tenant_id = $1
ORDER BY period_start ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "oracle_aggregated_metric":
metric_name = "total_leads"
if "pipeline" in lower_prompt:
metric_name = "total_pipeline_value"
elif "quota" in lower_prompt or "attainment" in lower_prompt:
metric_name = "quota_attainment"
sql = """
SELECT
metric_value,
metric_label,
trend_value,
comparison_label
FROM oracle_aggregated_metric
WHERE tenant_id = $1
AND metric_name = $2
ORDER BY observed_at DESC
LIMIT 1
"""
return sql, [ctx.tenant_id, metric_name]
if dataset == "lead_activity_log":
if "follow-up" in lower_prompt or "queue" in lower_prompt:
sql = """
SELECT
lead_name AS name,
assigned_broker,
COALESCE(last_contact_hours_ago, 0)::int AS last_contact_hours_ago,
COALESCE(qd_score, 0)::float AS qd_score,
urgency,
avatar_url AS avatar
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY last_contact_hours_ago DESC, qd_score DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
sql = """
SELECT
activity_type AS type,
COALESCE(activity_title, activity_summary, activity_type) AS title,
activity_summary AS summary,
actor_name AS actor,
TO_CHAR(activity_at, 'YYYY-MM-DD HH24:MI') AS date
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY activity_at DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
raise ValueError(f"Dataset '{dataset}' is not whitelisted for Oracle execution.")
data_access_gateway = DataAccessGateway()