Built the Oracle Tab (#14)

2026-04-11 19:35:45 +05:30
parent 8e1ffe0e43
commit fb656d1443
54 changed files with 10651 additions and 818 deletions
--- a/backend/oracle/policy_service.py
+++ b/backend/oracle/policy_service.py
@@ -0,0 +1,225 @@
+"""
+oracle/policy_service.py
+Enforces tenant isolation, role-based access, privacy-tier escalation,
+field-level redaction, and row limit guardrails for all Oracle data access.
+Section 11.3 of the Oracle Architecture Document.
+"""
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# ── Constants ─────────────────────────────────────────────────────────────────
+
+MAX_ROW_LIMITS: dict[str, int] = {
+    "junior_broker": 100,
+    "senior_broker": 500,
+    "sales_director": 2000,
+    "marketing_operator": 1000,
+    "data_steward": 5000,
+    "compliance_reviewer": 5000,
+    "platform_admin": 10000,
+}
+
+# Which roles can see which privacy tiers
+PRIVACY_TIER_ACCESS: dict[str, set[str]] = {
+    "standard":   {"junior_broker", "senior_broker", "sales_director", "marketing_operator", "data_steward", "compliance_reviewer", "platform_admin"},
+    "restricted": {"senior_broker", "sales_director", "data_steward", "compliance_reviewer", "platform_admin"},
+    "sensitive":  {"data_steward", "compliance_reviewer", "platform_admin"},
+}
+
+# Datasets with cross-tenant join restrictions
+CROSS_TENANT_RESTRICTED: set[str] = {
+    "global_lead_market",
+    "competitor_pricing",
+    "cross_tenant_referrals",
+}
+
+
+@dataclass
+class PolicyContext:
+    tenant_id: str
+    actor_id: str
+    actor_role: str
+    policy_profile_id: str = "policy_standard_v4"
+
+
+@dataclass
+class ValidationResult:
+    passed: bool
+    errors: list[str]
+    warnings: list[str]
+    redaction_policy: str = "none"
+    effective_row_limit: int = 100
+
+    @classmethod
+    def ok(cls, row_limit: int, redaction: str = "none") -> "ValidationResult":
+        return cls(passed=True, errors=[], warnings=[], redaction_policy=redaction, effective_row_limit=row_limit)
+
+    @classmethod
+    def denied(cls, reason: str) -> "ValidationResult":
+        return cls(passed=False, errors=[reason], warnings=[])
+
+
+class PolicyService:
+    """
+    Validates all Oracle data access against policy rules.
+    Configuration is loaded from env / feature flags in production;
+    falls back to safe defaults for demo mode.
+    """
+
+    def validate_retrieval_plan(
+        self,
+        plan: dict[str, Any],
+        ctx: PolicyContext,
+    ) -> ValidationResult:
+        """
+        Validates a structured retrieval plan (as produced by PromptOrchestrator).
+        Checks: tenant isolation, role access, privacy tier, row limits.
+        Returns ValidationResult with passed=True if all checks pass.
+        """
+        errors: list[str] = []
+        warnings: list[str] = []
+
+        dataset = plan.get("dataset", "")
+        privacy_tier = plan.get("privacyTier", "standard")
+        requested_row_limit = plan.get("rowLimit", 100)
+        joins = plan.get("joins", [])
+
+        # 1. Tenant isolation — reject cross-tenant predicates
+        if dataset in CROSS_TENANT_RESTRICTED:
+            errors.append(
+                f"POLICY_CROSS_TENANT_JOIN_DENIED: Dataset '{dataset}' requires "
+                f"cross-tenant access which is not permitted for role '{ctx.actor_role}'."
+            )
+
+        # 2. Cross-tenant join detection
+        for join in joins:
+            if join.get("tenantId") and join["tenantId"] != ctx.tenant_id:
+                errors.append(
+                    f"POLICY_CROSS_TENANT_JOIN_DENIED: Join to tenant '{join['tenantId']}' "
+                    f"is not permitted."
+                )
+
+        # 3. Privacy tier access
+        allowed_roles = PRIVACY_TIER_ACCESS.get(privacy_tier, set())
+        if ctx.actor_role not in allowed_roles:
+            errors.append(
+                f"POLICY_PRIVACY_TIER_ESCALATION: Role '{ctx.actor_role}' cannot access "
+                f"'{privacy_tier}' tier data in dataset '{dataset}'."
+            )
+
+        # 4. Row limit guardrail
+        max_limit = MAX_ROW_LIMITS.get(ctx.actor_role, 100)
+        effective_limit = min(requested_row_limit, max_limit)
+        if requested_row_limit > max_limit:
+            warnings.append(
+                f"ROW_LIMIT_CAPPED: Requested {requested_row_limit} rows; "
+                f"capped to {effective_limit} for role '{ctx.actor_role}'."
+            )
+
+        # 5. Determine redaction policy
+        redaction = "none"
+        if privacy_tier == "restricted" and ctx.actor_role == "senior_broker":
+            redaction = "aggregate_only"
+        elif privacy_tier == "sensitive":
+            redaction = "full_redact"
+
+        if errors:
+            return ValidationResult(
+                passed=False,
+                errors=errors,
+                warnings=warnings,
+                redaction_policy=redaction,
+                effective_row_limit=effective_limit,
+            )
+
+        return ValidationResult(
+            passed=True,
+            errors=[],
+            warnings=warnings,
+            redaction_policy=redaction,
+            effective_row_limit=effective_limit,
+        )
+
+    def enforce_tenant_predicate(
+        self,
+        query_parameters: dict[str, Any],
+        ctx: PolicyContext,
+    ) -> dict[str, Any]:
+        """
+        Ensures :tenant_id parameter is always bound to the actor's tenant.
+        Overrides any attacker-supplied tenant_id parameter.
+        """
+        params = dict(query_parameters)
+        params["tenant_id"] = ctx.tenant_id
+        return params
+
+    def validate_component_access(
+        self,
+        component_access_controls: dict[str, Any],
+        ctx: PolicyContext,
+    ) -> bool:
+        """
+        Returns True if the actor's role is in the component's allowedRoles.
+        """
+        allowed_roles: list[str] = component_access_controls.get("allowedRoles", [])
+        if not allowed_roles:
+            # Open access (shouldn't happen in production)
+            logger.warning(
+                "POLICY_WARN: Component has no allowedRoles — defaulting to deny for tenant=%s actor=%s",
+                ctx.tenant_id,
+                ctx.actor_id,
+            )
+            return False
+        return ctx.actor_role in allowed_roles
+
+    def redact(
+        self,
+        rows: list[dict[str, Any]],
+        redaction_policy: str,
+        sensitive_fields: list[str] | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Applies field-level redaction to result rows.
+        """
+        if redaction_policy == "none" or not rows:
+            return rows
+        if redaction_policy == "full_redact":
+            return [{"__redacted__": True, "count": len(rows)}]
+        if redaction_policy == "aggregate_only":
+            # Keep only aggregate fields; drop individual identifiers
+            safe_fields = {"count", "total", "average", "sum", "min", "max", "stage", "source", "district"}
+            return [{k: v for k, v in row.items() if k in safe_fields} for row in rows]
+        if redaction_policy == "team_scope":
+            # Keep rows where assigned_broker matches actor (simplified demo rule)
+            return rows  # Full enforcement requires actor context per row
+        return rows
+
+    def audit_policy_check(
+        self,
+        ctx: PolicyContext,
+        dataset: str,
+        result: ValidationResult,
+    ) -> None:
+        """Emit an audit event for every policy check (passed or denied)."""
+        if not result.passed:
+            logger.warning(
+                "POLICY_DENIED tenant=%s actor=%s dataset=%s errors=%s",
+                ctx.tenant_id,
+                ctx.actor_id,
+                dataset,
+                result.errors,
+            )
+        else:
+            logger.debug(
+                "POLICY_PASS tenant=%s actor=%s dataset=%s redaction=%s limit=%d",
+                ctx.tenant_id,
+                ctx.actor_id,
+                dataset,
+                result.redaction_policy,
+                result.effective_row_limit,
+            )