Project_Velocity/backend/oracle/policy_service.py

"""
oracle/policy_service.py
Enforces tenant isolation, role-based access, privacy-tier escalation,
field-level redaction, and row limit guardrails for all Oracle data access.
Section 11.3 of the Oracle Architecture Document.
"""
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Any

logger = logging.getLogger(__name__)

# ── Constants ─────────────────────────────────────────────────────────────────

MAX_ROW_LIMITS: dict[str, int] = {
    "junior_broker": 100,
    "senior_broker": 500,
    "sales_director": 2000,
    "marketing_operator": 1000,
    "data_steward": 5000,
    "compliance_reviewer": 5000,
    "platform_admin": 10000,
}

# Which roles can see which privacy tiers
PRIVACY_TIER_ACCESS: dict[str, set[str]] = {
    "standard":   {"junior_broker", "senior_broker", "sales_director", "marketing_operator", "data_steward", "compliance_reviewer", "platform_admin"},
    "restricted": {"senior_broker", "sales_director", "data_steward", "compliance_reviewer", "platform_admin"},
    "sensitive":  {"data_steward", "compliance_reviewer", "platform_admin"},
}

# Datasets with cross-tenant join restrictions
CROSS_TENANT_RESTRICTED: set[str] = {
    "global_lead_market",
    "competitor_pricing",
    "cross_tenant_referrals",
}


@dataclass
class PolicyContext:
    tenant_id: str
    actor_id: str
    actor_role: str
    policy_profile_id: str = "policy_standard_v4"


@dataclass
class ValidationResult:
    passed: bool
    errors: list[str]
    warnings: list[str]
    redaction_policy: str = "none"
    effective_row_limit: int = 100

    @classmethod
    def ok(cls, row_limit: int, redaction: str = "none") -> "ValidationResult":
        return cls(passed=True, errors=[], warnings=[], redaction_policy=redaction, effective_row_limit=row_limit)

    @classmethod
    def denied(cls, reason: str) -> "ValidationResult":
        return cls(passed=False, errors=[reason], warnings=[])


class PolicyService:
    """
    Validates all Oracle data access against policy rules.
    Configuration is loaded from env / feature flags in production;
    falls back to safe defaults for demo mode.
    """

    def validate_retrieval_plan(
        self,
        plan: dict[str, Any],
        ctx: PolicyContext,
    ) -> ValidationResult:
        """
        Validates a structured retrieval plan (as produced by PromptOrchestrator).
        Checks: tenant isolation, role access, privacy tier, row limits.
        Returns ValidationResult with passed=True if all checks pass.
        """
        errors: list[str] = []
        warnings: list[str] = []

        dataset = plan.get("dataset", "")
        privacy_tier = plan.get("privacyTier", "standard")
        requested_row_limit = plan.get("rowLimit", 100)
        joins = plan.get("joins", [])

        # 1. Tenant isolation — reject cross-tenant predicates
        if dataset in CROSS_TENANT_RESTRICTED:
            errors.append(
                f"POLICY_CROSS_TENANT_JOIN_DENIED: Dataset '{dataset}' requires "
                f"cross-tenant access which is not permitted for role '{ctx.actor_role}'."
            )

        # 2. Cross-tenant join detection
        for join in joins:
            if join.get("tenantId") and join["tenantId"] != ctx.tenant_id:
                errors.append(
                    f"POLICY_CROSS_TENANT_JOIN_DENIED: Join to tenant '{join['tenantId']}' "
                    f"is not permitted."
                )

        # 3. Privacy tier access
        allowed_roles = PRIVACY_TIER_ACCESS.get(privacy_tier, set())
        if ctx.actor_role not in allowed_roles:
            errors.append(
                f"POLICY_PRIVACY_TIER_ESCALATION: Role '{ctx.actor_role}' cannot access "
                f"'{privacy_tier}' tier data in dataset '{dataset}'."
            )

        # 4. Row limit guardrail
        max_limit = MAX_ROW_LIMITS.get(ctx.actor_role, 100)
        effective_limit = min(requested_row_limit, max_limit)
        if requested_row_limit > max_limit:
            warnings.append(
                f"ROW_LIMIT_CAPPED: Requested {requested_row_limit} rows; "
                f"capped to {effective_limit} for role '{ctx.actor_role}'."
            )

        # 5. Determine redaction policy
        redaction = "none"
        if privacy_tier == "restricted" and ctx.actor_role == "senior_broker":
            redaction = "aggregate_only"
        elif privacy_tier == "sensitive":
            redaction = "full_redact"

        if errors:
            return ValidationResult(
                passed=False,
                errors=errors,
                warnings=warnings,
                redaction_policy=redaction,
                effective_row_limit=effective_limit,
            )

        return ValidationResult(
            passed=True,
            errors=[],
            warnings=warnings,
            redaction_policy=redaction,
            effective_row_limit=effective_limit,
        )

    def enforce_tenant_predicate(
        self,
        query_parameters: dict[str, Any],
        ctx: PolicyContext,
    ) -> dict[str, Any]:
        """
        Ensures :tenant_id parameter is always bound to the actor's tenant.
        Overrides any attacker-supplied tenant_id parameter.
        """
        params = dict(query_parameters)
        params["tenant_id"] = ctx.tenant_id
        return params

    def validate_component_access(
        self,
        component_access_controls: dict[str, Any],
        ctx: PolicyContext,
    ) -> bool:
        """
        Returns True if the actor's role is in the component's allowedRoles.
        """
        allowed_roles: list[str] = component_access_controls.get("allowedRoles", [])
        if not allowed_roles:
            # Open access (shouldn't happen in production)
            logger.warning(
                "POLICY_WARN: Component has no allowedRoles — defaulting to deny for tenant=%s actor=%s",
                ctx.tenant_id,
                ctx.actor_id,
            )
            return False
        return ctx.actor_role in allowed_roles

    def redact(
        self,
        rows: list[dict[str, Any]],
        redaction_policy: str,
        sensitive_fields: list[str] | None = None,
    ) -> list[dict[str, Any]]:
        """
        Applies field-level redaction to result rows.
        """
        if redaction_policy == "none" or not rows:
            return rows
        if redaction_policy == "full_redact":
            return [{"__redacted__": True, "count": len(rows)}]
        if redaction_policy == "aggregate_only":
            # Keep only aggregate fields; drop individual identifiers
            safe_fields = {"count", "total", "average", "sum", "min", "max", "stage", "source", "district"}
            return [{k: v for k, v in row.items() if k in safe_fields} for row in rows]
        if redaction_policy == "team_scope":
            # Keep rows where assigned_broker matches actor (simplified demo rule)
            return rows  # Full enforcement requires actor context per row
        return rows

    def audit_policy_check(
        self,
        ctx: PolicyContext,
        dataset: str,
        result: ValidationResult,
    ) -> None:
        """Emit an audit event for every policy check (passed or denied)."""
        if not result.passed:
            logger.warning(
                "POLICY_DENIED tenant=%s actor=%s dataset=%s errors=%s",
                ctx.tenant_id,
                ctx.actor_id,
                dataset,
                result.errors,
            )
        else:
            logger.debug(
                "POLICY_PASS tenant=%s actor=%s dataset=%s redaction=%s limit=%d",
                ctx.tenant_id,
                ctx.actor_id,
                dataset,
                result.redaction_policy,
                result.effective_row_limit,
            )