""" oracle/policy_service.py Enforces tenant isolation, role-based access, privacy-tier escalation, field-level redaction, and row limit guardrails for all Oracle data access. Section 11.3 of the Oracle Architecture Document. """ from __future__ import annotations import logging from dataclasses import dataclass from typing import Any logger = logging.getLogger(__name__) # ── Constants ───────────────────────────────────────────────────────────────── MAX_ROW_LIMITS: dict[str, int] = { "junior_broker": 100, "senior_broker": 500, "sales_director": 2000, "marketing_operator": 1000, "data_steward": 5000, "compliance_reviewer": 5000, "platform_admin": 10000, } # Which roles can see which privacy tiers PRIVACY_TIER_ACCESS: dict[str, set[str]] = { "standard": {"junior_broker", "senior_broker", "sales_director", "marketing_operator", "data_steward", "compliance_reviewer", "platform_admin"}, "restricted": {"senior_broker", "sales_director", "data_steward", "compliance_reviewer", "platform_admin"}, "sensitive": {"data_steward", "compliance_reviewer", "platform_admin"}, } # Datasets with cross-tenant join restrictions CROSS_TENANT_RESTRICTED: set[str] = { "global_lead_market", "competitor_pricing", "cross_tenant_referrals", } @dataclass class PolicyContext: tenant_id: str actor_id: str actor_role: str policy_profile_id: str = "policy_standard_v4" @dataclass class ValidationResult: passed: bool errors: list[str] warnings: list[str] redaction_policy: str = "none" effective_row_limit: int = 100 @classmethod def ok(cls, row_limit: int, redaction: str = "none") -> "ValidationResult": return cls(passed=True, errors=[], warnings=[], redaction_policy=redaction, effective_row_limit=row_limit) @classmethod def denied(cls, reason: str) -> "ValidationResult": return cls(passed=False, errors=[reason], warnings=[]) class PolicyService: """ Validates all Oracle data access against policy rules. Configuration is loaded from env / feature flags in production; falls back to safe defaults for demo mode. """ def validate_retrieval_plan( self, plan: dict[str, Any], ctx: PolicyContext, ) -> ValidationResult: """ Validates a structured retrieval plan (as produced by PromptOrchestrator). Checks: tenant isolation, role access, privacy tier, row limits. Returns ValidationResult with passed=True if all checks pass. """ errors: list[str] = [] warnings: list[str] = [] dataset = plan.get("dataset", "") privacy_tier = plan.get("privacyTier", "standard") requested_row_limit = plan.get("rowLimit", 100) joins = plan.get("joins", []) # 1. Tenant isolation — reject cross-tenant predicates if dataset in CROSS_TENANT_RESTRICTED: errors.append( f"POLICY_CROSS_TENANT_JOIN_DENIED: Dataset '{dataset}' requires " f"cross-tenant access which is not permitted for role '{ctx.actor_role}'." ) # 2. Cross-tenant join detection for join in joins: if join.get("tenantId") and join["tenantId"] != ctx.tenant_id: errors.append( f"POLICY_CROSS_TENANT_JOIN_DENIED: Join to tenant '{join['tenantId']}' " f"is not permitted." ) # 3. Privacy tier access allowed_roles = PRIVACY_TIER_ACCESS.get(privacy_tier, set()) if ctx.actor_role not in allowed_roles: errors.append( f"POLICY_PRIVACY_TIER_ESCALATION: Role '{ctx.actor_role}' cannot access " f"'{privacy_tier}' tier data in dataset '{dataset}'." ) # 4. Row limit guardrail max_limit = MAX_ROW_LIMITS.get(ctx.actor_role, 100) effective_limit = min(requested_row_limit, max_limit) if requested_row_limit > max_limit: warnings.append( f"ROW_LIMIT_CAPPED: Requested {requested_row_limit} rows; " f"capped to {effective_limit} for role '{ctx.actor_role}'." ) # 5. Determine redaction policy redaction = "none" if privacy_tier == "restricted" and ctx.actor_role == "senior_broker": redaction = "aggregate_only" elif privacy_tier == "sensitive": redaction = "full_redact" if errors: return ValidationResult( passed=False, errors=errors, warnings=warnings, redaction_policy=redaction, effective_row_limit=effective_limit, ) return ValidationResult( passed=True, errors=[], warnings=warnings, redaction_policy=redaction, effective_row_limit=effective_limit, ) def enforce_tenant_predicate( self, query_parameters: dict[str, Any], ctx: PolicyContext, ) -> dict[str, Any]: """ Ensures :tenant_id parameter is always bound to the actor's tenant. Overrides any attacker-supplied tenant_id parameter. """ params = dict(query_parameters) params["tenant_id"] = ctx.tenant_id return params def validate_component_access( self, component_access_controls: dict[str, Any], ctx: PolicyContext, ) -> bool: """ Returns True if the actor's role is in the component's allowedRoles. """ allowed_roles: list[str] = component_access_controls.get("allowedRoles", []) if not allowed_roles: # Open access (shouldn't happen in production) logger.warning( "POLICY_WARN: Component has no allowedRoles — defaulting to deny for tenant=%s actor=%s", ctx.tenant_id, ctx.actor_id, ) return False return ctx.actor_role in allowed_roles def redact( self, rows: list[dict[str, Any]], redaction_policy: str, sensitive_fields: list[str] | None = None, ) -> list[dict[str, Any]]: """ Applies field-level redaction to result rows. """ if redaction_policy == "none" or not rows: return rows if redaction_policy == "full_redact": return [{"__redacted__": True, "count": len(rows)}] if redaction_policy == "aggregate_only": # Keep only aggregate fields; drop individual identifiers safe_fields = {"count", "total", "average", "sum", "min", "max", "stage", "source", "district"} return [{k: v for k, v in row.items() if k in safe_fields} for row in rows] if redaction_policy == "team_scope": # Keep rows where assigned_broker matches actor (simplified demo rule) return rows # Full enforcement requires actor context per row return rows def audit_policy_check( self, ctx: PolicyContext, dataset: str, result: ValidationResult, ) -> None: """Emit an audit event for every policy check (passed or denied).""" if not result.passed: logger.warning( "POLICY_DENIED tenant=%s actor=%s dataset=%s errors=%s", ctx.tenant_id, ctx.actor_id, dataset, result.errors, ) else: logger.debug( "POLICY_PASS tenant=%s actor=%s dataset=%s redaction=%s limit=%d", ctx.tenant_id, ctx.actor_id, dataset, result.redaction_policy, result.effective_row_limit, )