Initial commit: Velocity-OS migration

This commit is contained in:
2026-05-01 12:32:19 +05:30
commit 407af828d4
283 changed files with 207782 additions and 0 deletions

View File

@@ -0,0 +1 @@
# Oracle services package

View File

@@ -0,0 +1,361 @@
from __future__ import annotations
import json
import os
import uuid
from datetime import datetime, timezone
from typing import Any
from fastapi import HTTPException
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
_DB_URL = os.getenv("DATABASE_URL", "")
def _now() -> datetime:
return datetime.now(timezone.utc)
def _iso(value: datetime | None) -> str | None:
return value.isoformat() if value else None
def _coerce_datetime(value: datetime | str | None) -> datetime | None:
if value is None or isinstance(value, datetime):
return value
if isinstance(value, str) and value.strip():
try:
return datetime.fromisoformat(value)
except ValueError:
return None
return None
def _db_ready() -> bool:
return bool(_DB_URL and not _DB_URL.startswith("PLACEHOLDER") and asyncpg is not None)
class OracleActionService:
async def ensure_schema(self) -> None:
if not _db_ready():
return
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
await conn.execute(
"""
CREATE TABLE IF NOT EXISTS oracle_actions (
action_id UUID PRIMARY KEY,
execution_id UUID,
tenant_id TEXT NOT NULL,
page_id UUID,
branch_id TEXT,
actor_id TEXT NOT NULL,
target_entity_type TEXT NOT NULL,
target_entity_id TEXT,
action_type TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'planned',
prompt TEXT,
workflow_dispatch JSONB NOT NULL DEFAULT '{}'::jsonb,
component_ids JSONB NOT NULL DEFAULT '[]'::jsonb,
writeback_payload JSONB NOT NULL DEFAULT '{}'::jsonb,
result_payload JSONB NOT NULL DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
"""
)
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_oracle_actions_execution ON oracle_actions(execution_id, created_at DESC)"
)
await conn.execute(
"CREATE INDEX IF NOT EXISTS idx_oracle_actions_target ON oracle_actions(target_entity_type, target_entity_id, created_at DESC)"
)
finally:
await conn.close()
async def create_from_execution(
self,
*,
execution: dict[str, Any],
target_entity_type: str = "canvas_page",
target_entity_id: str | None = None,
action_type: str = "oracle_canvas_generation",
writeback_payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
action = {
"actionId": str(uuid.uuid4()),
"executionId": execution.get("executionId"),
"tenantId": execution.get("tenantId"),
"pageId": execution.get("pageId"),
"branchId": execution.get("branchId"),
"actorId": execution.get("actorId"),
"targetEntityType": target_entity_type,
"targetEntityId": target_entity_id or execution.get("pageId"),
"actionType": action_type,
"status": "planned",
"prompt": execution.get("prompt"),
"workflowDispatch": execution.get("workflowDispatch") or {},
"componentIds": execution.get("componentsCreated") or [],
"writebackPayload": writeback_payload or {},
"resultPayload": {},
"createdAt": _now(),
"updatedAt": _now(),
}
await self._persist_action(action)
return action
async def get_action(self, action_id: str) -> dict[str, Any] | None:
if not _db_ready():
return None
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
row = await conn.fetchrow(
"""
SELECT action_id, execution_id, tenant_id, page_id, branch_id, actor_id,
target_entity_type, target_entity_id, action_type, status, prompt,
workflow_dispatch, component_ids, writeback_payload, result_payload,
created_at, updated_at
FROM oracle_actions
WHERE action_id = $1::uuid
""",
action_id,
)
finally:
await conn.close()
return self._serialize(row) if row else None
async def list_actions(self, *, status: str | None = None, limit: int = 50) -> list[dict[str, Any]]:
if not _db_ready():
return []
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
if status:
rows = await conn.fetch(
"""
SELECT action_id, execution_id, tenant_id, page_id, branch_id, actor_id,
target_entity_type, target_entity_id, action_type, status, prompt,
workflow_dispatch, component_ids, writeback_payload, result_payload,
created_at, updated_at
FROM oracle_actions
WHERE status = $1
ORDER BY created_at DESC
LIMIT $2
""",
status,
limit,
)
else:
rows = await conn.fetch(
"""
SELECT action_id, execution_id, tenant_id, page_id, branch_id, actor_id,
target_entity_type, target_entity_id, action_type, status, prompt,
workflow_dispatch, component_ids, writeback_payload, result_payload,
created_at, updated_at
FROM oracle_actions
ORDER BY created_at DESC
LIMIT $1
""",
limit,
)
finally:
await conn.close()
return [self._serialize(row) for row in rows]
async def apply_writeback(self, payload: dict[str, Any]) -> dict[str, Any]:
if not _db_ready():
raise HTTPException(status_code=503, detail="Oracle writeback store unavailable.")
if payload["target_entity_type"] != "lead":
raise HTTPException(status_code=422, detail="Only lead writebacks are supported in this pass.")
assert asyncpg is not None
await self.ensure_schema()
conn = await asyncpg.connect(_DB_URL)
try:
target_lead_id = payload["target_entity_id"]
action_id = payload["action_id"]
writeback = payload["writeback_payload"]
existing = await conn.fetchrow(
"SELECT id, notes, metadata, kanban_status, qualification, score FROM leads WHERE id = $1",
target_lead_id,
)
if existing is None:
raise HTTPException(status_code=404, detail=f"Lead '{target_lead_id}' not found for Oracle writeback.")
metadata = dict(existing["metadata"] or {})
metadata_patch = writeback.get("metadata_patch") or {}
if isinstance(metadata_patch, dict):
metadata.update(metadata_patch)
score = int(existing["score"] or 0) + int(writeback.get("score_delta") or 0)
updated_notes = (existing["notes"] or "").strip()
notes_append = writeback.get("notes_append")
if notes_append:
separator = "\n\n" if updated_notes else ""
updated_notes = f"{updated_notes}{separator}{notes_append}"
updated = await conn.fetchrow(
"""
UPDATE leads
SET notes = $2,
metadata = $3::jsonb,
kanban_status = COALESCE($4, kanban_status),
qualification = COALESCE($5, qualification),
score = $6,
updated_at = NOW()
WHERE id = $1
RETURNING id, notes, metadata, kanban_status, qualification, score, updated_at
""",
target_lead_id,
updated_notes,
json.dumps(metadata),
writeback.get("kanban_status"),
writeback.get("qualification"),
max(score, 0),
)
oracle_message = writeback.get("oracle_message")
if oracle_message:
await conn.execute(
"""
INSERT INTO chat_logs (id, lead_id, sender, channel, content, metadata, created_at)
VALUES ($1, $2, 'oracle', 'oracle', $3, $4::jsonb, NOW())
""",
str(uuid.uuid4()),
target_lead_id,
oracle_message,
json.dumps({"oracle_action_id": action_id, "writeback": True}),
)
result_payload = {
"lead_id": updated["id"],
"kanban_status": updated["kanban_status"],
"qualification": updated["qualification"],
"score": updated["score"],
"updated_at": updated["updated_at"].isoformat() if updated["updated_at"] else None,
}
await conn.execute(
"""
INSERT INTO oracle_actions (
action_id, execution_id, tenant_id, page_id, branch_id, actor_id,
target_entity_type, target_entity_id, action_type, status, prompt,
workflow_dispatch, component_ids, writeback_payload, result_payload,
created_at, updated_at
)
VALUES (
$1::uuid, NULL, $2, NULL, NULL, $3,
$4, $5, $6, 'applied', NULL,
'{}'::jsonb, '[]'::jsonb, $7::jsonb, $8::jsonb,
NOW(), NOW()
)
ON CONFLICT (action_id)
DO UPDATE SET
status = 'applied',
writeback_payload = EXCLUDED.writeback_payload,
result_payload = EXCLUDED.result_payload,
updated_at = NOW()
""",
action_id,
payload.get("tenant_id", "tenant_velocity"),
payload.get("actor_id", "oracle_operator"),
payload["target_entity_type"],
target_lead_id,
payload.get("action_type", "lead_writeback"),
json.dumps(writeback),
json.dumps(result_payload),
)
finally:
await conn.close()
return {
"actionId": action_id,
"status": "applied",
"targetEntityType": payload["target_entity_type"],
"targetEntityId": payload["target_entity_id"],
"resultPayload": result_payload,
}
async def _persist_action(self, action: dict[str, Any]) -> None:
if not _db_ready():
return
await self.ensure_schema()
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
await conn.execute(
"""
INSERT INTO oracle_actions (
action_id, execution_id, tenant_id, page_id, branch_id, actor_id,
target_entity_type, target_entity_id, action_type, status, prompt,
workflow_dispatch, component_ids, writeback_payload, result_payload,
created_at, updated_at
)
VALUES (
$1::uuid, $2::uuid, $3, $4::uuid, $5, $6,
$7, $8, $9, $10, $11,
$12::jsonb, $13::jsonb, $14::jsonb, $15::jsonb,
$16::timestamptz, $17::timestamptz
)
ON CONFLICT (action_id)
DO UPDATE SET
status = EXCLUDED.status,
workflow_dispatch = EXCLUDED.workflow_dispatch,
component_ids = EXCLUDED.component_ids,
writeback_payload = EXCLUDED.writeback_payload,
result_payload = EXCLUDED.result_payload,
updated_at = EXCLUDED.updated_at
""",
action["actionId"],
action.get("executionId"),
action["tenantId"],
action.get("pageId"),
action.get("branchId"),
action["actorId"],
action["targetEntityType"],
action.get("targetEntityId"),
action["actionType"],
action["status"],
action.get("prompt"),
json.dumps(action.get("workflowDispatch") or {}),
json.dumps(action.get("componentIds") or []),
json.dumps(action.get("writebackPayload") or {}),
json.dumps(action.get("resultPayload") or {}),
_coerce_datetime(action["createdAt"]),
_coerce_datetime(action["updatedAt"]),
)
finally:
await conn.close()
@staticmethod
def _serialize(row: Any) -> dict[str, Any]:
return {
"actionId": str(row["action_id"]),
"executionId": str(row["execution_id"]) if row["execution_id"] else None,
"tenantId": row["tenant_id"],
"pageId": str(row["page_id"]) if row["page_id"] else None,
"branchId": row["branch_id"],
"actorId": row["actor_id"],
"targetEntityType": row["target_entity_type"],
"targetEntityId": row["target_entity_id"],
"actionType": row["action_type"],
"status": row["status"],
"prompt": row["prompt"],
"workflowDispatch": row["workflow_dispatch"] or {},
"componentIds": row["component_ids"] or [],
"writebackPayload": row["writeback_payload"] or {},
"resultPayload": row["result_payload"] or {},
"createdAt": _iso(row["created_at"]),
"updatedAt": _iso(row["updated_at"]),
}
oracle_action_service = OracleActionService()

View File

@@ -0,0 +1,780 @@
"""
oracle/canvas_service.py
Canvas persistence for Oracle pages, revisions, and current component projections.
"""
from __future__ import annotations
import json
import logging
import os
import uuid
from copy import deepcopy
from datetime import datetime, timezone
from typing import Any
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
logger = logging.getLogger(__name__)
_DB_URL = os.getenv("DATABASE_URL", "")
_DEMO_PAGES: dict[str, dict[str, Any]] = {}
_DEMO_REVISIONS: dict[str, list[dict[str, Any]]] = {}
_DEMO_COMPONENTS: dict[str, list[dict[str, Any]]] = {}
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _allow_in_memory() -> bool:
return (
os.getenv("ORACLE_ALLOW_IN_MEMORY_FALLBACK", "").lower() in {"1", "true", "yes"}
or "PYTEST_CURRENT_TEST" in os.environ
)
def _db_ready() -> bool:
return bool(_DB_URL and not _DB_URL.startswith("PLACEHOLDER") and asyncpg is not None)
def _is_demo() -> bool:
return not _db_ready() and _allow_in_memory()
def _ensure_ready() -> None:
if _db_ready() or _is_demo():
return
if asyncpg is None:
raise RuntimeError("Oracle backend requires asyncpg to connect to PostgreSQL.")
raise RuntimeError("Oracle backend requires DATABASE_URL for production persistence.")
def _stringify(value: Any) -> str:
return str(value) if value is not None else ""
def _json_object(value: Any) -> dict[str, Any]:
if isinstance(value, dict):
return value
if isinstance(value, str) and value.strip():
try:
parsed = json.loads(value)
if isinstance(parsed, dict):
return parsed
except Exception:
logger.warning("canvas_service: failed to parse JSON object field; using empty object")
return {}
def _json_array(value: Any) -> list[Any]:
if isinstance(value, list):
return value
if isinstance(value, str) and value.strip():
try:
parsed = json.loads(value)
if isinstance(parsed, list):
return parsed
except Exception:
logger.warning("canvas_service: failed to parse JSON array field; using empty array")
return []
def _json_safe(value: Any) -> Any:
if isinstance(value, datetime):
return value.isoformat()
if isinstance(value, uuid.UUID):
return str(value)
if isinstance(value, dict):
return {str(key): _json_safe(val) for key, val in value.items()}
if isinstance(value, list):
return [_json_safe(item) for item in value]
if isinstance(value, tuple):
return [_json_safe(item) for item in value]
return value
def _normalize_component(component: dict[str, Any]) -> dict[str, Any]:
normalized = deepcopy(component)
normalized["componentId"] = _stringify(normalized.get("componentId"))
normalized["dataRows"] = _json_array(normalized.get("dataRows"))
descriptor = _json_object(normalized.get("dataSourceDescriptor"))
if descriptor.get("descriptorId") is not None:
descriptor["descriptorId"] = _stringify(descriptor["descriptorId"])
normalized["dataSourceDescriptor"] = descriptor
for field in (
"visualizationParameters",
"dataBindings",
"provenance",
"renderingHints",
"layout",
"accessControls",
"styleSignature",
"validationState",
):
normalized[field] = _json_object(normalized.get(field))
return normalized
def _deserialize_component_row(row: Any) -> dict[str, Any]:
return _normalize_component(
{
"componentId": _stringify(row["component_id"]),
"type": row["type"],
"title": row["title"],
"description": row["description"],
"version": row["version"],
"lifecycleState": row["lifecycle_state"],
"dataSourceDescriptor": row["data_source_descriptor"],
"dataRows": row["data_rows"],
"visualizationParameters": row["visualization_parameters"],
"dataBindings": row["data_bindings"],
"provenance": row["provenance"],
"renderingHints": row["rendering_hints"],
"layout": row["layout"],
"accessControls": row["access_controls"],
"styleSignature": row["style_signature"],
"validationState": row["validation_state"],
"auditLog": list(row["audit_log"] or []),
}
)
def _deserialize_page_row(row: Any, components: list[dict[str, Any]]) -> dict[str, Any]:
page_id = _stringify(row["page_id"])
branch_id = _stringify(row["branch_id"])
head_revision = int(row["head_revision"])
return {
"pageId": page_id,
"tenantId": row["tenant_id"],
"ownerId": row["owner_id"],
"branchId": branch_id,
"branchName": row["branch_name"],
"pageType": row["page_type"],
"title": row["title"],
"isShared": bool(row["is_shared"]),
"headRevision": head_revision,
"baseRevision": int(row["base_revision"]),
"sharingPolicy": _json_object(row["sharing_policy"]) or {
"shareMode": "direct_fork_only",
"allowReshare": False,
"defaultForkVisibility": "private",
},
"forks": [],
"lineage": [],
"audit": {"lastAuditEventId": "", "eventCount": 0},
"presence": {"activeViewers": 0, "activeEditors": 0, "lastPresenceAt": row["updated_at"].isoformat()},
"mainBranchPointer": {"pageId": page_id, "branchId": branch_id, "revision": head_revision},
"components": components,
"createdAt": row["created_at"].isoformat(),
"updatedAt": row["updated_at"].isoformat(),
}
class CanvasService:
async def list_pages(
self,
*,
tenant_id: str,
owner_id: str,
search: str | None = None,
limit: int = 50,
) -> list[dict[str, Any]]:
_ensure_ready()
safe_limit = max(1, min(limit, 100))
search_term = (search or "").strip().lower()
if _is_demo():
candidates = [
page
for page in _DEMO_PAGES.values()
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id
]
if search_term:
candidates = [page for page in candidates if search_term in page.get("title", "").lower()]
candidates.sort(key=lambda page: page.get("updatedAt", ""), reverse=True)
return [{**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))} for page in candidates[:safe_limit]]
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
rows = await conn.fetch(
"""
SELECT *
FROM oracle_canvas_pages
WHERE tenant_id = $1
AND owner_id = $2
AND ($3 = '' OR lower(title) LIKE '%' || $3 || '%')
ORDER BY updated_at DESC, created_at DESC
LIMIT $4
""",
tenant_id,
owner_id,
search_term,
safe_limit,
)
pages: list[dict[str, Any]] = []
for row in rows:
components = await self._pg_fetch_components(conn, _stringify(row["page_id"]), tenant_id)
pages.append(_deserialize_page_row(row, components))
return pages
finally:
await conn.close()
async def create_page(
self,
*,
tenant_id: str,
owner_id: str,
title: str = "Untitled Canvas",
page_type: str = "main",
branch_name: str = "main",
sharing_policy: dict[str, Any] | None = None,
) -> dict[str, Any]:
_ensure_ready()
if _is_demo():
page_id = str(uuid.uuid4())
branch_id = str(uuid.uuid4())
page = {
"pageId": page_id,
"tenantId": tenant_id,
"ownerId": owner_id,
"branchId": branch_id,
"branchName": branch_name,
"pageType": page_type,
"title": title,
"isShared": False,
"headRevision": 0,
"baseRevision": 0,
"sharingPolicy": sharing_policy or {"shareMode": "direct_fork_only", "allowReshare": False, "defaultForkVisibility": "private"},
"forks": [],
"lineage": [],
"audit": {"lastAuditEventId": "", "eventCount": 0},
"presence": {"activeViewers": 0, "activeEditors": 0, "lastPresenceAt": _now()},
"mainBranchPointer": {"pageId": page_id, "branchId": branch_id, "revision": 0},
"components": [],
"createdAt": _now(),
"updatedAt": _now(),
}
_DEMO_PAGES[page_id] = page
_DEMO_REVISIONS[page_id] = []
_DEMO_COMPONENTS[page_id] = []
return page
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
row = await conn.fetchrow(
"""
INSERT INTO oracle_canvas_pages (
tenant_id, owner_id, branch_id, branch_name, page_type, title, sharing_policy
)
VALUES ($1, $2, $3, $4, $5, $6, $7::jsonb)
RETURNING *
""",
tenant_id,
owner_id,
str(uuid.uuid4()),
branch_name,
page_type,
title,
json.dumps(sharing_policy or {"shareMode": "direct_fork_only", "allowReshare": False, "defaultForkVisibility": "private"}),
)
return _deserialize_page_row(row, [])
finally:
await conn.close()
async def ensure_default_page(
self,
*,
tenant_id: str,
owner_id: str,
title: str = "Oracle Main Canvas",
) -> dict[str, Any]:
page = await self.get_first_page_for_owner(tenant_id=tenant_id, owner_id=owner_id)
if page:
return page
return await self.create_page(tenant_id=tenant_id, owner_id=owner_id, title=title)
async def get_first_page_for_owner(self, *, tenant_id: str, owner_id: str) -> dict[str, Any] | None:
_ensure_ready()
if _is_demo():
candidates = [
page
for page in _DEMO_PAGES.values()
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id
]
if candidates:
candidates.sort(key=lambda page: page.get("updatedAt", ""), reverse=True)
page = candidates[0]
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))}
return None
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
row = await conn.fetchrow(
"""
SELECT *
FROM oracle_canvas_pages
WHERE tenant_id = $1 AND owner_id = $2
ORDER BY updated_at DESC, created_at DESC
LIMIT 1
""",
tenant_id,
owner_id,
)
if not row:
return None
components = await self._pg_fetch_components(conn, _stringify(row["page_id"]), tenant_id)
return _deserialize_page_row(row, components)
finally:
await conn.close()
async def get_page(self, page_id: str, tenant_id: str) -> dict[str, Any] | None:
_ensure_ready()
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if page and page["tenantId"] == tenant_id:
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page_id, []))}
return None
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
row = await conn.fetchrow(
"""
SELECT *
FROM oracle_canvas_pages
WHERE page_id = $1::uuid AND tenant_id = $2
""",
page_id,
tenant_id,
)
if not row:
return None
components = await self._pg_fetch_components(conn, page_id, tenant_id)
return _deserialize_page_row(row, components)
finally:
await conn.close()
async def update_page_title(
self,
*,
page_id: str,
tenant_id: str,
owner_id: str,
title: str,
) -> dict[str, Any]:
_ensure_ready()
clean_title = (title or "").strip() or "Untitled Canvas"
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if not page or page["tenantId"] != tenant_id or page["ownerId"] != owner_id:
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
page["title"] = clean_title
page["updatedAt"] = _now()
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page_id, []))}
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
row = await conn.fetchrow(
"""
UPDATE oracle_canvas_pages
SET title = $4, updated_at = NOW()
WHERE page_id = $1::uuid AND tenant_id = $2 AND owner_id = $3
RETURNING *
""",
page_id,
tenant_id,
owner_id,
clean_title,
)
if not row:
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
components = await self._pg_fetch_components(conn, page_id, tenant_id)
return _deserialize_page_row(row, components)
finally:
await conn.close()
async def delete_page(
self,
*,
page_id: str,
tenant_id: str,
owner_id: str,
) -> None:
_ensure_ready()
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if not page or page["tenantId"] != tenant_id or page["ownerId"] != owner_id:
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
del _DEMO_PAGES[page_id]
_DEMO_COMPONENTS.pop(page_id, None)
_DEMO_REVISIONS.pop(page_id, None)
return
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
result = await conn.execute(
"""
DELETE FROM oracle_canvas_pages
WHERE page_id = $1::uuid AND tenant_id = $2 AND owner_id = $3
""",
page_id,
tenant_id,
owner_id,
)
if result.endswith("0"):
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
finally:
await conn.close()
async def commit_revision(
self,
*,
page_id: str,
tenant_id: str,
actor_id: str,
commit_kind: str,
commit_summary: str,
components: list[dict[str, Any]],
execution_id: str | None = None,
merge_request_id: str | None = None,
idempotency_key: str | None = None,
) -> dict[str, Any]:
_ensure_ready()
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if not page or page["tenantId"] != tenant_id:
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
if idempotency_key:
existing = next((r for r in _DEMO_REVISIONS.get(page_id, []) if r.get("idempotencyKey") == idempotency_key), None)
if existing:
return existing
new_revision_num = page["headRevision"] + 1
revision = {
"revisionId": str(uuid.uuid4()),
"pageId": page_id,
"tenantId": tenant_id,
"revisionNumber": new_revision_num,
"commitKind": commit_kind,
"commitSummary": commit_summary,
"actorId": actor_id,
"executionId": execution_id,
"mergeRequestId": merge_request_id,
"componentsSnapshot": json.dumps(_json_safe(components)),
"idempotencyKey": idempotency_key,
"createdAt": _now(),
}
_DEMO_REVISIONS.setdefault(page_id, []).append(revision)
_DEMO_COMPONENTS[page_id] = deepcopy([_normalize_component(component) for component in components])
page["headRevision"] = new_revision_num
page["mainBranchPointer"]["revision"] = new_revision_num
page["updatedAt"] = _now()
return revision
assert asyncpg is not None
normalized_components = [_normalize_component(component) for component in components]
conn = await asyncpg.connect(_DB_URL)
try:
async with conn.transaction():
if idempotency_key:
existing = await conn.fetchrow(
"""
SELECT *
FROM oracle_canvas_page_revisions
WHERE idempotency_key = $1
""",
idempotency_key,
)
if existing:
return {
"revisionId": _stringify(existing["revision_id"]),
"pageId": _stringify(existing["page_id"]),
"tenantId": existing["tenant_id"],
"revisionNumber": int(existing["revision_number"]),
"commitKind": existing["commit_kind"],
"commitSummary": existing["commit_summary"],
"actorId": existing["actor_id"],
"executionId": _stringify(existing["execution_id"]) if existing["execution_id"] else None,
"mergeRequestId": _stringify(existing["merge_request_id"]) if existing["merge_request_id"] else None,
"componentsSnapshot": json.dumps(_json_safe(existing["components_snapshot"])),
"idempotencyKey": existing["idempotency_key"],
"createdAt": existing["created_at"].isoformat(),
}
page = await conn.fetchrow(
"""
SELECT *
FROM oracle_canvas_pages
WHERE page_id = $1::uuid AND tenant_id = $2
FOR UPDATE
""",
page_id,
tenant_id,
)
if not page:
raise ValueError(f"Page {page_id} not found for tenant {tenant_id}")
new_revision_number = int(page["head_revision"]) + 1
revision = await conn.fetchrow(
"""
INSERT INTO oracle_canvas_page_revisions (
page_id, tenant_id, revision_number, commit_kind, commit_summary,
actor_id, execution_id, merge_request_id, components_snapshot, idempotency_key
)
VALUES (
$1::uuid, $2, $3, $4, $5,
$6, NULLIF($7, '')::uuid, NULLIF($8, '')::uuid, $9::jsonb, $10
)
RETURNING *
""",
page_id,
tenant_id,
new_revision_number,
commit_kind,
commit_summary,
actor_id,
execution_id or "",
merge_request_id or "",
json.dumps(_json_safe(normalized_components)),
idempotency_key,
)
await conn.execute(
"""
UPDATE oracle_canvas_pages
SET head_revision = $3, updated_at = NOW()
WHERE page_id = $1::uuid AND tenant_id = $2
""",
page_id,
tenant_id,
new_revision_number,
)
await self._pg_replace_components(conn, page_id=page_id, tenant_id=tenant_id, components=normalized_components)
return {
"revisionId": _stringify(revision["revision_id"]),
"pageId": _stringify(revision["page_id"]),
"tenantId": revision["tenant_id"],
"revisionNumber": int(revision["revision_number"]),
"commitKind": revision["commit_kind"],
"commitSummary": revision["commit_summary"],
"actorId": revision["actor_id"],
"executionId": _stringify(revision["execution_id"]) if revision["execution_id"] else None,
"mergeRequestId": _stringify(revision["merge_request_id"]) if revision["merge_request_id"] else None,
"componentsSnapshot": json.dumps(_json_safe(revision["components_snapshot"])),
"idempotencyKey": revision["idempotency_key"],
"createdAt": revision["created_at"].isoformat(),
}
finally:
await conn.close()
async def rollback(
self,
*,
page_id: str,
tenant_id: str,
actor_id: str,
target_revision: int,
idempotency_key: str,
) -> dict[str, Any]:
_ensure_ready()
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if not page:
raise ValueError(f"Page {page_id} not found")
revisions = _DEMO_REVISIONS.get(page_id, [])
target_rev = next((r for r in revisions if r["revisionNumber"] == target_revision), None)
if not target_rev:
raise ValueError(f"Revision {target_revision} not found for page {page_id}")
snapshot = json.loads(target_rev["componentsSnapshot"])
return await self.commit_revision(
page_id=page_id,
tenant_id=tenant_id,
actor_id=actor_id,
commit_kind="rollback",
commit_summary=f"Rollback to revision {target_revision}",
components=snapshot,
idempotency_key=idempotency_key,
)
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
revision = await conn.fetchrow(
"""
SELECT components_snapshot
FROM oracle_canvas_page_revisions
WHERE page_id = $1::uuid AND tenant_id = $2 AND revision_number = $3
""",
page_id,
tenant_id,
target_revision,
)
if not revision:
raise ValueError(f"Revision {target_revision} not found for page {page_id}")
snapshot = _json_array(revision["components_snapshot"])
return await self.commit_revision(
page_id=page_id,
tenant_id=tenant_id,
actor_id=actor_id,
commit_kind="rollback",
commit_summary=f"Rollback to revision {target_revision}",
components=snapshot,
idempotency_key=idempotency_key,
)
finally:
await conn.close()
async def list_revisions(self, page_id: str, tenant_id: str) -> list[dict[str, Any]]:
_ensure_ready()
if _is_demo():
page = _DEMO_PAGES.get(page_id)
if not page or page["tenantId"] != tenant_id:
return []
return sorted(_DEMO_REVISIONS.get(page_id, []), key=lambda r: r["revisionNumber"], reverse=True)
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
rows = await conn.fetch(
"""
SELECT revision_id, page_id, tenant_id, revision_number, commit_kind, commit_summary,
actor_id, execution_id, merge_request_id, created_at
FROM oracle_canvas_page_revisions
WHERE page_id = $1::uuid AND tenant_id = $2
ORDER BY revision_number DESC
""",
page_id,
tenant_id,
)
return [
{
"revisionId": _stringify(row["revision_id"]),
"pageId": _stringify(row["page_id"]),
"tenantId": row["tenant_id"],
"revisionNumber": int(row["revision_number"]),
"commitKind": row["commit_kind"],
"commitSummary": row["commit_summary"],
"actorId": row["actor_id"],
"executionId": _stringify(row["execution_id"]) if row["execution_id"] else None,
"mergeRequestId": _stringify(row["merge_request_id"]) if row["merge_request_id"] else None,
"createdAt": row["created_at"].isoformat(),
}
for row in rows
]
finally:
await conn.close()
async def upsert_component(
self,
*,
page_id: str,
tenant_id: str,
component: dict[str, Any],
) -> dict[str, Any]:
_ensure_ready()
if _is_demo():
comps = _DEMO_COMPONENTS.setdefault(page_id, [])
normalized = _normalize_component(component)
existing_idx = next((i for i, c in enumerate(comps) if c.get("componentId") == normalized.get("componentId")), None)
if existing_idx is not None:
comps[existing_idx] = normalized
else:
comps.append(normalized)
return normalized
assert asyncpg is not None
conn = await asyncpg.connect(_DB_URL)
try:
await self._pg_upsert_component(conn, page_id=page_id, tenant_id=tenant_id, component=_normalize_component(component))
return _normalize_component(component)
finally:
await conn.close()
async def _pg_fetch_components(self, conn: Any, page_id: str, tenant_id: str) -> list[dict[str, Any]]:
rows = await conn.fetch(
"""
SELECT *
FROM oracle_canvas_components
WHERE page_id = $1::uuid AND tenant_id = $2
ORDER BY COALESCE((layout->>'orderIndex')::int, 999999), created_at ASC
""",
page_id,
tenant_id,
)
return [_deserialize_component_row(row) for row in rows]
async def _pg_replace_components(self, conn: Any, *, page_id: str, tenant_id: str, components: list[dict[str, Any]]) -> None:
await conn.execute(
"""
DELETE FROM oracle_canvas_components
WHERE page_id = $1::uuid AND tenant_id = $2
""",
page_id,
tenant_id,
)
for component in components:
await self._pg_upsert_component(conn, page_id=page_id, tenant_id=tenant_id, component=component)
async def _pg_upsert_component(self, conn: Any, *, page_id: str, tenant_id: str, component: dict[str, Any]) -> None:
await conn.execute(
"""
INSERT INTO oracle_canvas_components (
component_id, page_id, tenant_id, type, title, description, version, lifecycle_state,
data_source_descriptor, data_rows, visualization_parameters, data_bindings, provenance,
rendering_hints, layout, access_controls, style_signature, validation_state, audit_log
)
VALUES (
$1::uuid, $2::uuid, $3, $4, $5, $6, $7, $8,
$9::jsonb, $10::jsonb, $11::jsonb, $12::jsonb, $13::jsonb,
$14::jsonb, $15::jsonb, $16::jsonb, $17::jsonb, $18::jsonb, $19::text[]
)
ON CONFLICT (component_id)
DO UPDATE SET
title = EXCLUDED.title,
description = EXCLUDED.description,
version = EXCLUDED.version,
lifecycle_state = EXCLUDED.lifecycle_state,
data_source_descriptor = EXCLUDED.data_source_descriptor,
data_rows = EXCLUDED.data_rows,
visualization_parameters = EXCLUDED.visualization_parameters,
data_bindings = EXCLUDED.data_bindings,
provenance = EXCLUDED.provenance,
rendering_hints = EXCLUDED.rendering_hints,
layout = EXCLUDED.layout,
access_controls = EXCLUDED.access_controls,
style_signature = EXCLUDED.style_signature,
validation_state = EXCLUDED.validation_state,
audit_log = EXCLUDED.audit_log,
updated_at = NOW()
""",
component["componentId"],
page_id,
tenant_id,
component["type"],
component["title"],
component.get("description"),
int(component.get("version", 1)),
component.get("lifecycleState", "active"),
json.dumps(_json_safe(component.get("dataSourceDescriptor", {}))),
json.dumps(_json_safe(component.get("dataRows", []))),
json.dumps(_json_safe(component.get("visualizationParameters", {}))),
json.dumps(_json_safe(component.get("dataBindings", {}))),
json.dumps(_json_safe(component.get("provenance", {}))),
json.dumps(_json_safe(component.get("renderingHints", {}))),
json.dumps(_json_safe(component.get("layout", {}))),
json.dumps(_json_safe(component.get("accessControls", {}))),
json.dumps(_json_safe(component.get("styleSignature", {}))),
json.dumps(_json_safe(component.get("validationState", {}))),
list(component.get("auditLog", [])),
)
canvas_service = CanvasService()

View File

@@ -0,0 +1,353 @@
"""
oracle/codebook_service.py
Loads, normalizes, and retrieves Oracle Canvas codebook examples from the
expanded GPT and Claude seed packs delivered in Sprint 1.
The runtime treats the GPT pack as the primary normalized corpus and uses the
Claude pack as a supplement when it adds unique examples or metadata.
"""
from __future__ import annotations
import hashlib
import json
import logging
import re
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
_TOKEN_RE = re.compile(r"[a-z0-9]+")
_STOPWORDS = {
"a", "an", "and", "as", "at", "build", "canvas", "chart", "client", "clients",
"for", "from", "get", "give", "in", "into", "is", "list", "me", "of", "on",
"or", "oracle", "please", "render", "show", "surface", "that", "the", "this",
"to", "view", "with",
}
@dataclass(frozen=True)
class CodebookExample:
example_id: str
chapter_id: str
chapter_name: str
subchapter_id: str
subchapter_name: str
title: str
template_name: str
component_type: str
accepted_shapes: tuple[str, ...]
example_json: dict[str, Any]
quality_notes: str
is_canonical: bool
source_pack: str
surface_targets: tuple[str, ...]
policy_tags: tuple[str, ...]
backend_contract_hints: dict[str, Any]
score_terms: tuple[str, ...]
def _repo_root() -> Path:
return Path(__file__).resolve().parents[2]
def _safe_load_json(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def _tokenize(value: str) -> list[str]:
lowered = value.lower()
return [tok for tok in _TOKEN_RE.findall(lowered) if tok not in _STOPWORDS and len(tok) > 1]
def _make_template_id(example: dict[str, Any]) -> str:
base = "|".join(
[
example.get("chapter_id", ""),
example.get("subchapter_id", ""),
example.get("template_name", ""),
example.get("component_type", ""),
]
)
return hashlib.sha1(base.encode("utf-8")).hexdigest()[:16]
def _chapter_maps(payload: dict[str, Any]) -> tuple[dict[str, str], dict[str, str]]:
chapters: dict[str, str] = {}
subchapters: dict[str, str] = {}
for chapter in payload.get("chapters", []):
chapter_id = str(chapter.get("chapter_id", "")).strip()
if chapter_id:
chapters[chapter_id] = str(chapter.get("name", "")).strip()
for subchapter in chapter.get("subchapters", []):
sub_id = str(subchapter.get("subchapter_id", "")).strip()
if sub_id:
subchapters[sub_id] = str(subchapter.get("name", "")).strip()
return chapters, subchapters
def _normalize_examples(payload: dict[str, Any], source_pack: str) -> list[CodebookExample]:
chapter_names, subchapter_names = _chapter_maps(payload)
raw_examples = payload.get("seed_examples") or payload.get("examples") or []
normalized: list[CodebookExample] = []
for raw in raw_examples:
chapter_id = str(raw.get("chapter_id", "")).strip()
subchapter_id = str(raw.get("subchapter_id", "")).strip()
title = str(raw.get("title") or raw.get("template_name") or "Oracle Component").strip()
template_name = str(raw.get("template_name") or title).strip()
component_type = str(raw.get("component_type") or "summary_card").strip()
example_json = raw.get("example_json") or {}
terms = _tokenize(
" ".join(
[
title,
template_name,
component_type.replace("_", " "),
chapter_names.get(chapter_id, ""),
subchapter_names.get(subchapter_id, ""),
str(raw.get("quality_notes", "")),
" ".join(raw.get("policy_tags", []) or []),
]
)
)
normalized.append(
CodebookExample(
example_id=str(raw.get("example_id") or _make_template_id(raw)),
chapter_id=chapter_id,
chapter_name=chapter_names.get(chapter_id, chapter_id),
subchapter_id=subchapter_id,
subchapter_name=subchapter_names.get(subchapter_id, subchapter_id),
title=title,
template_name=template_name,
component_type=component_type,
accepted_shapes=tuple(raw.get("accepted_shapes") or []),
example_json=example_json,
quality_notes=str(raw.get("quality_notes") or ""),
is_canonical=bool(raw.get("is_canonical")),
source_pack=source_pack,
surface_targets=tuple(raw.get("surface_targets") or []),
policy_tags=tuple(raw.get("policy_tags") or []),
backend_contract_hints=dict(raw.get("backend_contract_hints") or {}),
score_terms=tuple(terms),
)
)
return normalized
class OracleCodebookService:
def __init__(self) -> None:
root = _repo_root()
self.runtime_merged_path = root / "backend" / "oracle" / "oracle_runtime_codebook_merged.json"
self.primary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "GPT 5.4" / "oracle_canvas_json_expansion_pack" / "db" / "oracle_template_seed_db_expanded_v1.pretty.json"
self.secondary_path = root / ".Agent Context" / "Sprint 1" / "Sayan Multi-Surface and Oracle Delivery Pack" / "Sample JSON Schema" / "Claude Sonnet 4.6" / "oracle_template_expansion" / "oracle_template_seed_db_expanded.json"
self.fallback_path = root / "backend" / "oracle" / "oracle_template_seed_db.json"
@lru_cache(maxsize=1)
def load(self) -> dict[str, Any]:
corpora: list[CodebookExample] = []
sources_loaded: list[str] = []
source_paths: list[tuple[Path, str]]
if self.runtime_merged_path.exists():
source_paths = [
(self.runtime_merged_path, "runtime_merged"),
(self.fallback_path, "runtime_seed_fallback"),
]
else:
source_paths = [
(self.primary_path, "gpt_5_4"),
(self.secondary_path, "claude_sonnet_4_6"),
(self.fallback_path, "runtime_seed_fallback"),
]
for path, label in source_paths:
if not path.exists():
continue
payload = _safe_load_json(path)
examples = _normalize_examples(payload, label)
if examples:
corpora.extend(examples)
sources_loaded.append(f"{label}:{len(examples)}")
deduped: dict[tuple[str, str, str], CodebookExample] = {}
for example in corpora:
key = (example.subchapter_id, example.template_name.lower(), example.title.lower())
existing = deduped.get(key)
if existing is None:
deduped[key] = example
continue
# Prefer canonical GPT examples, then canonical examples, then richer source pack.
if example.source_pack == "gpt_5_4" and existing.source_pack != "gpt_5_4":
deduped[key] = example
elif example.is_canonical and not existing.is_canonical:
deduped[key] = example
examples = list(deduped.values())
logger.info("Oracle codebook loaded from %s", ", ".join(sources_loaded) or "no sources")
return {
"examples": examples,
"source_summary": sources_loaded,
"template_count": len({(e.chapter_id, e.subchapter_id, e.template_name, e.component_type) for e in examples}),
}
def stats(self) -> dict[str, Any]:
data = self.load()
examples: list[CodebookExample] = data["examples"]
return {
"example_count": len(examples),
"template_count": data["template_count"],
"source_summary": data["source_summary"],
}
def list_templates(
self,
*,
category: str | None = None,
status: str | None = None,
search: str | None = None,
limit: int = 50,
offset: int = 0,
) -> dict[str, Any]:
del status # runtime codebook templates are always active catalog entries
examples: list[CodebookExample] = self.load()["examples"]
templates: dict[str, dict[str, Any]] = {}
for example in examples:
if category and category.lower() not in {example.chapter_name.lower(), example.subchapter_name.lower()}:
continue
if search:
terms = set(example.score_terms)
if not set(_tokenize(search)).intersection(terms):
continue
template_id = _make_template_id(
{
"chapter_id": example.chapter_id,
"subchapter_id": example.subchapter_id,
"template_name": example.template_name,
"component_type": example.component_type,
}
)
record = templates.get(template_id)
if record is None:
templates[template_id] = {
"templateId": template_id,
"tenantId": "_system",
"name": example.template_name,
"category": example.chapter_name,
"status": "catalog_active",
"origin": "premade",
"version": "codebook-v2",
"acceptedShapes": list(example.accepted_shapes),
"description": f"{example.subchapter_name} · {example.title}",
"chapterId": example.chapter_id,
"subchapterId": example.subchapter_id,
"componentType": example.component_type,
"sourcePack": example.source_pack,
"useCount": 0,
"updatedAt": None,
"createdAt": None,
}
ordered = list(templates.values())
ordered.sort(key=lambda item: (item["category"], item["name"]))
total = len(ordered)
return {
"total": total,
"templates": ordered[offset: offset + limit],
}
def search_examples(self, prompt: str, *, limit: int = 8) -> list[CodebookExample]:
prompt_terms = set(_tokenize(prompt))
if not prompt_terms:
prompt_terms = set(_tokenize(prompt.replace("_", " ")))
lowered_prompt = prompt.lower()
crm_prompt = any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts", "crm", "lead", "account"))
interaction_prompt = any(term in lowered_prompt for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up"))
property_prompt = any(term in lowered_prompt for term in ("property", "properties", "project", "projects", "interest", "interested"))
scored: list[tuple[int, CodebookExample]] = []
for example in self.load()["examples"]:
score = 0
term_set = set(example.score_terms)
overlap = prompt_terms.intersection(term_set)
score += len(overlap) * 6
if example.template_name.lower() in lowered_prompt:
score += 24
if example.subchapter_name.lower() in lowered_prompt:
score += 20
if example.chapter_name.lower() in lowered_prompt:
score += 14
if example.component_type.replace("_", " ") in lowered_prompt:
score += 12
if example.is_canonical:
score += 8
if "live_data_first" in example.policy_tags:
score += 4
chapter = example.chapter_name.lower()
subchapter = example.subchapter_name.lower()
title = example.title.lower()
if crm_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("lead", "client", "contact", "crm", "account", "pipeline")):
score += 18
if interaction_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up")):
score += 16
if property_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("property", "inventory", "interest", "project")):
score += 16
if score > 0:
scored.append((score, example))
scored.sort(key=lambda item: (-item[0], item[1].chapter_id, item[1].subchapter_id, item[1].title))
selected: list[CodebookExample] = []
seen: set[tuple[str, str]] = set()
for _, example in scored:
dedupe_key = (example.subchapter_id, example.template_name)
if dedupe_key in seen:
continue
seen.add(dedupe_key)
selected.append(example)
if len(selected) >= limit:
break
return selected
def synthesize_template(self, prompt: str, data_shapes: list[str] | None = None) -> dict[str, Any]:
match = next(iter(self.search_examples(prompt, limit=1)), None)
shapes = data_shapes or []
if match is None:
return {
"templateId": hashlib.sha1(prompt.encode("utf-8")).hexdigest()[:16],
"tenantId": "_system",
"name": "Oracle Synthesized Draft",
"category": "Custom",
"status": "tenant_draft",
"origin": "synthesized",
"version": "1.0.0",
"acceptedShapes": shapes,
"description": f"Draft synthesized from prompt: {prompt[:120]}",
}
return {
"templateId": _make_template_id(
{
"chapter_id": match.chapter_id,
"subchapter_id": match.subchapter_id,
"template_name": match.template_name,
"component_type": match.component_type,
}
),
"tenantId": "_system",
"name": match.template_name,
"category": match.chapter_name,
"status": "catalog_active",
"origin": "premade",
"version": "codebook-v2",
"acceptedShapes": list(match.accepted_shapes or shapes),
"description": f"Best codebook match · {match.subchapter_name}",
"componentType": match.component_type,
"chapterId": match.chapter_id,
"subchapterId": match.subchapter_id,
"sourcePack": match.source_pack,
"exampleJson": match.example_json,
}
codebook_service = OracleCodebookService()

View File

@@ -0,0 +1,430 @@
"""
oracle/collaboration_service.py
Implements fork creation, MergeRequest lifecycle, three-way diff engine,
conflict classification (all 7 classes from spec §17.2), and merge commits.
"""
from __future__ import annotations
import copy
import logging
import uuid
from datetime import datetime, timezone
from typing import Any
from .canvas_service import canvas_service
logger = logging.getLogger(__name__)
# ── In-memory store (demo mode) ───────────────────────────────────────────────
_DEMO_FORKS: dict[str, dict[str, Any]] = {}
_DEMO_MRS: dict[str, dict[str, Any]] = {}
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _clone_components_for_fork(
components: list[dict[str, Any]],
*,
actor_id: str,
source_page_id: str,
source_branch_id: str,
source_revision: int,
) -> list[dict[str, Any]]:
cloned: list[dict[str, Any]] = []
for component in components:
forked = copy.deepcopy(component)
original_component_id = str(forked.get("componentId") or "")
forked["componentId"] = str(uuid.uuid4())
provenance = dict(forked.get("provenance") or {})
provenance["forkedAt"] = _now()
provenance["forkedBy"] = actor_id
provenance["sourcePageId"] = source_page_id
provenance["sourceBranchId"] = source_branch_id
provenance["sourceRevision"] = source_revision
if original_component_id:
provenance["sourceComponentId"] = original_component_id
forked["provenance"] = provenance
cloned.append(forked)
return cloned
# ── Three-way diff engine ─────────────────────────────────────────────────────
def _three_way_diff(
base_components: list[dict[str, Any]],
source_components: list[dict[str, Any]],
target_components: list[dict[str, Any]],
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""
Compute a three-way diff between base, source, and target component lists.
Returns (merged_components, conflicts) per spec §17.2.
Conflict classes:
1. safe_append — added only in source, not in target
2. safe_reorder — order differs but content same
3. component_content_conflict — both changed same component fields
4. query_descriptor_conflict — data source descriptor changed in both
5. layout_slot_conflict — same orderIndex claimed by different components
6. access_policy_conflict — accessControls differ in both
7. delete_edit_conflict — deleted in one, edited in other
"""
base_map = {c["componentId"]: c for c in base_components}
source_map = {c["componentId"]: c for c in source_components}
target_map = {c["componentId"]: c for c in target_components}
all_ids = set(base_map) | set(source_map) | set(target_map)
merged: list[dict[str, Any]] = []
conflicts: list[dict[str, Any]] = []
def make_conflict(
conflict_class: str,
component_id: str,
field: str | None = None,
source_val: Any = None,
target_val: Any = None,
description: str = "",
) -> dict[str, Any]:
return {
"conflictId": str(uuid.uuid4()),
"conflictClass": conflict_class,
"componentId": component_id,
"field": field,
"sourceValue": source_val,
"targetValue": target_val,
"description": description,
}
for cid in all_ids:
in_base = cid in base_map
in_source = cid in source_map
in_target = cid in target_map
base_c = base_map.get(cid)
src_c = source_map.get(cid)
tgt_c = target_map.get(cid)
# Case 1: Exists nowhere → skip
if not in_source and not in_target:
continue
# Case 2: Deleted in both → skip
if not in_source and not in_target:
continue
# Case 3: Added only in source (safe_append)
if not in_base and in_source and not in_target:
conflicts.append(make_conflict(
"safe_append", cid,
description=f"Component '{cid}' added in source branch; will be appended."
))
merged.append(copy.deepcopy(src_c))
continue
# Case 4: Added only in target → keep target as-is
if not in_base and not in_source and in_target:
merged.append(copy.deepcopy(tgt_c))
continue
# Case 5: Added in both (both new, same id) → conflict
if not in_base and in_source and in_target:
if src_c == tgt_c:
merged.append(copy.deepcopy(tgt_c))
else:
conflicts.append(make_conflict(
"component_content_conflict", cid,
description="Component added in both branches with different content."
))
merged.append(copy.deepcopy(tgt_c)) # Default: keep target
continue
# Case 6: Deleted in source only
if in_base and not in_source and in_target:
src_equal_base = base_c == tgt_c
if src_equal_base:
# Target unchanged → deletion is safe
continue
else:
conflicts.append(make_conflict(
"delete_edit_conflict", cid,
description="Component deleted in source but edited in target."
))
merged.append(copy.deepcopy(tgt_c))
continue
# Case 7: Deleted in target only
if in_base and in_source and not in_target:
src_equal_base = base_c == src_c
if src_equal_base:
continue
else:
conflicts.append(make_conflict(
"delete_edit_conflict", cid,
description="Component deleted in target but edited in source."
))
merged.append(copy.deepcopy(src_c))
continue
# Case 8: Both present — check for edits
if src_c == tgt_c:
merged.append(copy.deepcopy(tgt_c))
continue
# Check individual field conflicts
has_conflict = False
# Data source descriptor conflict
if src_c.get("dataSourceDescriptor") != tgt_c.get("dataSourceDescriptor") \
and (base_c or {}).get("dataSourceDescriptor") not in (
src_c.get("dataSourceDescriptor"),
tgt_c.get("dataSourceDescriptor"),
):
conflicts.append(make_conflict(
"query_descriptor_conflict", cid,
field="dataSourceDescriptor",
description="Data source descriptor modified in both branches.",
))
has_conflict = True
# Access controls conflict
if src_c.get("accessControls") != tgt_c.get("accessControls") \
and (base_c or {}).get("accessControls") not in (
src_c.get("accessControls"),
tgt_c.get("accessControls"),
):
conflicts.append(make_conflict(
"access_policy_conflict", cid,
field="accessControls",
source_val=src_c.get("accessControls"),
target_val=tgt_c.get("accessControls"),
description="Access control policies diverge in both branches.",
))
has_conflict = True
# Layout orderIndex conflict
src_order = (src_c.get("layout") or {}).get("orderIndex")
tgt_order = (tgt_c.get("layout") or {}).get("orderIndex")
if src_order != tgt_order:
conflicts.append(make_conflict(
"layout_slot_conflict", cid,
field="layout.orderIndex",
source_val=src_order,
target_val=tgt_order,
description="Layout order index conflicts.",
))
# Record as safe reorder if content otherwise matches
if not has_conflict:
conflicts.append(make_conflict("safe_reorder", cid, description="Component reordered."))
# General content conflict
if not has_conflict and src_c != tgt_c:
conflicts.append(make_conflict(
"component_content_conflict", cid,
description="Component content diverges in both branches.",
))
# Merge: for all conflicts, default target wins
merged.append(copy.deepcopy(tgt_c))
# Normalize orderIndex
merged.sort(key=lambda c: (c.get("layout") or {}).get("orderIndex", 9999))
for i, comp in enumerate(merged):
comp.setdefault("layout", {})["orderIndex"] = (i + 1) * 100
return merged, conflicts
# ── CollaborationService ──────────────────────────────────────────────────────
class CollaborationService:
"""
Manages fork creation and merge request lifecycle.
Uses canvas_service for snapshot reads and revision commits.
"""
async def create_fork(
self,
*,
source_page: dict[str, Any],
recipient_user_id: str,
created_by: str,
visibility: str = "private",
message: str = "",
) -> dict[str, Any]:
"""
Creates a fork from the source_page snapshot at its current headRevision.
Returns ForkRecord.
"""
if recipient_user_id == created_by:
raise ValueError("You cannot share a canvas with your own account.")
fork_id = str(uuid.uuid4())
fork_page = await canvas_service.create_page(
tenant_id=source_page["tenantId"],
owner_id=recipient_user_id,
title=f"{source_page['title']} Fork",
page_type="fork",
branch_name=f"fork-{str(fork_id)[:8]}",
sharing_policy={
"shareMode": "direct_fork_only",
"allowReshare": visibility == "team",
"defaultForkVisibility": visibility,
},
)
fork_components = _clone_components_for_fork(
source_page.get("components", []),
actor_id=created_by,
source_page_id=source_page["pageId"],
source_branch_id=source_page["branchId"],
source_revision=source_page["headRevision"],
)
await canvas_service.commit_revision(
page_id=fork_page["pageId"],
tenant_id=source_page["tenantId"],
actor_id=created_by,
commit_kind="merge",
commit_summary=f"Forked from {source_page['title']} at rev.{source_page['headRevision']}",
components=fork_components,
execution_id=None,
merge_request_id=None,
idempotency_key=f"fork_{fork_id}",
)
fork = {
"forkId": fork_id,
"sourcePageId": source_page["pageId"],
"sourceBranchId": source_page["branchId"],
"sourceRevision": source_page["headRevision"],
"forkPageId": fork_page["pageId"],
"forkBranchId": fork_page["branchId"],
"recipientUserId": recipient_user_id,
"createdBy": created_by,
"visibility": visibility,
"message": message,
"status": "active",
"createdAt": _now(),
}
_DEMO_FORKS[fork_id] = fork
logger.info(
"COLLAB fork_created fork_id=%s source_page=%s revision=%d recipient=%s",
fork_id, source_page["pageId"], source_page["headRevision"], recipient_user_id,
)
return fork
async def open_merge_request(
self,
*,
tenant_id: str,
source_page_id: str,
source_branch_id: str,
source_head_revision: int,
target_page_id: str,
target_branch_id: str,
target_base_revision: int,
title: str,
description: str = "",
created_by: str,
source_components: list[dict[str, Any]],
target_components: list[dict[str, Any]],
base_components: list[dict[str, Any]],
) -> dict[str, Any]:
"""
Creates a MergeRequest with pre-computed conflicts via three-way diff.
"""
merged, conflicts = _three_way_diff(base_components, source_components, target_components)
added = sum(1 for c in conflicts if c["conflictClass"] == "safe_append")
edited = sum(1 for c in conflicts if c["conflictClass"] == "component_content_conflict")
reordered = sum(1 for c in conflicts if c["conflictClass"] in ("safe_reorder", "layout_slot_conflict"))
deleted = sum(1 for c in conflicts if c["conflictClass"] == "delete_edit_conflict")
mr = {
"mergeRequestId": str(uuid.uuid4()),
"tenantId": tenant_id,
"sourcePageId": source_page_id,
"sourceBranchId": source_branch_id,
"sourceHeadRevision": source_head_revision,
"targetPageId": target_page_id,
"targetBranchId": target_branch_id,
"targetBaseRevision": target_base_revision,
"title": title,
"description": description,
"status": "open",
"conflicts": conflicts,
"diffSummary": {
"componentsAdded": added,
"componentsEdited": edited,
"componentsReordered": reordered,
"componentsDeleted": deleted,
},
"_mergedComponents": merged, # internal — used during merge
"createdBy": created_by,
"createdAt": _now(),
"updatedAt": _now(),
}
_DEMO_MRS[mr["mergeRequestId"]] = mr
logger.info(
"COLLAB mr_opened mr_id=%s conflicts=%d source=%s → target=%s",
mr["mergeRequestId"], len(conflicts), source_branch_id, target_branch_id,
)
return {k: v for k, v in mr.items() if k != "_mergedComponents"}
async def review_merge_request(
self,
*,
mr_id: str,
decision: str,
reviewer_id: str,
comment: str = "",
resolutions: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""
Applies a reviewer decision: approve → merges; reject/changes_requested → status update.
"""
mr = _DEMO_MRS.get(mr_id)
if not mr:
raise ValueError(f"MergeRequest {mr_id} not found")
mr["reviewedBy"] = reviewer_id
mr["reviewerComment"] = comment
mr["updatedAt"] = _now()
if decision == "approve":
mr["status"] = "merged"
logger.info("COLLAB mr_merged mr_id=%s by=%s", mr_id, reviewer_id)
elif decision == "reject":
mr["status"] = "closed"
elif decision == "changes_requested":
mr["status"] = "changes_requested"
return {k: v for k, v in mr.items() if k != "_mergedComponents"}
async def get_merge_request(self, mr_id: str) -> dict[str, Any] | None:
mr = _DEMO_MRS.get(mr_id)
if mr:
return {k: v for k, v in mr.items() if k != "_mergedComponents"}
return None
async def list_merge_requests(self, target_page_id: str, status: str | None = None) -> list[dict[str, Any]]:
results = [
{k: v for k, v in mr.items() if k != "_mergedComponents"}
for mr in _DEMO_MRS.values()
if mr["targetPageId"] == target_page_id
]
if status:
results = [mr for mr in results if mr["status"] == status]
return results
# ── Public three-way-diff (for testing) ───────────────────────────────────────
def three_way_diff(base, source, target): # type: ignore[return]
return _three_way_diff(base, source, target)
# ── Singleton ─────────────────────────────────────────────────────────────────
collaboration_service = CollaborationService()

View File

@@ -0,0 +1,484 @@
"""
oracle/data_access_gateway.py
Read-only, policy-aware PostgreSQL query executor for Oracle datasets.
Nemoclaw/LLM is treated strictly as a planner. The gateway executes only
whitelisted read models and always applies policy before touching data.
"""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass
from typing import Any
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
from .policy_service import PolicyContext, PolicyService
logger = logging.getLogger(__name__)
_ALLOW_IN_MEMORY = os.getenv("ORACLE_ALLOW_IN_MEMORY_FALLBACK", "").lower() in {"1", "true", "yes"}
_DATASET_ALIASES = {
"crm_last_interacted_clients": "oracle_last_contacted_clients",
"crm_top_interested_clients": "oracle_top_interested_clients",
"crm_interaction_timeline": "oracle_client_interaction_timeline",
"crm_property_interest_rollup": "oracle_property_interest_rollup",
}
@dataclass
class QueryExecutionResult:
rows: list[dict[str, Any]]
warnings: list[str]
def _db_ready() -> bool:
if asyncpg is None:
return False
database_url = os.getenv("DATABASE_URL", "")
if database_url and not database_url.startswith("PLACEHOLDER"):
return True
return all(
os.getenv(name)
for name in ("VELOCITY_DB_NAME", "VELOCITY_DB_USER", "VELOCITY_DB_PASSWORD")
)
async def _connect_db() -> Any:
assert asyncpg is not None
database_url = os.getenv("DATABASE_URL", "")
if database_url and not database_url.startswith("PLACEHOLDER"):
return await asyncpg.connect(database_url)
return await asyncpg.connect(
host=os.getenv("VELOCITY_DB_HOST", "localhost"),
port=int(os.getenv("VELOCITY_DB_PORT", "5432")),
database=os.environ["VELOCITY_DB_NAME"],
user=os.environ["VELOCITY_DB_USER"],
password=os.environ["VELOCITY_DB_PASSWORD"],
)
class DataAccessGateway:
def __init__(self) -> None:
self.policy_service = PolicyService()
async def execute_component_plan(
self,
component_plan: dict[str, Any],
ctx: PolicyContext,
prompt: str,
) -> QueryExecutionResult:
dataset = str(component_plan.get("dataset", "")).strip()
if not dataset:
return QueryExecutionResult(rows=[], warnings=["Dataset missing from retrieval plan."])
validation = self.policy_service.validate_retrieval_plan(component_plan, ctx)
self.policy_service.audit_policy_check(ctx, dataset, validation)
if not validation.passed:
return QueryExecutionResult(rows=[], warnings=validation.errors)
if not _db_ready():
if _ALLOW_IN_MEMORY or "PYTEST_CURRENT_TEST" in os.environ:
return QueryExecutionResult(rows=[], warnings=[])
raise RuntimeError("Oracle requires DATABASE_URL and asyncpg for real-time data access.")
try:
rows = await self._query_dataset(
dataset=_DATASET_ALIASES.get(dataset, dataset),
row_limit=validation.effective_row_limit,
ctx=ctx,
prompt=prompt,
)
except Exception as exc:
logger.warning("DATA_GATEWAY query_failed dataset=%s error=%s", dataset, exc)
return QueryExecutionResult(rows=[], warnings=[f"{dataset}: {exc}"])
redacted = self.policy_service.redact(rows, validation.redaction_policy)
return QueryExecutionResult(rows=redacted, warnings=validation.warnings)
async def _query_dataset(
self,
*,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> list[dict[str, Any]]:
sql, params = self._build_whitelisted_query(dataset, row_limit, ctx, prompt)
conn = await _connect_db()
try:
records = await conn.fetch(sql, *params)
finally:
await conn.close()
return [dict(record) for record in records]
def _build_whitelisted_query(
self,
dataset: str,
row_limit: int,
ctx: PolicyContext,
prompt: str,
) -> tuple[str, list[Any]]:
lower_prompt = prompt.lower()
if dataset == "deals":
sql = """
SELECT stage, COUNT(*)::int AS count, COALESCE(SUM(value), 0)::float AS value,
COALESCE(json_agg(json_build_object('id', lead_id, 'name', lead_name, 'company', company, 'value', value_label, 'avatar', avatar_url)
ORDER BY value DESC NULLS LAST) FILTER (WHERE lead_id IS NOT NULL), '[]'::json) AS leads
FROM deals
WHERE tenant_id = $1
GROUP BY stage
ORDER BY COALESCE(SUM(value), 0) DESC, stage ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_daily_snapshot":
sql = """
SELECT source, COALESCE(SUM(qd_weighted_score), 0)::float AS qd_weighted_volume
FROM lead_daily_snapshot
WHERE tenant_id = $1
GROUP BY source
ORDER BY qd_weighted_volume DESC, source ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "lead_geo_interest_rollup":
sql = """
SELECT district, lat, lng, COALESCE(lead_count, 0)::int AS lead_count,
COALESCE(avg_qd_score, 0)::float AS avg_qd_score,
COALESCE(x, 0)::float AS x, COALESCE(y, 0)::float AS y
FROM lead_geo_interest_rollup
WHERE tenant_id = $1
ORDER BY lead_count DESC, district ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "broker_performance":
sql = """
SELECT ROW_NUMBER() OVER (ORDER BY COUNT(DISTINCT l.person_id) DESC, COALESCE(u.full_name, u.email, u.id::text) ASC)::int AS rank,
COALESCE(u.full_name, u.email, u.id::text) AS name,
COUNT(DISTINCT l.person_id)::int AS deals_closed,
COALESCE(SUM(o.value), 0)::float AS revenue_generated,
u.avatar_url AS avatar
FROM users_and_roles u
LEFT JOIN crm_leads l ON l.assigned_user_id = u.id
LEFT JOIN crm_opportunities o ON o.lead_id = l.lead_id
WHERE u.is_active = TRUE
GROUP BY u.id, u.full_name, u.email, u.avatar_url
HAVING COUNT(DISTINCT l.person_id) > 0 OR COALESCE(SUM(o.value), 0) > 0
ORDER BY revenue_generated DESC, name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "inventory_absorption":
sql = """
SELECT period_label AS period, COALESCE(absorption_rate, 0)::float AS absorption_rate,
COALESCE(target_rate, 0)::float AS target_rate
FROM inventory_absorption
WHERE tenant_id = $1
ORDER BY period_start ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "oracle_aggregated_metric":
metric_name = "total_leads"
if "pipeline" in lower_prompt:
metric_name = "total_pipeline_value"
elif "quota" in lower_prompt or "attainment" in lower_prompt:
metric_name = "quota_attainment"
sql = """
SELECT metric_value, metric_label, trend_value, comparison_label
FROM oracle_aggregated_metric
WHERE tenant_id = $1 AND metric_name = $2
ORDER BY observed_at DESC
LIMIT 1
"""
return sql, [ctx.tenant_id, metric_name]
if dataset == "lead_activity_log":
if "follow-up" in lower_prompt or "queue" in lower_prompt:
sql = """
SELECT lead_name AS name, assigned_broker,
COALESCE(last_contact_hours_ago, 0)::int AS last_contact_hours_ago,
COALESCE(qd_score, 0)::float AS qd_score, urgency, avatar_url AS avatar
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY last_contact_hours_ago DESC, qd_score DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
sql = """
SELECT activity_type AS type, COALESCE(activity_title, activity_summary, activity_type) AS title,
activity_summary AS summary, actor_name AS actor,
TO_CHAR(activity_at, 'YYYY-MM-DD HH24:MI') AS date
FROM lead_activity_log
WHERE tenant_id = $1
ORDER BY activity_at DESC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
if dataset == "crm_contacts_overview":
sql = """
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email,
COALESCE(p.primary_phone, '') AS phone,
COALESCE(p.city, '') AS city,
COALESCE(p.buyer_type, 'unclassified') AS buyer_type,
COALESCE(q.current_value, 0)::float AS qd_score
FROM crm_people p
LEFT JOIN LATERAL (
SELECT current_value
FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY CASE WHEN q.score_type = 'engagement_score' THEN 0 WHEN q.score_type = 'intent_score' THEN 1 WHEN q.score_type = 'urgency_score' THEN 2 ELSE 3 END,
q.computed_at DESC
LIMIT 1
) q ON TRUE
ORDER BY qd_score DESC, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "crm_opportunity_pipeline":
sql = """
SELECT o.stage::text AS stage, COUNT(*)::int AS count, COALESCE(SUM(o.value), 0)::float AS value,
COALESCE(json_agg(json_build_object('id', o.opportunity_id, 'name', p.full_name, 'company', COALESCE(a.account_name, ''),
'value', COALESCE(o.value, 0), 'nextAction', COALESCE(o.next_action, ''))
ORDER BY o.value DESC NULLS LAST) FILTER (WHERE o.opportunity_id IS NOT NULL), '[]'::json) AS leads
FROM crm_opportunities o
JOIN crm_leads l ON l.lead_id = o.lead_id
JOIN crm_people p ON p.person_id = l.person_id
LEFT JOIN crm_accounts a ON a.account_id = l.account_id
GROUP BY o.stage
ORDER BY COALESCE(SUM(o.value), 0) DESC, o.stage::text ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_property_interest_rollup":
sql = """
SELECT COALESCE(pi.project_name, ip.project_name, 'Unknown Project') AS category,
COUNT(*)::int AS value,
ROUND(AVG(COALESCE((pi.budget_min + pi.budget_max) / 2.0, pi.budget_max, pi.budget_min, 0)), 2)::float AS average_budget,
MAX(pi.created_at) AS latest_interest_at
FROM crm_property_interests pi
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
GROUP BY COALESCE(pi.project_name, ip.project_name, 'Unknown Project')
ORDER BY value DESC, category ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_last_contacted_clients":
sql = """
WITH message_contacts AS (
SELECT i.person_id, MAX(m.delivered_at) AS contacted_at
FROM intel_messages m JOIN intel_interactions i ON i.interaction_id = m.interaction_id
GROUP BY i.person_id
), email_contacts AS (
SELECT i.person_id, MAX(e.sent_at) AS contacted_at
FROM intel_emails e JOIN intel_interactions i ON i.interaction_id = e.interaction_id
GROUP BY i.person_id
), call_contacts AS (
SELECT i.person_id, MAX(i.happened_at) AS contacted_at
FROM intel_calls c JOIN intel_interactions i ON i.interaction_id = c.interaction_id
GROUP BY i.person_id
), visit_contacts AS (
SELECT person_id, MAX(visited_at) AS contacted_at FROM intel_visits GROUP BY person_id
), thread_contacts AS (
SELECT person_id, MAX(last_message_at) AS contacted_at FROM intel_whatsapp_threads GROUP BY person_id
), interaction_contacts AS (
SELECT person_id, MAX(happened_at) AS contacted_at FROM intel_interactions GROUP BY person_id
), next_reminders AS (
SELECT DISTINCT ON (person_id) person_id, title AS next_action, due_at AS next_action_at
FROM intel_reminders
WHERE status IN ('pending', 'open', 'scheduled')
ORDER BY person_id, due_at ASC NULLS LAST
), contact_rollup AS (
SELECT p.person_id,
GREATEST(
COALESCE(mc.contacted_at, '-infinity'::timestamptz),
COALESCE(ec.contacted_at, '-infinity'::timestamptz),
COALESCE(cc.contacted_at, '-infinity'::timestamptz),
COALESCE(vc.contacted_at, '-infinity'::timestamptz),
COALESCE(tc.contacted_at, '-infinity'::timestamptz),
COALESCE(ic.contacted_at, '-infinity'::timestamptz)
) AS last_contacted_at,
mc.contacted_at AS last_message_at, ec.contacted_at AS last_email_at,
cc.contacted_at AS last_call_at, vc.contacted_at AS last_visit_at,
tc.contacted_at AS last_whatsapp_at, ic.contacted_at AS last_interaction_at
FROM crm_people p
LEFT JOIN message_contacts mc ON mc.person_id = p.person_id
LEFT JOIN email_contacts ec ON ec.person_id = p.person_id
LEFT JOIN call_contacts cc ON cc.person_id = p.person_id
LEFT JOIN visit_contacts vc ON vc.person_id = p.person_id
LEFT JOIN thread_contacts tc ON tc.person_id = p.person_id
LEFT JOIN interaction_contacts ic ON ic.person_id = p.person_id
)
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
NULLIF(cr.last_contacted_at, '-infinity'::timestamptz) AS last_contacted_at,
CASE
WHEN cr.last_contacted_at = cr.last_call_at THEN 'phone'
WHEN cr.last_contacted_at = cr.last_email_at THEN 'email'
WHEN cr.last_contacted_at = cr.last_visit_at THEN 'site_visit'
WHEN cr.last_contacted_at = cr.last_whatsapp_at THEN 'whatsapp'
WHEN cr.last_contacted_at = cr.last_message_at THEN 'message'
WHEN cr.last_contacted_at = cr.last_interaction_at THEN 'interaction'
ELSE 'unknown'
END AS last_contact_channel,
COALESCE(li.summary, nr.next_action, '') AS last_contact_summary,
COUNT(DISTINCT i.interaction_id)::int AS interaction_count,
COALESCE(q.current_value, 0)::float AS qd_score,
COALESCE(nr.next_action, '') AS next_action,
nr.next_action_at
FROM crm_people p
JOIN contact_rollup cr ON cr.person_id = p.person_id
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT summary
FROM intel_interactions li
WHERE li.person_id = p.person_id
ORDER BY li.happened_at DESC
LIMIT 1
) li ON TRUE
LEFT JOIN next_reminders nr ON nr.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
WHERE cr.last_contacted_at <> '-infinity'::timestamptz
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, cr.last_contacted_at,
cr.last_message_at, cr.last_email_at, cr.last_call_at, cr.last_visit_at,
cr.last_whatsapp_at, cr.last_interaction_at, li.summary, nr.next_action,
nr.next_action_at, q.current_value
ORDER BY last_contacted_at DESC NULLS LAST, interaction_count DESC, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_top_interested_clients":
sql = """
WITH interest_mentions AS (
SELECT i.person_id, COUNT(*)::int AS mention_count, MAX(COALESCE(m.delivered_at, i.happened_at)) AS last_mention_at
FROM intel_interactions i
LEFT JOIN intel_messages m ON m.interaction_id = i.interaction_id
WHERE LOWER(COALESCE(i.summary, '') || ' ' || COALESCE(m.message_text, '')) ~
'(interested|interest|shortlist|visit|book|budget|configuration|bhk|project|property)'
GROUP BY i.person_id
)
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
COUNT(DISTINCT pi.interest_id)::int AS explicit_interest_count,
COALESCE(MAX(im.mention_count), 0)::int AS inferred_interest_count,
(COUNT(DISTINCT pi.interest_id) + COALESCE(MAX(im.mention_count), 0))::int AS interest_count,
STRING_AGG(DISTINCT COALESCE(pi.project_name, ip.project_name), ', ' ORDER BY COALESCE(pi.project_name, ip.project_name)) AS projects,
GREATEST(COALESCE(MAX(pi.created_at), '-infinity'::timestamptz),
COALESCE(MAX(im.last_mention_at), '-infinity'::timestamptz),
COALESCE(p.updated_at, p.created_at)) AS last_interest_at,
COALESCE(q.current_value, 0)::float AS qd_score,
COALESCE(MAX(pi.notes), '') AS latest_interest_note
FROM crm_people p
LEFT JOIN crm_property_interests pi ON pi.person_id = p.person_id
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
LEFT JOIN interest_mentions im ON im.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
HAVING COUNT(DISTINCT pi.interest_id) > 0 OR COALESCE(MAX(im.mention_count), 0) > 0
ORDER BY interest_count DESC, qd_score DESC, last_interest_at DESC NULLS LAST, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_client_interaction_timeline":
sql = """
WITH timeline AS (
SELECT i.person_id, i.channel::text AS type, COALESCE(i.interaction_type, i.channel::text) AS title,
COALESCE(i.summary, '') AS detail, i.happened_at AS event_at, 'interaction' AS source_type
FROM intel_interactions i
UNION ALL
SELECT i.person_id, 'message', COALESCE(m.sender_role, 'message'), m.message_text, m.delivered_at, 'message'
FROM intel_messages m JOIN intel_interactions i ON i.interaction_id = m.interaction_id
UNION ALL
SELECT i.person_id, 'call', c.call_direction::text, COALESCE(t.full_text, c.call_outcome, 'Call record'), i.happened_at, 'call'
FROM intel_calls c
JOIN intel_interactions i ON i.interaction_id = c.interaction_id
LEFT JOIN intel_transcripts t ON t.call_id = c.call_id OR t.interaction_id = i.interaction_id
UNION ALL
SELECT i.person_id, 'email', COALESCE(e.subject, 'Email'), COALESCE(e.body_text, ''), e.sent_at, 'email'
FROM intel_emails e JOIN intel_interactions i ON i.interaction_id = e.interaction_id
UNION ALL
SELECT v.person_id, 'site_visit', COALESCE(v.project_name, 'Site visit'), COALESCE(v.visit_notes, ''), v.visited_at, 'visit'
FROM intel_visits v
UNION ALL
SELECT r.person_id, 'reminder', r.title, COALESCE(r.notes, r.status), COALESCE(r.due_at, r.created_at), 'reminder'
FROM intel_reminders r
UNION ALL
SELECT q.person_id, 'qd_score', q.score_type, COALESCE(q.reasoning, q.current_value::text), q.computed_at, 'qd_score'
FROM intel_qd_scores q
UNION ALL
SELECT qt.person_id, 'qd_timeseries', COALESCE(qt.signal_source, qt.score_type), qt.value::text, qt.timestamp, 'qd_timeseries'
FROM intel_qd_timeseries qt
)
SELECT t.type, t.title, CONCAT(p.full_name, ' - ', t.detail) AS summary,
p.full_name AS actor, TO_CHAR(t.event_at, 'YYYY-MM-DD HH24:MI') AS date,
t.source_type, t.event_at
FROM timeline t
JOIN crm_people p ON p.person_id = t.person_id
ORDER BY t.event_at DESC NULLS LAST
LIMIT $1
"""
return sql, [row_limit]
if dataset == "oracle_client_360_summary":
sql = """
SELECT p.person_id::text AS id, p.full_name AS name,
COALESCE(p.primary_email, '') AS email, COALESCE(p.primary_phone, '') AS phone,
COALESCE(l.status::text, 'unknown') AS lead_status,
COALESCE(l.budget_band, '') AS budget_band,
COALESCE(l.urgency, '') AS urgency,
COALESCE(q.current_value, 0)::float AS qd_score,
COUNT(DISTINCT pi.interest_id)::int AS interest_count,
COUNT(DISTINCT i.interaction_id)::int AS interaction_count,
MAX(i.happened_at) AS last_interaction_at,
STRING_AGG(DISTINCT COALESCE(pi.project_name, ip.project_name), ', ' ORDER BY COALESCE(pi.project_name, ip.project_name)) AS projects
FROM crm_people p
LEFT JOIN crm_leads l ON l.person_id = p.person_id
LEFT JOIN crm_property_interests pi ON pi.person_id = p.person_id
LEFT JOIN inventory_projects ip ON ip.project_id = pi.project_id
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, l.status, l.budget_band, l.urgency, q.current_value
ORDER BY qd_score DESC, interaction_count DESC, interest_count DESC, name ASC
LIMIT $1
"""
return sql, [row_limit]
raise ValueError(f"Dataset '{dataset}' is not whitelisted for Oracle execution.")
data_access_gateway = DataAccessGateway()

View File

@@ -0,0 +1,202 @@
"""
oracle/execution_profiler.py
Post-execution quality checks for Oracle natural DB queries.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
_STALE_THRESHOLD_DAYS = 365
@dataclass
class QualityIssue:
code: str
description: str
severity: str
replan_hint: str
@dataclass
class ProfileResult:
passed: bool
row_count: int
issues: list[QualityIssue] = field(default_factory=list)
replan_hints: list[str] = field(default_factory=list)
suggested_component_type: str | None = None
def _extract_cardinality_from_prompt(prompt: str) -> int | None:
lowered = prompt.lower()
numeric_match = re.search(r"\b(?:top|last|latest|recent|first|show|which)\s+(\d{1,4})\b", lowered)
if numeric_match:
return int(numeric_match.group(1))
words = {
"one": 1,
"two": 2,
"three": 3,
"four": 4,
"five": 5,
"six": 6,
"seven": 7,
"eight": 8,
"nine": 9,
"ten": 10,
"eleven": 11,
"twelve": 12,
"fifteen": 15,
"twenty": 20,
}
word_match = re.search(
r"\b(?:top|last|latest|recent|first|show|which)\s+"
r"(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|fifteen|twenty)\b",
lowered,
)
if word_match:
return words.get(word_match.group(1))
return None
def _all_null_measures(rows: list[dict[str, Any]], columns: list[str]) -> bool:
if not rows or not columns:
return False
numeric_columns: list[str] = []
for column in columns:
saw_numeric = False
all_null = True
for row in rows[:20]:
value = row.get(column)
if value is not None:
all_null = False
if isinstance(value, (int, float)):
saw_numeric = True
if saw_numeric:
numeric_columns.append(column)
if not all_null:
return False
if numeric_columns:
return True
return all(all(value is None for value in row.values()) for row in rows[:5])
def _timestamps_are_stale(rows: list[dict[str, Any]], columns: list[str]) -> bool:
timestamp_columns = [
column for column in columns if any(token in column for token in ("_at", "date", "timestamp", "when", "time"))
]
if not timestamp_columns or not rows:
return False
now = datetime.now(timezone.utc)
checked = 0
stale = 0
for row in rows[:20]:
for column in timestamp_columns:
value = row.get(column)
if value is None or not isinstance(value, str):
continue
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
continue
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
checked += 1
if (now - parsed).days > _STALE_THRESHOLD_DAYS:
stale += 1
return checked > 0 and stale == checked
class ExecutionProfiler:
def profile(
self,
*,
rows: list[dict[str, Any]],
columns: list[str],
sql: str,
prompt: str,
source_tables: list[str],
row_limit: int,
) -> ProfileResult:
del source_tables, row_limit
issues: list[QualityIssue] = []
sql_lower = sql.lower()
if len(rows) == 0:
issues.append(
QualityIssue(
code="zero_rows",
description="Query returned zero rows.",
severity="blocking",
replan_hint=(
"The query returned zero rows. Use authoritative recency and business-semantic columns "
"from the semantic catalog. Avoid sparse or deprecated timestamp fields."
),
)
)
elif _all_null_measures(rows, columns):
issues.append(
QualityIssue(
code="all_null_measures",
description="Rows returned but numeric measure columns are null.",
severity="blocking",
replan_hint=(
"The query returned rows but numeric measures are null. "
"Check join keys and metric source columns."
),
)
)
requested_n = _extract_cardinality_from_prompt(prompt)
if requested_n is not None and len(rows) > requested_n * 3:
issues.append(
QualityIssue(
code="cardinality_mismatch",
description=f"Prompt asked for about {requested_n} rows but query returned {len(rows)}.",
severity="warning",
replan_hint=f"Respect the requested result count and add LIMIT {requested_n}.",
)
)
if rows and _timestamps_are_stale(rows, columns):
issues.append(
QualityIssue(
code="stale_timestamps",
description="Returned timestamps appear stale.",
severity="warning",
replan_hint="The result timestamps are stale. Use authoritative recency fields.",
)
)
suggested_type: str | None = None
if len(rows) == 1 and len(columns) <= 4:
non_null_values = [value for value in rows[0].values() if value is not None]
if non_null_values and all(isinstance(value, (int, float)) for value in non_null_values):
suggested_type = "kpiTile"
issues.append(
QualityIssue(
code="single_row_scalar",
description="Single scalar row is better rendered as KPI tile.",
severity="warning",
replan_hint="",
)
)
blocking = [issue for issue in issues if issue.severity == "blocking"]
return ProfileResult(
passed=len(blocking) == 0,
row_count=len(rows),
issues=issues,
replan_hints=[issue.replan_hint for issue in issues if issue.replan_hint],
suggested_component_type=suggested_type,
)
execution_profiler = ExecutionProfiler()

View File

@@ -0,0 +1,591 @@
"""
Natural DB-first Oracle agent.
Pipeline:
1. schema introspection
2. semantic SQL planning
3. plan verification and optional repair
4. SQL execution
5. execution quality profiling and auto-replan
6. visualization planning from actual result shape
"""
from __future__ import annotations
import json
import logging
import os
import re
from dataclasses import dataclass
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from backend.services.runtime_llm_service import runtime_llm_service
from .execution_profiler import execution_profiler
from .plan_verifier import plan_verifier
from .semantic_catalog import CATALOG_VERSION, build_semantic_context_for_planner
from .visualization_planner import VisualizationDecision, visualization_planner
try:
import asyncpg # type: ignore
except Exception: # pragma: no cover
asyncpg = None # type: ignore
logger = logging.getLogger(__name__)
DESTRUCTIVE_SQL = re.compile(
r"\b(insert|update|delete|drop|alter|truncate|copy|create|grant|revoke|call|execute|do|merge)\b",
re.IGNORECASE,
)
TABLE_REF_RE = re.compile(r"\b(?:from|join)\s+([a-zA-Z_][\w.]*)(?:\s|$)", re.IGNORECASE)
_MAX_REPLAN_ATTEMPTS = 2
def _json_safe(value: Any) -> Any:
if isinstance(value, (datetime, date)):
return value.isoformat()
if isinstance(value, Decimal):
return float(value)
if isinstance(value, (list, tuple)):
return [_json_safe(item) for item in value]
if isinstance(value, dict):
return {str(key): _json_safe(item) for key, item in value.items()}
return value
def db_ready() -> bool:
if asyncpg is None:
return False
read_database_url = os.getenv("ORACLE_READ_DATABASE_URL", "")
if read_database_url and not read_database_url.startswith("PLACEHOLDER"):
return True
database_url = os.getenv("DATABASE_URL", "")
return bool(database_url and not database_url.startswith("PLACEHOLDER")) or all(
os.getenv(name) for name in ("VELOCITY_DB_NAME", "VELOCITY_DB_USER", "VELOCITY_DB_PASSWORD")
)
async def connect_db() -> Any:
if asyncpg is None:
raise RuntimeError("asyncpg is not installed.")
read_database_url = os.getenv("ORACLE_READ_DATABASE_URL", "")
if read_database_url and not read_database_url.startswith("PLACEHOLDER"):
return await asyncpg.connect(read_database_url)
if all(os.getenv(name) for name in ("VELOCITY_DB_READ_NAME", "VELOCITY_DB_READ_USER", "VELOCITY_DB_READ_PASSWORD")):
return await asyncpg.connect(
host=os.getenv("VELOCITY_DB_READ_HOST", os.getenv("VELOCITY_DB_HOST", "127.0.0.1")),
port=int(os.getenv("VELOCITY_DB_READ_PORT", os.getenv("VELOCITY_DB_PORT", "5432"))),
database=os.environ["VELOCITY_DB_READ_NAME"],
user=os.environ["VELOCITY_DB_READ_USER"],
password=os.environ["VELOCITY_DB_READ_PASSWORD"],
)
database_url = os.getenv("DATABASE_URL", "")
if database_url and not database_url.startswith("PLACEHOLDER"):
return await asyncpg.connect(database_url)
return await asyncpg.connect(
host=os.getenv("VELOCITY_DB_HOST", "127.0.0.1"),
port=int(os.getenv("VELOCITY_DB_PORT", "5432")),
database=os.environ["VELOCITY_DB_NAME"],
user=os.environ["VELOCITY_DB_USER"],
password=os.environ["VELOCITY_DB_PASSWORD"],
)
@dataclass
class NaturalQueryResult:
prompt: str
sql: str
title: str
summary: str
columns: list[str]
rows: list[dict[str, Any]]
row_count: int
source_tables: list[str]
component_type: str
warnings: list[str]
visualization_decision: VisualizationDecision | None = None
replan_count: int = 0
semantic_catalog_version: str = CATALOG_VERSION
def as_dict(self) -> dict[str, Any]:
decision = self.visualization_decision
return {
"prompt": self.prompt,
"sql": self.sql,
"title": self.title,
"summary": self.summary,
"columns": self.columns,
"rows": self.rows,
"rowCount": self.row_count,
"sourceTables": self.source_tables,
"componentType": self.component_type,
"warnings": self.warnings,
"semanticCatalogVersion": self.semantic_catalog_version,
"replanCount": self.replan_count,
"visualizationDecision": {
"xAxis": decision.x_axis,
"yAxis": decision.y_axis,
"dimensionCols": decision.dimension_cols,
"measureCols": decision.measure_cols,
"widthMode": decision.width_mode,
"minHeightPx": decision.min_height_px,
"skeletonVariant": decision.skeleton_variant,
"vizParams": decision.viz_params,
"dataBindings": decision.data_bindings,
"confidence": decision.confidence,
"reasoning": decision.reasoning,
}
if decision
else {},
}
def sanitize_sql(sql: str, row_limit: int) -> tuple[str, list[str], list[str]]:
warnings: list[str] = []
clean = re.sub(r"--.*?$|/\*.*?\*/", "", sql.strip(), flags=re.MULTILINE | re.DOTALL).strip().rstrip(";")
if not re.match(r"^(select|with)\b", clean, re.IGNORECASE):
raise ValueError("Oracle SQL agent only accepts SELECT or WITH queries.")
if DESTRUCTIVE_SQL.search(clean):
raise ValueError("Oracle SQL agent blocked non-read SQL.")
tables: list[str] = []
for match in TABLE_REF_RE.finditer(clean):
table = match.group(1).split(".")[-1].strip('"').lower()
if table in {"lateral", "select"}:
continue
if table and table not in tables:
tables.append(table)
if "limit" not in clean.lower():
clean += f" LIMIT {row_limit}"
warnings.append(f"Row cap {row_limit} auto-applied (query had no LIMIT).")
return clean, tables, warnings
def _detect_intents(prompt: str) -> list[str]:
lowered = prompt.lower()
intents: list[str] = []
if any(token in lowered for token in (
"last contact", "last contacted", "recently contacted", "last call",
"last message", "last whatsapp", "contacted us", "follow-up", "follow up",
"days since", "no contact",
)):
intents.append("last_contacted")
if any(token in lowered for token in (
"interested in", "shown interest", "interest in", "interested clients",
"project interest", "property interest",
)):
intents.append("interested_clients")
if any(token in lowered for token in ("qd score", "qualification score", "desire score", "intent score", "qd")):
intents.append("qd_score")
if any(token in lowered for token in ("pipeline", "stage", "funnel", "kanban", "deal")):
intents.append("pipeline")
if any(token in lowered for token in ("site visit", "visited", "visit")):
intents.append("site_visits")
if any(token in lowered for token in ("call", "transcript", "whatsapp", "email", "message", "conversation", "interaction", "timeline", "activity")):
intents.append("timeline")
if any(token in lowered for token in ("objection", "concern", "complaint", "pushback")):
intents.append("objections")
if any(token in lowered for token in ("broker", "agent performance", "referral")):
intents.append("broker_performance")
if any(token in lowered for token in ("next action", "next step", "what should i do", "follow-up priority", "action queue")):
intents.append("next_action")
if any(token in lowered for token in ("project", "unit", "inventory", "available", "price", "configuration")):
intents.append("inventory")
if any(token in lowered for token in ("client 360", "dossier", "profile")):
intents.append("client_360")
if any(token in lowered for token in ("fact", "memory", "promise", "commitment", "budget", "preference")):
intents.append("extracted_facts")
return intents or ["last_contacted"]
def title_from_prompt(prompt: str) -> str:
words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")
return (words[:1].upper() + words[1:80]) if words else "Oracle Query Result"
class NaturalDbAgent:
async def schema_catalog(self, conn: Any | None = None) -> dict[str, Any]:
own_conn = conn is None
if conn is None:
if not db_ready():
return {"tables": [], "available": False}
conn = await connect_db()
try:
table_names = await conn.fetch(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
ORDER BY table_name
"""
)
public_tables = [row["table_name"] for row in table_names]
rows = await conn.fetch(
"""
SELECT c.table_name, c.column_name, c.data_type, c.udt_name, c.is_nullable
FROM information_schema.columns c
WHERE c.table_schema = 'public'
ORDER BY c.table_name, c.ordinal_position
"""
)
counts: dict[str, int | None] = {}
for table in public_tables:
exists = await conn.fetchval("SELECT to_regclass($1)", f"public.{table}")
counts[table] = None if not exists else int(await conn.fetchval(f'SELECT COUNT(*) FROM "{table}"'))
tables: dict[str, dict[str, Any]] = {}
for row in rows:
entry = tables.setdefault(row["table_name"], {"columns": [], "rowCount": counts.get(row["table_name"])})
entry["columns"].append(
{
"name": row["column_name"],
"dataType": row["data_type"],
"udtName": row["udt_name"],
"nullable": row["is_nullable"] == "YES",
}
)
return {"available": True, "tables": tables, "allowedTables": public_tables}
finally:
if own_conn:
await conn.close()
async def data_health(self, conn: Any | None = None) -> dict[str, Any]:
catalog = await self.schema_catalog(conn)
expected = {
"crm_people": 341,
"crm_leads": 250,
"crm_opportunities": 400,
"crm_property_interests": 400,
"intel_interactions": 1897,
"intel_messages": 6944,
"intel_calls": 478,
"intel_transcripts": 231,
"intel_emails": 149,
"intel_visits": 305,
"intel_reminders": 759,
"intel_extracted_facts": 1686,
"read_last_contacted": 250,
"read_next_best_action": 250,
}
tables = catalog.get("tables", {})
counts = {table: (meta or {}).get("rowCount") for table, meta in sorted(tables.items())}
return {
"counts": counts,
"expectedSyntheticV2Counts": expected,
"missingTables": [table for table, count in counts.items() if count is None],
"emptyTables": [table for table, count in counts.items() if count == 0],
"belowExpected": {
table: {"expected": expected_count, "actual": counts.get(table)}
for table, expected_count in expected.items()
if (counts.get(table) or 0) < expected_count
},
}
async def execute_prompt(self, prompt: str, *, row_limit: int = 100, conn: Any | None = None) -> NaturalQueryResult:
if not prompt.strip():
raise ValueError("Prompt is required.")
own_conn = conn is None
if conn is None:
if not db_ready():
raise RuntimeError("Database unavailable for Oracle natural query.")
conn = await connect_db()
try:
catalog = await self.schema_catalog(conn)
detected_intents = _detect_intents(prompt)
return await self._pipeline(
conn=conn,
prompt=prompt,
catalog=catalog,
detected_intents=detected_intents,
row_limit=row_limit,
attempt=0,
prior_feedback=None,
)
finally:
if own_conn:
await conn.close()
async def _pipeline(
self,
*,
conn: Any,
prompt: str,
catalog: dict[str, Any],
detected_intents: list[str],
row_limit: int,
attempt: int,
prior_feedback: str | None,
) -> NaturalQueryResult:
warnings: list[str] = []
plan = await self._plan_sql(
prompt=prompt,
catalog=catalog,
detected_intents=detected_intents,
row_limit=row_limit,
prior_feedback=prior_feedback,
)
raw_sql = str(plan.get("sql") or "").strip()
if not raw_sql:
raise RuntimeError("Natural SQL planner returned no SQL.")
verification = await plan_verifier.verify_and_repair(
sql=raw_sql,
prompt=prompt,
detected_intents=detected_intents,
row_limit=row_limit,
llm_service=runtime_llm_service,
)
if verification.was_repaired:
warnings.append(
"Plan verifier repaired violations: "
+ ", ".join(violation.rule for violation in verification.violations if violation.severity == "blocking")
)
if not verification.passed:
details = "; ".join(
f"{violation.rule}: {violation.detail}"
for violation in verification.violations
if violation.severity == "blocking"
)
raise RuntimeError(f"Oracle SQL plan failed verification: {details}")
if verification.notes:
warnings.extend(verification.notes)
effective_sql, source_tables, sanitize_warnings = sanitize_sql(verification.sql, row_limit)
warnings.extend(sanitize_warnings)
try:
records = await conn.fetch(effective_sql)
except Exception as exc:
raise RuntimeError(f"Natural SQL execution failed: {exc}") from exc
rows = [_json_safe(dict(record)) for record in records]
columns = list(rows[0].keys()) if rows else []
profile = execution_profiler.profile(
rows=rows,
columns=columns,
sql=effective_sql,
prompt=prompt,
source_tables=source_tables,
row_limit=row_limit,
)
if not profile.passed and attempt < _MAX_REPLAN_ATTEMPTS:
feedback = " | ".join(profile.replan_hints)
warnings.append(f"Auto-replan triggered (attempt {attempt + 1}): {feedback[:160]}")
return await self._pipeline(
conn=conn,
prompt=prompt,
catalog=catalog,
detected_intents=detected_intents,
row_limit=row_limit,
attempt=attempt + 1,
prior_feedback=feedback,
)
if not profile.passed:
for issue in profile.issues:
if issue.severity == "blocking":
warnings.append(f"Quality issue after {attempt} replans: [{issue.code}] {issue.description}")
visualization_decision = visualization_planner.plan(
rows=rows,
columns=columns,
prompt=prompt,
source_tables=source_tables,
profile_suggested_type=profile.suggested_component_type,
title_from_planner=str(plan.get("title") or ""),
)
title = visualization_decision.title or str(plan.get("title") or title_from_prompt(prompt))
summary = str(plan.get("rationale") or f"SQL-backed Oracle result from {', '.join(source_tables) or 'Velocity CRM'}.")
return NaturalQueryResult(
prompt=prompt,
sql=effective_sql,
title=title,
summary=summary,
columns=columns,
rows=rows,
row_count=len(rows),
source_tables=source_tables,
component_type=visualization_decision.component_type,
warnings=warnings,
visualization_decision=visualization_decision,
replan_count=attempt,
semantic_catalog_version=CATALOG_VERSION,
)
async def _plan_sql(
self,
*,
prompt: str,
catalog: dict[str, Any],
detected_intents: list[str],
row_limit: int,
prior_feedback: str | None = None,
) -> dict[str, Any]:
try:
providers = runtime_llm_service._provider_catalog()
except Exception:
providers = {}
if not providers:
raise RuntimeError("No runtime LLM providers configured for Oracle natural planning.")
schema_full = catalog.get("tables", {})
relevant_tables = self._relevant_tables_for_intents(detected_intents)
schema_brief_dict = {
table: meta
for table, meta in schema_full.items()
if table in relevant_tables or table in {"crm_people", "crm_leads", "inventory_projects", "inventory_units"}
}
schema_brief = json.dumps(schema_brief_dict, default=str)[:14000]
semantic_context = build_semantic_context_for_planner(detected_intents, max_concepts=5)
replan_section = ""
if prior_feedback:
replan_section = (
f"\n\nPREVIOUS ATTEMPT FAILED - EXECUTION FEEDBACK:\n{prior_feedback}\n"
"You must address the feedback and change the query accordingly."
)
example_section = (
"CANONICAL SQL PATTERNS:\n"
"Generic top QD clients:\n"
"SELECT p.full_name, p.primary_email, p.primary_phone, q.current_value AS qd_score, q.score_type, q.computed_at "
"FROM intel_qd_scores q JOIN crm_people p ON p.person_id = q.person_id "
"WHERE q.score_type = 'overall' ORDER BY q.current_value DESC LIMIT 8;\n"
"Property-scoped lowest QD clients:\n"
"SELECT p.full_name, p.primary_email, pi.project_name, q.current_value AS qd_score "
"FROM crm_property_interests pi JOIN crm_people p ON p.person_id = pi.person_id "
"JOIN intel_qd_scores q ON q.person_id = p.person_id "
"WHERE q.score_type = 'overall' AND pi.project_name ILIKE '%Atri Surya Toron%' "
"ORDER BY q.current_value ASC LIMIT 5;\n"
"Recently contacted high-interest clients:\n"
"SELECT p.full_name, p.primary_email, lc.last_contact_at, lc.last_channel, q.current_value AS qd_score "
"FROM read_last_contacted lc JOIN crm_people p ON p.person_id = lc.person_id "
"LEFT JOIN intel_qd_scores q ON q.person_id = p.person_id AND q.score_type = 'overall' "
"WHERE lc.last_contact_at >= NOW() - INTERVAL '3 months' "
"ORDER BY q.current_value DESC NULLS LAST LIMIT 10;"
)
response = await runtime_llm_service.chat(
provider_id="sglang",
model=None,
system_prompt=(
"You are Oracle's read-only PostgreSQL planner for Project Velocity CRM. "
"Use the semantic catalog as the business source of truth, not raw column guessing. "
"Generate exactly one SELECT or WITH query. "
"Return strict JSON with keys: sql, title, rationale. "
"Never generate INSERT, UPDATE, DELETE, DDL, COPY, or permission statements. "
"Never use columns that are not present in the raw schema."
),
messages=[
{
"role": "user",
"content": (
f"SEMANTIC CATALOG:\n{semantic_context}\n\n"
f"RAW SCHEMA:\n{schema_brief}\n\n"
"NON-NEGOTIABLE DATA RULES:\n"
"- crm_people is identity only; it does not own QD scores.\n"
"- For QD score prompts, join intel_qd_scores.person_id to crm_people.person_id and use intel_qd_scores.current_value.\n"
"- Valid intel_qd_scores.score_type values are: overall, intent, engagement, urgency, financial_qualification.\n"
"- Never filter intel_qd_scores.score_type = 'QD'. For generic QD prompts use score_type = 'overall'.\n"
"- For contact recency, use read_last_contacted.last_contact_at or intel_interactions.happened_at.\n"
"- Do not use edge_communication_events.timestamp or crm_property_interests.last_discussed_at for contact recency.\n\n"
f"{example_section}\n\n"
f"DETECTED INTENTS: {', '.join(detected_intents)}\n\n"
f"USER QUESTION:\n{prompt}\n\n"
f"ROW CAP: {row_limit}\n"
f"{replan_section}\n\n"
"Return strict JSON: {\"sql\": \"...\", \"title\": \"...\", \"rationale\": \"...\"}"
),
}
],
temperature=0.05,
response_format="json",
metadata={
"agent": "oracle_natural_db_agent_v2",
"intents": detected_intents,
"catalog_version": CATALOG_VERSION,
},
)
message = response.get("message") or {}
parsed = message.get("parsedJson")
content = message.get("content") or "{}"
if not isinstance(parsed, dict):
parsed = json.loads(content) if isinstance(content, str) else content
if isinstance(parsed, dict) and parsed.get("sql"):
return parsed
raise RuntimeError("Natural DB planner returned no valid SQL.")
@staticmethod
def _relevant_tables_for_intents(intents: list[str]) -> set[str]:
intent_tables: dict[str, set[str]] = {
"last_contacted": {
"intel_interactions",
"crm_people",
"crm_leads",
"read_last_contacted",
"crm_last_contact_read_model",
},
"interested_clients": {
"crm_property_interests",
"crm_people",
"inventory_projects",
"intel_qd_scores",
},
"qd_score": {"intel_qd_scores", "crm_people"},
"pipeline": {"crm_opportunities", "crm_leads", "crm_people", "inventory_projects"},
"site_visits": {"intel_visits", "crm_people", "inventory_projects"},
"timeline": {
"intel_interactions",
"intel_calls",
"intel_whatsapp_threads",
"intel_messages",
"intel_emails",
"intel_visits",
"crm_people",
},
"objections": {"intel_call_objections", "crm_people", "inventory_projects"},
"broker_performance": {"crm_leads", "crm_opportunities", "crm_people"},
"next_action": {"read_next_best_action", "crm_people"},
"inventory": {"inventory_projects", "inventory_units", "crm_property_interests"},
"client_360": {
"crm_people",
"crm_leads",
"intel_qd_scores",
"crm_property_interests",
"crm_opportunities",
"intel_interactions",
"read_last_contacted",
"read_next_best_action",
},
"extracted_facts": {"intel_extracted_facts", "crm_people"},
}
tables: set[str] = set()
for intent in intents:
tables.update(intent_tables.get(intent, set()))
return tables
natural_db_agent = NaturalDbAgent()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,497 @@
{
"_meta": {
"version": "1.0.0",
"created": "2026-04-18",
"description": "Oracle Template Seed Database — canonical chapter/subchapter taxonomy and seed JSON examples for the Project Velocity Oracle platform",
"total_chapters": 6,
"total_subchapters": 24,
"total_seed_examples": 36
},
"chapters": [
{
"chapter_id": "ch-001",
"name": "Market Intelligence",
"description": "Components for real estate market analysis, pricing trends, demand signals, and competitive landscape.",
"sort_order": 1,
"subchapters": [
{
"subchapter_id": "sub-001-01",
"name": "Pricing Trends",
"description": "Price per sqft trends, AED/m² benchmarks, quarterly movement charts.",
"sort_order": 1
},
{
"subchapter_id": "sub-001-02",
"name": "Demand Signals",
"description": "Search volume, inquiry rate, site visit frequency, and absorption rate components.",
"sort_order": 2
},
{
"subchapter_id": "sub-001-03",
"name": "Competitive Landscape",
"description": "Developer comparison, project pipeline mapping, competitive unit mix analysis.",
"sort_order": 3
},
{
"subchapter_id": "sub-001-04",
"name": "Location Index",
"description": "District-level scores, proximity analysis, infrastructure readiness.",
"sort_order": 4
}
]
},
{
"chapter_id": "ch-002",
"name": "Lead Intelligence",
"description": "Components for lead profiling, scoring, pipeline health, and behaviour tracking.",
"sort_order": 2,
"subchapters": [
{
"subchapter_id": "sub-002-01",
"name": "Lead Profile",
"description": "Buyer persona cards, nationality, budget bracket, preferred property type.",
"sort_order": 1
},
{
"subchapter_id": "sub-002-02",
"name": "QD Score",
"description": "Qualification-Desire score breakdown, historical trend, per-dimension scores.",
"sort_order": 2
},
{
"subchapter_id": "sub-002-03",
"name": "Pipeline Health",
"description": "Pipeline stage distribution, velocity, stall alerts, probability weighting.",
"sort_order": 3
},
{
"subchapter_id": "sub-002-04",
"name": "Engagement History",
"description": "Touchpoint timeline, dwell time heat maps, content interaction logs.",
"sort_order": 4
}
]
},
{
"chapter_id": "ch-003",
"name": "Communication Intelligence",
"description": "Components surfacing insights from calls, messages, transcripts, and follow-up commitments.",
"sort_order": 3,
"subchapters": [
{
"subchapter_id": "sub-003-01",
"name": "Call Summary",
"description": "Transcript summary, speaker diarization, key-phrase extraction.",
"sort_order": 1
},
{
"subchapter_id": "sub-003-02",
"name": "Promise Tracker",
"description": "Promises made during calls, follow-up dates, commitment confidence.",
"sort_order": 2
},
{
"subchapter_id": "sub-003-03",
"name": "WhatsApp Thread",
"description": "Business WhatsApp message thread summaries, sentiment per message.",
"sort_order": 3
},
{
"subchapter_id": "sub-003-04",
"name": "Reminder Surface",
"description": "Due follow-ups, overdue reminders, NemoClaw-suggested next actions.",
"sort_order": 4
}
]
},
{
"chapter_id": "ch-004",
"name": "Inventory Analytics",
"description": "Components for property inventory insight, availability, and absorption.",
"sort_order": 4,
"subchapters": [
{
"subchapter_id": "sub-004-01",
"name": "Property Card",
"description": "Single-property summary card with unit details, pricing, media reference.",
"sort_order": 1
},
{
"subchapter_id": "sub-004-02",
"name": "Availability Matrix",
"description": "Bed-type × availability grid with unit count and price band.",
"sort_order": 2
},
{
"subchapter_id": "sub-004-03",
"name": "Absorption Rate",
"description": "Sales velocity per project and developer over rolling windows.",
"sort_order": 3
},
{
"subchapter_id": "sub-004-04",
"name": "Inventory Comparison",
"description": "Side-by-side comparison of two or more properties on key metrics.",
"sort_order": 4
}
]
},
{
"chapter_id": "ch-005",
"name": "Operational Metrics",
"description": "System-level, team-level, and showroom-level operational performance components.",
"sort_order": 5,
"subchapters": [
{
"subchapter_id": "sub-005-01",
"name": "Showroom Traffic",
"description": "Visitor count, zone dwell time, peak hour distribution.",
"sort_order": 1
},
{
"subchapter_id": "sub-005-02",
"name": "Team Performance",
"description": "Agent-level QD scores, conversion rates, call volume, follow-up compliance.",
"sort_order": 2
},
{
"subchapter_id": "sub-005-03",
"name": "Campaign Metrics",
"description": "Catalyst campaign reach, engagement rate, cost-per-lead, ROAS.",
"sort_order": 3
},
{
"subchapter_id": "sub-005-04",
"name": "System Health",
"description": "Backend queue depth, GPU utilization, transcription job latency.",
"sort_order": 4
}
]
},
{
"chapter_id": "ch-006",
"name": "Calendar and Follow-Up",
"description": "Components for scheduling, action planning, and NemoClaw-derived follow-up surfaces.",
"sort_order": 6,
"subchapters": [
{
"subchapter_id": "sub-006-01",
"name": "Calendar View",
"description": "Personal calendar view with communication-derived events and reminders.",
"sort_order": 1
},
{
"subchapter_id": "sub-006-02",
"name": "Action Queue",
"description": "Prioritized action list for an agent, ordered by urgency and lead value.",
"sort_order": 2
},
{
"subchapter_id": "sub-006-03",
"name": "Follow-Up Plan",
"description": "Structured follow-up plan derived from call outcomes and NemoClaw insights.",
"sort_order": 3
},
{
"subchapter_id": "sub-006-04",
"name": "Reminder Cards",
"description": "Surface-agnostic reminder card applicable to tablet and phone edge.",
"sort_order": 4
}
]
}
],
"seed_examples": [
{
"example_id": "ex-001",
"chapter_id": "ch-001",
"subchapter_id": "sub-001-01",
"title": "Dubai Marina — Price Per Sqft Trend (12-Month)",
"quality_notes": "Canonical example. Use for pricing trend chart templates.",
"is_canonical": true,
"template_name": "Pricing Trend Chart",
"component_type": "line_chart",
"accepted_shapes": ["time_series"],
"example_json": {
"componentType": "line_chart",
"title": "Dubai Marina — AED/sqft Trend",
"subtitle": "12-Month Rolling Average",
"dataSource": {
"type": "inventory_aggregate",
"district": "Dubai Marina",
"metric": "avg_price_per_sqft",
"window": "12M"
},
"visualization": {
"xAxis": "month",
"yAxis": "aed_per_sqft",
"format": "currency_aed",
"annotations": [
{ "date": "2025-10", "label": "Off-plan surge", "type": "event" }
],
"trend_line": true,
"confidence_band": false
},
"style": {
"accentColor": "#2563EB",
"gridLines": "subtle"
}
}
},
{
"example_id": "ex-002",
"chapter_id": "ch-001",
"subchapter_id": "sub-001-02",
"title": "Inquiry Velocity — Downtown Dubai (30-Day)",
"quality_notes": "Use for demand signal bar charts.",
"is_canonical": true,
"template_name": "Demand Signal Bar",
"component_type": "bar_chart",
"accepted_shapes": ["categorical_count"],
"example_json": {
"componentType": "bar_chart",
"title": "Inquiry Volume — Downtown Dubai",
"subtitle": "Last 30 Days by Week",
"dataSource": {
"type": "crm_aggregate",
"district": "Downtown Dubai",
"metric": "inquiry_count",
"window": "30D",
"groupBy": "week"
},
"visualization": {
"xAxis": "week",
"yAxis": "inquiry_count",
"format": "integer",
"comparison": { "enabled": true, "label": "Prior 30D", "style": "ghost_bar" }
},
"style": {
"accentColor": "#10B981",
"barRadius": 4
}
}
},
{
"example_id": "ex-003",
"chapter_id": "ch-002",
"subchapter_id": "sub-002-02",
"title": "Lead QD Score Card — Mohammed Al-Rashid",
"quality_notes": "Canonical single-lead QD score breakdown card.",
"is_canonical": true,
"template_name": "QD Score Card",
"component_type": "metric_card_group",
"accepted_shapes": ["qd_score_breakdown"],
"example_json": {
"componentType": "metric_card_group",
"title": "QD Score",
"subtitle": "Qualification × Desire",
"dataSource": {
"type": "sentinel_qd",
"leadId": "{{lead_id}}"
},
"visualization": {
"layout": "2x2_grid",
"cards": [
{ "dimension": "overall", "label": "Overall QD", "format": "percentage" },
{ "dimension": "qualification", "label": "Qualification", "format": "percentage" },
{ "dimension": "desire", "label": "Desire", "format": "percentage" },
{ "dimension": "velocity", "label": "Engagement Velocity", "format": "trend_arrow" }
],
"threshold_colors": {
"high": "#10B981",
"medium": "#F59E0B",
"low": "#EF4444"
}
}
}
},
{
"example_id": "ex-004",
"chapter_id": "ch-003",
"subchapter_id": "sub-003-01",
"title": "Call Summary Card — Diarized Transcript with Key Phrases",
"quality_notes": "Canonical call summary. Use for communication intelligence panels.",
"is_canonical": true,
"template_name": "Call Summary Card",
"component_type": "communication_summary",
"accepted_shapes": ["transcript_summary"],
"example_json": {
"componentType": "communication_summary",
"title": "Call Summary",
"dataSource": {
"type": "edge_communication_event",
"eventId": "{{event_id}}",
"channel": "pstn"
},
"visualization": {
"layout": "timeline_with_phrases",
"show_speaker_labels": true,
"show_duration": true,
"show_sentiment": true,
"key_phrase_highlight": true,
"sections": ["summary", "promises", "key_phrases", "next_action"]
}
}
},
{
"example_id": "ex-005",
"chapter_id": "ch-003",
"subchapter_id": "sub-003-02",
"title": "Promise Tracker — Lead Follow-Up Commitments",
"quality_notes": "Canonical promise tracker. Use for follow-up reminder surfaces.",
"is_canonical": true,
"template_name": "Promise Tracker Table",
"component_type": "data_table",
"accepted_shapes": ["communication_facts"],
"example_json": {
"componentType": "data_table",
"title": "Promises and Commitments",
"dataSource": {
"type": "edge_memory_facts",
"leadId": "{{lead_id}}",
"factTypes": ["promise", "follow_up_date", "decision_maker_note"]
},
"visualization": {
"columns": [
{ "key": "fact_text", "label": "Commitment", "width": "flex" },
{ "key": "effective_date", "label": "Due", "format": "date_relative" },
{ "key": "confidence", "label": "Confidence", "format": "percentage" },
{ "key": "extracted_from", "label": "Source", "format": "badge" }
],
"row_actions": ["mark_done", "create_calendar_event"],
"sort": { "column": "effective_date", "direction": "asc" }
}
}
},
{
"example_id": "ex-006",
"chapter_id": "ch-004",
"subchapter_id": "sub-004-01",
"title": "Property Card — Sobha One Tower A",
"quality_notes": "Canonical property card. Use for inventory summaries.",
"is_canonical": true,
"template_name": "Property Summary Card",
"component_type": "property_card",
"accepted_shapes": ["inventory_property"],
"example_json": {
"componentType": "property_card",
"title": "Property Summary",
"dataSource": {
"type": "inventory_property",
"propertyId": "{{property_id}}"
},
"visualization": {
"layout": "hero_with_stats",
"sections": [
"project_name",
"developer_name",
"location_map_pin",
"price_bands",
"unit_mix_summary",
"amenity_chips",
"media_carousel"
],
"cta": { "label": "Schedule Viewing", "action": "create_calendar_event" }
}
}
},
{
"example_id": "ex-007",
"chapter_id": "ch-005",
"subchapter_id": "sub-005-01",
"title": "Showroom Traffic Heatmap",
"quality_notes": "Canonical traffic component. Use for operational dashboards.",
"is_canonical": true,
"template_name": "Showroom Traffic Heatmap",
"component_type": "heatmap",
"accepted_shapes": ["zone_time_matrix"],
"example_json": {
"componentType": "heatmap",
"title": "Showroom Zone Traffic",
"subtitle": "Today — Live",
"dataSource": {
"type": "sentinel_live",
"metric": "visitor_dwell_time",
"groupBy": ["zone", "hour"]
},
"visualization": {
"xAxis": "hour_of_day",
"yAxis": "zone_name",
"value": "avg_dwell_minutes",
"colorScale": { "low": "#EFF6FF", "high": "#1D4ED8" },
"annotations": true
}
}
},
{
"example_id": "ex-008",
"chapter_id": "ch-006",
"subchapter_id": "sub-006-04",
"title": "Phone Edge Reminder Card — Follow-Up Due",
"quality_notes": "Designed for narrow phone edge surfaces. Minimal data footprint.",
"is_canonical": true,
"template_name": "Reminder Card",
"component_type": "compact_alert_card",
"accepted_shapes": ["insight_recommendation"],
"example_json": {
"componentType": "compact_alert_card",
"title": "Follow-Up Reminder",
"dataSource": {
"type": "insight_recommendations",
"leadId": "{{lead_id}}",
"status": "pending",
"limit": 1
},
"visualization": {
"layout": "single_card_narrow",
"fields": ["summary", "suggested_action", "target_system"],
"actions": ["accept", "dismiss", "snooze_1h"],
"urgency_indicator": true,
"surface_target": ["iphone_edge", "android_phone_edge"]
}
}
}
],
"kimi_synthetic_plan": {
"description": "Downstream Kimi synthetic data expansion plan consuming this seed DB",
"expansion_targets": [
{
"chapter_id": "ch-001",
"subchapter_id": "sub-001-01",
"seed_example_ids": ["ex-001"],
"requested_count": 50,
"model": "kimi",
"diversity_axes": ["district", "property_type", "time_window"]
},
{
"chapter_id": "ch-002",
"subchapter_id": "sub-002-02",
"seed_example_ids": ["ex-003"],
"requested_count": 100,
"model": "kimi",
"diversity_axes": ["lead_nationality", "budget_bracket", "pipeline_stage"]
},
{
"chapter_id": "ch-003",
"subchapter_id": "sub-003-01",
"seed_example_ids": ["ex-004"],
"requested_count": 200,
"model": "kimi",
"diversity_axes": ["call_outcome", "property_type", "language"]
},
{
"chapter_id": "ch-004",
"subchapter_id": "sub-004-01",
"seed_example_ids": ["ex-006"],
"requested_count": 150,
"model": "kimi",
"diversity_axes": ["developer_name", "district", "bedrooms"]
}
],
"quality_gate": {
"min_acceptance_confidence": 0.8,
"human_review_required_for_canonical": true,
"auto_accept_below_count": 20
}
}
}

View File

@@ -0,0 +1,97 @@
from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Any
_PROMPT_DIR = Path(__file__).resolve().parent.parent / "nemoclaw_prompts"
_PLACEHOLDER_PATTERN = re.compile(r"\{(\w+)\}")
_TEMPLATE_HINTS = {
"pipeline": ["tpl_pipeline_board_v2", "tpl_followup_queue_v1"],
"kanban": ["tpl_pipeline_board_v2"],
"map": ["tpl_geo_investor_heat_v2"],
"geo": ["tpl_geo_investor_heat_v2"],
"trend": ["tpl_absorption_trend_v1", "tpl_campaign_lead_line_v1"],
"quota": ["tpl_quota_gauge_v1", "tpl_kpi_pipeline_health_v1"],
"broker": ["tpl_broker_performance_v1"],
"source": ["tpl_qd_source_compare_v1", "tpl_bar_source_quality_v3"],
"follow": ["tpl_followup_queue_v1", "tpl_followup_gap_v1"],
"campaign": ["tpl_campaign_lead_line_v1"],
}
class PersonaService:
def __init__(self) -> None:
self.prompt_files = {
"qd_calculator": _PROMPT_DIR / "qd_calculator.md",
"lead_tagger": _PROMPT_DIR / "lead_tagger.md",
"cctv_profiler": _PROMPT_DIR / "cctv_profiler.md",
}
async def health(self) -> dict[str, Any]:
loaded = {}
for key, path in self.prompt_files.items():
loaded[key] = path.exists() and path.read_text(encoding="utf-8").strip() != ""
return {
"status": "healthy" if all(loaded.values()) else "degraded",
"prompts": loaded,
}
async def render_prompt(
self,
*,
prompt_name: str,
variables: dict[str, Any],
) -> dict[str, Any]:
path = self.prompt_files.get(prompt_name)
if path is None or not path.exists():
raise FileNotFoundError(f"Unknown prompt '{prompt_name}'.")
template = path.read_text(encoding="utf-8")
rendered = template
for key, value in variables.items():
rendered = rendered.replace(f"{{{key}}}", json.dumps(value) if isinstance(value, (dict, list)) else str(value))
unresolved = sorted(set(_PLACEHOLDER_PATTERN.findall(rendered)))
return {
"promptName": prompt_name,
"templatePath": str(path),
"renderedPrompt": rendered,
"unresolvedVariables": unresolved,
}
async def plan_for_prompt(
self,
*,
prompt: str,
tenant_id: str,
actor_role: str,
) -> dict[str, Any]:
lower_prompt = prompt.lower()
recommended: list[str] = []
for token, template_ids in _TEMPLATE_HINTS.items():
if token in lower_prompt:
recommended.extend(template_ids)
if not recommended:
recommended = ["tpl_kpi_pipeline_health_v1", "tpl_qd_source_compare_v1"]
recommended = list(dict.fromkeys(recommended))
return {
"tenantId": tenant_id,
"actorRole": actor_role,
"recommendedTemplates": recommended,
"canvasBlocks": [
{
"type": "textCanvas",
"widthMode": "full",
"minHeightPx": 180,
"content": (
"Oracle planned a mixed response: query the CRM, reuse matching component templates, "
"and synthesize missing visualization blocks if a direct template is unavailable."
),
}
],
"workflowIntent": "comfy_oracle_canvas",
}
persona_service = PersonaService()

View File

@@ -0,0 +1,436 @@
"""
oracle/plan_verifier.py
Verify planned SQL before execution and optionally repair common semantic errors.
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any
from .semantic_catalog import VALID_QD_SCORE_TYPES, build_semantic_context_for_planner
logger = logging.getLogger(__name__)
_DESTRUCTIVE = re.compile(
r"\b(insert|update|delete|drop|alter|truncate|copy|create|grant|revoke|call|execute|do|merge)\b",
re.IGNORECASE,
)
_BAD_TIMESTAMP_PATTERNS: list[tuple[str, str]] = [
("edge_communication_events", "timestamp"),
("crm_property_interests", "last_discussed_at"),
("crm_property_interests", "last_interaction"),
]
_BAD_SCORE_PATTERNS: list[tuple[str, str]] = [
("crm_people", "engagement_score"),
("crm_leads", "engagement_score"),
("intel_interactions", "engagement_score"),
("crm_people", "qd_score"),
("crm_leads", "qd_score"),
]
_HALLUCINATED_COLUMNS: list[tuple[str, str]] = [
("intel_interactions", "broker_id"),
("intel_interactions", "sentiment"),
("crm_leads", "last_contacted_at"),
("crm_people", "last_contact"),
("read_last_contacted", "last_contacted_at"),
("read_last_contacted", "days_since_last_contact"),
("read_last_contacted", "staleness_label"),
]
_CONTACT_INTENTS = {"last_contacted", "timeline"}
def _extract_limit_from_prompt(prompt: str, default: int) -> int:
lowered = prompt.lower()
numeric_match = re.search(r"\b(?:top|last|latest|recent|first|show|which|give me)\s+(\d{1,4})\b", lowered)
if numeric_match:
return max(1, min(int(numeric_match.group(1)), default))
words = {
"one": 1,
"two": 2,
"three": 3,
"four": 4,
"five": 5,
"six": 6,
"seven": 7,
"eight": 8,
"nine": 9,
"ten": 10,
"eleven": 11,
"twelve": 12,
"fifteen": 15,
"twenty": 20,
}
word_match = re.search(
r"\b(?:top|last|latest|recent|first|show|which|give me)\s+"
r"(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|fifteen|twenty)\b",
lowered,
)
if word_match:
return max(1, min(words[word_match.group(1)], default))
return default
def _canonical_qd_sql(prompt: str, row_limit: int) -> str:
limit = _extract_limit_from_prompt(prompt, row_limit)
lowered = prompt.lower()
direction = "ASC" if any(token in lowered for token in ("lowest", "least", "bottom", "weakest")) else "DESC"
project_filter = ""
project_join = ""
project_match = re.search(r"\bin\s+([A-Za-z0-9][A-Za-z0-9 .&'-]{2,80})(?:\?|$)", prompt)
if project_match:
project_name = project_match.group(1).strip()
if not re.search(r"\b(last|month|months|week|weeks|day|days|year|years)\b", project_name, re.IGNORECASE):
project_join = "JOIN crm_property_interests pi ON pi.person_id = p.person_id "
escaped = project_name.replace("'", "''")
project_filter = f"AND pi.project_name ILIKE '%{escaped}%' "
return (
"SELECT p.full_name, p.primary_email, p.primary_phone, "
"q.current_value AS qd_score, q.score_type, q.computed_at "
"FROM intel_qd_scores q "
"JOIN crm_people p ON p.person_id = q.person_id "
f"{project_join}"
"WHERE q.score_type = 'overall' "
f"{project_filter}"
f"ORDER BY q.current_value {direction} "
f"LIMIT {limit}"
)
def _canonical_recent_contact_sql(prompt: str, row_limit: int) -> str:
limit = _extract_limit_from_prompt(prompt, row_limit)
interval = "3 months"
lowered = prompt.lower()
interval_match = re.search(r"\b(?:last|past|recent)\s+(\d{1,3})\s+(day|days|week|weeks|month|months|year|years)\b", lowered)
if interval_match:
count, unit = interval_match.groups()
interval = f"{int(count)} {unit}"
return (
"SELECT p.full_name, p.primary_email, p.primary_phone, "
"lc.last_contact_at, lc.last_channel, lc.days_since_contact, "
"q.current_value AS qd_score "
"FROM read_last_contacted lc "
"JOIN crm_people p ON p.person_id = lc.person_id "
"LEFT JOIN intel_qd_scores q ON q.person_id = p.person_id AND q.score_type = 'overall' "
f"WHERE lc.last_contact_at >= NOW() - INTERVAL '{interval}' "
"ORDER BY q.current_value DESC NULLS LAST, lc.last_contact_at DESC "
f"LIMIT {limit}"
)
def _semantic_rule_repair(
*,
prompt: str,
detected_intents: list[str],
row_limit: int,
violations: list[VerificationViolation],
) -> str | None:
violation_rules = {violation.rule for violation in violations}
if "qd_score" in detected_intents and violation_rules.intersection({"wrong_score_column", "impossible_score_type"}):
return _canonical_qd_sql(prompt, row_limit)
if set(detected_intents).intersection(_CONTACT_INTENTS) and violation_rules.intersection(
{"deprecated_timestamp", "hallucinated_column"}
):
return _canonical_recent_contact_sql(prompt, row_limit)
return None
def _extract_score_type_literals(sql: str) -> list[str]:
literals: list[str] = []
eq_pattern = re.compile(
r"(?:\b\w+\.)?score_type\s*=\s*'([^']+)'",
re.IGNORECASE,
)
in_pattern = re.compile(
r"(?:\b\w+\.)?score_type\s+in\s*\(([^)]*)\)",
re.IGNORECASE | re.DOTALL,
)
literals.extend(match.group(1) for match in eq_pattern.finditer(sql))
for match in in_pattern.finditer(sql):
literals.extend(re.findall(r"'([^']+)'", match.group(1)))
return literals
def _references_table(sql_lower: str, table: str) -> bool:
return bool(re.search(rf"\b(?:from|join)\s+(?:public\.)?{re.escape(table)}\b", sql_lower))
def _aliases_for_table(sql: str, table: str) -> set[str]:
aliases = {table}
pattern = re.compile(
rf"\b(?:from|join)\s+(?:public\.)?{re.escape(table)}(?:\s+(?:as\s+)?([a-zA-Z_][a-zA-Z0-9_]*))?",
re.IGNORECASE,
)
for match in pattern.finditer(sql):
alias = match.group(1)
if alias and alias.lower() not in {"on", "where", "join", "left", "right", "inner", "outer", "full", "cross"}:
aliases.add(alias)
return aliases
def _references_column(sql: str, sql_lower: str, table: str, column: str) -> bool:
if not _references_table(sql_lower, table):
return False
for alias in _aliases_for_table(sql, table):
qualified = re.compile(rf"\b{re.escape(alias)}\.{re.escape(column)}\b", re.IGNORECASE)
if qualified.search(sql):
return True
return False
@dataclass
class VerificationViolation:
rule: str
detail: str
severity: str
@dataclass
class VerificationResult:
passed: bool
sql: str
original_sql: str
violations: list[VerificationViolation] = field(default_factory=list)
was_repaired: bool = False
repair_attempted: bool = False
repair_failed: bool = False
notes: list[str] = field(default_factory=list)
class PlanVerifier:
def verify(self, sql: str, prompt: str, detected_intents: list[str], row_limit: int) -> VerificationResult:
del prompt
violations: list[VerificationViolation] = []
sql_lower = sql.lower()
intent_set = set(detected_intents)
if _DESTRUCTIVE.search(sql):
violations.append(
VerificationViolation(
rule="destructive_dml",
detail="SQL contains a write or DDL statement.",
severity="blocking",
)
)
for table, column in _BAD_TIMESTAMP_PATTERNS:
if intent_set.intersection(_CONTACT_INTENTS) and _references_column(sql, sql_lower, table, column):
violations.append(
VerificationViolation(
rule="deprecated_timestamp",
detail=(
f"SQL references {table}.{column}, which is sparse or deprecated. "
"Use intel_interactions.happened_at or read_last_contacted.last_contact_at."
),
severity="blocking",
)
)
valid_score_types = {value.lower() for value in VALID_QD_SCORE_TYPES}
for literal in _extract_score_type_literals(sql):
if literal.lower() not in valid_score_types:
violations.append(
VerificationViolation(
rule="impossible_score_type",
detail=(
f"SQL filters intel_qd_scores.score_type with impossible value '{literal}'. "
"Valid values are: " + ", ".join(VALID_QD_SCORE_TYPES) + ". "
"For generic QD prompts, use score_type = 'overall'."
),
severity="blocking",
)
)
for table, column in _BAD_SCORE_PATTERNS:
if _references_column(sql, sql_lower, table, column):
violations.append(
VerificationViolation(
rule="wrong_score_column",
detail=(
f"SQL references {table}.{column}, which is not the QD source of truth. "
"Use intel_qd_scores.current_value."
),
severity="blocking",
)
)
for table, column in _HALLUCINATED_COLUMNS:
if _references_column(sql, sql_lower, table, column):
violations.append(
VerificationViolation(
rule="hallucinated_column",
detail=f"SQL references {table}.{column}, which does not exist in the live schema.",
severity="blocking",
)
)
if "limit" not in sql_lower:
violations.append(
VerificationViolation(
rule="missing_limit",
detail=f"SQL has no LIMIT clause; executor will enforce row cap {row_limit}.",
severity="warning",
)
)
if re.search(r"\bselect\s+\*\b", sql_lower) and sql_lower.count("join") > 1:
violations.append(
VerificationViolation(
rule="select_star_join",
detail="SELECT * with multiple JOINs may create noisy wide rows.",
severity="warning",
)
)
blocking = [violation for violation in violations if violation.severity == "blocking"]
return VerificationResult(
passed=len(blocking) == 0,
sql=sql,
original_sql=sql,
violations=violations,
)
async def verify_and_repair(
self,
sql: str,
prompt: str,
detected_intents: list[str],
row_limit: int,
llm_service: Any | None = None,
) -> VerificationResult:
result = self.verify(sql, prompt, detected_intents, row_limit)
if result.passed:
return result
blocking = [violation for violation in result.violations if violation.severity == "blocking"]
if not blocking:
return result
result.repair_attempted = True
if llm_service is None:
result.repair_failed = True
result.notes.append("No LLM service available for SQL repair.")
return result
try:
repaired_sql = await self._repair_sql(
sql=sql,
prompt=prompt,
violations=blocking,
detected_intents=detected_intents,
row_limit=row_limit,
llm_service=llm_service,
)
except Exception as exc:
logger.warning("plan_verifier repair failed: %s", exc)
result.repair_failed = True
result.notes.append(f"Repair failed: {exc}")
return result
recheck = self.verify(repaired_sql, prompt, detected_intents, row_limit)
recheck.original_sql = sql
recheck.was_repaired = True
recheck.repair_attempted = True
recheck.notes.append(
"Repaired violations: " + ", ".join(violation.rule for violation in blocking)
)
if not recheck.passed:
semantic_repair = _semantic_rule_repair(
prompt=prompt,
detected_intents=detected_intents,
row_limit=row_limit,
violations=blocking,
)
if semantic_repair:
semantic_recheck = self.verify(semantic_repair, prompt, detected_intents, row_limit)
semantic_recheck.original_sql = sql
semantic_recheck.was_repaired = True
semantic_recheck.repair_attempted = True
semantic_recheck.notes.append(
"Semantic rule repair applied: " + ", ".join(violation.rule for violation in blocking)
)
return semantic_recheck
return recheck
async def _repair_sql(
self,
*,
sql: str,
prompt: str,
violations: list[VerificationViolation],
detected_intents: list[str],
row_limit: int,
llm_service: Any,
) -> str:
semantic_ctx = build_semantic_context_for_planner(detected_intents, max_concepts=4)
violation_text = "\n".join(f"- [{violation.rule}] {violation.detail}" for violation in violations)
hard_rules = (
"Hard repair rules:\n"
"- crm_people is identity only. It has no QD score source-of-truth column.\n"
"- For QD score prompts, use intel_qd_scores.current_value and join crm_people on person_id.\n"
"- Valid intel_qd_scores.score_type values are: "
+ ", ".join(VALID_QD_SCORE_TYPES)
+ ".\n"
"- Never use score_type = 'QD'. For generic QD prompts use score_type = 'overall'.\n"
"- For recent contact prompts, use read_last_contacted.last_contact_at or intel_interactions.happened_at.\n"
"- Never use edge_communication_events.timestamp or crm_property_interests.last_discussed_at for contact recency."
)
canonical_examples = (
"Canonical repair examples:\n"
"Generic QD ranking:\n"
"SELECT p.full_name, p.primary_email, p.primary_phone, q.current_value AS qd_score, q.score_type, q.computed_at "
"FROM intel_qd_scores q JOIN crm_people p ON p.person_id = q.person_id "
"WHERE q.score_type = 'overall' ORDER BY q.current_value DESC LIMIT 8;\n"
"Recent contact ranking:\n"
"SELECT p.full_name, p.primary_email, lc.last_contact_at, lc.last_channel, q.current_value AS qd_score "
"FROM read_last_contacted lc JOIN crm_people p ON p.person_id = lc.person_id "
"LEFT JOIN intel_qd_scores q ON q.person_id = p.person_id AND q.score_type = 'overall' "
"WHERE lc.last_contact_at >= NOW() - INTERVAL '3 months' "
"ORDER BY q.current_value DESC NULLS LAST LIMIT 10;"
)
response = await llm_service.chat(
provider_id="sglang",
model=None,
system_prompt=(
"You are Oracle's SQL repair agent. "
"Fix only the listed violations. Return strict JSON with key 'sql'."
),
messages=[
{
"role": "user",
"content": (
f"Original prompt: {prompt}\n\n"
f"Semantic catalog:\n{semantic_ctx}\n\n"
f"{hard_rules}\n\n"
f"{canonical_examples}\n\n"
f"Violations:\n{violation_text}\n\n"
f"Broken SQL:\n{sql}\n\n"
f"Row cap: {row_limit}\n\n"
"Return JSON: {\"sql\": \"<corrected SQL>\"}"
),
}
],
temperature=0.0,
response_format="json",
metadata={"agent": "oracle_plan_verifier_repair"},
)
message = response.get("message") or {}
parsed = message.get("parsedJson")
if not isinstance(parsed, dict):
content = message.get("content") or "{}"
parsed = json.loads(content) if isinstance(content, str) else {}
repaired = str(parsed.get("sql") or "").strip()
if not repaired:
raise ValueError("Repair LLM returned empty SQL.")
return repaired
plan_verifier = PlanVerifier()

View File

@@ -0,0 +1,225 @@
"""
oracle/policy_service.py
Enforces tenant isolation, role-based access, privacy-tier escalation,
field-level redaction, and row limit guardrails for all Oracle data access.
Section 11.3 of the Oracle Architecture Document.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any
logger = logging.getLogger(__name__)
# ── Constants ─────────────────────────────────────────────────────────────────
MAX_ROW_LIMITS: dict[str, int] = {
"junior_broker": 100,
"senior_broker": 500,
"sales_director": 2000,
"marketing_operator": 1000,
"data_steward": 5000,
"compliance_reviewer": 5000,
"platform_admin": 10000,
}
# Which roles can see which privacy tiers
PRIVACY_TIER_ACCESS: dict[str, set[str]] = {
"standard": {"junior_broker", "senior_broker", "sales_director", "marketing_operator", "data_steward", "compliance_reviewer", "platform_admin"},
"restricted": {"senior_broker", "sales_director", "data_steward", "compliance_reviewer", "platform_admin"},
"sensitive": {"data_steward", "compliance_reviewer", "platform_admin"},
}
# Datasets with cross-tenant join restrictions
CROSS_TENANT_RESTRICTED: set[str] = {
"global_lead_market",
"competitor_pricing",
"cross_tenant_referrals",
}
@dataclass
class PolicyContext:
tenant_id: str
actor_id: str
actor_role: str
policy_profile_id: str = "policy_standard_v4"
@dataclass
class ValidationResult:
passed: bool
errors: list[str]
warnings: list[str]
redaction_policy: str = "none"
effective_row_limit: int = 100
@classmethod
def ok(cls, row_limit: int, redaction: str = "none") -> "ValidationResult":
return cls(passed=True, errors=[], warnings=[], redaction_policy=redaction, effective_row_limit=row_limit)
@classmethod
def denied(cls, reason: str) -> "ValidationResult":
return cls(passed=False, errors=[reason], warnings=[])
class PolicyService:
"""
Validates all Oracle data access against policy rules.
Configuration is loaded from env / feature flags in production;
falls back to safe defaults for demo mode.
"""
def validate_retrieval_plan(
self,
plan: dict[str, Any],
ctx: PolicyContext,
) -> ValidationResult:
"""
Validates a structured retrieval plan (as produced by PromptOrchestrator).
Checks: tenant isolation, role access, privacy tier, row limits.
Returns ValidationResult with passed=True if all checks pass.
"""
errors: list[str] = []
warnings: list[str] = []
dataset = plan.get("dataset", "")
privacy_tier = plan.get("privacyTier", "standard")
requested_row_limit = plan.get("rowLimit", 100)
joins = plan.get("joins", [])
# 1. Tenant isolation — reject cross-tenant predicates
if dataset in CROSS_TENANT_RESTRICTED:
errors.append(
f"POLICY_CROSS_TENANT_JOIN_DENIED: Dataset '{dataset}' requires "
f"cross-tenant access which is not permitted for role '{ctx.actor_role}'."
)
# 2. Cross-tenant join detection
for join in joins:
if join.get("tenantId") and join["tenantId"] != ctx.tenant_id:
errors.append(
f"POLICY_CROSS_TENANT_JOIN_DENIED: Join to tenant '{join['tenantId']}' "
f"is not permitted."
)
# 3. Privacy tier access
allowed_roles = PRIVACY_TIER_ACCESS.get(privacy_tier, set())
if ctx.actor_role not in allowed_roles:
errors.append(
f"POLICY_PRIVACY_TIER_ESCALATION: Role '{ctx.actor_role}' cannot access "
f"'{privacy_tier}' tier data in dataset '{dataset}'."
)
# 4. Row limit guardrail
max_limit = MAX_ROW_LIMITS.get(ctx.actor_role, 100)
effective_limit = min(requested_row_limit, max_limit)
if requested_row_limit > max_limit:
warnings.append(
f"ROW_LIMIT_CAPPED: Requested {requested_row_limit} rows; "
f"capped to {effective_limit} for role '{ctx.actor_role}'."
)
# 5. Determine redaction policy
redaction = "none"
if privacy_tier == "restricted" and ctx.actor_role == "senior_broker":
redaction = "aggregate_only"
elif privacy_tier == "sensitive":
redaction = "full_redact"
if errors:
return ValidationResult(
passed=False,
errors=errors,
warnings=warnings,
redaction_policy=redaction,
effective_row_limit=effective_limit,
)
return ValidationResult(
passed=True,
errors=[],
warnings=warnings,
redaction_policy=redaction,
effective_row_limit=effective_limit,
)
def enforce_tenant_predicate(
self,
query_parameters: dict[str, Any],
ctx: PolicyContext,
) -> dict[str, Any]:
"""
Ensures :tenant_id parameter is always bound to the actor's tenant.
Overrides any attacker-supplied tenant_id parameter.
"""
params = dict(query_parameters)
params["tenant_id"] = ctx.tenant_id
return params
def validate_component_access(
self,
component_access_controls: dict[str, Any],
ctx: PolicyContext,
) -> bool:
"""
Returns True if the actor's role is in the component's allowedRoles.
"""
allowed_roles: list[str] = component_access_controls.get("allowedRoles", [])
if not allowed_roles:
# Open access (shouldn't happen in production)
logger.warning(
"POLICY_WARN: Component has no allowedRoles — defaulting to deny for tenant=%s actor=%s",
ctx.tenant_id,
ctx.actor_id,
)
return False
return ctx.actor_role in allowed_roles
def redact(
self,
rows: list[dict[str, Any]],
redaction_policy: str,
sensitive_fields: list[str] | None = None,
) -> list[dict[str, Any]]:
"""
Applies field-level redaction to result rows.
"""
if redaction_policy == "none" or not rows:
return rows
if redaction_policy == "full_redact":
return [{"__redacted__": True, "count": len(rows)}]
if redaction_policy == "aggregate_only":
# Keep only aggregate fields; drop individual identifiers
safe_fields = {"count", "total", "average", "sum", "min", "max", "stage", "source", "district"}
return [{k: v for k, v in row.items() if k in safe_fields} for row in rows]
if redaction_policy == "team_scope":
# Keep rows where assigned_broker matches actor (simplified demo rule)
return rows # Full enforcement requires actor context per row
return rows
def audit_policy_check(
self,
ctx: PolicyContext,
dataset: str,
result: ValidationResult,
) -> None:
"""Emit an audit event for every policy check (passed or denied)."""
if not result.passed:
logger.warning(
"POLICY_DENIED tenant=%s actor=%s dataset=%s errors=%s",
ctx.tenant_id,
ctx.actor_id,
dataset,
result.errors,
)
else:
logger.debug(
"POLICY_PASS tenant=%s actor=%s dataset=%s redaction=%s limit=%d",
ctx.tenant_id,
ctx.actor_id,
dataset,
result.redaction_policy,
result.effective_row_limit,
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,554 @@
"""
oracle/router_v1.py
FastAPI router for all Oracle v1 endpoints.
Mounted at /api/oracle/v1 in main.py.
Endpoints (from spec §13.2):
GET /me
GET /canvas-pages/{pageId}
POST /canvas-pages/{pageId}/prompts
POST /canvas-pages/{pageId}/forks
POST /canvas-pages/{pageId}/rollback
GET /canvas-pages/{pageId}/revisions
GET /component-templates
POST /component-templates/synthesize (stub)
GET /merge-requests
POST /merge-requests
POST /merge-requests/{mrId}/review
WS /ws/oracle/canvas/{pageId}
"""
from __future__ import annotations
import json
import logging
import os
import uuid
from datetime import datetime, timezone
from typing import Any, Set
from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket, WebSocketDisconnect, status
from pydantic import BaseModel, Field
from backend.auth.dependencies import UserPrincipal, get_current_user
from .canvas_service import canvas_service
from .collaboration_service import collaboration_service
from .action_service import oracle_action_service
from .persona_service import persona_service
from .prompt_orchestrator import prompt_orchestrator
from .policy_service import PolicyService, PolicyContext
from .codebook_service import codebook_service
logger = logging.getLogger(__name__)
router = APIRouter()
policy_svc = PolicyService()
_DEFAULT_TENANT_ID = os.getenv("ORACLE_DEFAULT_TENANT_ID", "tenant_velocity")
# ── Helpers ───────────────────────────────────────────────────────────────────
def _ok(data: Any, meta: dict | None = None) -> dict:
return {"status": "ok", "data": data, "meta": meta or {}}
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _normalize_oracle_role(role: str) -> str:
mapping = {
"JUNIOR_BROKER": "junior_broker",
"SENIOR_BROKER": "senior_broker",
"SALES_DIRECTOR": "sales_director",
"ADMIN": "platform_admin",
}
return mapping.get(role.strip().upper(), "sales_director")
def _build_user_profile(
*,
user_id: str,
email: str,
display_name: str,
role: str,
avatar_url: str | None,
default_page_id: str,
) -> dict[str, Any]:
return {
"userId": user_id,
"tenantId": _DEFAULT_TENANT_ID,
"email": email,
"displayName": display_name,
"role": _normalize_oracle_role(role),
"avatarUrl": avatar_url,
"timezone": os.getenv("ORACLE_DEFAULT_TIMEZONE", "Asia/Dubai"),
"locale": os.getenv("ORACLE_DEFAULT_LOCALE", "en-AE"),
"defaultPageId": default_page_id,
"canvasPreferences": {
"defaultDensity": "comfortable",
"defaultPlacementMode": "append_after_last_visible_component",
"showLineageBadges": True,
},
"policyProfileId": os.getenv("ORACLE_POLICY_PROFILE_ID", "policy_sales_director_standard_v4"),
"createdAt": os.getenv("ORACLE_PROFILE_CREATED_AT", _now()),
"updatedAt": _now(),
}
async def _get_current_user_profile(request: Request, user: UserPrincipal) -> dict[str, Any]:
seed_page = await canvas_service.ensure_default_page(
tenant_id=_DEFAULT_TENANT_ID,
owner_id=user.user_id,
title=os.getenv("ORACLE_DEFAULT_PAGE_TITLE", "Oracle Main Canvas"),
)
pool = getattr(request.app.state, "db_pool", None)
if pool is None:
raise HTTPException(status_code=503, detail="Database unavailable.")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT
COALESCE(full_name, split_part(email, '@', 1), id::text) AS display_name,
COALESCE(email, id::text || '@velocity.local') AS email,
avatar_url
FROM users_and_roles
WHERE id = $1::uuid
""",
user.user_id,
)
return _build_user_profile(
user_id=user.user_id,
email=row["email"] if row else f"{user.user_id}@velocity.local",
display_name=row["display_name"] if row else user.user_id,
role=user.role,
avatar_url=row["avatar_url"] if row else None,
default_page_id=seed_page["pageId"],
)
async def _ctx_from_request(request: Request, user: UserPrincipal) -> PolicyContext:
me = await _get_current_user_profile(request, user)
return PolicyContext(
tenant_id=me["tenantId"],
actor_id=me["userId"],
actor_role=me["role"],
)
async def _resolve_page_id(request: Request, user: UserPrincipal, page_id: str) -> str:
normalized = (page_id or "").strip()
if normalized and normalized.lower() != "main":
return normalized
me = await _get_current_user_profile(request, user)
return str(me["defaultPageId"])
# ── Pydantic Models ───────────────────────────────────────────────────────────
class PromptSubmitRequest(BaseModel):
clientRequestId: str = Field(..., description="Client-generated idempotency key")
branchId: str
prompt: str = Field(..., min_length=1, max_length=4096)
conversationContext: list[dict[str, str]] = Field(default_factory=list)
placementMode: str = Field("append_after_last_visible_component")
targetLeadId: str | None = None
plannedWriteback: dict[str, Any] = Field(default_factory=dict)
class ForkCreateRequest(BaseModel):
recipientUserId: str
sourceRevision: int
visibility: str = Field("private", pattern="^(private|team)$")
message: str = ""
class RollbackRequest(BaseModel):
targetRevision: int = Field(..., ge=1)
clientRequestId: str
class MergeRequestCreateRequest(BaseModel):
sourcePageId: str
sourceBranchId: str
targetPageId: str
targetBranchId: str
title: str = Field(..., min_length=1, max_length=256)
description: str = ""
class MergeReviewRequest(BaseModel):
decision: str = Field(..., pattern="^(approve|reject|changes_requested)$")
comment: str = ""
resolutions: list[dict[str, Any]] = Field(default_factory=list)
class TemplateSynthesizeRequest(BaseModel):
prompt: str
dataShape: list[str]
styleSignatureRef: str | None = None
class PersonaRenderRequest(BaseModel):
promptName: str = Field(..., pattern="^(qd_calculator|lead_tagger|cctv_profiler)$")
variables: dict[str, Any] = Field(default_factory=dict)
class PageCreateRequest(BaseModel):
title: str = Field(default="Untitled Canvas", max_length=256)
class PageUpdateRequest(BaseModel):
title: str = Field(..., min_length=1, max_length=256)
# ── Endpoints ─────────────────────────────────────────────────────────────────
@router.get("/me", summary="Get current user profile")
async def get_me(request: Request, user: UserPrincipal = Depends(get_current_user)) -> dict:
return _ok(await _get_current_user_profile(request, user))
@router.get("/canvas-pages", summary="List canvas pages for current user")
async def list_canvas_pages(
request: Request,
search: str | None = None,
limit: int = 50,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
ctx = await _ctx_from_request(request, user)
pages = await canvas_service.list_pages(
tenant_id=ctx.tenant_id,
owner_id=ctx.actor_id,
search=search,
limit=limit,
)
return _ok(pages, meta={"count": len(pages)})
@router.post("/canvas-pages", summary="Create a new canvas page")
async def create_canvas_page(
payload: PageCreateRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
ctx = await _ctx_from_request(request, user)
page = await canvas_service.create_page(
tenant_id=ctx.tenant_id,
owner_id=ctx.actor_id,
title=payload.title.strip() or "Untitled Canvas",
)
return _ok(page)
@router.get("/canvas-pages/{page_id}", summary="Get canvas page by ID")
async def get_canvas_page(page_id: str, request: Request, user: UserPrincipal = Depends(get_current_user)) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
page = await canvas_service.get_page(page_id, ctx.tenant_id)
if not page:
raise HTTPException(status_code=404, detail=f"Canvas page '{page_id}' not found.")
return _ok(page)
@router.patch("/canvas-pages/{page_id}", summary="Rename a canvas page")
async def rename_canvas_page(
page_id: str,
payload: PageUpdateRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
try:
page = await canvas_service.update_page_title(
page_id=page_id,
tenant_id=ctx.tenant_id,
owner_id=ctx.actor_id,
title=payload.title,
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _ok(page)
@router.delete("/canvas-pages/{page_id}", summary="Delete a canvas page")
async def delete_canvas_page(
page_id: str,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
try:
await canvas_service.delete_page(
page_id=page_id,
tenant_id=ctx.tenant_id,
owner_id=ctx.actor_id,
)
except ValueError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _ok({"pageId": page_id, "deleted": True})
@router.post("/canvas-pages/{page_id}/prompts", summary="Submit a prompt to generate canvas components")
async def submit_prompt(
page_id: str,
payload: PromptSubmitRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
execution = await prompt_orchestrator.execute(
tenant_id=ctx.tenant_id,
page_id=page_id,
branch_id=payload.branchId,
actor_id=ctx.actor_id,
actor_role=ctx.actor_role,
prompt=payload.prompt,
conversation_context=payload.conversationContext,
client_request_id=payload.clientRequestId,
placement_mode=payload.placementMode,
)
if execution["status"] == "failed":
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail={"errors": execution.get("warnings", [])},
)
page = await canvas_service.get_page(page_id, ctx.tenant_id)
action = await oracle_action_service.create_from_execution(
execution=execution,
target_entity_type="lead" if payload.targetLeadId else "canvas_page",
target_entity_id=payload.targetLeadId or page_id,
action_type="oracle_prompt_writeback_plan" if payload.targetLeadId else "oracle_canvas_generation",
writeback_payload=payload.plannedWriteback,
)
return _ok({
"executionId": execution["executionId"],
"actionId": action["actionId"],
"status": execution["status"],
"pageId": page_id,
"branchId": payload.branchId,
"headRevision": execution.get("headRevision", page.get("headRevision", 0) if page else 0),
"componentsCreated": execution.get("componentsCreated", []),
"summary": execution.get("summary", ""),
"warnings": execution.get("warnings", []),
"components": page.get("components", []) if page else [],
})
@router.post("/canvas-pages/{page_id}/forks", summary="Create a fork (share) from a canvas page")
async def create_fork(
page_id: str,
payload: ForkCreateRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
page = await canvas_service.get_page(page_id, ctx.tenant_id)
if not page:
raise HTTPException(status_code=404, detail="Source page not found.")
try:
fork = await collaboration_service.create_fork(
source_page=page,
recipient_user_id=payload.recipientUserId,
created_by=ctx.actor_id,
visibility=payload.visibility,
message=payload.message,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return _ok(fork)
@router.post("/canvas-pages/{page_id}/rollback", summary="Rollback canvas to a prior revision")
async def rollback_canvas(
page_id: str,
payload: RollbackRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
result = await canvas_service.rollback(
page_id=page_id,
tenant_id=ctx.tenant_id,
actor_id=ctx.actor_id,
target_revision=payload.targetRevision,
idempotency_key=payload.clientRequestId,
)
page = await canvas_service.get_page(page_id, ctx.tenant_id)
return _ok({
"pageId": page_id,
"headRevision": result.get("revisionNumber", payload.targetRevision),
"components": page.get("components", []) if page else [],
})
@router.get("/canvas-pages/{page_id}/revisions", summary="List revision history for a canvas page")
async def list_revisions(page_id: str, request: Request, user: UserPrincipal = Depends(get_current_user)) -> dict:
page_id = await _resolve_page_id(request, user, page_id)
ctx = await _ctx_from_request(request, user)
revisions = await canvas_service.list_revisions(page_id, ctx.tenant_id)
return _ok(revisions, meta={"count": len(revisions)})
@router.get("/component-templates", summary="List component templates")
async def list_templates(
category: str | None = None,
status: str | None = None,
search: str | None = None,
limit: int = 50,
offset: int = 0,
) -> dict:
result = codebook_service.list_templates(
category=category,
status=status,
search=search,
limit=limit,
offset=offset,
)
return _ok(result["templates"], meta={"count": result["total"], "limit": limit, "offset": offset})
@router.post("/component-templates/synthesize", summary="Synthesize a new component template from a prompt")
async def synthesize_template(
payload: TemplateSynthesizeRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
me = await _get_current_user_profile(request, user)
template = codebook_service.synthesize_template(
prompt=payload.prompt,
data_shapes=payload.dataShape,
)
template["tenantId"] = me["tenantId"]
template.setdefault("createdAt", _now())
template.setdefault("updatedAt", _now())
return _ok(template)
@router.get("/persona/health", summary="Health check for Oracle persona prompt loading")
async def persona_health() -> dict:
return _ok(await persona_service.health())
@router.post("/persona/render", summary="Render a subordinate Oracle persona prompt")
async def persona_render(payload: PersonaRenderRequest) -> dict:
try:
rendered = await persona_service.render_prompt(
prompt_name=payload.promptName,
variables=payload.variables,
)
except FileNotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
return _ok(rendered)
@router.get("/merge-requests", summary="List merge requests for a target page")
async def list_merge_requests(targetPageId: str | None = None, status: str | None = None) -> dict:
if not targetPageId:
raise HTTPException(status_code=400, detail="targetPageId query param required")
mrs = await collaboration_service.list_merge_requests(targetPageId, status)
return _ok(mrs, meta={"count": len(mrs)})
@router.post("/merge-requests", summary="Open a merge request")
async def create_merge_request(
payload: MergeRequestCreateRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
ctx = await _ctx_from_request(request, user)
source_page = await canvas_service.get_page(payload.sourcePageId, ctx.tenant_id)
target_page = await canvas_service.get_page(payload.targetPageId, ctx.tenant_id)
if not source_page or not target_page:
raise HTTPException(status_code=404, detail="Source or target page not found.")
mr = await collaboration_service.open_merge_request(
tenant_id=ctx.tenant_id,
source_page_id=payload.sourcePageId,
source_branch_id=payload.sourceBranchId,
source_head_revision=source_page.get("headRevision", 0),
target_page_id=payload.targetPageId,
target_branch_id=payload.targetBranchId,
target_base_revision=target_page.get("headRevision", 0),
title=payload.title,
description=payload.description,
created_by=ctx.actor_id,
source_components=source_page.get("components", []),
target_components=target_page.get("components", []),
base_components=[], # Simplified: empty base for demo
)
return _ok(mr)
@router.post("/merge-requests/{mr_id}/review", summary="Submit a merge request review")
async def review_merge_request(
mr_id: str,
payload: MergeReviewRequest,
request: Request,
user: UserPrincipal = Depends(get_current_user),
) -> dict:
ctx = await _ctx_from_request(request, user)
mr = await collaboration_service.review_merge_request(
mr_id=mr_id,
decision=payload.decision,
reviewer_id=ctx.actor_id,
comment=payload.comment,
resolutions=payload.resolutions,
)
return _ok(mr)
# ── WebSocket ─────────────────────────────────────────────────────────────────
class OracleConnectionManager:
def __init__(self) -> None:
self.active: dict[str, Set[WebSocket]] = {}
async def connect(self, ws: WebSocket, page_id: str) -> None:
await ws.accept()
self.active.setdefault(page_id, set()).add(ws)
def disconnect(self, ws: WebSocket, page_id: str) -> None:
page_connections = self.active.get(page_id, set())
page_connections.discard(ws)
async def broadcast_page(self, page_id: str, payload: dict) -> None:
dead: set[WebSocket] = set()
for ws in self.active.get(page_id, set()):
try:
await ws.send_text(json.dumps(payload))
except Exception:
dead.add(ws)
if dead:
self.active.get(page_id, set()).difference_update(dead)
oracle_manager = OracleConnectionManager()
@router.websocket("/ws/oracle/canvas/{page_id}")
async def oracle_canvas_ws(ws: WebSocket, page_id: str) -> None:
"""
WebSocket endpoint for real-time Oracle canvas collaboration.
Event types: oracle.page.revision.committed, oracle.prompt.received, oracle.presence.updated
"""
await oracle_manager.connect(ws, page_id)
try:
while True:
data = await ws.receive_text()
try:
msg = json.loads(data)
# Reflect heartbeat
if msg.get("type") == "heartbeat":
await ws.send_text(json.dumps({"type": "heartbeat.ack", "timestamp": _now()}))
except json.JSONDecodeError:
pass
except WebSocketDisconnect:
oracle_manager.disconnect(ws, page_id)
# ── Pre-made templates seed ───────────────────────────────────────────────────

View File

@@ -0,0 +1,350 @@
-- ────────────────────────────────────────────────────────────────────────────
-- Oracle Schema Extension v2 — Multi-Surface Platform and Oracle Expansion
-- Date: 2026-04-18
-- Author: Velocity Platform Team
-- Depends on: schema_oracle.sql (must be applied first)
-- PostgreSQL 14+ required · UUID via pgcrypto already enabled
-- ────────────────────────────────────────────────────────────────────────────
-- ─── 1. Oracle Template Taxonomy ─────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS oracle_template_chapters (
chapter_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
sort_order INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_template_subchapters (
subchapter_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chapter_id UUID NOT NULL REFERENCES oracle_template_chapters(chapter_id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
description TEXT,
sort_order INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_template_seed_examples (
example_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
template_id UUID NOT NULL REFERENCES oracle_component_templates(template_id) ON DELETE CASCADE,
chapter_id UUID REFERENCES oracle_template_chapters(chapter_id),
subchapter_id UUID REFERENCES oracle_template_subchapters(subchapter_id),
title TEXT NOT NULL,
example_json JSONB NOT NULL,
quality_notes TEXT,
is_canonical BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Extend oracle_component_templates with chapter/subchapter linkage
-- (additive columns — does not alter existing rows)
ALTER TABLE oracle_component_templates
ADD COLUMN IF NOT EXISTS chapter_id UUID REFERENCES oracle_template_chapters(chapter_id),
ADD COLUMN IF NOT EXISTS subchapter_id UUID REFERENCES oracle_template_subchapters(subchapter_id),
ADD COLUMN IF NOT EXISTS json_template JSONB,
ADD COLUMN IF NOT EXISTS description TEXT;
-- ─── 2. Kimi Synthetic Data Jobs ─────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS oracle_synthetic_generation_jobs (
job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
template_id UUID NOT NULL REFERENCES oracle_component_templates(template_id),
chapter_id UUID REFERENCES oracle_template_chapters(chapter_id),
subchapter_id UUID REFERENCES oracle_template_subchapters(subchapter_id),
model TEXT NOT NULL DEFAULT 'kimi',
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','running','completed','failed','cancelled')),
requested_count INTEGER NOT NULL DEFAULT 10,
accepted_count INTEGER NOT NULL DEFAULT 0,
error_message TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_by TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 3. Inventory Pipeline ───────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS inventory_import_batches (
batch_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
source_type TEXT NOT NULL CHECK (source_type IN ('csv','json','api_push','manual')),
submitted_by TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','validating','processing','completed','failed','partial')),
total_rows INTEGER NOT NULL DEFAULT 0,
accepted_rows INTEGER NOT NULL DEFAULT 0,
rejected_rows INTEGER NOT NULL DEFAULT 0,
error_summary JSONB NOT NULL DEFAULT '[]'::JSONB,
source_file_ref TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS inventory_properties (
property_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
batch_id UUID REFERENCES inventory_import_batches(batch_id),
source_id TEXT, -- external source identifier
project_name TEXT NOT NULL,
developer_name TEXT NOT NULL,
location JSONB NOT NULL DEFAULT '{}'::JSONB, -- {city, district, lat, lng}
property_type TEXT NOT NULL, -- apartment, villa, penthouse, plot, etc.
price_bands JSONB NOT NULL DEFAULT '[]'::JSONB, -- [{minAED, maxAED, unitType}]
unit_mix JSONB NOT NULL DEFAULT '[]'::JSONB, -- [{bedrooms, count, sizeSqft}]
amenities TEXT[] NOT NULL DEFAULT '{}',
status TEXT NOT NULL DEFAULT 'active'
CHECK (status IN ('active','archived','draft','under_review')),
validation_state JSONB NOT NULL DEFAULT '{}'::JSONB,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS inventory_media_assets (
media_asset_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
property_id UUID NOT NULL REFERENCES inventory_properties(property_id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
media_type TEXT NOT NULL CHECK (media_type IN ('image','video','floorplan','brochure','360','vr')),
url TEXT NOT NULL,
thumbnail_url TEXT,
sort_order INTEGER NOT NULL DEFAULT 0,
metadata JSONB NOT NULL DEFAULT '{}'::JSONB,
uploaded_by TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 4. Edge Communication Events ────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS edge_communication_events (
event_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
lead_id TEXT NOT NULL,
channel TEXT NOT NULL
CHECK (channel IN ('pstn','whatsapp_message','whatsapp_voice',
'whatsapp_video','email','facebook_message',
'instagram_message','in_app_voip','manual_note')),
direction TEXT NOT NULL CHECK (direction IN ('inbound','outbound')),
provider TEXT, -- twilio, vonage, meta, etc.
capture_mode TEXT NOT NULL
CHECK (capture_mode IN ('direct_api','provider_routed','operator_import','operator_note')),
consent_state TEXT NOT NULL DEFAULT 'unknown'
CHECK (consent_state IN ('unknown','granted','denied','not_required')),
timestamp TIMESTAMPTZ NOT NULL DEFAULT NOW(),
duration_seconds INTEGER,
summary TEXT,
raw_reference TEXT, -- provider message/call ID
recording_ref TEXT, -- storage path or URL
provider_metadata JSONB NOT NULL DEFAULT '{}'::JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS edge_communication_memory_facts (
fact_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
lead_id TEXT NOT NULL,
event_id UUID REFERENCES edge_communication_events(event_id),
fact_type TEXT NOT NULL
CHECK (fact_type IN ('promise','preference','follow_up_date',
'objection','interest_signal','budget','timeline',
'constraint','decision_maker_note','custom')),
fact_text TEXT NOT NULL,
effective_date DATE,
confidence NUMERIC(4,3) NOT NULL DEFAULT 1.0 CHECK (confidence BETWEEN 0 AND 1),
extracted_from TEXT NOT NULL
CHECK (extracted_from IN ('transcript','message_thread','operator_note','import')),
is_confirmed BOOLEAN NOT NULL DEFAULT FALSE,
confirmed_by TEXT,
confirmed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 5. Transcription Jobs and Segments ──────────────────────────────────────
CREATE TABLE IF NOT EXISTS edge_transcription_jobs (
transcription_job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
event_id UUID NOT NULL REFERENCES edge_communication_events(event_id) ON DELETE CASCADE,
media_type TEXT NOT NULL CHECK (media_type IN ('audio','video')),
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','queued','processing','completed','failed')),
transcript_ref TEXT, -- storage path to diarized JSON
provider TEXT NOT NULL DEFAULT 'nemoclaw',
consent_state TEXT NOT NULL DEFAULT 'unknown'
CHECK (consent_state IN ('unknown','granted','denied')),
speaker_count INTEGER,
word_count INTEGER,
language TEXT NOT NULL DEFAULT 'en',
error_message TEXT,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS edge_transcript_segments (
segment_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
transcription_job_id UUID NOT NULL REFERENCES edge_transcription_jobs(transcription_job_id) ON DELETE CASCADE,
event_id UUID NOT NULL REFERENCES edge_communication_events(event_id),
speaker_label TEXT NOT NULL, -- SPEAKER_00, SPEAKER_01, etc.
start_ms INTEGER NOT NULL,
end_ms INTEGER NOT NULL,
text TEXT NOT NULL,
confidence NUMERIC(4,3) NOT NULL DEFAULT 1.0,
is_agent_turn BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 6. User Calendar Events ─────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS user_calendar_events (
calendar_event_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
owner_user_id TEXT NOT NULL,
lead_id TEXT,
source_event_id UUID REFERENCES edge_communication_events(event_id),
title TEXT NOT NULL,
description TEXT,
start_at TIMESTAMPTZ NOT NULL,
end_at TIMESTAMPTZ NOT NULL,
all_day BOOLEAN NOT NULL DEFAULT FALSE,
status TEXT NOT NULL DEFAULT 'confirmed'
CHECK (status IN ('tentative','confirmed','done','cancelled')),
reminder_minutes INTEGER[] NOT NULL DEFAULT '{15}'::INTEGER[],
created_by TEXT NOT NULL
CHECK (created_by IN ('user','nemoclaw_suggested','operator_import')),
is_nemoclaw_confirmed BOOLEAN NOT NULL DEFAULT FALSE,
location TEXT,
metadata JSONB NOT NULL DEFAULT '{}'::JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 7. Insight Recommendations ──────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS insight_recommendations (
recommendation_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
lead_id TEXT NOT NULL,
source_event_id UUID REFERENCES edge_communication_events(event_id),
recommendation_type TEXT NOT NULL
CHECK (recommendation_type IN ('follow_up_call','send_message',
'schedule_meeting','update_crm',
'update_qd_score','send_property_info',
'escalate','custom')),
summary TEXT NOT NULL,
suggested_action TEXT NOT NULL,
target_system TEXT NOT NULL
CHECK (target_system IN ('crm','calendar','qd_score','whatsapp','email','operator')),
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','accepted','dismissed','acted_upon')),
confidence NUMERIC(4,3) NOT NULL DEFAULT 0.8,
acted_by TEXT,
acted_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 8. Admin Action Events ───────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS admin_action_events (
action_event_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
action_id TEXT NOT NULL UNIQUE, -- idempotency key from client
action_type TEXT NOT NULL
CHECK (action_type IN (
'user_create','user_deactivate','user_role_change',
'tenant_config_update','inventory_batch_approve',
'inventory_batch_reject','template_publish','template_archive',
'synthetic_job_trigger','synthetic_job_cancel',
'system_health_check','queue_drain','debug_event_export',
'install_register','install_deregister'
)),
target_type TEXT NOT NULL,
target_id TEXT NOT NULL,
requested_by TEXT NOT NULL,
payload JSONB NOT NULL DEFAULT '{}'::JSONB,
status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending','processing','completed','failed','rejected')),
result_message TEXT,
result_artifacts JSONB NOT NULL DEFAULT '[]'::JSONB,
executed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─── 9. Surface Sessions (cross-surface telemetry) ───────────────────────────
CREATE TABLE IF NOT EXISTS surface_sessions (
session_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
user_id TEXT NOT NULL,
surface_type TEXT NOT NULL
CHECK (surface_type IN ('webos','ipad','android_tablet',
'iphone_edge','android_phone_edge')),
app_version TEXT NOT NULL,
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
ended_at TIMESTAMPTZ,
last_active_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
screen_sequence TEXT[] NOT NULL DEFAULT '{}',
metadata JSONB NOT NULL DEFAULT '{}'::JSONB
);
-- ─── Indexes ──────────────────────────────────────────────────────────────────
-- Template taxonomy
CREATE INDEX IF NOT EXISTS idx_tmpl_chapters_tenant ON oracle_template_chapters(tenant_id, is_active);
CREATE INDEX IF NOT EXISTS idx_tmpl_subchapters_chapter ON oracle_template_subchapters(chapter_id, is_active);
CREATE INDEX IF NOT EXISTS idx_tmpl_seed_examples_template ON oracle_template_seed_examples(template_id);
CREATE INDEX IF NOT EXISTS idx_tmpl_seed_examples_chapter ON oracle_template_seed_examples(chapter_id);
-- Synthetic jobs
CREATE INDEX IF NOT EXISTS idx_synthetic_jobs_tenant ON oracle_synthetic_generation_jobs(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_synthetic_jobs_template ON oracle_synthetic_generation_jobs(template_id);
-- Inventory
CREATE INDEX IF NOT EXISTS idx_inv_batches_tenant ON inventory_import_batches(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_inv_props_tenant ON inventory_properties(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_inv_props_batch ON inventory_properties(batch_id);
CREATE INDEX IF NOT EXISTS idx_inv_media_property ON inventory_media_assets(property_id);
-- Edge communication
CREATE INDEX IF NOT EXISTS idx_edge_events_lead ON edge_communication_events(tenant_id, lead_id, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_edge_events_channel ON edge_communication_events(channel, timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_edge_memory_lead ON edge_communication_memory_facts(tenant_id, lead_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_edge_memory_event ON edge_communication_memory_facts(event_id);
-- Transcription
CREATE INDEX IF NOT EXISTS idx_transcription_jobs_event ON edge_transcription_jobs(event_id);
CREATE INDEX IF NOT EXISTS idx_transcription_jobs_status ON edge_transcription_jobs(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_transcript_segments_job ON edge_transcript_segments(transcription_job_id, start_ms);
-- Calendar
CREATE INDEX IF NOT EXISTS idx_calendar_events_owner ON user_calendar_events(tenant_id, owner_user_id, start_at);
CREATE INDEX IF NOT EXISTS idx_calendar_events_lead ON user_calendar_events(lead_id, start_at);
-- Insights
CREATE INDEX IF NOT EXISTS idx_insights_lead ON insight_recommendations(tenant_id, lead_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_insights_status ON insight_recommendations(status, created_at DESC);
-- Admin
CREATE INDEX IF NOT EXISTS idx_admin_actions_tenant ON admin_action_events(tenant_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_admin_actions_type ON admin_action_events(action_type, status);
-- Surface sessions
CREATE INDEX IF NOT EXISTS idx_surface_sessions_user ON surface_sessions(tenant_id, user_id, started_at DESC);
CREATE INDEX IF NOT EXISTS idx_surface_sessions_type ON surface_sessions(surface_type, started_at DESC);

View File

@@ -0,0 +1,236 @@
-- Oracle Canvas Schema — Section 16.4 of the Oracle Architecture Document v1.0
-- Run this against your PostgreSQL database to create the Oracle persistence layer.
-- Requires: UUID extension, JSONB support (PostgreSQL 14+)
-- ── Prerequisites ─────────────────────────────────────────────────────────────
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- ── Core tables ───────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS oracle_canvas_pages (
page_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
owner_id TEXT NOT NULL,
branch_id TEXT NOT NULL,
branch_name TEXT NOT NULL DEFAULT 'main',
page_type TEXT NOT NULL DEFAULT 'main' CHECK (page_type IN ('main', 'fork')),
title TEXT NOT NULL DEFAULT 'Untitled Canvas',
is_shared BOOLEAN NOT NULL DEFAULT FALSE,
head_revision INTEGER NOT NULL DEFAULT 0,
base_revision INTEGER NOT NULL DEFAULT 0,
sharing_policy JSONB NOT NULL DEFAULT '{"shareMode":"direct_fork_only","allowReshare":false,"defaultForkVisibility":"private"}'::JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_canvas_page_revisions (
revision_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
revision_number INTEGER NOT NULL,
commit_kind TEXT NOT NULL CHECK (commit_kind IN ('prompt', 'merge', 'rollback', 'manual_edit')),
commit_summary TEXT,
actor_id TEXT NOT NULL,
execution_id UUID,
merge_request_id UUID,
components_snapshot JSONB NOT NULL DEFAULT '[]'::JSONB,
idempotency_key TEXT UNIQUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (page_id, revision_number)
);
CREATE TABLE IF NOT EXISTS oracle_canvas_components (
component_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id) ON DELETE CASCADE,
tenant_id TEXT NOT NULL,
type TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
version INTEGER NOT NULL DEFAULT 1,
lifecycle_state TEXT NOT NULL DEFAULT 'active' CHECK (lifecycle_state IN ('draft','active','superseded','archived','revoked')),
data_source_descriptor JSONB NOT NULL,
data_rows JSONB NOT NULL DEFAULT '[]'::JSONB,
visualization_parameters JSONB NOT NULL DEFAULT '{}'::JSONB,
data_bindings JSONB NOT NULL DEFAULT '{}'::JSONB,
provenance JSONB NOT NULL,
rendering_hints JSONB NOT NULL,
layout JSONB NOT NULL,
access_controls JSONB NOT NULL,
style_signature JSONB NOT NULL DEFAULT '{}'::JSONB,
validation_state JSONB NOT NULL DEFAULT '{}'::JSONB,
audit_log TEXT[] NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
ALTER TABLE oracle_canvas_components
ADD COLUMN IF NOT EXISTS data_rows JSONB NOT NULL DEFAULT '[]'::JSONB;
WITH latest_revisions AS (
SELECT DISTINCT ON (page_id, tenant_id)
page_id,
tenant_id,
components_snapshot
FROM oracle_canvas_page_revisions
ORDER BY page_id, tenant_id, revision_number DESC
),
snapshot_components AS (
SELECT
latest_revisions.page_id,
latest_revisions.tenant_id,
component->>'componentId' AS component_id,
COALESCE(component->'dataRows', '[]'::jsonb) AS data_rows
FROM latest_revisions,
jsonb_array_elements(latest_revisions.components_snapshot) AS component
)
UPDATE oracle_canvas_components occ
SET data_rows = snapshot_components.data_rows
FROM snapshot_components
WHERE occ.page_id = snapshot_components.page_id
AND occ.tenant_id = snapshot_components.tenant_id
AND occ.component_id::text = snapshot_components.component_id
AND occ.data_rows = '[]'::jsonb
AND snapshot_components.data_rows <> '[]'::jsonb;
CREATE TABLE IF NOT EXISTS oracle_prompt_executions (
execution_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id) ON DELETE CASCADE,
branch_id TEXT NOT NULL,
actor_id TEXT NOT NULL,
prompt TEXT NOT NULL,
intent_class TEXT NOT NULL DEFAULT 'analytical',
status TEXT NOT NULL DEFAULT 'received',
model_runtime TEXT NOT NULL DEFAULT 'nemoclaw_hosted',
semantic_model_version TEXT NOT NULL DEFAULT 'oracle_semantic_v1',
retrieval_plan JSONB,
visualization_plan JSONB,
warnings TEXT[] NOT NULL DEFAULT '{}',
summary TEXT,
components_created TEXT[] NOT NULL DEFAULT '{}',
client_request_id TEXT UNIQUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ
);
CREATE TABLE IF NOT EXISTS oracle_component_templates (
template_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
name TEXT NOT NULL,
category TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'catalog_active',
origin TEXT NOT NULL DEFAULT 'premade',
version TEXT NOT NULL DEFAULT '1.0.0',
accepted_shapes TEXT[] NOT NULL DEFAULT '{}',
style_signature JSONB DEFAULT NULL,
validation_state JSONB DEFAULT NULL,
provenance JSONB DEFAULT NULL,
use_count INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_forks (
fork_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id),
source_branch_id TEXT NOT NULL,
source_revision INTEGER NOT NULL,
fork_page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id),
fork_branch_id TEXT NOT NULL,
recipient_user_id TEXT NOT NULL,
created_by TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active','merged','closed')),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_merge_requests (
merge_request_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
source_page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id),
source_branch_id TEXT NOT NULL,
source_head_revision INTEGER NOT NULL,
target_page_id UUID NOT NULL REFERENCES oracle_canvas_pages(page_id),
target_branch_id TEXT NOT NULL,
target_base_revision INTEGER NOT NULL,
title TEXT NOT NULL,
description TEXT,
status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open','changes_requested','approved','merged','closed')),
conflicts JSONB NOT NULL DEFAULT '[]'::JSONB,
diff_summary JSONB DEFAULT NULL,
resolutions JSONB DEFAULT NULL,
created_by TEXT NOT NULL,
reviewed_by TEXT,
reviewer_comment TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_lineage_records (
lineage_record_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
source_kind TEXT NOT NULL,
source_id TEXT NOT NULL,
transformation_type TEXT NOT NULL,
transformation_spec_hash TEXT,
produced_kind TEXT NOT NULL,
produced_id TEXT NOT NULL,
policy_snapshot_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS oracle_audit_events (
audit_event_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id TEXT NOT NULL,
entity_type TEXT NOT NULL,
entity_id TEXT NOT NULL,
action TEXT NOT NULL,
actor_id TEXT NOT NULL,
actor_type TEXT NOT NULL DEFAULT 'user',
correlation_id TEXT NOT NULL,
execution_id UUID,
details JSONB NOT NULL DEFAULT '{}'::JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ── Indexes ───────────────────────────────────────────────────────────────────
-- Canvas pages: tenant lookup, branch lookup
CREATE INDEX IF NOT EXISTS idx_oracle_pages_tenant ON oracle_canvas_pages(tenant_id);
CREATE INDEX IF NOT EXISTS idx_oracle_pages_owner ON oracle_canvas_pages(owner_id);
CREATE INDEX IF NOT EXISTS idx_oracle_pages_branch ON oracle_canvas_pages(branch_id);
-- Revisions: page-scoped revision queries
CREATE INDEX IF NOT EXISTS idx_oracle_revisions_page ON oracle_canvas_page_revisions(page_id, revision_number DESC);
CREATE INDEX IF NOT EXISTS idx_oracle_revisions_tenant ON oracle_canvas_page_revisions(tenant_id);
CREATE INDEX IF NOT EXISTS idx_oracle_revisions_execution ON oracle_canvas_page_revisions(execution_id);
-- Components: page-scoped, lifecycle
CREATE INDEX IF NOT EXISTS idx_oracle_components_page ON oracle_canvas_components(page_id, lifecycle_state);
CREATE INDEX IF NOT EXISTS idx_oracle_components_tenant ON oracle_canvas_components(tenant_id);
-- Prompt executions: page/actor lookup
CREATE INDEX IF NOT EXISTS idx_oracle_executions_page ON oracle_prompt_executions(page_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_oracle_executions_actor ON oracle_prompt_executions(actor_id, created_at DESC);
-- Templates: tenant + category + status
CREATE INDEX IF NOT EXISTS idx_oracle_templates_tenant_cat ON oracle_component_templates(tenant_id, category, status);
-- Forks: source and recipient lookup
CREATE INDEX IF NOT EXISTS idx_oracle_forks_source ON oracle_forks(source_page_id);
CREATE INDEX IF NOT EXISTS idx_oracle_forks_recipient ON oracle_forks(recipient_user_id);
-- Merge requests: target/source page, status
CREATE INDEX IF NOT EXISTS idx_oracle_mrs_target ON oracle_merge_requests(target_page_id, status);
CREATE INDEX IF NOT EXISTS idx_oracle_mrs_source ON oracle_merge_requests(source_page_id, status);
CREATE INDEX IF NOT EXISTS idx_oracle_mrs_tenant ON oracle_merge_requests(tenant_id, status);
-- Lineage: source/produced lookups
CREATE INDEX IF NOT EXISTS idx_oracle_lineage_source ON oracle_lineage_records(source_kind, source_id);
CREATE INDEX IF NOT EXISTS idx_oracle_lineage_produced ON oracle_lineage_records(produced_kind, produced_id);
CREATE INDEX IF NOT EXISTS idx_oracle_lineage_tenant ON oracle_lineage_records(tenant_id);
-- Audit: entity lookup, correlation lookup
CREATE INDEX IF NOT EXISTS idx_oracle_audit_entity ON oracle_audit_events(entity_type, entity_id, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_oracle_audit_correlation ON oracle_audit_events(correlation_id);
CREATE INDEX IF NOT EXISTS idx_oracle_audit_tenant ON oracle_audit_events(tenant_id, created_at DESC);

View File

@@ -0,0 +1,512 @@
"""
oracle/semantic_catalog.py
Business-semantic layer for Oracle's natural DB planner.
This sits between raw schema introspection and SQL generation. It defines:
- authoritative tables and columns for business concepts
- deprecated or sparse fields the planner should avoid
- preferred join paths
- compact semantic context for the planner prompt
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
class Confidence:
RELIABLE = "reliable"
PARTIAL = "partial"
SPARSE = "sparse"
DEPRECATED = "deprecated"
@dataclass(frozen=True)
class FieldDescriptor:
table: str
column: str
confidence: str
description: str
notes: str = ""
valid_values: tuple[str, ...] = ()
examples: tuple[str, ...] = ()
@dataclass(frozen=True)
class JoinPath:
from_table: str
from_col: str
to_table: str
to_col: str
join_type: str = "INNER"
notes: str = ""
@dataclass
class ConceptDescriptor:
concept_id: str
label: str
description: str
authoritative_fields: list[FieldDescriptor]
deprecated_fields: list[FieldDescriptor] = field(default_factory=list)
preferred_join_paths: list[JoinPath] = field(default_factory=list)
usage_notes: str = ""
CATALOG_VERSION = "velocity_semantic_v2026_04_25_01"
@dataclass(frozen=True)
class ColumnMetadata:
table: str
column: str
topic: str
meaning: str
reliability: str
valid_values: tuple[str, ...] = ()
examples: tuple[str, ...] = ()
usage: str = ""
avoid: bool = False
VALID_QD_SCORE_TYPES: tuple[str, ...] = (
"overall",
"intent",
"engagement",
"urgency",
"financial_qualification",
)
COLUMN_METADATA: list[ColumnMetadata] = [
ColumnMetadata(
"intel_qd_scores",
"score_type",
"qd_score",
"Score family/category. There is no score_type value named QD.",
Confidence.RELIABLE,
valid_values=VALID_QD_SCORE_TYPES,
examples=("overall", "intent", "engagement"),
usage=(
"For generic QD score prompts, prefer score_type = 'overall'. "
"For specific intent/engagement/urgency/financial prompts, use the matching valid value. "
"Never filter score_type = 'QD'."
),
),
ColumnMetadata(
"intel_qd_scores",
"current_value",
"qd_score",
"Authoritative numeric score value for the selected score_type.",
Confidence.RELIABLE,
examples=("98.0", "72.4"),
usage="Rank, sort, average, or threshold QD-style scores with this column.",
),
ColumnMetadata(
"intel_qd_scores",
"computed_at",
"qd_score",
"Timestamp when the score was computed.",
Confidence.RELIABLE,
examples=("2026-04-18T00:00:00"),
usage="Use for score freshness, not client contact recency.",
),
ColumnMetadata(
"intel_interactions",
"happened_at",
"contact_recency",
"Primary timestamp for client contact and interaction recency.",
Confidence.RELIABLE,
usage="Use for contacted, last contacted, recent contact, activity, and timeline prompts.",
),
ColumnMetadata(
"read_last_contacted",
"last_contact_at",
"contact_recency",
"Precomputed per-client last contact timestamp.",
Confidence.RELIABLE,
usage="Prefer for client-level last-contact summaries when this read model is available.",
),
ColumnMetadata(
"edge_communication_events",
"timestamp",
"contact_recency",
"Legacy/sparse event timestamp that is not reliable for Oracle CRM recency.",
Confidence.SPARSE,
usage="Do not use for contact prompts.",
avoid=True,
),
ColumnMetadata(
"crm_property_interests",
"last_discussed_at",
"contact_recency",
"Sparse legacy field; property interest does not prove recent contact.",
Confidence.SPARSE,
usage="Do not use as the primary recency filter.",
avoid=True,
),
ColumnMetadata(
"crm_property_interests",
"project_name",
"property_interest",
"Human-readable project/property name attached to a client's interest.",
Confidence.RELIABLE,
examples=("Atri Surya Toron", "Godrej Elevate"),
usage="Use ILIKE filters for property/project scoped prompts.",
),
ColumnMetadata(
"crm_property_interests",
"interest_level",
"property_interest",
"Interest strength label or score imported from CRM enrichment.",
Confidence.RELIABLE,
usage="Use with project_name and person_id to rank interested clients or properties.",
),
]
CONCEPTS: list[ConceptDescriptor] = [
ConceptDescriptor(
concept_id="person_identity",
label="Client Identity",
description="Canonical identity record for a person in CRM.",
authoritative_fields=[
FieldDescriptor("crm_people", "person_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("crm_people", "full_name", Confidence.RELIABLE, "Display name"),
FieldDescriptor("crm_people", "primary_email", Confidence.RELIABLE, "Email"),
FieldDescriptor("crm_people", "primary_phone", Confidence.RELIABLE, "Phone"),
FieldDescriptor("crm_people", "persona_labels", Confidence.PARTIAL, "Buyer persona labels"),
],
usage_notes=(
"Anchor client-level queries on crm_people.person_id. "
"Treat crm_people as the identity source of truth."
),
),
ConceptDescriptor(
concept_id="lead_funnel",
label="Lead Funnel",
description="Lead ownership, stage, status, and urgency.",
authoritative_fields=[
FieldDescriptor("crm_leads", "lead_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("crm_leads", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("crm_leads", "stage", Confidence.RELIABLE, "Current funnel stage"),
FieldDescriptor("crm_leads", "status", Confidence.RELIABLE, "Lead status"),
FieldDescriptor("crm_leads", "assigned_user_id", Confidence.RELIABLE, "Owning user"),
FieldDescriptor("crm_leads", "budget_band", Confidence.PARTIAL, "Budget band"),
FieldDescriptor("crm_leads", "urgency", Confidence.PARTIAL, "Urgency tag"),
],
preferred_join_paths=[
JoinPath("crm_people", "person_id", "crm_leads", "person_id"),
],
),
ConceptDescriptor(
concept_id="qd_score",
label="QD Score",
description="Qualification / Desire score source of truth.",
authoritative_fields=[
FieldDescriptor("intel_qd_scores", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("intel_qd_scores", "current_value", Confidence.RELIABLE, "Authoritative QD score"),
FieldDescriptor(
"intel_qd_scores",
"score_type",
Confidence.RELIABLE,
"Score family",
notes="Valid values are overall, intent, engagement, urgency, financial_qualification. There is no value named QD.",
valid_values=VALID_QD_SCORE_TYPES,
),
FieldDescriptor("intel_qd_scores", "computed_at", Confidence.RELIABLE, "Score timestamp"),
],
deprecated_fields=[
FieldDescriptor("crm_people", "engagement_score", Confidence.DEPRECATED, "Not QD"),
FieldDescriptor("crm_leads", "engagement_score", Confidence.DEPRECATED, "Not QD"),
FieldDescriptor("intel_interactions", "engagement_score", Confidence.DEPRECATED, "Not QD"),
],
usage_notes=(
"When a prompt mentions QD, qualification, desire, or intent score, "
"use intel_qd_scores.current_value. Do not substitute engagement_score. "
"Do not filter score_type = 'QD'. For generic QD prompts, use score_type = 'overall'. "
"Use intent, engagement, urgency, or financial_qualification only when the prompt asks for that specific family."
),
),
ConceptDescriptor(
concept_id="communication_events",
label="Communication Events",
description="Authoritative recent-contact and interaction history source.",
authoritative_fields=[
FieldDescriptor("intel_interactions", "interaction_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("intel_interactions", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("intel_interactions", "channel", Confidence.RELIABLE, "Interaction channel"),
FieldDescriptor("intel_interactions", "interaction_type", Confidence.RELIABLE, "Interaction type"),
FieldDescriptor("intel_interactions", "happened_at", Confidence.RELIABLE, "Primary recency timestamp"),
FieldDescriptor("intel_interactions", "summary", Confidence.RELIABLE, "Interaction summary"),
],
deprecated_fields=[
FieldDescriptor("edge_communication_events", "timestamp", Confidence.SPARSE, "Do not use for recency"),
FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.SPARSE, "Do not use for recency"),
],
preferred_join_paths=[
JoinPath("crm_people", "person_id", "intel_interactions", "person_id", "LEFT"),
JoinPath("intel_interactions", "interaction_id", "intel_calls", "interaction_id", "LEFT"),
JoinPath("intel_interactions", "interaction_id", "intel_messages", "interaction_id", "LEFT"),
JoinPath("intel_interactions", "interaction_id", "intel_emails", "interaction_id", "LEFT"),
],
usage_notes=(
"For recent contact, last contact, or contacted us, prefer intel_interactions.happened_at. "
"Use read_last_contacted if available for precomputed summaries."
),
),
ConceptDescriptor(
concept_id="last_contact_read_model",
label="Last Contact Read Model",
description="Per-person last-contact summary materialization.",
authoritative_fields=[
FieldDescriptor("read_last_contacted", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("read_last_contacted", "last_contact_at", Confidence.RELIABLE, "Last contact time"),
FieldDescriptor("read_last_contacted", "last_channel", Confidence.RELIABLE, "Last contact channel"),
FieldDescriptor("read_last_contacted", "days_since_contact", Confidence.RELIABLE, "Recency in days"),
FieldDescriptor("read_last_contacted", "interactions_last_90d", Confidence.RELIABLE, "Recent interaction volume"),
],
deprecated_fields=[
FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.DEPRECATED, "Stale field"),
],
usage_notes=(
"If this table exists, prefer it for last-contact prompts over rebuilding recency from raw interactions."
),
),
ConceptDescriptor(
concept_id="next_best_action",
label="Next Best Action",
description="Precomputed follow-up action recommendations.",
authoritative_fields=[
FieldDescriptor("read_next_best_action", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("read_next_best_action", "action_label", Confidence.RELIABLE, "Human-readable action"),
FieldDescriptor("read_next_best_action", "urgency", Confidence.RELIABLE, "Urgency"),
FieldDescriptor("read_next_best_action", "recommended_channel", Confidence.RELIABLE, "Suggested channel"),
FieldDescriptor("read_next_best_action", "execute_within_hours", Confidence.RELIABLE, "Action SLA"),
],
),
ConceptDescriptor(
concept_id="property_interest",
label="Property Interest",
description="Client-level project or unit interest records.",
authoritative_fields=[
FieldDescriptor("crm_property_interests", "interest_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("crm_property_interests", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("crm_property_interests", "project_id", Confidence.PARTIAL, "FK to inventory_projects"),
FieldDescriptor("crm_property_interests", "project_name", Confidence.RELIABLE, "Primary text project scope"),
FieldDescriptor("crm_property_interests", "unit_id", Confidence.PARTIAL, "FK to inventory_units"),
FieldDescriptor("crm_property_interests", "interest_level", Confidence.RELIABLE, "Interest strength"),
FieldDescriptor("crm_property_interests", "configuration_preference", Confidence.PARTIAL, "Configuration"),
FieldDescriptor("crm_property_interests", "budget_min", Confidence.PARTIAL, "Minimum budget"),
FieldDescriptor("crm_property_interests", "budget_max", Confidence.PARTIAL, "Maximum budget"),
FieldDescriptor("crm_property_interests", "financing_plan", Confidence.PARTIAL, "Financing plan"),
FieldDescriptor("crm_property_interests", "notes", Confidence.PARTIAL, "Free-text notes"),
],
deprecated_fields=[
FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.DEPRECATED, "Do not use for recency"),
],
preferred_join_paths=[
JoinPath("crm_people", "person_id", "crm_property_interests", "person_id", "LEFT"),
JoinPath("crm_property_interests", "project_id", "inventory_projects", "project_id", "LEFT"),
],
usage_notes=(
"For prompts scoped to a specific property or project, filter on crm_property_interests.project_name "
"case-insensitively. For top properties, group by project_name and count distinct person_id."
),
),
ConceptDescriptor(
concept_id="opportunities",
label="Opportunities",
description="Deal pipeline records.",
authoritative_fields=[
FieldDescriptor("crm_opportunities", "opportunity_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("crm_opportunities", "lead_id", Confidence.RELIABLE, "FK to crm_leads"),
FieldDescriptor("crm_opportunities", "project_id", Confidence.RELIABLE, "FK to inventory_projects"),
FieldDescriptor("crm_opportunities", "stage", Confidence.RELIABLE, "Opportunity stage"),
FieldDescriptor("crm_opportunities", "value", Confidence.RELIABLE, "Deal value"),
FieldDescriptor("crm_opportunities", "probability", Confidence.PARTIAL, "Probability"),
FieldDescriptor("crm_opportunities", "next_action", Confidence.RELIABLE, "Next action"),
],
preferred_join_paths=[
JoinPath("crm_people", "person_id", "crm_leads", "person_id"),
JoinPath("crm_leads", "lead_id", "crm_opportunities", "lead_id", "LEFT"),
JoinPath("crm_opportunities", "project_id", "inventory_projects", "project_id", "LEFT"),
],
),
ConceptDescriptor(
concept_id="site_visits",
label="Site Visits",
description="Physical visit records and outcomes.",
authoritative_fields=[
FieldDescriptor("intel_visits", "visit_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("intel_visits", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("intel_visits", "project_id", Confidence.PARTIAL, "FK to inventory_projects"),
FieldDescriptor("intel_visits", "project_name", Confidence.PARTIAL, "Project name"),
FieldDescriptor("intel_visits", "visited_at", Confidence.RELIABLE, "Visit timestamp"),
FieldDescriptor("intel_visits", "visit_notes", Confidence.RELIABLE, "Visit notes"),
],
),
ConceptDescriptor(
concept_id="inventory",
label="Inventory",
description="Project and unit master data.",
authoritative_fields=[
FieldDescriptor("inventory_projects", "project_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("inventory_projects", "project_name", Confidence.RELIABLE, "Project name"),
FieldDescriptor("inventory_projects", "developer_name", Confidence.RELIABLE, "Developer"),
FieldDescriptor("inventory_projects", "micro_market", Confidence.RELIABLE, "Micro market"),
FieldDescriptor("inventory_units", "unit_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("inventory_units", "project_id", Confidence.RELIABLE, "FK to inventory_projects"),
FieldDescriptor("inventory_units", "configuration", Confidence.RELIABLE, "Configuration"),
FieldDescriptor("inventory_units", "price_current", Confidence.RELIABLE, "Current price"),
FieldDescriptor("inventory_units", "status", Confidence.RELIABLE, "Unit status"),
],
),
ConceptDescriptor(
concept_id="extracted_facts",
label="Extracted Facts",
description="AI-extracted CRM memory facts.",
authoritative_fields=[
FieldDescriptor("intel_extracted_facts", "fact_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("intel_extracted_facts", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("intel_extracted_facts", "fact_type", Confidence.RELIABLE, "Fact type"),
FieldDescriptor("intel_extracted_facts", "fact_text", Confidence.RELIABLE, "Fact text"),
FieldDescriptor("intel_extracted_facts", "confidence", Confidence.RELIABLE, "Extraction confidence"),
FieldDescriptor("intel_extracted_facts", "effective_date", Confidence.PARTIAL, "Fact date"),
],
),
ConceptDescriptor(
concept_id="call_objections",
label="Call Objections",
description="Structured objections extracted from calls.",
authoritative_fields=[
FieldDescriptor("intel_call_objections", "objection_id", Confidence.RELIABLE, "Primary key"),
FieldDescriptor("intel_call_objections", "person_id", Confidence.RELIABLE, "FK to crm_people"),
FieldDescriptor("intel_call_objections", "objection_type", Confidence.RELIABLE, "Objection type"),
FieldDescriptor("intel_call_objections", "objection_text", Confidence.RELIABLE, "Objection text"),
FieldDescriptor("intel_call_objections", "intensity", Confidence.RELIABLE, "Intensity"),
FieldDescriptor("intel_call_objections", "was_resolved", Confidence.RELIABLE, "Resolution flag"),
FieldDescriptor("intel_call_objections", "raised_at", Confidence.RELIABLE, "Raised timestamp"),
],
),
]
_CONCEPT_INDEX: dict[str, ConceptDescriptor] = {concept.concept_id: concept for concept in CONCEPTS}
def get_concept(concept_id: str) -> ConceptDescriptor | None:
return _CONCEPT_INDEX.get(concept_id)
def all_concepts() -> list[ConceptDescriptor]:
return CONCEPTS
INTENT_CONCEPT_MAP: dict[str, list[str]] = {
"last_contacted": ["last_contact_read_model", "communication_events", "person_identity"],
"interested_clients": ["property_interest", "person_identity", "lead_funnel"],
"qd_score": ["qd_score", "person_identity"],
"pipeline": ["opportunities", "lead_funnel", "person_identity"],
"site_visits": ["site_visits", "person_identity", "property_interest"],
"timeline": ["communication_events", "person_identity"],
"objections": ["call_objections", "communication_events", "person_identity"],
"broker_performance": ["lead_funnel", "opportunities"],
"next_action": ["next_best_action", "person_identity", "lead_funnel"],
"inventory": ["inventory", "property_interest"],
"extracted_facts": ["extracted_facts", "person_identity"],
"client_360": [
"person_identity",
"lead_funnel",
"qd_score",
"communication_events",
"property_interest",
"opportunities",
"next_best_action",
],
}
def concepts_for_intent(intent: str) -> list[ConceptDescriptor]:
ids = INTENT_CONCEPT_MAP.get(intent, ["person_identity", "lead_funnel"])
return [_CONCEPT_INDEX[concept_id] for concept_id in ids if concept_id in _CONCEPT_INDEX]
def _field_to_dict(field: FieldDescriptor) -> dict[str, Any]:
return {
"table": field.table,
"column": field.column,
"confidence": field.confidence,
"description": field.description,
**({"notes": field.notes} if field.notes else {}),
**({"valid_values": list(field.valid_values)} if field.valid_values else {}),
**({"examples": list(field.examples)} if field.examples else {}),
}
def concept_to_dict(concept: ConceptDescriptor) -> dict[str, Any]:
return {
"concept_id": concept.concept_id,
"label": concept.label,
"description": concept.description,
"authoritative_fields": [_field_to_dict(field) for field in concept.authoritative_fields],
"deprecated_fields": [_field_to_dict(field) for field in concept.deprecated_fields],
"preferred_join_paths": [
{
"from": f"{join.from_table}.{join.from_col}",
"to": f"{join.to_table}.{join.to_col}",
"join_type": join.join_type,
**({"notes": join.notes} if join.notes else {}),
}
for join in concept.preferred_join_paths
],
**({"usage_notes": concept.usage_notes} if concept.usage_notes else {}),
}
def build_semantic_context_for_planner(detected_intents: list[str], *, max_concepts: int = 5) -> str:
import json
seen: set[str] = set()
ordered: list[ConceptDescriptor] = []
for intent in detected_intents:
for concept in concepts_for_intent(intent):
if concept.concept_id not in seen:
seen.add(concept.concept_id)
ordered.append(concept)
relevant_topics = set(detected_intents)
if "last_contacted" in relevant_topics or "timeline" in relevant_topics:
relevant_topics.add("contact_recency")
if "interested_clients" in relevant_topics or "inventory" in relevant_topics:
relevant_topics.add("property_interest")
if "qd_score" in relevant_topics:
relevant_topics.add("qd_score")
column_metadata = [
{
"table": item.table,
"column": item.column,
"topic": item.topic,
"meaning": item.meaning,
"reliability": item.reliability,
**({"valid_values": list(item.valid_values)} if item.valid_values else {}),
**({"examples": list(item.examples)} if item.examples else {}),
**({"usage": item.usage} if item.usage else {}),
**({"avoid": item.avoid} if item.avoid else {}),
}
for item in COLUMN_METADATA
if item.topic in relevant_topics or item.avoid
]
return json.dumps(
{
"catalog_version": CATALOG_VERSION,
"concepts": [concept_to_dict(concept) for concept in ordered[:max_concepts]],
"column_metadata": column_metadata,
"global_rules": [
"Do not invent enum values. Use only valid_values from column_metadata when filtering enum-like columns.",
"Queries that return zero rows because of impossible enum filters are invalid plans.",
"For contact recency, use read_last_contacted.last_contact_at or intel_interactions.happened_at.",
"Do not use fields marked avoid=true for the main business filter.",
],
},
separators=(",", ":"),
)

View File

@@ -0,0 +1,382 @@
"""
oracle/visualization_planner.py
Pick Oracle canvas renderer types from actual result shape.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Any
@dataclass
class ColumnProfile:
name: str
is_numeric: bool
is_string: bool
is_datetime: bool
is_boolean: bool
null_rate: float
sample_values: list[Any]
@dataclass
class VisualizationDecision:
component_type: str
x_axis: str | None
y_axis: str | None
series_cols: list[str]
dimension_cols: list[str]
measure_cols: list[str]
title: str
width_mode: str
min_height_px: int
skeleton_variant: str
viz_params: dict[str, Any]
data_bindings: dict[str, Any]
confidence: float
reasoning: str
def _looks_like_timestamp(value: str) -> bool:
return bool(re.match(r"\d{4}-\d{2}-\d{2}", value))
def _profile_columns(rows: list[dict[str, Any]], columns: list[str]) -> list[ColumnProfile]:
if not rows:
return [ColumnProfile(column, False, False, False, False, 1.0, []) for column in columns]
sample_size = min(len(rows), 20)
profiles: list[ColumnProfile] = []
for column in columns:
values = [rows[index].get(column) for index in range(sample_size)]
non_null = [value for value in values if value is not None]
null_rate = 1.0 - len(non_null) / sample_size if sample_size else 1.0
profiles.append(
ColumnProfile(
name=column,
is_numeric=any(isinstance(value, (int, float)) for value in non_null),
is_string=any(isinstance(value, str) and not _looks_like_timestamp(value) for value in non_null[:5]),
is_datetime=any(isinstance(value, str) and _looks_like_timestamp(value) for value in non_null[:5]),
is_boolean=any(isinstance(value, bool) for value in non_null),
null_rate=null_rate,
sample_values=non_null[:3],
)
)
return profiles
_DIMENSION_HINTS = {
"name", "full_name", "project_name", "developer_name", "agent_name",
"broker_company", "category", "label", "stage", "channel", "type",
"micro_market", "district", "status", "persona", "nationality",
}
_MEASURE_HINTS = {
"count", "total", "sum", "avg", "average", "value", "score", "rate",
"current_value", "qd_score", "probability", "interest_count", "visit_count",
"interaction_count", "days", "amount", "revenue",
}
_TIMESTAMP_HINTS = {"at", "date", "time", "when", "timestamp"}
_PREFERRED_X = [
"project_name", "developer_name", "category", "stage", "channel",
"micro_market", "broker_company", "agent_name", "name", "full_name",
"label", "status", "type",
]
_PREFERRED_Y = [
"count", "total", "interested_clients", "interest_count", "client_count",
"current_value", "qd_score", "value", "probability", "interaction_count",
"visit_count", "days_since_last_contact",
]
_TABLE_COLUMN_PRESETS: dict[str, list[str]] = {
"crm_people": ["full_name", "primary_phone", "primary_email", "persona_labels"],
"intel_qd_scores": ["full_name", "current_value", "score_type", "computed_at"],
"crm_leads": ["full_name", "stage", "status", "budget_band", "urgency"],
"intel_interactions": ["full_name", "channel", "interaction_type", "happened_at", "summary"],
"read_last_contacted": ["full_name", "last_contacted_at", "last_channel", "days_since_last_contact", "staleness_label"],
"crm_property_interests": ["full_name", "project_name", "interest_level", "configuration_preference"],
"intel_call_objections": ["full_name", "objection_type", "intensity", "was_resolved", "raised_at"],
"intel_extracted_facts": ["full_name", "fact_type", "fact_text", "confidence", "effective_date"],
"read_next_best_action": ["full_name", "action_label", "urgency", "recommended_channel", "execute_within_hours"],
}
def _pick_axis(candidates: list[str], preferred: list[str]) -> str | None:
for candidate in preferred:
if candidate in candidates:
return candidate
return candidates[0] if candidates else None
def _title_from_prompt(prompt: str) -> str:
words = re.sub(r"\s+", " ", prompt.strip()).strip(" ?.!")[:72]
return (words[:1].upper() + words[1:]) if words else "Oracle Query Result"
class VisualizationPlanner:
def plan(
self,
*,
rows: list[dict[str, Any]],
columns: list[str],
prompt: str,
source_tables: list[str],
profile_suggested_type: str | None = None,
title_from_planner: str | None = None,
) -> VisualizationDecision:
profiles = _profile_columns(rows, columns)
classifications = {profile.name: self._classify_column(profile) for profile in profiles}
dimensions = [column for column, kind in classifications.items() if kind == "dimension"]
measures = [column for column, kind in classifications.items() if kind == "measure"]
timestamps = [column for column, kind in classifications.items() if kind == "timestamp"]
row_count = len(rows)
prompt_lower = prompt.lower()
if profile_suggested_type:
return self._build_decision(
component_type=profile_suggested_type,
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning=f"Execution profiler suggested {profile_suggested_type}",
confidence=0.9,
)
timeline_terms = ("timeline", "history", "activity", "message", "call log", "whatsapp", "email", "conversation", "transcript", "interaction")
if any(term in prompt_lower for term in timeline_terms) and timestamps:
return self._build_decision(
component_type="activityStream",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Activity-like prompt plus timestamped result.",
confidence=0.88,
)
if row_count == 1 and measures and not dimensions:
return self._build_decision(
component_type="kpiTile",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Single numeric row.",
confidence=0.92,
)
if timestamps and measures and any(term in prompt_lower for term in ("trend", "over time", "monthly", "weekly", "growth", "timeseries")):
return self._build_decision(
component_type="lineChart",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Temporal series plus measure.",
confidence=0.87,
)
if ("stage" in columns or "pipeline" in prompt_lower) and any(term in prompt_lower for term in ("pipeline", "funnel", "stage", "kanban", "deal")):
return self._build_decision(
component_type="pipelineBoard",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Pipeline-like prompt and stage-like data.",
confidence=0.85,
)
if dimensions and measures and row_count <= 30 and not timestamps:
return self._build_decision(
component_type="barChart",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Categorical dimension plus measure.",
confidence=0.8,
)
return self._build_decision(
component_type="table",
dimensions=dimensions,
measures=measures,
timestamps=timestamps,
columns=columns,
rows=rows,
row_count=row_count,
prompt=prompt,
source_tables=source_tables,
title=title_from_planner,
reasoning="Default structured table.",
confidence=0.7,
)
@staticmethod
def _classify_column(profile: ColumnProfile) -> str:
lower = profile.name.lower()
if lower.endswith("_id"):
return "identity"
if profile.is_datetime or any(token in lower for token in _TIMESTAMP_HINTS):
return "timestamp"
if lower in _DIMENSION_HINTS or (profile.is_string and not profile.is_numeric):
return "dimension"
if profile.is_numeric or any(token in lower for token in _MEASURE_HINTS):
return "measure"
return "other"
def _build_decision(
self,
*,
component_type: str,
dimensions: list[str],
measures: list[str],
timestamps: list[str],
columns: list[str],
rows: list[dict[str, Any]],
row_count: int,
prompt: str,
source_tables: list[str],
title: str | None,
reasoning: str,
confidence: float,
) -> VisualizationDecision:
x_axis = _pick_axis(dimensions + timestamps, _PREFERRED_X + list(timestamps))
y_axis = _pick_axis(measures, _PREFERRED_Y)
if component_type == "table":
display_columns = self._table_columns(columns, source_tables)
else:
display_columns = columns
viz_params = self._build_viz_params(
component_type=component_type,
x_axis=x_axis,
y_axis=y_axis,
display_columns=display_columns,
row_count=row_count,
)
data_bindings = {
"dimensions": dimensions[:2] if dimensions else (timestamps[:1] if timestamps else []),
"measures": measures[:3],
"series": [],
"filters": [],
}
width_mode = "full" if component_type in {"table", "activityStream", "pipelineBoard"} else "half"
height_map = {
"kpiTile": 140,
"barChart": 320,
"lineChart": 320,
"activityStream": 380,
"table": 300,
"pipelineBoard": 400,
}
skeleton_map = {
"kpiTile": "kpi",
"barChart": "chart",
"lineChart": "chart",
"activityStream": "table",
"table": "table",
"pipelineBoard": "pipeline",
}
return VisualizationDecision(
component_type=component_type,
x_axis=x_axis,
y_axis=y_axis,
series_cols=[],
dimension_cols=dimensions,
measure_cols=measures,
title=title or _title_from_prompt(prompt),
width_mode=width_mode,
min_height_px=height_map.get(component_type, 300),
skeleton_variant=skeleton_map.get(component_type, "generic"),
viz_params=viz_params,
data_bindings=data_bindings,
confidence=confidence,
reasoning=reasoning,
)
@staticmethod
def _table_columns(all_columns: list[str], source_tables: list[str]) -> list[str]:
for table in source_tables:
preset = _TABLE_COLUMN_PRESETS.get(table)
if preset:
matched = [column for column in preset if column in all_columns]
if matched:
return matched
return [column for column in all_columns if not column.endswith("_id") or column == "person_id"][:8]
@staticmethod
def _build_viz_params(
*,
component_type: str,
x_axis: str | None,
y_axis: str | None,
display_columns: list[str],
row_count: int,
) -> dict[str, Any]:
del row_count
if component_type == "barChart":
return {
"xAxis": x_axis or "category",
"yAxis": y_axis or "value",
"sort": "desc",
"showLabels": True,
"legend": False,
}
if component_type == "lineChart":
return {"showPoints": True, "smooth": True}
if component_type == "kpiTile":
return {"label": "Result", "trend": "", "comparisonLabel": ""}
if component_type == "table":
return {
"columns": display_columns,
"emptyStateTitle": "No matching records found",
"emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
"rankBy": y_axis,
"showTopBadge": False,
}
if component_type == "activityStream":
return {"showUrgencyIndicator": True}
if component_type == "pipelineBoard":
return {"showValue": True, "colorByStage": True}
return {}
visualization_planner = VisualizationPlanner()