Feat: CRM v2, Richer synthetic data, Canvas JSON Components

This commit is contained in:
Sagnik
2026-04-23 22:00:44 +05:30
parent 6cdc366718
commit f04571bd7b
54 changed files with 89916 additions and 578 deletions

View File

@@ -10,6 +10,7 @@ import logging
import os
import uuid
import json
import re
from datetime import datetime, timezone
from typing import Any
@@ -18,6 +19,7 @@ from .canvas_service import canvas_service
from .data_access_gateway import data_access_gateway
from .persona_service import persona_service
from .codebook_service import codebook_service, CodebookExample
from .natural_db_agent import natural_db_agent
from backend.services.runtime_llm_service import runtime_llm_service
from backend.services.nemoclaw_runtime import nemoclaw_runtime
@@ -107,7 +109,7 @@ def _build_demo_retrieval_plan(
Produces a valid retrieval plan that passes policy validation.
"""
component_types = _detect_component_types(prompt)
row_limit = 50 if actor_role in ("senior_broker", "junior_broker") else 200
row_limit = _parse_prompt_row_limit(prompt, actor_role)
return {
"planId": str(uuid.uuid4()),
@@ -130,12 +132,12 @@ def _build_demo_retrieval_plan(
_DATASET_MAP: dict[str, str] = {
"pipeline_board": "crm_opportunity_pipeline",
"bar_chart": "crm_property_interest_rollup",
"bar_chart": "oracle_property_interest_rollup",
"geo_map": "lead_geo_interest_rollup",
"table": "crm_contacts_overview",
"line_chart": "crm_property_interest_rollup",
"line_chart": "oracle_property_interest_rollup",
"kpi_tile": "oracle_aggregated_metric",
"activity_stream": "crm_interaction_timeline",
"activity_stream": "oracle_client_interaction_timeline",
}
_CODEBOOK_COMPONENT_MAP: dict[str, str] = {
@@ -164,34 +166,85 @@ def _component_plan_type_from_codebook(example: CodebookExample) -> str:
return _CODEBOOK_COMPONENT_MAP.get(example.component_type, "table")
def _parse_prompt_row_limit(prompt: str, actor_role: str) -> int:
default_limit = 50 if actor_role in ("senior_broker", "junior_broker") else 200
match = re.search(r"\b(?:top|last|latest|recent|first|show|name of the last)\s+(\d{1,4})\b", prompt.lower())
if not match:
return default_limit
requested = max(1, int(match.group(1)))
return min(requested, default_limit)
def _prompt_data_intent(prompt: str) -> str | None:
lowered = prompt.lower()
contact_terms = (
"last contacted", "last contact", "last contacted us", "recently contacted",
"recent contacts", "last call", "last called", "last message", "last messaged",
"last whatsapp", "who contacted us", "contacted us", "contacted clients",
"client contacted", "clients contacted", "follow-up", "follow up",
)
interest_terms = (
"shown interest", "showed interest", "interested clients", "interested client",
"property interest", "project interest", "interested in any", "interest in any",
"interested in our properties", "interested in properties",
)
timeline_terms = (
"conversation", "timeline", "whatsapp", "messages", "message history",
"call history", "transcript", "email", "visit history", "interaction history",
)
client_360_terms = ("client 360", "client dossier", "highest intent buyer", "client profile")
if any(term in lowered for term in contact_terms) or re.search(r"\blast\s+\d+\s+contacted\b", lowered):
return "last_contacted"
if any(term in lowered for term in interest_terms) or (
any(term in lowered for term in ("interest", "interested", "project", "property", "properties"))
and any(term in lowered for term in ("client", "clients", "contact", "contacts"))
):
return "interested_clients"
if any(term in lowered for term in client_360_terms):
return "client_360"
if any(term in lowered for term in timeline_terms):
return "timeline"
return None
def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_type: str | None = None) -> str:
chapter = example.chapter_name.lower()
subchapter = example.subchapter_name.lower()
component_plan_type = component_plan_type or _component_plan_type_from_codebook(example)
lowered_prompt = prompt.lower()
data_intent = _prompt_data_intent(prompt)
if data_intent == "last_contacted":
return "oracle_last_contacted_clients" if component_plan_type != "activity_stream" else "oracle_client_interaction_timeline"
if data_intent == "interested_clients":
return "oracle_top_interested_clients" if component_plan_type == "table" else "oracle_property_interest_rollup"
if data_intent == "client_360":
return "oracle_client_360_summary"
if data_intent == "timeline":
return "oracle_client_interaction_timeline"
if component_plan_type == "activity_stream":
return "crm_interaction_timeline"
return "oracle_client_interaction_timeline"
if component_plan_type == "pipeline_board":
return "crm_opportunity_pipeline"
if component_plan_type == "table" and any(term in lowered_prompt for term in ("last interacted", "last interaction", "recently contacted", "recent interaction")):
return "crm_last_interacted_clients"
return "oracle_last_contacted_clients"
if component_plan_type == "table" and any(term in lowered_prompt for term in ("interest", "interested", "project", "property", "properties")) and any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts")):
return "crm_top_interested_clients"
return "oracle_top_interested_clients"
if component_plan_type == "line_chart" and any(term in lowered_prompt for term in ("trend", "time", "history", "growth")):
return "crm_property_interest_rollup"
return "oracle_property_interest_rollup"
if any(term in lowered_prompt for term in ("contact", "client 360", "crm", "account", "lead")):
if "timeline" in lowered_prompt or "message" in lowered_prompt or "call" in lowered_prompt or "email" in lowered_prompt:
return "crm_interaction_timeline"
return "oracle_client_interaction_timeline"
if "pipeline" in lowered_prompt or "opportunit" in lowered_prompt:
return "crm_opportunity_pipeline"
if ("interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt) and ("client" in lowered_prompt or "contact" in lowered_prompt):
return "crm_top_interested_clients"
return "oracle_top_interested_clients"
if "interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt:
return "crm_property_interest_rollup"
return "oracle_property_interest_rollup"
if "last interacted" in lowered_prompt or "recently contacted" in lowered_prompt or "recent interaction" in lowered_prompt:
return "crm_last_interacted_clients"
return "oracle_last_contacted_clients"
return "crm_contacts_overview"
if "client" in chapter or "client" in subchapter or "contact" in subchapter:
@@ -199,9 +252,9 @@ def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_
if "opportun" in chapter or "pipeline" in subchapter:
return "crm_opportunity_pipeline"
if "interaction" in chapter or "communication" in chapter or "timeline" in subchapter:
return "crm_interaction_timeline"
return "oracle_client_interaction_timeline"
if "property" in chapter or "inventory" in chapter or "interest" in subchapter:
return "crm_property_interest_rollup"
return "oracle_property_interest_rollup"
return _DATASET_MAP.get(component_plan_type, "oracle_aggregated_metric")
@@ -211,7 +264,7 @@ def _build_codebook_retrieval_plan(
actor_role: str,
matches: list[CodebookExample],
) -> dict[str, Any]:
row_limit = 50 if actor_role in ("senior_broker", "junior_broker") else 200
row_limit = _parse_prompt_row_limit(prompt, actor_role)
desired_types = _detect_component_types(prompt)
if not desired_types:
desired_types = [_component_plan_type_from_codebook(matches[0])] if matches else ["table"]
@@ -265,12 +318,17 @@ def _title_for_dataset(dataset: str, component_plan_type: str, prompt: str) -> s
"crm_interaction_timeline": "Client Interaction Timeline",
"crm_last_interacted_clients": "Last Interacted Clients",
"crm_top_interested_clients": "Top Interested Clients",
"oracle_property_interest_rollup": "Property Interest Rollup",
"oracle_client_interaction_timeline": "Client Interaction Timeline",
"oracle_last_contacted_clients": "Last Contacted Clients",
"oracle_top_interested_clients": "Top Interested Clients",
"oracle_client_360_summary": "Client 360 Summary",
"broker_performance": "Broker Performance",
}
if dataset == "crm_top_interested_clients" and "top" in lowered_prompt:
if dataset in {"crm_top_interested_clients", "oracle_top_interested_clients"} and "top" in lowered_prompt:
return "Top Interested Clients"
if dataset == "crm_last_interacted_clients" and ("top" in lowered_prompt or "last" in lowered_prompt):
return "Last Interacted Clients"
if dataset in {"crm_last_interacted_clients", "oracle_last_contacted_clients"} and ("top" in lowered_prompt or "last" in lowered_prompt):
return "Last Contacted Clients"
return dataset_titles.get(dataset)
@@ -288,6 +346,11 @@ _RUNTIME_ALLOWED_DATASETS = {
"crm_interaction_timeline",
"crm_last_interacted_clients",
"crm_top_interested_clients",
"oracle_property_interest_rollup",
"oracle_client_interaction_timeline",
"oracle_last_contacted_clients",
"oracle_top_interested_clients",
"oracle_client_360_summary",
}
@@ -348,6 +411,64 @@ class PromptOrchestrator:
await self._persist_execution(execution)
# ── Step 1: Build retrieval plan ──────────────────────────────────────
page = await canvas_service.get_page(page_id, tenant_id)
existing_comps = page.get("components", []) if page else []
next_order_base = self._next_order_base(existing_comps)
section_id = f"sec_prompt_generated_{execution_id.replace('-', '')[:12]}"
natural_result = None
try:
natural_result = await natural_db_agent.execute_prompt(
prompt,
row_limit=_parse_prompt_row_limit(prompt, actor_role),
)
except Exception as exc:
logger.warning("ORCH natural DB agent unavailable, falling back to component planner: %s", exc)
warnings.append(f"Natural DB agent unavailable ({exc}); using component planner fallback.")
if natural_result is not None:
execution["status"] = "executing"
execution["retrievalPlan"] = {
"planId": str(uuid.uuid4()),
"planner": "oracle_natural_db_agent",
"sql": natural_result.sql,
"sourceTables": natural_result.source_tables,
"rowCount": natural_result.row_count,
}
viz_plan = self._build_natural_visualization_plan(
result=natural_result.as_dict(),
prompt=prompt,
execution_id=execution_id,
actor_id=actor_id,
branch_id=branch_id,
base_order=next_order_base,
section_id=section_id,
)
execution["visualizationPlan"] = viz_plan
execution["componentsCreated"] = [c["componentId"] for c in viz_plan.get("components", [])]
try:
if page:
revision = await canvas_service.commit_revision(
page_id=page_id,
tenant_id=tenant_id,
actor_id=actor_id,
commit_kind="prompt",
commit_summary=f"Oracle: {prompt[:80]}",
components=existing_comps + viz_plan.get("components", []),
execution_id=execution_id,
idempotency_key=client_request_id,
)
execution["headRevision"] = revision["revisionNumber"]
except Exception as exc:
logger.warning("ORCH natural revision_commit failed (non-fatal): %s", exc)
warnings.append("Revision commit deferred; will retry on next sync.")
execution["status"] = "completed"
execution["summary"] = self._generate_summary(prompt, viz_plan)
execution["completedAt"] = _now()
execution["warnings"] = warnings + natural_result.warnings
await self._persist_execution(execution)
return execution
codebook_matches = codebook_service.search_examples(prompt, limit=4)
execution["codebookMatches"] = [
{
@@ -580,6 +701,92 @@ class PromptOrchestrator:
return {"components": components}
def _build_natural_visualization_plan(
self,
*,
result: dict[str, Any],
prompt: str,
execution_id: str,
actor_id: str,
branch_id: str,
base_order: int,
section_id: str,
) -> dict[str, Any]:
rows = result.get("rows") or []
columns = result.get("columns") or (list(rows[0].keys()) if rows else [])
ctype = str(result.get("componentType") or "table")
mapped_type = self._map_type(ctype)
dataset = "oracle_natural_sql"
component_id = str(uuid.uuid4())
comp: dict[str, Any] = {
"componentId": component_id,
"type": mapped_type,
"title": result.get("title") or self._generate_title(prompt, ctype),
"description": f"SQL-backed Oracle result from: \"{prompt[:96]}\"",
"dataSourceDescriptor": {
"descriptorId": str(uuid.uuid4()),
"sourceType": "postgres",
"connectorId": "velocity-core-postgres",
"dataset": dataset,
"authContextRef": f"authctx_{actor_id}_scope",
"queryTemplate": result.get("sql", ""),
"queryParameters": {},
"rowLimit": len(rows),
"privacyTier": "standard",
"cachePolicy": {"mode": "revision_scoped"},
},
"visualizationParameters": {
**self._default_viz_params(ctype, dataset, rows),
"columns": columns,
"sqlSummary": result.get("summary"),
"sourceTables": result.get("sourceTables", []),
"rowCount": result.get("rowCount", len(rows)),
},
"dataBindings": self._default_bindings(ctype),
"version": 1,
"lifecycleState": "active",
"provenance": {
"originType": "prompt_generated",
"promptExecutionId": execution_id,
"sourceBranchId": branch_id,
"createdBy": actor_id,
"createdAt": _iso(_now()),
"sourceTables": result.get("sourceTables", []),
"sqlSummary": result.get("summary"),
},
"renderingHints": self._rendering_hints(ctype),
"layout": {
"orderIndex": base_order + 100,
"sectionId": section_id,
"widthMode": "full" if mapped_type in ("table", "pipelineBoard", "timeline", "activityStream") else "half",
"minHeightPx": 320,
"stickyHeader": False,
},
"accessControls": {
"visibilityScope": "private",
"allowedRoles": ["senior_broker", "sales_director", "marketing_operator", "data_steward", "compliance_reviewer", "platform_admin"],
"redactionPolicy": "none",
},
"styleSignature": {
"theme": "velocity_glass",
"paletteToken": "ocean_signal",
"motionProfile": "calm_reveal",
"density": "comfortable",
"radiusScale": "lg",
"typographyScale": "balanced",
},
"validationState": {
"schema": "pass",
"policy": "pass",
"a11y": "pass",
"performance": "pass",
"status": "validated",
},
"auditLog": [f"aud_{execution_id}_natural_sql"],
"dataRows": rows,
}
return {"components": [comp]}
@staticmethod
def _next_order_base(existing_components: list[dict[str, Any]]) -> int:
max_existing = 0
@@ -706,6 +913,9 @@ class PromptOrchestrator:
"crm_contacts_overview": ["name", "email", "phone", "city", "buyer_type", "qd_score"],
"crm_last_interacted_clients": ["name", "email", "phone", "last_interaction_at", "interaction_count", "qd_score"],
"crm_top_interested_clients": ["name", "email", "phone", "interest_count", "projects", "qd_score"],
"oracle_last_contacted_clients": ["name", "phone", "last_contacted_at", "last_contact_channel", "last_contact_summary", "interaction_count", "qd_score", "next_action"],
"oracle_top_interested_clients": ["name", "phone", "interest_count", "projects", "last_interest_at", "qd_score"],
"oracle_client_360_summary": ["name", "phone", "lead_status", "budget_band", "urgency", "qd_score", "interest_count", "interaction_count", "projects"],
}
defaults: dict[str, dict[str, Any]] = {
"bar_chart": {"xAxis": "category", "yAxis": "value", "sort": "desc", "showLabels": True, "legend": False},
@@ -847,7 +1057,7 @@ class PromptOrchestrator:
Uses the shared runtime LLM service to propose a retrieval plan.
Raises on malformed output so the orchestrator can fall back safely.
"""
row_limit = 50 if ctx.actor_role in ("senior_broker", "junior_broker") else 200
row_limit = _parse_prompt_row_limit(prompt, ctx.actor_role)
system_prompt = (
"You are the Oracle planner for Project Velocity. "
"Return JSON only. "
@@ -855,7 +1065,9 @@ class PromptOrchestrator:
"Allowed component types: pipeline_board, bar_chart, geo_map, table, line_chart, kpi_tile, activity_stream. "
"Allowed datasets: deals, lead_daily_snapshot, lead_geo_interest_rollup, broker_performance, inventory_absorption, "
"oracle_aggregated_metric, lead_activity_log, crm_contacts_overview, crm_opportunity_pipeline, "
"crm_property_interest_rollup, crm_interaction_timeline. "
"crm_property_interest_rollup, crm_interaction_timeline, crm_last_interacted_clients, crm_top_interested_clients, "
"oracle_property_interest_rollup, oracle_client_interaction_timeline, oracle_last_contacted_clients, "
"oracle_top_interested_clients, oracle_client_360_summary. "
"Return an object with keys semanticModelVersion, intentClass, components. "
"Each component must include suggestedType, dataset, and titleHint. "
"Do not emit SQL. Do not invent datasets outside the allowlist."