feat(crm): canonical crm and imported routes implementation

This commit is contained in:
Sagnik
2026-04-18 21:32:54 +05:30
parent 37c06de749
commit 954618c3ef
52 changed files with 80656 additions and 1 deletions

View File

@@ -0,0 +1,3 @@
"""
backend/services/client_graph/__init__.py
"""

View File

@@ -0,0 +1,369 @@
"""
backend/services/client_graph/aggregation_service.py
Client 360 Aggregation Service
Produces Client360Snapshot read models by joining across
crm_people, crm_leads, crm_opportunities, intel_interactions,
intel_reminders, intel_qd_scores, crm_property_interests.
This is a derived read model — never the sole source of truth.
As specified in Doc 07 (Client360Snapshot contract) and Doc 08 (Adapter Spec).
"""
from __future__ import annotations
import logging
from typing import Any
logger = logging.getLogger("velocity.client_graph.aggregation")
def _serialize_person(row: Any) -> dict[str, Any]:
return {
"person_id": str(row["person_id"]),
"full_name": row["full_name"],
"primary_email": row["primary_email"],
"primary_phone": row["primary_phone"],
"buyer_type": row["buyer_type"],
"persona_labels": row["persona_labels"] or [],
"source_confidence": float(row["source_confidence"] or 0.0),
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
def _serialize_lead(row: Any) -> dict[str, Any]:
return {
"lead_id": str(row["lead_id"]),
"status": row["status"],
"budget_band": row["budget_band"],
"urgency": row["urgency"],
"financing_posture": row["financing_posture"],
"timeline_to_decision": row["timeline_to_decision"],
"objections": row["objections"] or [],
"motivations": row["motivations"] or [],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
def _serialize_opportunity(row: Any) -> dict[str, Any]:
return {
"opportunity_id": str(row["opportunity_id"]),
"stage": row["stage"],
"value": float(row["value"]) if row["value"] else None,
"probability": row["probability"],
"expected_close_date": row["expected_close_date"].isoformat() if row["expected_close_date"] else None,
"next_action": row["next_action"],
"project_id": str(row["project_id"]) if row["project_id"] else None,
"unit_id": str(row["unit_id"]) if row["unit_id"] else None,
}
def _serialize_interaction(row: Any) -> dict[str, Any]:
return {
"interaction_id": str(row["interaction_id"]),
"channel": row["channel"],
"interaction_type": row["interaction_type"],
"happened_at": row["happened_at"].isoformat() if row["happened_at"] else None,
"summary": row["summary"],
}
def _serialize_reminder(row: Any) -> dict[str, Any]:
return {
"reminder_id": str(row["reminder_id"]),
"reminder_type": row["reminder_type"],
"title": row["title"],
"due_at": row["due_at"].isoformat() if row["due_at"] else None,
"status": row["status"],
"priority": row["priority"],
}
def _serialize_qd_score(row: Any) -> dict[str, Any]:
return {
"score_type": row["score_type"],
"current_value": float(row["current_value"]),
"computed_at": row["computed_at"].isoformat() if row["computed_at"] else None,
"reasoning": row["reasoning"],
}
def _serialize_property_interest(row: Any) -> dict[str, Any]:
return {
"interest_id": str(row["interest_id"]),
"project_name": row["project_name"],
"unit_preference": row["unit_preference"],
"configuration": row["configuration"],
"budget_min": float(row["budget_min"]) if row["budget_min"] else None,
"budget_max": float(row["budget_max"]) if row["budget_max"] else None,
"priority": row["priority"],
}
async def get_client_360(conn: Any, person_id: str) -> dict[str, Any] | None:
"""
Aggregate a full Client360Snapshot for a given person_id.
This is a read model — derived from canonical tables, never primary truth.
"""
# 1. Core identity
person_row = await conn.fetchrow(
"""
SELECT person_id, full_name, primary_email, primary_phone,
buyer_type, persona_labels, source_confidence, created_at
FROM crm_people
WHERE person_id = $1::uuid
""",
person_id,
)
if not person_row:
return None
identity = _serialize_person(person_row)
# 2. Account links
account_rows = await conn.fetch(
"""
SELECT ca.account_id, ca.account_name, ca.account_type, ca.industry
FROM crm_accounts ca
INNER JOIN crm_leads cl ON cl.account_id = ca.account_id
WHERE cl.person_id = $1::uuid
LIMIT 5
""",
person_id,
)
account_links = [
{
"account_id": str(r["account_id"]),
"account_name": r["account_name"],
"account_type": r["account_type"],
"industry": r["industry"],
}
for r in account_rows
]
# 3. Active lead
lead_row = await conn.fetchrow(
"""
SELECT lead_id, status, budget_band, urgency, financing_posture,
timeline_to_decision, objections, motivations, created_at
FROM crm_leads
WHERE person_id = $1::uuid
ORDER BY created_at DESC
LIMIT 1
""",
person_id,
)
lead = _serialize_lead(lead_row) if lead_row else None
# 4. Active opportunities (top 5)
opp_rows = await conn.fetch(
"""
SELECT co.opportunity_id, co.stage, co.value, co.probability,
co.expected_close_date, co.next_action, co.project_id, co.unit_id
FROM crm_opportunities co
INNER JOIN crm_leads cl ON cl.lead_id = co.lead_id
WHERE cl.person_id = $1::uuid
ORDER BY co.updated_at DESC
LIMIT 5
""",
person_id,
)
active_opportunities = [_serialize_opportunity(r) for r in opp_rows]
# 5. Recent interactions (last 10)
interaction_rows = await conn.fetch(
"""
SELECT interaction_id, channel, interaction_type, happened_at, summary
FROM intel_interactions
WHERE person_id = $1::uuid
ORDER BY happened_at DESC
LIMIT 10
""",
person_id,
)
recent_interactions = [_serialize_interaction(r) for r in interaction_rows]
# 6. Property interests
interest_rows = await conn.fetch(
"""
SELECT interest_id, project_name, unit_preference, configuration,
budget_min, budget_max, priority
FROM crm_property_interests
WHERE person_id = $1::uuid
ORDER BY priority ASC, interest_id ASC
LIMIT 10
""",
person_id,
)
property_interests = [_serialize_property_interest(r) for r in interest_rows]
# 7. Pending tasks / reminders
task_rows = await conn.fetch(
"""
SELECT reminder_id, reminder_type, title, due_at, status, priority
FROM intel_reminders
WHERE person_id = $1::uuid
AND status IN ('pending', 'snoozed')
ORDER BY due_at ASC NULLS LAST
LIMIT 10
""",
person_id,
)
tasks = [_serialize_reminder(r) for r in task_rows]
# 8. QD overview (all score types)
qd_rows = await conn.fetch(
"""
SELECT score_type, current_value, computed_at, reasoning
FROM intel_qd_scores
WHERE person_id = $1::uuid
""",
person_id,
)
qd_overview = {r["score_type"]: _serialize_qd_score(r) for r in qd_rows}
# 9. Risk flags — heuristic derivation
risk_flags: list[str] = []
if lead and lead.get("urgency") in ("high", "critical") and not active_opportunities:
risk_flags.append("high_urgency_without_active_opportunity")
if not recent_interactions:
risk_flags.append("no_recent_interactions")
if qd_overview.get("intent_score", {}).get("current_value", 1.0) < 0.3:
risk_flags.append("low_intent_score")
if not property_interests:
risk_flags.append("no_property_interests_recorded")
# 10. Recommended next actions — simple heuristic
recommended_next_actions: list[str] = []
if tasks:
overdue = [t for t in tasks if t.get("status") == "pending"]
if overdue:
recommended_next_actions.append(f"Complete pending task: {overdue[0]['title']}")
if lead and lead.get("urgency") in ("high", "critical"):
recommended_next_actions.append("High-urgency client — prioritize callback within 24h")
if not recent_interactions and lead:
recommended_next_actions.append("No recent interactions — schedule follow-up")
return {
"client_ref": person_id,
"snapshot_type": "client_360",
"identity": identity,
"account_links": account_links,
"current_lead": lead,
"active_opportunities": active_opportunities,
"recent_interactions": recent_interactions,
"property_interests": property_interests,
"tasks": tasks,
"qd_overview": qd_overview,
"risk_flags": risk_flags,
"recommended_next_actions": recommended_next_actions,
"note": "Derived read model. Not primary truth. Refresh from canonical tables.",
}
async def get_contact_list(
conn: Any,
search: str | None = None,
buyer_type: str | None = None,
status: str | None = None,
limit: int = 50,
offset: int = 0,
) -> dict[str, Any]:
"""
Paginated contact list with lead status and QD summary.
Implements the 'summary query' pattern from Doc 09.
"""
clauses: list[str] = ["1=1"]
params: list[Any] = []
if search:
params.append(f"%{search}%")
clauses.append(
f"(p.full_name ILIKE ${len(params)} OR p.primary_email ILIKE ${len(params)} OR p.primary_phone ILIKE ${len(params)})"
)
if buyer_type:
params.append(buyer_type)
clauses.append(f"p.buyer_type = ${len(params)}")
if status:
params.append(status)
clauses.append(f"cl.status = ${len(params)}::crm_lead_status")
where = "WHERE " + " AND ".join(clauses)
params_for_count = params.copy()
params.append(limit)
params.append(offset)
query = f"""
SELECT
p.person_id,
p.full_name,
p.primary_email,
p.primary_phone,
p.buyer_type,
p.created_at,
cl.lead_id,
cl.status AS lead_status,
cl.budget_band,
cl.urgency,
COALESCE(qs.intent_value, 0.0) AS intent_score,
COALESCE(qs.urgency_value, 0.0) AS urgency_score,
(SELECT COUNT(*) FROM intel_interactions ii WHERE ii.person_id = p.person_id) AS interaction_count,
(SELECT MAX(happened_at) FROM intel_interactions ii WHERE ii.person_id = p.person_id) AS last_interaction_at,
(SELECT COUNT(*) FROM intel_reminders ir WHERE ir.person_id = p.person_id AND ir.status = 'pending') AS pending_tasks
FROM crm_people p
LEFT JOIN LATERAL (
SELECT lead_id, status, budget_band, urgency
FROM crm_leads
WHERE person_id = p.person_id
ORDER BY created_at DESC
LIMIT 1
) cl ON TRUE
LEFT JOIN LATERAL (
SELECT
MAX(CASE WHEN score_type = 'intent_score' THEN current_value END) AS intent_value,
MAX(CASE WHEN score_type = 'urgency_score' THEN current_value END) AS urgency_value
FROM intel_qd_scores
WHERE person_id = p.person_id
) qs ON TRUE
{where}
ORDER BY last_interaction_at DESC NULLS LAST, p.created_at DESC
LIMIT ${len(params) - 1} OFFSET ${len(params)}
"""
count_query = f"""
SELECT COUNT(*)
FROM crm_people p
LEFT JOIN crm_leads cl ON cl.person_id = p.person_id
{where}
"""
rows = await conn.fetch(query, *params)
total_row = await conn.fetchrow(count_query, *params_for_count)
total = int(total_row[0]) if total_row else 0
contacts = []
for r in rows:
contacts.append({
"person_id": str(r["person_id"]),
"full_name": r["full_name"],
"primary_email": r["primary_email"],
"primary_phone": r["primary_phone"],
"buyer_type": r["buyer_type"],
"lead_id": str(r["lead_id"]) if r["lead_id"] else None,
"lead_status": r["lead_status"],
"budget_band": r["budget_band"],
"urgency": r["urgency"],
"intent_score": float(r["intent_score"]),
"urgency_score": float(r["urgency_score"]),
"interaction_count": int(r["interaction_count"]),
"last_interaction_at": r["last_interaction_at"].isoformat() if r["last_interaction_at"] else None,
"pending_tasks": int(r["pending_tasks"]),
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
})
return {
"contacts": contacts,
"total": total,
"limit": limit,
"offset": offset,
}

View File

@@ -0,0 +1,3 @@
"""
backend/services/imports/__init__.py
"""

View File

@@ -0,0 +1,282 @@
"""
backend/services/imports/ingest_service.py
CRM Import Ingestion Service
Implements the RawImportBatch → ImportMappingManifest → NormalizedEntityProposal pipeline
as specified in Doc 08 (Adapter Spec) and Doc 07 (Contracts and Schema Blueprint).
Flow:
1. receive CSV upload, store raw batch record
2. parse headers and infer column mapping
3. validate row structure, detect unresolved columns
4. create NormalizedEntityProposal records for review
5. queue for human approval before canonical commit
"""
from __future__ import annotations
import csv
import io
import json
import logging
import uuid
from datetime import datetime, timezone
from typing import Any
logger = logging.getLogger("velocity.imports.ingest")
# ── Column mapping heuristics ─────────────────────────────────────────────────
# Maps common source column names → canonical crm_people / crm_leads fields.
CANONICAL_COLUMN_MAP: dict[str, str] = {
# Identity
"name": "full_name",
"full name": "full_name",
"client name": "full_name",
"contact name": "full_name",
"first name": "full_name",
"customer name": "full_name",
# Email
"email": "primary_email",
"email address": "primary_email",
"e-mail": "primary_email",
# Phone
"phone": "primary_phone",
"mobile": "primary_phone",
"contact number": "primary_phone",
"mobile number": "primary_phone",
"phone number": "primary_phone",
# Budget
"budget": "budget_band",
"budget range": "budget_band",
"investment budget": "budget_band",
# Project interest
"project": "project_name",
"project name": "project_name",
"interested in": "project_name",
"property interest": "project_name",
# Source
"source": "source_system",
"lead source": "source_system",
"channel": "source_system",
# Status / Stage
"status": "status",
"lead status": "status",
"stage": "status",
"funnel stage": "status",
# Notes
"notes": "notes",
"remarks": "notes",
"comment": "notes",
"comments": "notes",
# Buyer type
"type": "buyer_type",
"client type": "buyer_type",
"category": "buyer_type",
}
REQUIRED_CANONICAL_FIELDS = {"full_name"}
HIGH_RISK_FIELDS = {"primary_email", "primary_phone"}
def _normalize_header(h: str) -> str:
return h.strip().lower().replace("_", " ")
def infer_column_mapping(headers: list[str]) -> dict[str, Any]:
"""
Produce an ImportMappingManifest-compatible mapping dict.
Returns: {
mapped: {source_col → canonical_field},
unmapped: [source_col, ...],
confidence: 0.0-1.0
}
"""
mapped: dict[str, str] = {}
unmapped: list[str] = []
for h in headers:
normalized = _normalize_header(h)
canonical = CANONICAL_COLUMN_MAP.get(normalized)
if canonical:
mapped[h] = canonical
else:
unmapped.append(h)
mapped_count = len(mapped)
total = len(headers)
confidence = mapped_count / total if total > 0 else 0.0
return {
"mapped": mapped,
"unmapped": unmapped,
"mapped_count": mapped_count,
"unmapped_count": len(unmapped),
"confidence": round(confidence, 3),
}
def parse_csv_content(content: str) -> dict[str, Any]:
"""
Parse CSV content, detect headers, and extract rows.
Returns: {headers, rows, row_count, parse_errors}
"""
reader = csv.DictReader(io.StringIO(content))
headers = reader.fieldnames or []
rows: list[dict[str, Any]] = []
parse_errors: list[str] = []
for i, row in enumerate(reader):
try:
rows.append(dict(row))
except Exception as e:
parse_errors.append(f"Row {i + 2}: {str(e)}")
return {
"headers": list(headers),
"rows": rows,
"row_count": len(rows),
"parse_errors": parse_errors,
}
def build_normalized_proposals(
rows: list[dict[str, Any]],
mapping: dict[str, str],
batch_id: str,
source_system: str = "csv_upload",
) -> list[dict[str, Any]]:
"""
Convert raw CSV rows to NormalizedEntityProposal payloads.
One proposal per row — each must be approved before canonical commit.
"""
proposals: list[dict[str, Any]] = []
now = datetime.now(timezone.utc).isoformat()
for i, row in enumerate(rows):
canonical: dict[str, Any] = {}
unresolved: list[str] = []
confidence = 1.0
for src_col, canonical_field in mapping.items():
val = row.get(src_col, "").strip()
if val:
canonical[canonical_field] = val
else:
unresolved.append(src_col)
# Validate required fields
review_required = False
missing_required = [f for f in REQUIRED_CANONICAL_FIELDS if not canonical.get(f)]
if missing_required:
review_required = True
confidence = max(0.0, confidence - 0.4)
# Flag high-risk fields (email/phone) if empty
missing_high_risk = [f for f in HIGH_RISK_FIELDS if not canonical.get(f)]
if missing_high_risk:
confidence = max(0.0, confidence - 0.1 * len(missing_high_risk))
proposal: dict[str, Any] = {
"proposal_id": str(uuid.uuid4()),
"batch_id": batch_id,
"row_number": i + 2,
"entity_type": "crm_person_with_lead",
"canonical_payload": canonical,
"raw_row": row,
"unresolved_fields": unresolved,
"missing_required": missing_required,
"confidence": round(confidence, 3),
"review_required": review_required,
"status": "proposed",
"created_at": now,
"source_system": source_system,
}
proposals.append(proposal)
return proposals
def create_import_batch_record(
filename: str,
row_count: int,
mapping_manifest: dict[str, Any],
source_system: str = "csv_upload",
uploaded_by_id: str | None = None,
) -> dict[str, Any]:
"""
Build the workflow_import_batches record payload.
"""
now = datetime.now(timezone.utc).isoformat()
return {
"batch_id": str(uuid.uuid4()),
"source_system": source_system,
"uploaded_filename": filename,
"mime_type": "text/csv",
"row_count": row_count,
"mapped_count": mapping_manifest.get("mapped_count", 0),
"unresolved_count": mapping_manifest.get("unmapped_count", 0),
"uploaded_by": uploaded_by_id,
"lifecycle": "parsed",
"mapping_manifest": mapping_manifest,
"created_at": now,
"updated_at": now,
}
async def persist_import_batch(conn: Any, batch: dict[str, Any]) -> str:
"""
Insert a workflow_import_batches row and return batch_id.
"""
await conn.execute(
"""
INSERT INTO workflow_import_batches (
batch_id, source_system, uploaded_filename, mime_type, row_count,
mapped_count, unresolved_count, uploaded_by, lifecycle, mapping_manifest,
created_at, updated_at
) VALUES (
$1::uuid, $2, $3, $4, $5, $6, $7,
$8::uuid, $9::import_lifecycle, $10::jsonb, NOW(), NOW()
)
""",
batch["batch_id"],
batch["source_system"],
batch.get("uploaded_filename", "unknown.csv"),
batch.get("mime_type", "text/csv"),
batch.get("row_count", 0),
batch.get("mapped_count", 0),
batch.get("unresolved_count", 0),
batch.get("uploaded_by"),
batch.get("lifecycle", "parsed"),
json.dumps(batch.get("mapping_manifest", {})),
)
return batch["batch_id"]
async def persist_proposals_as_workflow_actions(
conn: Any, proposals: list[dict[str, Any]]
) -> int:
"""
Insert proposals into workflow_actions table for human review.
Returns inserted count.
"""
inserted = 0
for p in proposals:
await conn.execute(
"""
INSERT INTO workflow_actions (
action_id, action_type, target_domain, proposal_payload,
reasoning_summary, confidence, status, approval_required,
created_by_agent, created_at, updated_at
) VALUES (
$1::uuid, 'import_proposal', 'crm', $2::jsonb,
$3, $4, 'pending'::wf_status, $5, 'ingest_service', NOW(), NOW()
)
""",
p["proposal_id"],
json.dumps(p),
f"Import row {p['row_number']}: {p['canonical_payload'].get('full_name', 'unknown')}",
p["confidence"],
p["review_required"],
)
inserted += 1
return inserted