feat(crm): canonical crm and imported routes implementation

2026-04-18 21:32:54 +05:30
parent 37c06de749
commit 954618c3ef
52 changed files with 80656 additions and 1 deletions
--- a/backend/services/client_graph/init.py
+++ b/backend/services/client_graph/init.py
@@ -0,0 +1,3 @@
+"""
+backend/services/client_graph/__init__.py
+"""
--- a/backend/services/client_graph/aggregation_service.py
+++ b/backend/services/client_graph/aggregation_service.py
@@ -0,0 +1,369 @@
+"""
+backend/services/client_graph/aggregation_service.py
+Client 360 Aggregation Service
+
+Produces Client360Snapshot read models by joining across
+crm_people, crm_leads, crm_opportunities, intel_interactions,
+intel_reminders, intel_qd_scores, crm_property_interests.
+
+This is a derived read model — never the sole source of truth.
+As specified in Doc 07 (Client360Snapshot contract) and Doc 08 (Adapter Spec).
+"""
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+logger = logging.getLogger("velocity.client_graph.aggregation")
+
+
+def _serialize_person(row: Any) -> dict[str, Any]:
+    return {
+        "person_id": str(row["person_id"]),
+        "full_name": row["full_name"],
+        "primary_email": row["primary_email"],
+        "primary_phone": row["primary_phone"],
+        "buyer_type": row["buyer_type"],
+        "persona_labels": row["persona_labels"] or [],
+        "source_confidence": float(row["source_confidence"] or 0.0),
+        "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+    }
+
+
+def _serialize_lead(row: Any) -> dict[str, Any]:
+    return {
+        "lead_id": str(row["lead_id"]),
+        "status": row["status"],
+        "budget_band": row["budget_band"],
+        "urgency": row["urgency"],
+        "financing_posture": row["financing_posture"],
+        "timeline_to_decision": row["timeline_to_decision"],
+        "objections": row["objections"] or [],
+        "motivations": row["motivations"] or [],
+        "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+    }
+
+
+def _serialize_opportunity(row: Any) -> dict[str, Any]:
+    return {
+        "opportunity_id": str(row["opportunity_id"]),
+        "stage": row["stage"],
+        "value": float(row["value"]) if row["value"] else None,
+        "probability": row["probability"],
+        "expected_close_date": row["expected_close_date"].isoformat() if row["expected_close_date"] else None,
+        "next_action": row["next_action"],
+        "project_id": str(row["project_id"]) if row["project_id"] else None,
+        "unit_id": str(row["unit_id"]) if row["unit_id"] else None,
+    }
+
+
+def _serialize_interaction(row: Any) -> dict[str, Any]:
+    return {
+        "interaction_id": str(row["interaction_id"]),
+        "channel": row["channel"],
+        "interaction_type": row["interaction_type"],
+        "happened_at": row["happened_at"].isoformat() if row["happened_at"] else None,
+        "summary": row["summary"],
+    }
+
+
+def _serialize_reminder(row: Any) -> dict[str, Any]:
+    return {
+        "reminder_id": str(row["reminder_id"]),
+        "reminder_type": row["reminder_type"],
+        "title": row["title"],
+        "due_at": row["due_at"].isoformat() if row["due_at"] else None,
+        "status": row["status"],
+        "priority": row["priority"],
+    }
+
+
+def _serialize_qd_score(row: Any) -> dict[str, Any]:
+    return {
+        "score_type": row["score_type"],
+        "current_value": float(row["current_value"]),
+        "computed_at": row["computed_at"].isoformat() if row["computed_at"] else None,
+        "reasoning": row["reasoning"],
+    }
+
+
+def _serialize_property_interest(row: Any) -> dict[str, Any]:
+    return {
+        "interest_id": str(row["interest_id"]),
+        "project_name": row["project_name"],
+        "unit_preference": row["unit_preference"],
+        "configuration": row["configuration"],
+        "budget_min": float(row["budget_min"]) if row["budget_min"] else None,
+        "budget_max": float(row["budget_max"]) if row["budget_max"] else None,
+        "priority": row["priority"],
+    }
+
+
+async def get_client_360(conn: Any, person_id: str) -> dict[str, Any] | None:
+    """
+    Aggregate a full Client360Snapshot for a given person_id.
+    This is a read model — derived from canonical tables, never primary truth.
+    """
+    # 1. Core identity
+    person_row = await conn.fetchrow(
+        """
+        SELECT person_id, full_name, primary_email, primary_phone,
+               buyer_type, persona_labels, source_confidence, created_at
+        FROM crm_people
+        WHERE person_id = $1::uuid
+        """,
+        person_id,
+    )
+    if not person_row:
+        return None
+
+    identity = _serialize_person(person_row)
+
+    # 2. Account links
+    account_rows = await conn.fetch(
+        """
+        SELECT ca.account_id, ca.account_name, ca.account_type, ca.industry
+        FROM crm_accounts ca
+        INNER JOIN crm_leads cl ON cl.account_id = ca.account_id
+        WHERE cl.person_id = $1::uuid
+        LIMIT 5
+        """,
+        person_id,
+    )
+    account_links = [
+        {
+            "account_id": str(r["account_id"]),
+            "account_name": r["account_name"],
+            "account_type": r["account_type"],
+            "industry": r["industry"],
+        }
+        for r in account_rows
+    ]
+
+    # 3. Active lead
+    lead_row = await conn.fetchrow(
+        """
+        SELECT lead_id, status, budget_band, urgency, financing_posture,
+               timeline_to_decision, objections, motivations, created_at
+        FROM crm_leads
+        WHERE person_id = $1::uuid
+        ORDER BY created_at DESC
+        LIMIT 1
+        """,
+        person_id,
+    )
+    lead = _serialize_lead(lead_row) if lead_row else None
+
+    # 4. Active opportunities (top 5)
+    opp_rows = await conn.fetch(
+        """
+        SELECT co.opportunity_id, co.stage, co.value, co.probability,
+               co.expected_close_date, co.next_action, co.project_id, co.unit_id
+        FROM crm_opportunities co
+        INNER JOIN crm_leads cl ON cl.lead_id = co.lead_id
+        WHERE cl.person_id = $1::uuid
+        ORDER BY co.updated_at DESC
+        LIMIT 5
+        """,
+        person_id,
+    )
+    active_opportunities = [_serialize_opportunity(r) for r in opp_rows]
+
+    # 5. Recent interactions (last 10)
+    interaction_rows = await conn.fetch(
+        """
+        SELECT interaction_id, channel, interaction_type, happened_at, summary
+        FROM intel_interactions
+        WHERE person_id = $1::uuid
+        ORDER BY happened_at DESC
+        LIMIT 10
+        """,
+        person_id,
+    )
+    recent_interactions = [_serialize_interaction(r) for r in interaction_rows]
+
+    # 6. Property interests
+    interest_rows = await conn.fetch(
+        """
+        SELECT interest_id, project_name, unit_preference, configuration,
+               budget_min, budget_max, priority
+        FROM crm_property_interests
+        WHERE person_id = $1::uuid
+        ORDER BY priority ASC, interest_id ASC
+        LIMIT 10
+        """,
+        person_id,
+    )
+    property_interests = [_serialize_property_interest(r) for r in interest_rows]
+
+    # 7. Pending tasks / reminders
+    task_rows = await conn.fetch(
+        """
+        SELECT reminder_id, reminder_type, title, due_at, status, priority
+        FROM intel_reminders
+        WHERE person_id = $1::uuid
+          AND status IN ('pending', 'snoozed')
+        ORDER BY due_at ASC NULLS LAST
+        LIMIT 10
+        """,
+        person_id,
+    )
+    tasks = [_serialize_reminder(r) for r in task_rows]
+
+    # 8. QD overview (all score types)
+    qd_rows = await conn.fetch(
+        """
+        SELECT score_type, current_value, computed_at, reasoning
+        FROM intel_qd_scores
+        WHERE person_id = $1::uuid
+        """,
+        person_id,
+    )
+    qd_overview = {r["score_type"]: _serialize_qd_score(r) for r in qd_rows}
+
+    # 9. Risk flags — heuristic derivation
+    risk_flags: list[str] = []
+    if lead and lead.get("urgency") in ("high", "critical") and not active_opportunities:
+        risk_flags.append("high_urgency_without_active_opportunity")
+    if not recent_interactions:
+        risk_flags.append("no_recent_interactions")
+    if qd_overview.get("intent_score", {}).get("current_value", 1.0) < 0.3:
+        risk_flags.append("low_intent_score")
+    if not property_interests:
+        risk_flags.append("no_property_interests_recorded")
+
+    # 10. Recommended next actions — simple heuristic
+    recommended_next_actions: list[str] = []
+    if tasks:
+        overdue = [t for t in tasks if t.get("status") == "pending"]
+        if overdue:
+            recommended_next_actions.append(f"Complete pending task: {overdue[0]['title']}")
+    if lead and lead.get("urgency") in ("high", "critical"):
+        recommended_next_actions.append("High-urgency client — prioritize callback within 24h")
+    if not recent_interactions and lead:
+        recommended_next_actions.append("No recent interactions — schedule follow-up")
+
+    return {
+        "client_ref": person_id,
+        "snapshot_type": "client_360",
+        "identity": identity,
+        "account_links": account_links,
+        "current_lead": lead,
+        "active_opportunities": active_opportunities,
+        "recent_interactions": recent_interactions,
+        "property_interests": property_interests,
+        "tasks": tasks,
+        "qd_overview": qd_overview,
+        "risk_flags": risk_flags,
+        "recommended_next_actions": recommended_next_actions,
+        "note": "Derived read model. Not primary truth. Refresh from canonical tables.",
+    }
+
+
+async def get_contact_list(
+    conn: Any,
+    search: str | None = None,
+    buyer_type: str | None = None,
+    status: str | None = None,
+    limit: int = 50,
+    offset: int = 0,
+) -> dict[str, Any]:
+    """
+    Paginated contact list with lead status and QD summary.
+    Implements the 'summary query' pattern from Doc 09.
+    """
+    clauses: list[str] = ["1=1"]
+    params: list[Any] = []
+
+    if search:
+        params.append(f"%{search}%")
+        clauses.append(
+            f"(p.full_name ILIKE ${len(params)} OR p.primary_email ILIKE ${len(params)} OR p.primary_phone ILIKE ${len(params)})"
+        )
+    if buyer_type:
+        params.append(buyer_type)
+        clauses.append(f"p.buyer_type = ${len(params)}")
+    if status:
+        params.append(status)
+        clauses.append(f"cl.status = ${len(params)}::crm_lead_status")
+
+    where = "WHERE " + " AND ".join(clauses)
+    params_for_count = params.copy()
+
+    params.append(limit)
+    params.append(offset)
+
+    query = f"""
+        SELECT
+            p.person_id,
+            p.full_name,
+            p.primary_email,
+            p.primary_phone,
+            p.buyer_type,
+            p.created_at,
+            cl.lead_id,
+            cl.status AS lead_status,
+            cl.budget_band,
+            cl.urgency,
+            COALESCE(qs.intent_value, 0.0) AS intent_score,
+            COALESCE(qs.urgency_value, 0.0) AS urgency_score,
+            (SELECT COUNT(*) FROM intel_interactions ii WHERE ii.person_id = p.person_id) AS interaction_count,
+            (SELECT MAX(happened_at) FROM intel_interactions ii WHERE ii.person_id = p.person_id) AS last_interaction_at,
+            (SELECT COUNT(*) FROM intel_reminders ir WHERE ir.person_id = p.person_id AND ir.status = 'pending') AS pending_tasks
+        FROM crm_people p
+        LEFT JOIN LATERAL (
+            SELECT lead_id, status, budget_band, urgency
+            FROM crm_leads
+            WHERE person_id = p.person_id
+            ORDER BY created_at DESC
+            LIMIT 1
+        ) cl ON TRUE
+        LEFT JOIN LATERAL (
+            SELECT
+                MAX(CASE WHEN score_type = 'intent_score' THEN current_value END) AS intent_value,
+                MAX(CASE WHEN score_type = 'urgency_score' THEN current_value END) AS urgency_value
+            FROM intel_qd_scores
+            WHERE person_id = p.person_id
+        ) qs ON TRUE
+        {where}
+        ORDER BY last_interaction_at DESC NULLS LAST, p.created_at DESC
+        LIMIT ${len(params) - 1} OFFSET ${len(params)}
+    """
+
+    count_query = f"""
+        SELECT COUNT(*)
+        FROM crm_people p
+        LEFT JOIN crm_leads cl ON cl.person_id = p.person_id
+        {where}
+    """
+
+    rows = await conn.fetch(query, *params)
+    total_row = await conn.fetchrow(count_query, *params_for_count)
+    total = int(total_row[0]) if total_row else 0
+
+    contacts = []
+    for r in rows:
+        contacts.append({
+            "person_id": str(r["person_id"]),
+            "full_name": r["full_name"],
+            "primary_email": r["primary_email"],
+            "primary_phone": r["primary_phone"],
+            "buyer_type": r["buyer_type"],
+            "lead_id": str(r["lead_id"]) if r["lead_id"] else None,
+            "lead_status": r["lead_status"],
+            "budget_band": r["budget_band"],
+            "urgency": r["urgency"],
+            "intent_score": float(r["intent_score"]),
+            "urgency_score": float(r["urgency_score"]),
+            "interaction_count": int(r["interaction_count"]),
+            "last_interaction_at": r["last_interaction_at"].isoformat() if r["last_interaction_at"] else None,
+            "pending_tasks": int(r["pending_tasks"]),
+            "created_at": r["created_at"].isoformat() if r["created_at"] else None,
+        })
+
+    return {
+        "contacts": contacts,
+        "total": total,
+        "limit": limit,
+        "offset": offset,
+    }
--- a/backend/services/imports/init.py
+++ b/backend/services/imports/init.py
@@ -0,0 +1,3 @@
+"""
+backend/services/imports/__init__.py
+"""
--- a/backend/services/imports/ingest_service.py
+++ b/backend/services/imports/ingest_service.py
@@ -0,0 +1,282 @@
+"""
+backend/services/imports/ingest_service.py
+CRM Import Ingestion Service
+
+Implements the RawImportBatch → ImportMappingManifest → NormalizedEntityProposal pipeline
+as specified in Doc 08 (Adapter Spec) and Doc 07 (Contracts and Schema Blueprint).
+
+Flow:
+  1. receive CSV upload, store raw batch record
+  2. parse headers and infer column mapping
+  3. validate row structure, detect unresolved columns
+  4. create NormalizedEntityProposal records for review
+  5. queue for human approval before canonical commit
+"""
+from __future__ import annotations
+
+import csv
+import io
+import json
+import logging
+import uuid
+from datetime import datetime, timezone
+from typing import Any
+
+logger = logging.getLogger("velocity.imports.ingest")
+
+# ── Column mapping heuristics ─────────────────────────────────────────────────
+# Maps common source column names → canonical crm_people / crm_leads fields.
+
+CANONICAL_COLUMN_MAP: dict[str, str] = {
+    # Identity
+    "name": "full_name",
+    "full name": "full_name",
+    "client name": "full_name",
+    "contact name": "full_name",
+    "first name": "full_name",
+    "customer name": "full_name",
+    # Email
+    "email": "primary_email",
+    "email address": "primary_email",
+    "e-mail": "primary_email",
+    # Phone
+    "phone": "primary_phone",
+    "mobile": "primary_phone",
+    "contact number": "primary_phone",
+    "mobile number": "primary_phone",
+    "phone number": "primary_phone",
+    # Budget
+    "budget": "budget_band",
+    "budget range": "budget_band",
+    "investment budget": "budget_band",
+    # Project interest
+    "project": "project_name",
+    "project name": "project_name",
+    "interested in": "project_name",
+    "property interest": "project_name",
+    # Source
+    "source": "source_system",
+    "lead source": "source_system",
+    "channel": "source_system",
+    # Status / Stage
+    "status": "status",
+    "lead status": "status",
+    "stage": "status",
+    "funnel stage": "status",
+    # Notes
+    "notes": "notes",
+    "remarks": "notes",
+    "comment": "notes",
+    "comments": "notes",
+    # Buyer type
+    "type": "buyer_type",
+    "client type": "buyer_type",
+    "category": "buyer_type",
+}
+
+REQUIRED_CANONICAL_FIELDS = {"full_name"}
+HIGH_RISK_FIELDS = {"primary_email", "primary_phone"}
+
+
+def _normalize_header(h: str) -> str:
+    return h.strip().lower().replace("_", " ")
+
+
+def infer_column_mapping(headers: list[str]) -> dict[str, Any]:
+    """
+    Produce an ImportMappingManifest-compatible mapping dict.
+    Returns: {
+        mapped: {source_col → canonical_field},
+        unmapped: [source_col, ...],
+        confidence: 0.0-1.0
+    }
+    """
+    mapped: dict[str, str] = {}
+    unmapped: list[str] = []
+
+    for h in headers:
+        normalized = _normalize_header(h)
+        canonical = CANONICAL_COLUMN_MAP.get(normalized)
+        if canonical:
+            mapped[h] = canonical
+        else:
+            unmapped.append(h)
+
+    mapped_count = len(mapped)
+    total = len(headers)
+    confidence = mapped_count / total if total > 0 else 0.0
+
+    return {
+        "mapped": mapped,
+        "unmapped": unmapped,
+        "mapped_count": mapped_count,
+        "unmapped_count": len(unmapped),
+        "confidence": round(confidence, 3),
+    }
+
+
+def parse_csv_content(content: str) -> dict[str, Any]:
+    """
+    Parse CSV content, detect headers, and extract rows.
+    Returns: {headers, rows, row_count, parse_errors}
+    """
+    reader = csv.DictReader(io.StringIO(content))
+    headers = reader.fieldnames or []
+    rows: list[dict[str, Any]] = []
+    parse_errors: list[str] = []
+
+    for i, row in enumerate(reader):
+        try:
+            rows.append(dict(row))
+        except Exception as e:
+            parse_errors.append(f"Row {i + 2}: {str(e)}")
+
+    return {
+        "headers": list(headers),
+        "rows": rows,
+        "row_count": len(rows),
+        "parse_errors": parse_errors,
+    }
+
+
+def build_normalized_proposals(
+    rows: list[dict[str, Any]],
+    mapping: dict[str, str],
+    batch_id: str,
+    source_system: str = "csv_upload",
+) -> list[dict[str, Any]]:
+    """
+    Convert raw CSV rows to NormalizedEntityProposal payloads.
+    One proposal per row — each must be approved before canonical commit.
+    """
+    proposals: list[dict[str, Any]] = []
+    now = datetime.now(timezone.utc).isoformat()
+
+    for i, row in enumerate(rows):
+        canonical: dict[str, Any] = {}
+        unresolved: list[str] = []
+        confidence = 1.0
+
+        for src_col, canonical_field in mapping.items():
+            val = row.get(src_col, "").strip()
+            if val:
+                canonical[canonical_field] = val
+            else:
+                unresolved.append(src_col)
+
+        # Validate required fields
+        review_required = False
+        missing_required = [f for f in REQUIRED_CANONICAL_FIELDS if not canonical.get(f)]
+        if missing_required:
+            review_required = True
+            confidence = max(0.0, confidence - 0.4)
+
+        # Flag high-risk fields (email/phone) if empty
+        missing_high_risk = [f for f in HIGH_RISK_FIELDS if not canonical.get(f)]
+        if missing_high_risk:
+            confidence = max(0.0, confidence - 0.1 * len(missing_high_risk))
+
+        proposal: dict[str, Any] = {
+            "proposal_id": str(uuid.uuid4()),
+            "batch_id": batch_id,
+            "row_number": i + 2,
+            "entity_type": "crm_person_with_lead",
+            "canonical_payload": canonical,
+            "raw_row": row,
+            "unresolved_fields": unresolved,
+            "missing_required": missing_required,
+            "confidence": round(confidence, 3),
+            "review_required": review_required,
+            "status": "proposed",
+            "created_at": now,
+            "source_system": source_system,
+        }
+        proposals.append(proposal)
+
+    return proposals
+
+
+def create_import_batch_record(
+    filename: str,
+    row_count: int,
+    mapping_manifest: dict[str, Any],
+    source_system: str = "csv_upload",
+    uploaded_by_id: str | None = None,
+) -> dict[str, Any]:
+    """
+    Build the workflow_import_batches record payload.
+    """
+    now = datetime.now(timezone.utc).isoformat()
+    return {
+        "batch_id": str(uuid.uuid4()),
+        "source_system": source_system,
+        "uploaded_filename": filename,
+        "mime_type": "text/csv",
+        "row_count": row_count,
+        "mapped_count": mapping_manifest.get("mapped_count", 0),
+        "unresolved_count": mapping_manifest.get("unmapped_count", 0),
+        "uploaded_by": uploaded_by_id,
+        "lifecycle": "parsed",
+        "mapping_manifest": mapping_manifest,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+
+async def persist_import_batch(conn: Any, batch: dict[str, Any]) -> str:
+    """
+    Insert a workflow_import_batches row and return batch_id.
+    """
+    await conn.execute(
+        """
+        INSERT INTO workflow_import_batches (
+            batch_id, source_system, uploaded_filename, mime_type, row_count,
+            mapped_count, unresolved_count, uploaded_by, lifecycle, mapping_manifest,
+            created_at, updated_at
+        ) VALUES (
+            $1::uuid, $2, $3, $4, $5, $6, $7,
+            $8::uuid, $9::import_lifecycle, $10::jsonb, NOW(), NOW()
+        )
+        """,
+        batch["batch_id"],
+        batch["source_system"],
+        batch.get("uploaded_filename", "unknown.csv"),
+        batch.get("mime_type", "text/csv"),
+        batch.get("row_count", 0),
+        batch.get("mapped_count", 0),
+        batch.get("unresolved_count", 0),
+        batch.get("uploaded_by"),
+        batch.get("lifecycle", "parsed"),
+        json.dumps(batch.get("mapping_manifest", {})),
+    )
+    return batch["batch_id"]
+
+
+async def persist_proposals_as_workflow_actions(
+    conn: Any, proposals: list[dict[str, Any]]
+) -> int:
+    """
+    Insert proposals into workflow_actions table for human review.
+    Returns inserted count.
+    """
+    inserted = 0
+    for p in proposals:
+        await conn.execute(
+            """
+            INSERT INTO workflow_actions (
+                action_id, action_type, target_domain, proposal_payload,
+                reasoning_summary, confidence, status, approval_required,
+                created_by_agent, created_at, updated_at
+            ) VALUES (
+                $1::uuid, 'import_proposal', 'crm', $2::jsonb,
+                $3, $4, 'pending'::wf_status, $5, 'ingest_service', NOW(), NOW()
+            )
+            """,
+            p["proposal_id"],
+            json.dumps(p),
+            f"Import row {p['row_number']}: {p['canonical_payload'].get('full_name', 'unknown')}",
+            p["confidence"],
+            p["review_required"],
+        )
+        inserted += 1
+    return inserted