feat(crm): canonical crm and imported routes implementation

2026-04-18 21:32:54 +05:30
parent 37c06de749
commit 954618c3ef
52 changed files with 80656 additions and 1 deletions
--- a/backend/services/imports/ingest_service.py
+++ b/backend/services/imports/ingest_service.py
@@ -0,0 +1,282 @@
+"""
+backend/services/imports/ingest_service.py
+CRM Import Ingestion Service
+
+Implements the RawImportBatch → ImportMappingManifest → NormalizedEntityProposal pipeline
+as specified in Doc 08 (Adapter Spec) and Doc 07 (Contracts and Schema Blueprint).
+
+Flow:
+  1. receive CSV upload, store raw batch record
+  2. parse headers and infer column mapping
+  3. validate row structure, detect unresolved columns
+  4. create NormalizedEntityProposal records for review
+  5. queue for human approval before canonical commit
+"""
+from __future__ import annotations
+
+import csv
+import io
+import json
+import logging
+import uuid
+from datetime import datetime, timezone
+from typing import Any
+
+logger = logging.getLogger("velocity.imports.ingest")
+
+# ── Column mapping heuristics ─────────────────────────────────────────────────
+# Maps common source column names → canonical crm_people / crm_leads fields.
+
+CANONICAL_COLUMN_MAP: dict[str, str] = {
+    # Identity
+    "name": "full_name",
+    "full name": "full_name",
+    "client name": "full_name",
+    "contact name": "full_name",
+    "first name": "full_name",
+    "customer name": "full_name",
+    # Email
+    "email": "primary_email",
+    "email address": "primary_email",
+    "e-mail": "primary_email",
+    # Phone
+    "phone": "primary_phone",
+    "mobile": "primary_phone",
+    "contact number": "primary_phone",
+    "mobile number": "primary_phone",
+    "phone number": "primary_phone",
+    # Budget
+    "budget": "budget_band",
+    "budget range": "budget_band",
+    "investment budget": "budget_band",
+    # Project interest
+    "project": "project_name",
+    "project name": "project_name",
+    "interested in": "project_name",
+    "property interest": "project_name",
+    # Source
+    "source": "source_system",
+    "lead source": "source_system",
+    "channel": "source_system",
+    # Status / Stage
+    "status": "status",
+    "lead status": "status",
+    "stage": "status",
+    "funnel stage": "status",
+    # Notes
+    "notes": "notes",
+    "remarks": "notes",
+    "comment": "notes",
+    "comments": "notes",
+    # Buyer type
+    "type": "buyer_type",
+    "client type": "buyer_type",
+    "category": "buyer_type",
+}
+
+REQUIRED_CANONICAL_FIELDS = {"full_name"}
+HIGH_RISK_FIELDS = {"primary_email", "primary_phone"}
+
+
+def _normalize_header(h: str) -> str:
+    return h.strip().lower().replace("_", " ")
+
+
+def infer_column_mapping(headers: list[str]) -> dict[str, Any]:
+    """
+    Produce an ImportMappingManifest-compatible mapping dict.
+    Returns: {
+        mapped: {source_col → canonical_field},
+        unmapped: [source_col, ...],
+        confidence: 0.0-1.0
+    }
+    """
+    mapped: dict[str, str] = {}
+    unmapped: list[str] = []
+
+    for h in headers:
+        normalized = _normalize_header(h)
+        canonical = CANONICAL_COLUMN_MAP.get(normalized)
+        if canonical:
+            mapped[h] = canonical
+        else:
+            unmapped.append(h)
+
+    mapped_count = len(mapped)
+    total = len(headers)
+    confidence = mapped_count / total if total > 0 else 0.0
+
+    return {
+        "mapped": mapped,
+        "unmapped": unmapped,
+        "mapped_count": mapped_count,
+        "unmapped_count": len(unmapped),
+        "confidence": round(confidence, 3),
+    }
+
+
+def parse_csv_content(content: str) -> dict[str, Any]:
+    """
+    Parse CSV content, detect headers, and extract rows.
+    Returns: {headers, rows, row_count, parse_errors}
+    """
+    reader = csv.DictReader(io.StringIO(content))
+    headers = reader.fieldnames or []
+    rows: list[dict[str, Any]] = []
+    parse_errors: list[str] = []
+
+    for i, row in enumerate(reader):
+        try:
+            rows.append(dict(row))
+        except Exception as e:
+            parse_errors.append(f"Row {i + 2}: {str(e)}")
+
+    return {
+        "headers": list(headers),
+        "rows": rows,
+        "row_count": len(rows),
+        "parse_errors": parse_errors,
+    }
+
+
+def build_normalized_proposals(
+    rows: list[dict[str, Any]],
+    mapping: dict[str, str],
+    batch_id: str,
+    source_system: str = "csv_upload",
+) -> list[dict[str, Any]]:
+    """
+    Convert raw CSV rows to NormalizedEntityProposal payloads.
+    One proposal per row — each must be approved before canonical commit.
+    """
+    proposals: list[dict[str, Any]] = []
+    now = datetime.now(timezone.utc).isoformat()
+
+    for i, row in enumerate(rows):
+        canonical: dict[str, Any] = {}
+        unresolved: list[str] = []
+        confidence = 1.0
+
+        for src_col, canonical_field in mapping.items():
+            val = row.get(src_col, "").strip()
+            if val:
+                canonical[canonical_field] = val
+            else:
+                unresolved.append(src_col)
+
+        # Validate required fields
+        review_required = False
+        missing_required = [f for f in REQUIRED_CANONICAL_FIELDS if not canonical.get(f)]
+        if missing_required:
+            review_required = True
+            confidence = max(0.0, confidence - 0.4)
+
+        # Flag high-risk fields (email/phone) if empty
+        missing_high_risk = [f for f in HIGH_RISK_FIELDS if not canonical.get(f)]
+        if missing_high_risk:
+            confidence = max(0.0, confidence - 0.1 * len(missing_high_risk))
+
+        proposal: dict[str, Any] = {
+            "proposal_id": str(uuid.uuid4()),
+            "batch_id": batch_id,
+            "row_number": i + 2,
+            "entity_type": "crm_person_with_lead",
+            "canonical_payload": canonical,
+            "raw_row": row,
+            "unresolved_fields": unresolved,
+            "missing_required": missing_required,
+            "confidence": round(confidence, 3),
+            "review_required": review_required,
+            "status": "proposed",
+            "created_at": now,
+            "source_system": source_system,
+        }
+        proposals.append(proposal)
+
+    return proposals
+
+
+def create_import_batch_record(
+    filename: str,
+    row_count: int,
+    mapping_manifest: dict[str, Any],
+    source_system: str = "csv_upload",
+    uploaded_by_id: str | None = None,
+) -> dict[str, Any]:
+    """
+    Build the workflow_import_batches record payload.
+    """
+    now = datetime.now(timezone.utc).isoformat()
+    return {
+        "batch_id": str(uuid.uuid4()),
+        "source_system": source_system,
+        "uploaded_filename": filename,
+        "mime_type": "text/csv",
+        "row_count": row_count,
+        "mapped_count": mapping_manifest.get("mapped_count", 0),
+        "unresolved_count": mapping_manifest.get("unmapped_count", 0),
+        "uploaded_by": uploaded_by_id,
+        "lifecycle": "parsed",
+        "mapping_manifest": mapping_manifest,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+
+async def persist_import_batch(conn: Any, batch: dict[str, Any]) -> str:
+    """
+    Insert a workflow_import_batches row and return batch_id.
+    """
+    await conn.execute(
+        """
+        INSERT INTO workflow_import_batches (
+            batch_id, source_system, uploaded_filename, mime_type, row_count,
+            mapped_count, unresolved_count, uploaded_by, lifecycle, mapping_manifest,
+            created_at, updated_at
+        ) VALUES (
+            $1::uuid, $2, $3, $4, $5, $6, $7,
+            $8::uuid, $9::import_lifecycle, $10::jsonb, NOW(), NOW()
+        )
+        """,
+        batch["batch_id"],
+        batch["source_system"],
+        batch.get("uploaded_filename", "unknown.csv"),
+        batch.get("mime_type", "text/csv"),
+        batch.get("row_count", 0),
+        batch.get("mapped_count", 0),
+        batch.get("unresolved_count", 0),
+        batch.get("uploaded_by"),
+        batch.get("lifecycle", "parsed"),
+        json.dumps(batch.get("mapping_manifest", {})),
+    )
+    return batch["batch_id"]
+
+
+async def persist_proposals_as_workflow_actions(
+    conn: Any, proposals: list[dict[str, Any]]
+) -> int:
+    """
+    Insert proposals into workflow_actions table for human review.
+    Returns inserted count.
+    """
+    inserted = 0
+    for p in proposals:
+        await conn.execute(
+            """
+            INSERT INTO workflow_actions (
+                action_id, action_type, target_domain, proposal_payload,
+                reasoning_summary, confidence, status, approval_required,
+                created_by_agent, created_at, updated_at
+            ) VALUES (
+                $1::uuid, 'import_proposal', 'crm', $2::jsonb,
+                $3, $4, 'pending'::wf_status, $5, 'ingest_service', NOW(), NOW()
+            )
+            """,
+            p["proposal_id"],
+            json.dumps(p),
+            f"Import row {p['row_number']}: {p['canonical_payload'].get('full_name', 'unknown')}",
+            p["confidence"],
+            p["review_required"],
+        )
+        inserted += 1
+    return inserted