feat: Ipad app production readiness, Colony orchestration, Social posting (#44)

#38 Ipad app production readiness, Colony orchestration, Social posting Co-authored-by: Sayan Datta <sayan@Sayans-MacBook-Air.local> Reviewed-on: #44
2026-05-03 18:30:38 +05:30
parent 59d398abc3
commit eeb684b46c
86 changed files with 20349 additions and 1655 deletions
--- a/backend/services/comms_ingest.py
+++ b/backend/services/comms_ingest.py
@@ -6,9 +6,12 @@ import json
 import os
 import re
 from datetime import UTC, datetime
+from pathlib import Path
 from typing import Any
 from uuid import UUID

+import httpx
+
 PHONEUTILS_AVAILABLE = False
 try:
    import phonenumbers
@@ -22,6 +25,130 @@ except ImportError:
 DEFAULT_COUNTRY = os.getenv("COMMS_DEFAULT_COUNTRY_CODE", "91")


+class TranscriptionError(RuntimeError):
+    """Raised when the configured transcription provider cannot produce text."""
+
+
+async def _read_recording_bytes(recording_url: str) -> tuple[bytes, str, str]:
+    if not recording_url:
+        raise TranscriptionError("recording_url is required.")
+
+    if recording_url.startswith("file://"):
+        path = Path(recording_url[7:]).expanduser()
+        return path.read_bytes(), path.name or "recording.audio", "application/octet-stream"
+
+    local_path = Path(recording_url).expanduser()
+    if local_path.exists():
+        return local_path.read_bytes(), local_path.name or "recording.audio", "application/octet-stream"
+
+    async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
+        response = await client.get(recording_url)
+        response.raise_for_status()
+        content_type = response.headers.get("content-type", "application/octet-stream")
+        filename = recording_url.rstrip("/").split("/")[-1] or "recording.audio"
+        return response.content, filename, content_type
+
+
+async def _transcribe_openai(recording_url: str) -> dict[str, Any]:
+    api_key = os.getenv("OPENAI_API_KEY", "").strip()
+    if not api_key:
+        raise TranscriptionError("OPENAI_API_KEY is required for COMMS_TRANSCRIPTION_PROVIDER=openai.")
+
+    audio, filename, content_type = await _read_recording_bytes(recording_url)
+    model = os.getenv("COMMS_OPENAI_TRANSCRIPTION_MODEL", "whisper-1")
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        response = await client.post(
+            "https://api.openai.com/v1/audio/transcriptions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            data={"model": model, "response_format": "verbose_json"},
+            files={"file": (filename, audio, content_type)},
+        )
+        response.raise_for_status()
+    payload = response.json()
+    text = (payload.get("text") or "").strip()
+    if not text:
+        raise TranscriptionError("OpenAI transcription response did not include text.")
+    return {
+        "text": text,
+        "provider": "openai",
+        "language": payload.get("language") or "unknown",
+        "segments": payload.get("segments") or [],
+        "raw": payload,
+    }
+
+
+async def _transcribe_deepgram(recording_url: str) -> dict[str, Any]:
+    api_key = os.getenv("DEEPGRAM_API_KEY", "").strip()
+    if not api_key:
+        raise TranscriptionError("DEEPGRAM_API_KEY is required for COMMS_TRANSCRIPTION_PROVIDER=deepgram.")
+
+    audio, _, content_type = await _read_recording_bytes(recording_url)
+    model = os.getenv("COMMS_DEEPGRAM_MODEL", "nova-2")
+    language = os.getenv("COMMS_TRANSCRIPTION_LANGUAGE", "en")
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        response = await client.post(
+            f"https://api.deepgram.com/v1/listen?model={model}&language={language}&diarize=true&smart_format=true",
+            headers={"Authorization": f"Token {api_key}", "Content-Type": content_type},
+            content=audio,
+        )
+        response.raise_for_status()
+    payload = response.json()
+    alternative = (
+        payload.get("results", {})
+        .get("channels", [{}])[0]
+        .get("alternatives", [{}])[0]
+    )
+    text = (alternative.get("transcript") or "").strip()
+    if not text:
+        raise TranscriptionError("Deepgram transcription response did not include text.")
+    words = alternative.get("words") or []
+    return {
+        "text": text,
+        "provider": "deepgram",
+        "language": language,
+        "segments": words,
+        "raw": payload,
+    }
+
+
+async def _transcribe_http_endpoint(recording_url: str) -> dict[str, Any]:
+    endpoint = os.getenv("COMMS_TRANSCRIPTION_ENDPOINT", "").strip()
+    if not endpoint:
+        raise TranscriptionError("COMMS_TRANSCRIPTION_ENDPOINT is required for COMMS_TRANSCRIPTION_PROVIDER=http.")
+    token = os.getenv("COMMS_TRANSCRIPTION_ENDPOINT_TOKEN", "").strip()
+    headers = {"Authorization": f"Bearer {token}"} if token else {}
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        response = await client.post(endpoint, json={"recording_url": recording_url}, headers=headers)
+        response.raise_for_status()
+    payload = response.json()
+    text = (payload.get("text") or payload.get("transcript") or "").strip()
+    if not text:
+        raise TranscriptionError("HTTP transcription endpoint response did not include text.")
+    return {
+        "text": text,
+        "provider": "http",
+        "language": payload.get("language") or "unknown",
+        "segments": payload.get("segments") or [],
+        "raw": payload,
+    }
+
+
+async def transcribe_recording(recording_url: str, provider: str | None = None) -> dict[str, Any]:
+    selected = (provider or os.getenv("COMMS_TRANSCRIPTION_PROVIDER", "none")).strip().lower()
+    try:
+        if selected in {"", "none", "disabled"}:
+            raise TranscriptionError("COMMS_TRANSCRIPTION_PROVIDER is not configured.")
+        if selected in {"openai", "whisper"}:
+            return await _transcribe_openai(recording_url)
+        if selected == "deepgram":
+            return await _transcribe_deepgram(recording_url)
+        if selected in {"http", "endpoint", "custom"}:
+            return await _transcribe_http_endpoint(recording_url)
+        raise TranscriptionError(f"Unsupported COMMS_TRANSCRIPTION_PROVIDER '{selected}'.")
+    except httpx.HTTPError as exc:
+        raise TranscriptionError(f"{selected} transcription request failed: {exc}") from exc
+
+
 def normalize_phone(phone: str, default_region: str = DEFAULT_COUNTRY) -> str | None:
    """Return an E.164-like phone number suitable for provider and CRM matching."""
    if not phone: