feat: Oracle Canvas, Revision History and Canvas Sharing (#33)

Co-authored-by: Sagnik <sagnik7896@gmail.com>
Reviewed-on: sagnik/Project_Velocity#33
This commit is contained in:
2026-04-23 01:20:21 +05:30
parent e519339cc9
commit 6cdc366718
58 changed files with 3187 additions and 705 deletions

View File

@@ -32,6 +32,20 @@ SUPABASE_SERVICE_ROLE_KEY=PLACEHOLDER_your_supabase_service_role_key
# Base URL of ComfyUI server running locally or on GPU node
COMFY_BASE_URL=http://localhost:8188
# —— Shared Desineuron coding / Oracle / NemoClaw runtime —————————————————————
# Stable OpenAI-compatible SGLang route rendered through ingress.
LLM_BASE_URL=https://llm.desineuron.in
SGLANG_BASE_URL=https://llm.desineuron.in
SGLANG_CHAT_URL=https://llm.desineuron.in/v1/chat/completions
SGLANG_MODELS_URL=https://llm.desineuron.in/v1/models
SGLANG_MODEL=qwen3.6:35b-a3b
SGLANG_API_TOKEN=
# NemoClaw follows the same routed SGLang runtime.
NEMOCLAW_BASE_URL=https://llm.desineuron.in
NEMOCLAW_MODEL=qwen3.6:35b-a3b
NEMOCLAW_API_TOKEN=
# ── Backend ───────────────────────────────────────────────────────────────────
# CORS origins — comma-separated list of allowed frontend origins
CORS_ORIGINS=http://localhost:5173,http://localhost:3000

View File

@@ -70,6 +70,31 @@ def _json_object(value: Any) -> dict[str, Any]:
return {}
def _json_array(value: Any) -> list[Any]:
if isinstance(value, list):
return value
if isinstance(value, str) and value.strip():
try:
parsed = json.loads(value)
if isinstance(parsed, list):
return parsed
except Exception:
logger.warning("canvas_service: failed to parse JSON array field; using empty array")
return []
def _json_safe(value: Any) -> Any:
if isinstance(value, datetime):
return value.isoformat()
if isinstance(value, dict):
return {str(key): _json_safe(val) for key, val in value.items()}
if isinstance(value, list):
return [_json_safe(item) for item in value]
if isinstance(value, tuple):
return [_json_safe(item) for item in value]
return value
def _normalize_component(component: dict[str, Any]) -> dict[str, Any]:
normalized = deepcopy(component)
normalized["componentId"] = _stringify(normalized.get("componentId"))
@@ -224,9 +249,15 @@ class CanvasService:
async def get_first_page_for_owner(self, *, tenant_id: str, owner_id: str) -> dict[str, Any] | None:
_ensure_ready()
if _is_demo():
for page in _DEMO_PAGES.values():
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id:
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))}
candidates = [
page
for page in _DEMO_PAGES.values()
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id
]
if candidates:
candidates.sort(key=lambda page: page.get("updatedAt", ""), reverse=True)
page = candidates[0]
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))}
return None
assert asyncpg is not None
@@ -237,7 +268,7 @@ class CanvasService:
SELECT *
FROM oracle_canvas_pages
WHERE tenant_id = $1 AND owner_id = $2
ORDER BY created_at ASC
ORDER BY updated_at DESC, created_at DESC
LIMIT 1
""",
tenant_id,
@@ -310,7 +341,7 @@ class CanvasService:
"actorId": actor_id,
"executionId": execution_id,
"mergeRequestId": merge_request_id,
"componentsSnapshot": json.dumps(components),
"componentsSnapshot": json.dumps(_json_safe(components)),
"idempotencyKey": idempotency_key,
"createdAt": _now(),
}
@@ -346,7 +377,7 @@ class CanvasService:
"actorId": existing["actor_id"],
"executionId": _stringify(existing["execution_id"]) if existing["execution_id"] else None,
"mergeRequestId": _stringify(existing["merge_request_id"]) if existing["merge_request_id"] else None,
"componentsSnapshot": json.dumps(existing["components_snapshot"]),
"componentsSnapshot": json.dumps(_json_safe(existing["components_snapshot"])),
"idempotencyKey": existing["idempotency_key"],
"createdAt": existing["created_at"].isoformat(),
}
@@ -385,7 +416,7 @@ class CanvasService:
actor_id,
execution_id or "",
merge_request_id or "",
json.dumps(normalized_components),
json.dumps(_json_safe(normalized_components)),
idempotency_key,
)
@@ -411,7 +442,7 @@ class CanvasService:
"actorId": revision["actor_id"],
"executionId": _stringify(revision["execution_id"]) if revision["execution_id"] else None,
"mergeRequestId": _stringify(revision["merge_request_id"]) if revision["merge_request_id"] else None,
"componentsSnapshot": json.dumps(revision["components_snapshot"]),
"componentsSnapshot": json.dumps(_json_safe(revision["components_snapshot"])),
"idempotencyKey": revision["idempotency_key"],
"createdAt": revision["created_at"].isoformat(),
}
@@ -462,13 +493,14 @@ class CanvasService:
)
if not revision:
raise ValueError(f"Revision {target_revision} not found for page {page_id}")
snapshot = _json_array(revision["components_snapshot"])
return await self.commit_revision(
page_id=page_id,
tenant_id=tenant_id,
actor_id=actor_id,
commit_kind="rollback",
commit_summary=f"Rollback to revision {target_revision}",
components=list(revision["components_snapshot"]),
components=snapshot,
idempotency_key=idempotency_key,
)
finally:
@@ -604,15 +636,15 @@ class CanvasService:
component.get("description"),
int(component.get("version", 1)),
component.get("lifecycleState", "active"),
json.dumps(component.get("dataSourceDescriptor", {})),
json.dumps(component.get("visualizationParameters", {})),
json.dumps(component.get("dataBindings", {})),
json.dumps(component.get("provenance", {})),
json.dumps(component.get("renderingHints", {})),
json.dumps(component.get("layout", {})),
json.dumps(component.get("accessControls", {})),
json.dumps(component.get("styleSignature", {})),
json.dumps(component.get("validationState", {})),
json.dumps(_json_safe(component.get("dataSourceDescriptor", {}))),
json.dumps(_json_safe(component.get("visualizationParameters", {}))),
json.dumps(_json_safe(component.get("dataBindings", {}))),
json.dumps(_json_safe(component.get("provenance", {}))),
json.dumps(_json_safe(component.get("renderingHints", {}))),
json.dumps(_json_safe(component.get("layout", {}))),
json.dumps(_json_safe(component.get("accessControls", {}))),
json.dumps(_json_safe(component.get("styleSignature", {}))),
json.dumps(_json_safe(component.get("validationState", {}))),
list(component.get("auditLog", [])),
)

View File

@@ -261,13 +261,17 @@ class OracleCodebookService:
if not prompt_terms:
prompt_terms = set(_tokenize(prompt.replace("_", " ")))
lowered_prompt = prompt.lower()
crm_prompt = any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts", "crm", "lead", "account"))
interaction_prompt = any(term in lowered_prompt for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up"))
property_prompt = any(term in lowered_prompt for term in ("property", "properties", "project", "projects", "interest", "interested"))
scored: list[tuple[int, CodebookExample]] = []
for example in self.load()["examples"]:
score = 0
term_set = set(example.score_terms)
overlap = prompt_terms.intersection(term_set)
score += len(overlap) * 6
lowered_prompt = prompt.lower()
if example.template_name.lower() in lowered_prompt:
score += 24
if example.subchapter_name.lower() in lowered_prompt:
@@ -280,6 +284,15 @@ class OracleCodebookService:
score += 8
if "live_data_first" in example.policy_tags:
score += 4
chapter = example.chapter_name.lower()
subchapter = example.subchapter_name.lower()
title = example.title.lower()
if crm_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("lead", "client", "contact", "crm", "account", "pipeline")):
score += 18
if interaction_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up")):
score += 16
if property_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("property", "inventory", "interest", "project")):
score += 16
if score > 0:
scored.append((score, example))

View File

@@ -11,6 +11,8 @@ import uuid
from datetime import datetime, timezone
from typing import Any
from .canvas_service import canvas_service
logger = logging.getLogger(__name__)
# ── In-memory store (demo mode) ───────────────────────────────────────────────
@@ -23,6 +25,32 @@ def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _clone_components_for_fork(
components: list[dict[str, Any]],
*,
actor_id: str,
source_page_id: str,
source_branch_id: str,
source_revision: int,
) -> list[dict[str, Any]]:
cloned: list[dict[str, Any]] = []
for component in components:
forked = copy.deepcopy(component)
original_component_id = str(forked.get("componentId") or "")
forked["componentId"] = str(uuid.uuid4())
provenance = dict(forked.get("provenance") or {})
provenance["forkedAt"] = _now()
provenance["forkedBy"] = actor_id
provenance["sourcePageId"] = source_page_id
provenance["sourceBranchId"] = source_branch_id
provenance["sourceRevision"] = source_revision
if original_component_id:
provenance["sourceComponentId"] = original_component_id
forked["provenance"] = provenance
cloned.append(forked)
return cloned
# ── Three-way diff engine ─────────────────────────────────────────────────────
def _three_way_diff(
@@ -228,17 +256,50 @@ class CollaborationService:
Creates a fork from the source_page snapshot at its current headRevision.
Returns ForkRecord.
"""
if recipient_user_id == created_by:
raise ValueError("You cannot share a canvas with your own account.")
fork_id = str(uuid.uuid4())
fork_page_id = str(uuid.uuid4())
fork_branch_id = str(uuid.uuid4())
fork_page = await canvas_service.create_page(
tenant_id=source_page["tenantId"],
owner_id=recipient_user_id,
title=f"{source_page['title']} Fork",
page_type="fork",
branch_name=f"fork-{str(fork_id)[:8]}",
sharing_policy={
"shareMode": "direct_fork_only",
"allowReshare": visibility == "team",
"defaultForkVisibility": visibility,
},
)
fork_components = _clone_components_for_fork(
source_page.get("components", []),
actor_id=created_by,
source_page_id=source_page["pageId"],
source_branch_id=source_page["branchId"],
source_revision=source_page["headRevision"],
)
await canvas_service.commit_revision(
page_id=fork_page["pageId"],
tenant_id=source_page["tenantId"],
actor_id=created_by,
commit_kind="merge",
commit_summary=f"Forked from {source_page['title']} at rev.{source_page['headRevision']}",
components=fork_components,
execution_id=None,
merge_request_id=None,
idempotency_key=f"fork_{fork_id}",
)
fork = {
"forkId": fork_id,
"sourcePageId": source_page["pageId"],
"sourceBranchId": source_page["branchId"],
"sourceRevision": source_page["headRevision"],
"forkPageId": fork_page_id,
"forkBranchId": fork_branch_id,
"forkPageId": fork_page["pageId"],
"forkBranchId": fork_page["branchId"],
"recipientUserId": recipient_user_id,
"createdBy": created_by,
"visibility": visibility,

View File

@@ -159,14 +159,20 @@ class DataAccessGateway:
if dataset == "broker_performance":
sql = """
SELECT
ROW_NUMBER() OVER (ORDER BY COALESCE(revenue_generated, 0) DESC, broker_name ASC)::int AS rank,
broker_name AS name,
deals_closed::int AS deals_closed,
COALESCE(revenue_generated, 0)::float AS revenue_generated,
avatar_url AS avatar
FROM broker_performance
WHERE tenant_id = $1
ORDER BY revenue_generated DESC, broker_name ASC
ROW_NUMBER() OVER (
ORDER BY COUNT(DISTINCT l.person_id) DESC, COALESCE(u.full_name, u.email, u.id::text) ASC
)::int AS rank,
COALESCE(u.full_name, u.email, u.id::text) AS name,
COUNT(DISTINCT l.person_id)::int AS deals_closed,
COALESCE(SUM(o.value), 0)::float AS revenue_generated,
u.avatar_url AS avatar
FROM users_and_roles u
LEFT JOIN crm_leads l ON l.assigned_user_id = u.id
LEFT JOIN crm_opportunities o ON o.lead_id = l.lead_id
WHERE u.is_active = TRUE
GROUP BY u.id, u.full_name, u.email, u.avatar_url
HAVING COUNT(DISTINCT l.person_id) > 0 OR COALESCE(SUM(o.value), 0) > 0
ORDER BY revenue_generated DESC, name ASC
LIMIT $2
"""
return sql, [ctx.tenant_id, row_limit]
@@ -245,13 +251,20 @@ class DataAccessGateway:
COALESCE(p.primary_phone, '') AS phone,
COALESCE(p.city, '') AS city,
COALESCE(p.buyer_type, 'unclassified') AS buyer_type,
COALESCE(q.qd_score, 0)::float AS qd_score
COALESCE(q.current_value, 0)::float AS qd_score
FROM crm_people p
LEFT JOIN LATERAL (
SELECT qd_score
SELECT current_value
FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY q.scored_at DESC
ORDER BY
CASE
WHEN q.score_type = 'engagement_score' THEN 0
WHEN q.score_type = 'intent_score' THEN 1
WHEN q.score_type = 'urgency_score' THEN 2
ELSE 3
END,
q.computed_at DESC
LIMIT 1
) q ON TRUE
ORDER BY qd_score DESC, p.full_name ASC
@@ -301,6 +314,71 @@ class DataAccessGateway:
"""
return sql, [row_limit]
if dataset == "crm_last_interacted_clients":
sql = """
SELECT
p.person_id::text AS id,
p.full_name AS name,
COALESCE(p.primary_email, '') AS email,
COALESCE(p.primary_phone, '') AS phone,
COALESCE(MAX(i.happened_at), p.updated_at, p.created_at) AS last_interaction_at,
COUNT(i.interaction_id)::int AS interaction_count,
COALESCE(q.current_value, 0)::float AS qd_score
FROM crm_people p
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value
FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY
CASE
WHEN q.score_type = 'engagement_score' THEN 0
WHEN q.score_type = 'intent_score' THEN 1
WHEN q.score_type = 'urgency_score' THEN 2
ELSE 3
END,
q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
ORDER BY last_interaction_at DESC NULLS LAST, interaction_count DESC, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "crm_top_interested_clients":
sql = """
SELECT
p.person_id::text AS id,
p.full_name AS name,
COALESCE(p.primary_email, '') AS email,
COALESCE(p.primary_phone, '') AS phone,
COUNT(pi.interest_id)::int AS interest_count,
STRING_AGG(DISTINCT pi.project_name, ', ' ORDER BY pi.project_name) AS projects,
COALESCE(MAX(pi.created_at), p.updated_at, p.created_at) AS last_interest_at,
COALESCE(q.current_value, 0)::float AS qd_score
FROM crm_people p
INNER JOIN crm_property_interests pi ON pi.person_id = p.person_id
LEFT JOIN LATERAL (
SELECT current_value
FROM intel_qd_scores q
WHERE q.person_id = p.person_id
ORDER BY
CASE
WHEN q.score_type = 'engagement_score' THEN 0
WHEN q.score_type = 'intent_score' THEN 1
WHEN q.score_type = 'urgency_score' THEN 2
ELSE 3
END,
q.computed_at DESC
LIMIT 1
) q ON TRUE
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
ORDER BY interest_count DESC, qd_score DESC, last_interest_at DESC NULLS LAST, p.full_name ASC
LIMIT $1
"""
return sql, [row_limit]
if dataset == "crm_interaction_timeline":
sql = """
SELECT

View File

@@ -56,6 +56,18 @@ def _coerce_datetime(value: datetime | str | None) -> datetime | None:
# ── Execution store ───────────────────────────────────────────────────────────
def _json_safe(value: Any) -> Any:
if isinstance(value, datetime):
return value.isoformat()
if isinstance(value, dict):
return {str(key): _json_safe(val) for key, val in value.items()}
if isinstance(value, list):
return [_json_safe(item) for item in value]
if isinstance(value, tuple):
return [_json_safe(item) for item in value]
return value
_DEMO_EXECUTIONS: dict[str, dict[str, Any]] = {}
@@ -117,13 +129,13 @@ def _build_demo_retrieval_plan(
_DATASET_MAP: dict[str, str] = {
"pipeline_board": "deals",
"bar_chart": "lead_daily_snapshot",
"pipeline_board": "crm_opportunity_pipeline",
"bar_chart": "crm_property_interest_rollup",
"geo_map": "lead_geo_interest_rollup",
"table": "broker_performance",
"line_chart": "inventory_absorption",
"table": "crm_contacts_overview",
"line_chart": "crm_property_interest_rollup",
"kpi_tile": "oracle_aggregated_metric",
"activity_stream": "lead_activity_log",
"activity_stream": "crm_interaction_timeline",
}
_CODEBOOK_COMPONENT_MAP: dict[str, str] = {
@@ -162,6 +174,10 @@ def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_
return "crm_interaction_timeline"
if component_plan_type == "pipeline_board":
return "crm_opportunity_pipeline"
if component_plan_type == "table" and any(term in lowered_prompt for term in ("last interacted", "last interaction", "recently contacted", "recent interaction")):
return "crm_last_interacted_clients"
if component_plan_type == "table" and any(term in lowered_prompt for term in ("interest", "interested", "project", "property", "properties")) and any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts")):
return "crm_top_interested_clients"
if component_plan_type == "line_chart" and any(term in lowered_prompt for term in ("trend", "time", "history", "growth")):
return "crm_property_interest_rollup"
@@ -170,8 +186,12 @@ def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_
return "crm_interaction_timeline"
if "pipeline" in lowered_prompt or "opportunit" in lowered_prompt:
return "crm_opportunity_pipeline"
if ("interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt) and ("client" in lowered_prompt or "contact" in lowered_prompt):
return "crm_top_interested_clients"
if "interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt:
return "crm_property_interest_rollup"
if "last interacted" in lowered_prompt or "recently contacted" in lowered_prompt or "recent interaction" in lowered_prompt:
return "crm_last_interacted_clients"
return "crm_contacts_overview"
if "client" in chapter or "client" in subchapter or "contact" in subchapter:
@@ -205,6 +225,7 @@ def _build_codebook_retrieval_plan(
exemplar = matches[0]
for component_plan_type in desired_types[:4]:
dataset = _dataset_for_codebook(exemplar, prompt, component_plan_type)
title_hint = _title_for_dataset(dataset, component_plan_type, prompt) or title_hints.get(component_plan_type, exemplar.title)
components.append(
{
"suggestedType": component_plan_type,
@@ -222,7 +243,7 @@ def _build_codebook_retrieval_plan(
"subchapterName": exemplar.subchapter_name,
"sourcePack": exemplar.source_pack,
},
"titleHint": title_hints.get(component_plan_type, exemplar.title),
"titleHint": title_hint,
}
)
@@ -235,6 +256,24 @@ def _build_codebook_retrieval_plan(
}
def _title_for_dataset(dataset: str, component_plan_type: str, prompt: str) -> str | None:
lowered_prompt = prompt.lower()
dataset_titles = {
"crm_contacts_overview": "CRM Contacts Overview",
"crm_opportunity_pipeline": "Opportunity Pipeline",
"crm_property_interest_rollup": "Property Interest Rollup",
"crm_interaction_timeline": "Client Interaction Timeline",
"crm_last_interacted_clients": "Last Interacted Clients",
"crm_top_interested_clients": "Top Interested Clients",
"broker_performance": "Broker Performance",
}
if dataset == "crm_top_interested_clients" and "top" in lowered_prompt:
return "Top Interested Clients"
if dataset == "crm_last_interacted_clients" and ("top" in lowered_prompt or "last" in lowered_prompt):
return "Last Interacted Clients"
return dataset_titles.get(dataset)
_RUNTIME_ALLOWED_DATASETS = {
"deals",
"lead_daily_snapshot",
@@ -247,6 +286,8 @@ _RUNTIME_ALLOWED_DATASETS = {
"crm_opportunity_pipeline",
"crm_property_interest_rollup",
"crm_interaction_timeline",
"crm_last_interacted_clients",
"crm_top_interested_clients",
}
@@ -371,6 +412,11 @@ class PromptOrchestrator:
execution["status"] = "executing"
await self._persist_execution(execution)
page = await canvas_service.get_page(page_id, tenant_id)
existing_comps = page.get("components", []) if page else []
next_order_base = self._next_order_base(existing_comps)
section_id = f"sec_prompt_generated_{execution_id.replace('-', '')[:12]}"
# ── Step 3: Build visualization plan (component descriptors) ──────────
viz_plan = await self._build_visualization_plan(
retrieval_plan=retrieval_plan,
@@ -382,6 +428,8 @@ class PromptOrchestrator:
placement_mode=placement_mode,
ctx=ctx,
persona_plan=persona_plan,
base_order=next_order_base,
section_id=section_id,
)
execution["visualizationPlan"] = viz_plan
@@ -391,9 +439,7 @@ class PromptOrchestrator:
# Commit a revision bump with the new components
try:
page = await canvas_service.get_page(page_id, tenant_id)
if page:
existing_comps = page.get("components", [])
new_comps = existing_comps + viz_plan.get("components", [])
revision = await canvas_service.commit_revision(
page_id=page_id,
@@ -429,6 +475,8 @@ class PromptOrchestrator:
placement_mode: str,
ctx: PolicyContext,
persona_plan: dict[str, Any],
base_order: int,
section_id: str,
) -> dict[str, Any]:
"""Converts a retrieval plan into a list of CanvasComponent descriptors."""
components = [
@@ -438,9 +486,10 @@ class PromptOrchestrator:
branch_id=branch_id,
prompt=prompt,
persona_plan=persona_plan,
order_index=base_order + 100,
section_id=section_id,
)
]
base_order = 900 # Append after existing components
component_plans = retrieval_plan.get("components", [])
for i, plan in enumerate(component_plans):
@@ -469,7 +518,7 @@ class PromptOrchestrator:
"privacyTier": plan.get("privacyTier", "standard"),
"cachePolicy": {"mode": "ttl", "ttlSeconds": 120},
},
"visualizationParameters": self._default_viz_params(ctype, data_rows),
"visualizationParameters": self._default_viz_params(ctype, dataset, data_rows),
"dataBindings": self._default_bindings(ctype),
"version": 1,
"lifecycleState": "active",
@@ -483,7 +532,7 @@ class PromptOrchestrator:
"renderingHints": self._rendering_hints(ctype),
"layout": {
"orderIndex": base_order + (i + 1) * 100,
"sectionId": "sec_prompt_generated",
"sectionId": section_id,
"widthMode": "full" if ctype in ("pipeline_board", "table", "geo_map") else "half",
"minHeightPx": 300,
"stickyHeader": False,
@@ -520,11 +569,29 @@ class PromptOrchestrator:
dataset=dataset,
warnings=component_warnings,
order_index=base_order + (i + 1) * 100,
section_id=section_id,
)
components.append(comp)
if len(components) > 1:
planning_component = components.pop(0)
planning_component["layout"]["orderIndex"] = base_order + (len(component_plans) + 1) * 100
components.append(planning_component)
return {"components": components}
@staticmethod
def _next_order_base(existing_components: list[dict[str, Any]]) -> int:
max_existing = 0
for component in existing_components:
try:
order_index = int((component.get("layout") or {}).get("orderIndex", 0))
except (TypeError, ValueError):
order_index = 0
if order_index > max_existing:
max_existing = order_index
return ((max_existing // 100) + 1) * 100
@staticmethod
def _persona_text_canvas(
*,
@@ -533,13 +600,13 @@ class PromptOrchestrator:
branch_id: str,
prompt: str,
persona_plan: dict[str, Any],
order_index: int,
section_id: str,
) -> dict[str, Any]:
recommended = ", ".join(persona_plan.get("recommendedTemplates", [])) or "no direct template matches"
content = (
f"Oracle received: {prompt}\n\n"
f"Reusable templates: {recommended}\n\n"
"Execution policy: query live CRM data first, reuse matching templates, "
"synthesize missing UI blocks, then dispatch the required ComfyUI-backed workflow."
"Execution policy: query live CRM data first, pick the strongest-fitting canvas components, "
"and synthesize any missing UI blocks before rendering the result."
)
return {
"componentId": str(uuid.uuid4()),
@@ -574,8 +641,8 @@ class PromptOrchestrator:
},
"renderingHints": {"estimatedHeightPx": 180, "skeletonVariant": "text", "virtualizationPriority": 4},
"layout": {
"orderIndex": 910,
"sectionId": "sec_prompt_generated",
"orderIndex": order_index,
"sectionId": section_id,
"widthMode": "full",
"minHeightPx": 180,
"stickyHeader": False,
@@ -631,17 +698,34 @@ class PromptOrchestrator:
return labels.get(comp_type, "Oracle Canvas Component")
@staticmethod
def _default_viz_params(comp_type: str, rows: list[dict[str, Any]]) -> dict[str, Any]:
def _default_viz_params(comp_type: str, dataset: str, rows: list[dict[str, Any]]) -> dict[str, Any]:
first_row = rows[0] if rows else {}
inferred_columns = [key for key in first_row.keys() if key not in {"avatar"}] or ["name", "status"]
table_columns_by_dataset: dict[str, list[str]] = {
"broker_performance": ["name", "deals_closed", "revenue_generated"],
"crm_contacts_overview": ["name", "email", "phone", "city", "buyer_type", "qd_score"],
"crm_last_interacted_clients": ["name", "email", "phone", "last_interaction_at", "interaction_count", "qd_score"],
"crm_top_interested_clients": ["name", "email", "phone", "interest_count", "projects", "qd_score"],
}
defaults: dict[str, dict[str, Any]] = {
"bar_chart": {"xAxis": "category", "yAxis": "value", "sort": "desc", "showLabels": True, "legend": False},
"line_chart": {"showPoints": True, "smooth": True},
"kpi_tile": {
"label": rows[0].get("metric_label", "Result") if rows else "Result",
"trend": str(rows[0].get("trend_value", "")) if rows else "",
"comparisonLabel": rows[0].get("comparison_label", "") if rows else "",
"label": first_row.get("metric_label", "Result"),
"trend": str(first_row.get("trend_value", "")),
"comparisonLabel": first_row.get("comparison_label", ""),
},
"geo_map": {"mapStyle": "dubai_district_heat", "intensityField": "lead_count", "interactive": True, "tooltipFields": ["district", "lead_count", "avg_qd_score"]},
"table": {"rankBy": "revenue_generated", "showTopBadge": True, "columns": ["name", "deals_closed", "revenue_generated"]},
"table": {
"rankBy": "revenue_generated",
"showTopBadge": True,
"columns": table_columns_by_dataset.get(
dataset,
inferred_columns,
),
"emptyStateTitle": "No matching records found",
"emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
},
"pipeline_board": {"showValue": True, "colorByStage": True},
"activity_stream": {"showUrgencyIndicator": True},
}
@@ -674,7 +758,8 @@ class PromptOrchestrator:
def _generate_summary(prompt: str, viz_plan: dict[str, Any]) -> str:
count = len(viz_plan.get("components", []))
short_prompt = prompt[:60] + ("" if len(prompt) > 60 else "")
return f'Generated {count} component{"s" if count != 1 else ""} for: "{short_prompt}"'
data_component_count = max(count - 1, 0)
return f'Generated {data_component_count} component{"s" if data_component_count != 1 else ""} for: "{short_prompt}"'
@staticmethod
def _error_component(
@@ -686,6 +771,7 @@ class PromptOrchestrator:
dataset: str,
warnings: list[str],
order_index: int,
section_id: str,
) -> dict[str, Any]:
return {
"componentId": component_id,
@@ -722,7 +808,7 @@ class PromptOrchestrator:
"renderingHints": {"estimatedHeightPx": 140, "skeletonVariant": "generic", "virtualizationPriority": 5},
"layout": {
"orderIndex": order_index,
"sectionId": "sec_prompt_generated",
"sectionId": section_id,
"widthMode": "full",
"minHeightPx": 140,
"stickyHeader": False,
@@ -875,8 +961,8 @@ class PromptOrchestrator:
execution["status"],
execution["modelRuntime"],
execution["semanticModelVersion"],
json.dumps(execution.get("retrievalPlan") or {}),
json.dumps(execution.get("visualizationPlan") or {}),
json.dumps(_json_safe(execution.get("retrievalPlan") or {})),
json.dumps(_json_safe(execution.get("visualizationPlan") or {})),
execution.get("warnings", []),
execution.get("summary"),
execution.get("componentsCreated", []),

View File

@@ -257,13 +257,16 @@ async def create_fork(
page = await canvas_service.get_page(page_id, ctx.tenant_id)
if not page:
raise HTTPException(status_code=404, detail="Source page not found.")
fork = await collaboration_service.create_fork(
source_page=page,
recipient_user_id=payload.recipientUserId,
created_by=ctx.actor_id,
visibility=payload.visibility,
message=payload.message,
)
try:
fork = await collaboration_service.create_fork(
source_page=page,
recipient_user_id=payload.recipientUserId,
created_by=ctx.actor_id,
visibility=payload.visibility,
message=payload.message,
)
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
return _ok(fork)

View File

@@ -1,394 +1,95 @@
#!/usr/bin/env bash
# =============================================================================
# nemoclaw_deploy.sh
# Deploys NemoClaw on the AWS G6.12xlarge instance.
# - All data/install paths on NVMe (/opt/dlami/nvme/)
# - Configures OpenShell to use existing Ollama (qwen3.5:27b, port 11434)
# - GPUs 0+1 are Ollama's. Do NOT reassign them.
# - ComfyUI owns GPUs 2+3. Do NOT touch.
# - Creates a systemd service for the NemoClaw gateway.
# =============================================================================
set -euo pipefail
NVME="/opt/dlami/nvme"
AGENT_NAME="velocity-sentinel"
OLLAMA_URL="http://127.0.0.1:11434"
OLLAMA_MODEL="qwen3.5:27b"
OPENCLAW_PORT=8080 # Port our FastAPI backend targets
echo "================================================================"
echo " Project Velocity — NemoClaw + OpenShell Deploy Script"
echo " Instance: G6.12xlarge | NVMe: $NVME"
echo "================================================================"
# NemoClaw deployment helper for the Desineuron SGLang runtime.
# This script intentionally avoids Ollama-era assumptions and configures
# NemoClaw/OpenShell to talk to the shared OpenAI-compatible SGLang endpoint.
# ──────────────────────────────────────────────────────────────────
# 0. Safety checks
# ──────────────────────────────────────────────────────────────────
if [ "$(id -u)" -ne 0 ]; then
echo "[ERROR] Run as root or with sudo"; exit 1
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/nemoclaw}"
SGLANG_BASE_URL="${SGLANG_BASE_URL:-https://llm.desineuron.in}"
SGLANG_MODEL="${SGLANG_MODEL:-qwen3.6:35b-a3b}"
SGLANG_API_TOKEN="${SGLANG_API_TOKEN:-}"
OPENSHELL_PORT="${OPENSHELL_PORT:-8080}"
AGENT_NAME="${AGENT_NAME:-velocity-sentinel}"
if [[ "${EUID}" -ne 0 ]]; then
echo "Run this script with sudo or as root."
exit 1
fi
if ! mountpoint -q "$NVME" 2>/dev/null && [ ! -d "$NVME" ]; then
echo "[WARN] NVMe not mounted at $NVME — using /home/ubuntu/nvme as fallback"
NVME="/home/ubuntu/nvme"
mkdir -p "$NVME"
fi
echo "==> Desineuron NemoClaw deploy"
echo "NVME root : ${NVME_ROOT}"
echo "SGLang base URL: ${SGLANG_BASE_URL}"
echo "Model : ${SGLANG_MODEL}"
echo "Agent : ${AGENT_NAME}"
echo "[✓] NVMe target: $NVME"
mkdir -p "${NVME_ROOT}"/{logs,state,home}
# Confirm Ollama is alive before proceeding
if ! curl -sf "$OLLAMA_URL/api/tags" | grep -q "qwen"; then
echo "[WARN] Ollama at $OLLAMA_URL doesn't show qwen3.5:27b yet — proceeding anyway"
else
echo "[✓] Ollama confirmed running with qwen3.5:27b"
fi
# ──────────────────────────────────────────────────────────────────
# 1. Node.js 22 (NemoClaw requirement: >=22.16)
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[1/7] Installing Node.js 22..."
NODE_VERSION=$(node --version 2>/dev/null | sed 's/v//' | cut -d. -f1 || echo "0")
if [ "$NODE_VERSION" -ge 22 ]; then
echo "[✓] Node.js $(node --version) already installed"
else
if ! command -v node >/dev/null 2>&1; then
curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
apt-get update -y
apt-get install -y nodejs
echo "[✓] Node.js $(node --version) installed"
fi
npm --version
echo "[✓] npm $(npm --version)"
# ──────────────────────────────────────────────────────────────────
# 2. Docker (required for OpenShell container runtime)
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[2/7] Ensuring Docker is installed..."
if command -v docker &>/dev/null && docker info &>/dev/null; then
echo "[✓] Docker $(docker --version | awk '{print $3}') already running"
else
echo " Installing Docker..."
apt-get install -y ca-certificates curl gnupg lsb-release
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
chmod a+r /etc/apt/keyrings/docker.gpg
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \
https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \
| tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update -q
apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
systemctl enable docker
systemctl start docker
echo "[✓] Docker installed"
if ! command -v docker >/dev/null 2>&1; then
apt-get update -y
apt-get install -y docker.io
systemctl enable --now docker
fi
# Move Docker data root to NVMe so images don't fill root disk
DOCKER_DAEMON_JSON="/etc/docker/daemon.json"
if ! grep -q "nvme" "$DOCKER_DAEMON_JSON" 2>/dev/null; then
echo " Moving Docker data-root → $NVME/docker"
mkdir -p "$NVME/docker"
# Preserve existing config if any
EXISTING=$(cat "$DOCKER_DAEMON_JSON" 2>/dev/null || echo "{}")
python3 -c "
import json, sys
cfg = json.loads('''$EXISTING''')
cfg['data-root'] = '$NVME/docker'
print(json.dumps(cfg, indent=2))
" > "$DOCKER_DAEMON_JSON"
systemctl restart docker
echo "[✓] Docker data-root → $NVME/docker"
if ! command -v openshell >/dev/null 2>&1; then
npm install -g @nvidia/openshell || true
fi
# ──────────────────────────────────────────────────────────────────
# 3. Install NemoClaw (headless via env vars)
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[3/7] Installing NemoClaw..."
# Set HOME so NemoClaw installs to NVMe-backed location
export NEMOCLAW_HOME="$NVME/nemoclaw"
export OPENSHELL_HOME="$NVME/openshell"
export HOME_OVERRIDE="$NVME/home"
mkdir -p "$NEMOCLAW_HOME" "$OPENSHELL_HOME" "$HOME_OVERRIDE"
# Link ~/.nemoclaw and ~/.openshell to NVMe
ln -sfn "$NEMOCLAW_HOME" /root/.nemoclaw 2>/dev/null || true
ln -sfn "$NEMOCLAW_HOME" /home/ubuntu/.nemoclaw 2>/dev/null || true
ln -sfn "$OPENSHELL_HOME" /root/.openshell 2>/dev/null || true
ln -sfn "$OPENSHELL_HOME" /home/ubuntu/.openshell 2>/dev/null || true
if command -v nemoclaw &>/dev/null; then
echo "[✓] nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'version unknown')"
else
echo " Downloading NemoClaw installer..."
INSTALLER_SCRIPT="$NVME/nemoclaw_install.sh"
curl -fsSL https://www.nvidia.com/nemoclaw.sh -o "$INSTALLER_SCRIPT"
chmod +x "$INSTALLER_SCRIPT"
# Run the installer non-interactively
# NEMOCLAW_SKIP_ONBOARD=1 bypasses the interactive wizard (undocumented but standard pattern)
# We'll do manual onboarding after install using CLI flags
NEMOCLAW_SKIP_ONBOARD=1 \
NEMOCLAW_HOME="$NEMOCLAW_HOME" \
bash "$INSTALLER_SCRIPT" || true
# Reload PATH
export PATH="$PATH:/usr/local/bin:/root/.local/bin"
source ~/.bashrc 2>/dev/null || true
if ! command -v nemoclaw &>/dev/null; then
echo "[WARN] nemoclaw not in PATH yet — checking common locations..."
for p in /usr/local/bin/nemoclaw /root/.local/bin/nemoclaw "$NVME/bin/nemoclaw"; do
if [ -f "$p" ]; then
ln -sfn "$p" /usr/local/bin/nemoclaw
echo "[✓] Linked nemoclaw from $p"
break
fi
done
fi
echo "[✓] nemoclaw installed"
if ! command -v nemoclaw >/dev/null 2>&1; then
npm install -g @nvidia/nemoclaw || true
fi
# ──────────────────────────────────────────────────────────────────
# 4. Onboard the Velocity Sentinel agent sandbox
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[4/7] Onboarding '$AGENT_NAME' NemoClaw sandbox..."
cat >/etc/default/desineuron-nemoclaw <<EOF
SGLANG_BASE_URL=${SGLANG_BASE_URL}
SGLANG_MODEL=${SGLANG_MODEL}
SGLANG_API_TOKEN=${SGLANG_API_TOKEN}
NEMOCLAW_BASE_URL=${SGLANG_BASE_URL}
NEMOCLAW_MODEL=${SGLANG_MODEL}
NEMOCLAW_API_TOKEN=${SGLANG_API_TOKEN}
EOF
chmod 600 /etc/default/desineuron-nemoclaw
# Check if sandbox already exists
if nemoclaw "$AGENT_NAME" status &>/dev/null; then
echo "[✓] Sandbox '$AGENT_NAME' already exists — skipping creation"
else
echo " Running nemoclaw onboard (this may take a few minutes)..."
# --provider compatible-endpoint: use our local Ollama instead of NVIDIA cloud
# --yes: skip confirmation prompts
nemoclaw onboard \
--name "$AGENT_NAME" \
if command -v openshell >/dev/null 2>&1; then
openshell inference set \
--provider compatible-endpoint \
--endpoint "$OLLAMA_URL/v1" \
--model "$OLLAMA_MODEL" \
--yes \
--no-messaging-bridge \
--no-skills || {
echo "[WARN] Structured onboard failed — trying minimal onboard..."
# Fallback: let it run with defaults if flags are not supported in this alpha version
yes "" | nemoclaw onboard --name "$AGENT_NAME" 2>&1 | head -60 || true
}
echo "[✓] Sandbox onboarded"
--base-url "${SGLANG_BASE_URL}/v1" \
--api-key "${SGLANG_API_TOKEN:-desineuron}" \
--model "${SGLANG_MODEL}" \
--context-window 8192 \
--max-tokens 4096 || true
fi
# ──────────────────────────────────────────────────────────────────
# 5. Configure OpenShell to use Ollama (compatible endpoint)
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[5/7] Configuring OpenShell inference → Ollama (qwen3.5:27b)..."
# Set inference route to our local Ollama
openshell inference set \
--provider compatible-endpoint \
--base-url "$OLLAMA_URL/v1" \
--api-key "ollama" \
--model "$OLLAMA_MODEL" \
--context-window 32768 \
--max-tokens 4096 || {
echo "[WARN] openshell inference set failed — trying alternate syntax..."
openshell inference set \
--provider compatible-endpoint \
--model "$OLLAMA_MODEL" || true
}
# Also set the context window on the Ollama model side
echo " Setting Ollama num_ctx=32768..."
curl -s -X POST "$OLLAMA_URL/api/generate" \
-H "Content-Type: application/json" \
-d "{\"model\":\"$OLLAMA_MODEL\",\"prompt\":\"\",\"options\":{\"num_ctx\":32768},\"stream\":false}" \
> /dev/null 2>&1 || true
echo "[✓] OpenShell inference configured → $OLLAMA_URL ($OLLAMA_MODEL)"
# ──────────────────────────────────────────────────────────────────
# 6. Write OpenShell network policy (allow Velocity backend egress)
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[6/7] Writing OpenShell network policy..."
POLICY_DIR="$OPENSHELL_HOME/policy"
mkdir -p "$POLICY_DIR"
cat > "$POLICY_DIR/velocity_egress.yaml" << 'POLICY'
# OpenShell Network Egress Policy — Project Velocity Sentinel
# Applied to the velocity-sentinel sandbox.
# All non-listed hosts are blocked by default.
version: "1"
sandbox: velocity-sentinel
egress:
# Local Ollama inference (Qwen 3.5 27B)
- host: "127.0.0.1"
ports: [11434]
description: "Ollama LLM inference"
action: allow
# OpenShell gateway itself (loopback)
- host: "127.0.0.1"
ports: [8080, 8081, 8082, 8083, 8084, 8085]
description: "OpenShell gateway ports"
action: allow
# Velocity FastAPI backend (same host)
- host: "127.0.0.1"
ports: [8000, 8001, 8288]
description: "Velocity FastAPI backend"
action: allow
# PostgreSQL (same host)
- host: "127.0.0.1"
ports: [5432]
description: "PostgreSQL DB"
action: allow
# Block everything else
- host: "*"
action: deny
description: "Default deny — data sovereignty (India/Abu Dhabi)"
POLICY
# Apply the policy if openshell supports it
openshell policy apply "$POLICY_DIR/velocity_egress.yaml" 2>/dev/null || \
echo "[WARN] Policy apply not supported yet in this alpha — YAML written for future use"
echo "[✓] Network policy written → $POLICY_DIR/velocity_egress.yaml"
# ──────────────────────────────────────────────────────────────────
# 7. Write NemoClaw systemd service
# ──────────────────────────────────────────────────────────────────
echo ""
echo "[7/7] Installing systemd service: nemoclaw-velocity.service..."
NEMOCLAW_BIN=$(command -v nemoclaw || echo "/usr/local/bin/nemoclaw")
OPENSHELL_BIN=$(command -v openshell || echo "/usr/local/bin/openshell")
cat > /etc/systemd/system/nemoclaw-velocity.service << SERVICE
cat >/etc/systemd/system/desineuron-nemoclaw-gateway.service <<EOF
[Unit]
Description=NemoClaw Velocity Sentinel Gateway
Documentation=https://github.com/NVIDIA/NemoClaw
After=network.target ollama.service docker.service
Wants=ollama.service docker.service
Description=Desineuron NemoClaw Gateway
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=ubuntu
Group=ubuntu
WorkingDirectory=$NVME/nemoclaw
# GPU constraint: NemoClaw itself is CPU-bound (inference goes to Ollama)
# Ollama already owns GPUs 0,1. ComfyUI owns GPUs 2,3.
Environment=CUDA_VISIBLE_DEVICES=""
Environment=NEMOCLAW_HOME=$NVME/nemoclaw
Environment=OPENSHELL_HOME=$NVME/openshell
Environment=OLLAMA_BASE_URL=http://127.0.0.1:11434
Environment=VELOCITY_NEMO_MODEL=qwen3.5:27b
Environment=GATEWAY_PORT=$OPENCLAW_PORT
ExecStart=$NEMOCLAW_BIN $AGENT_NAME connect --gateway-port $OPENCLAW_PORT
ExecReload=/bin/kill -HUP \$MAINPID
EnvironmentFile=/etc/default/desineuron-nemoclaw
WorkingDirectory=${NVME_ROOT}
Environment=HOME=${NVME_ROOT}/home
ExecStart=/usr/bin/env bash -lc 'nemoclaw serve --name ${AGENT_NAME} --port ${OPENSHELL_PORT}'
Restart=always
RestartSec=10
StandardOutput=append:$NVME/logs/nemoclaw-velocity.log
StandardError=append:$NVME/logs/nemoclaw-velocity.log
# Limits
LimitNOFILE=65536
TimeoutStopSec=30
RestartSec=5
[Install]
WantedBy=multi-user.target
SERVICE
EOF
mkdir -p "$NVME/logs"
systemctl daemon-reload
systemctl enable nemoclaw-velocity.service
systemctl start nemoclaw-velocity.service || true # May fail on first boot if onboard not done
systemctl enable --now desineuron-nemoclaw-gateway.service
systemctl --no-pager --full status desineuron-nemoclaw-gateway.service
echo "[✓] nemoclaw-velocity.service enabled and started"
# ──────────────────────────────────────────────────────────────────
# Finalize: Detect gateway port & write env file
# ──────────────────────────────────────────────────────────────────
echo ""
echo "================================================================"
echo " Writing Velocity backend environment file..."
echo "================================================================"
VELOCITY_ENV="$NVME/velocity/env"
mkdir -p "$(dirname "$VELOCITY_ENV")"
# Detect actual OpenShell gateway URL
GATEWAY_URL="http://127.0.0.1:$OPENCLAW_PORT"
GATEWAY_CHAT_URL="$GATEWAY_URL/v1/chat/completions"
# Quick connectivity test (will succeed once nemoclaw starts)
echo " Testing gateway at $GATEWAY_CHAT_URL ..."
sleep 5
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" \
-X POST "$GATEWAY_CHAT_URL" \
-H "Content-Type: application/json" \
-d '{"model":"qwen3.5:27b","messages":[{"role":"user","content":"ping"}],"max_tokens":5}' \
2>/dev/null || echo "000")
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
echo "[✓] Gateway responding at $GATEWAY_CHAT_URL (HTTP $HTTP_CODE)"
else
echo "[WARN] Gateway not yet responding (HTTP $HTTP_CODE) — it may still be starting up"
fi
cat > "$VELOCITY_ENV" << ENV
# Project Velocity — Backend Environment
# Generated by nemoclaw_deploy.sh
# Loaded by: source $VELOCITY_ENV
# ── NemoClaw / OpenShell Gateway ──────────────────────────────────
NEMOCLAW_BASE_URL=$GATEWAY_URL
NEMOCLAW_CHAT_URL=$GATEWAY_CHAT_URL
NEMOCLAW_MODEL=qwen3.5:27b
NEMOCLAW_TIMEOUT_S=30.0
NEMOCLAW_TEMPERATURE=0.2
# ── Ollama (direct fallback if OpenShell gateway not up) ──────────
OLLAMA_BASE_URL=http://127.0.0.1:11434
# ── NemoClaw Prompts ──────────────────────────────────────────────
NEMOCLAW_PROMPT_DIR=$NVME/nemoclaw/prompts
# ── JWT / Auth ────────────────────────────────────────────────────
# VELOCITY_JWT_SECRET=<SET_THIS>
# ── PostgreSQL ────────────────────────────────────────────────────
# VELOCITY_DB_DSN=postgresql://velocity_app:<PW>@127.0.0.1:5432/velocity
ENV
echo "[✓] Environment file written → $VELOCITY_ENV"
echo ""
echo "================================================================"
echo " DONE. Summary:"
echo ""
echo " Agent name : $AGENT_NAME"
echo " Gateway URL : $GATEWAY_URL"
echo " Chat endpoint: $GATEWAY_CHAT_URL"
echo " Model : $OLLAMA_MODEL (via Ollama on port 11434)"
echo " GPUs 0,1 : Ollama (unchanged)"
echo " GPUs 2,3 : ComfyUI (unchanged)"
echo " Env file : $VELOCITY_ENV"
echo " Service log : $NVME/logs/nemoclaw-velocity.log"
echo ""
echo " Next commands to verify:"
echo " nemoclaw $AGENT_NAME status"
echo " nemoclaw $AGENT_NAME logs --follow"
echo " curl $GATEWAY_CHAT_URL (POST with messages[])"
echo "================================================================"
echo
echo "NemoClaw deployment complete."
echo "Gateway port : ${OPENSHELL_PORT}"
echo "Model : ${SGLANG_MODEL}"
echo "Runtime : ${SGLANG_BASE_URL}/v1"

View File

@@ -1,10 +1,13 @@
"""
backend/services/nemoclaw_client.py - NemoClaw inference client.
Primary path:
1. NVIDIA-hosted OpenAI-compatible chat completions.
2. Optional compatible endpoint via NEMOCLAW_BASE_URL.
3. Optional local Ollama fallback only when ALLOW_LOCAL_FALLBACK=true.
Production path:
1. Shared SGLang / OpenAI-compatible coding runtime.
Compatibility:
- Legacy NEMOCLAW_* env names are still honored.
- Legacy OLLAMA_BASE_URL can still seed the base URL, but Ollama is no longer
a production fallback path.
"""
from __future__ import annotations
@@ -24,28 +27,23 @@ logger = logging.getLogger("velocity.nemoclaw")
NEMOCLAW_TIMEOUT = float(os.getenv("NEMOCLAW_TIMEOUT_S", "45.0"))
NEMOCLAW_TEMPERATURE = float(os.getenv("NEMOCLAW_TEMPERATURE", "0.2"))
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
NVIDIA_BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
NVIDIA_CHAT_URL = os.getenv("NVIDIA_CHAT_URL", f"{NVIDIA_BASE_URL}/chat/completions")
NVIDIA_MODEL = os.getenv("NVIDIA_MODEL", "nvidia/nemotron-3-super-120b-a12b")
NVIDIA_FALLBACK_MODEL = os.getenv(
"NVIDIA_FALLBACK_MODEL",
"nvidia/llama-3.3-nemotron-super-49b-v1",
SGLANG_BASE_URL = os.getenv(
"SGLANG_BASE_URL",
os.getenv(
"NEMOCLAW_BASE_URL",
os.getenv("LLM_BASE_URL", os.getenv("OLLAMA_BASE_URL", "https://llm.desineuron.in")),
),
).rstrip("/")
SGLANG_CHAT_URL = os.getenv(
"SGLANG_CHAT_URL",
os.getenv("NEMOCLAW_CHAT_URL", f"{SGLANG_BASE_URL}/v1/chat/completions"),
)
NEMOCLAW_BASE_URL = os.getenv("NEMOCLAW_BASE_URL", "")
NEMOCLAW_CHAT_URL = (
os.getenv("NEMOCLAW_CHAT_URL") or f"{NEMOCLAW_BASE_URL}/v1/chat/completions"
if NEMOCLAW_BASE_URL
else ""
SGLANG_MODELS_URL = os.getenv("SGLANG_MODELS_URL", f"{SGLANG_BASE_URL}/v1/models")
SGLANG_MODEL = os.getenv(
"SGLANG_MODEL",
os.getenv("NEMOCLAW_MODEL", os.getenv("OLLAMA_MODEL", "qwen3.6:35b-a3b")),
)
NEMOCLAW_MODEL = os.getenv("NEMOCLAW_MODEL", NVIDIA_MODEL)
NEMOCLAW_API_TOKEN = os.getenv("NEMOCLAW_API_TOKEN", "")
ALLOW_LOCAL_FALLBACK = os.getenv("ALLOW_LOCAL_FALLBACK", "false").lower() == "true"
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
OLLAMA_CHAT_URL = f"{OLLAMA_BASE_URL}/v1/chat/completions"
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:27b")
SGLANG_API_TOKEN = os.getenv("SGLANG_API_TOKEN", os.getenv("NEMOCLAW_API_TOKEN", ""))
_PROMPT_DIR = os.getenv("NEMOCLAW_PROMPT_DIR", "/opt/dlami/nvme/nemoclaw/prompts")
@@ -201,83 +199,40 @@ async def _nemoclaw_chat(
user_content: str,
timeout: float = NEMOCLAW_TIMEOUT,
) -> dict:
endpoints: list[tuple[str, str, str, dict[str, str]]] = []
if NVIDIA_API_KEY:
endpoints.append(
(
"nvidia_primary",
NVIDIA_CHAT_URL,
NVIDIA_MODEL,
{
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json",
},
)
)
if NVIDIA_FALLBACK_MODEL and NVIDIA_FALLBACK_MODEL != NVIDIA_MODEL:
endpoints.append(
(
"nvidia_fallback",
NVIDIA_CHAT_URL,
NVIDIA_FALLBACK_MODEL,
{
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json",
},
)
)
if NEMOCLAW_CHAT_URL:
headers = {"Content-Type": "application/json"}
if NEMOCLAW_API_TOKEN:
headers["Authorization"] = f"Bearer {NEMOCLAW_API_TOKEN}"
endpoints.append(("compatible_endpoint", NEMOCLAW_CHAT_URL, NEMOCLAW_MODEL, headers))
if ALLOW_LOCAL_FALLBACK:
endpoints.append(
("ollama_fallback", OLLAMA_CHAT_URL, OLLAMA_MODEL, {"Content-Type": "application/json"})
if not SGLANG_CHAT_URL:
raise RuntimeError(
"No NemoClaw inference endpoint is configured. Set SGLANG_BASE_URL or NEMOCLAW_BASE_URL."
)
if not endpoints:
raise RuntimeError(
"No NemoClaw inference endpoint is configured. "
"Set NVIDIA_API_KEY or NEMOCLAW_BASE_URL."
)
headers = {"Content-Type": "application/json"}
if SGLANG_API_TOKEN:
headers["Authorization"] = f"Bearer {SGLANG_API_TOKEN}"
t_start = time.monotonic()
last_error: Exception | None = None
for label, url, model, headers in endpoints:
try:
result = await _attempt_chat(
label=label,
url=url,
model=model,
system_content=system_content,
user_content=user_content,
timeout=timeout,
headers=headers,
)
logger.info(
"NemoClaw inference via %s model=%s elapsed=%.2fs",
label,
model,
time.monotonic() - t_start,
)
return result
except (httpx.ConnectError, httpx.TimeoutException) as exc:
logger.warning("NemoClaw %s unreachable (%s), trying next endpoint", label, exc)
last_error = exc
except httpx.HTTPStatusError as exc:
logger.error(
"NemoClaw %s HTTP %s: %s",
label,
exc.response.status_code,
exc.response.text[:300],
)
last_error = exc
except (KeyError, IndexError, TypeError, json.JSONDecodeError) as exc:
logger.error("NemoClaw %s returned invalid JSON: %s", label, exc)
last_error = exc
raise RuntimeError(f"All NemoClaw endpoints failed. Last error: {last_error}")
try:
result = await _attempt_chat(
label="sglang",
url=SGLANG_CHAT_URL,
model=SGLANG_MODEL,
system_content=system_content,
user_content=user_content,
timeout=timeout,
headers=headers,
)
logger.info(
"NemoClaw inference via sglang model=%s elapsed=%.2fs",
SGLANG_MODEL,
time.monotonic() - t_start,
)
return result
except (httpx.ConnectError, httpx.TimeoutException) as exc:
raise RuntimeError(f"NemoClaw SGLang endpoint unreachable: {exc}") from exc
except httpx.HTTPStatusError as exc:
raise RuntimeError(
f"NemoClaw SGLang HTTP {exc.response.status_code}: {exc.response.text[:300]}"
) from exc
except (KeyError, IndexError, TypeError, json.JSONDecodeError) as exc:
raise RuntimeError(f"NemoClaw SGLang returned invalid JSON: {exc}") from exc
async def score_qd(
@@ -368,46 +323,32 @@ async def profile_cctv_visitor(
async def health_check() -> dict:
results: dict[str, str] = {}
endpoints: list[tuple[str, str, str, dict[str, str]]] = []
if NVIDIA_API_KEY:
endpoints.append(
(
"nvidia_primary",
NVIDIA_CHAT_URL,
NVIDIA_MODEL,
{
"Authorization": f"Bearer {NVIDIA_API_KEY}",
"Content-Type": "application/json",
headers = {"Content-Type": "application/json"}
if SGLANG_API_TOKEN:
headers["Authorization"] = f"Bearer {SGLANG_API_TOKEN}"
results: dict[str, str] = {
"model": SGLANG_MODEL,
"primary_url": SGLANG_CHAT_URL,
"models_url": SGLANG_MODELS_URL,
}
try:
async with httpx.AsyncClient(timeout=5.0) as client:
models_response = await client.get(SGLANG_MODELS_URL, headers=headers)
models_response.raise_for_status()
chat_response = await client.post(
SGLANG_CHAT_URL,
json={
"model": SGLANG_MODEL,
"messages": [{"role": "user", "content": "ping"}],
"max_tokens": 5,
},
headers=headers,
)
)
if NEMOCLAW_CHAT_URL:
headers = {"Content-Type": "application/json"}
if NEMOCLAW_API_TOKEN:
headers["Authorization"] = f"Bearer {NEMOCLAW_API_TOKEN}"
endpoints.append(("compatible_endpoint", NEMOCLAW_CHAT_URL, NEMOCLAW_MODEL, headers))
if ALLOW_LOCAL_FALLBACK:
endpoints.append(
("ollama_fallback", OLLAMA_CHAT_URL, OLLAMA_MODEL, {"Content-Type": "application/json"})
)
chat_response.raise_for_status()
results["sglang"] = "ok"
except Exception as exc:
results["sglang"] = f"error: {exc}"
for name, url, model, headers in endpoints:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.post(
url,
json={
"model": model,
"messages": [{"role": "user", "content": "ping"}],
"max_tokens": 5,
},
headers=headers,
)
results[name] = "ok" if response.status_code < 500 else f"http_{response.status_code}"
except Exception as exc:
results[name] = f"error: {exc}"
results["model"] = NVIDIA_MODEL if NVIDIA_API_KEY else NEMOCLAW_MODEL
results["primary_url"] = NVIDIA_CHAT_URL if NVIDIA_API_KEY else (NEMOCLAW_CHAT_URL or OLLAMA_CHAT_URL)
return results

View File

@@ -13,15 +13,17 @@ import httpx
logger = logging.getLogger("velocity.runtime_llm")
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
OLLAMA_CHAT_URL = os.getenv("OLLAMA_CHAT_URL", f"{OLLAMA_BASE_URL}/v1/chat/completions")
OLLAMA_TAGS_URL = os.getenv("OLLAMA_TAGS_URL", f"{OLLAMA_BASE_URL}/api/tags")
OLLAMA_DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:27b")
NEMOCLAW_BASE_URL = os.getenv("NEMOCLAW_BASE_URL", "").rstrip("/")
NEMOCLAW_CHAT_URL = (os.getenv("NEMOCLAW_CHAT_URL") or f"{NEMOCLAW_BASE_URL}/v1/chat/completions").rstrip("/") if NEMOCLAW_BASE_URL else ""
NEMOCLAW_DEFAULT_MODEL = os.getenv("NEMOCLAW_MODEL", "nvidia/nemotron-3-super-120b-a12b")
NEMOCLAW_API_TOKEN = os.getenv("NEMOCLAW_API_TOKEN", "")
SGLANG_BASE_URL = os.getenv(
"SGLANG_BASE_URL",
os.getenv("LLM_BASE_URL", os.getenv("OLLAMA_BASE_URL", "https://llm.desineuron.in")),
).rstrip("/")
SGLANG_CHAT_URL = os.getenv("SGLANG_CHAT_URL", f"{SGLANG_BASE_URL}/v1/chat/completions")
SGLANG_MODELS_URL = os.getenv("SGLANG_MODELS_URL", f"{SGLANG_BASE_URL}/v1/models")
SGLANG_DEFAULT_MODEL = os.getenv(
"SGLANG_MODEL",
os.getenv("OLLAMA_MODEL", "qwen3.6:35b-a3b"),
)
SGLANG_API_TOKEN = os.getenv("SGLANG_API_TOKEN", "")
RUNTIME_LLM_TIMEOUT_S = float(os.getenv("RUNTIME_LLM_TIMEOUT_S", "90.0"))
RUNTIME_LLM_CONCURRENCY = int(os.getenv("RUNTIME_LLM_BATCH_CONCURRENCY", "2"))
@@ -57,40 +59,30 @@ class RuntimeLLMService:
self._jobs: dict[str, dict[str, Any]] = {}
def _provider_catalog(self) -> list[RuntimeProvider]:
providers: list[RuntimeProvider] = []
if OLLAMA_CHAT_URL:
providers.append(
RuntimeProvider(
provider_id="ollama",
base_url=OLLAMA_BASE_URL,
chat_url=OLLAMA_CHAT_URL,
default_model=OLLAMA_DEFAULT_MODEL,
)
if not SGLANG_CHAT_URL:
return []
return [
RuntimeProvider(
provider_id="sglang",
base_url=SGLANG_BASE_URL,
chat_url=SGLANG_CHAT_URL,
default_model=SGLANG_DEFAULT_MODEL,
auth_token=SGLANG_API_TOKEN or None,
)
if NEMOCLAW_CHAT_URL:
providers.append(
RuntimeProvider(
provider_id="nemoclaw",
base_url=NEMOCLAW_BASE_URL,
chat_url=NEMOCLAW_CHAT_URL,
default_model=NEMOCLAW_DEFAULT_MODEL,
auth_token=NEMOCLAW_API_TOKEN or None,
)
)
return providers
]
def get_provider(self, provider_id: str | None) -> RuntimeProvider:
providers = {provider.provider_id: provider for provider in self._provider_catalog()}
if provider_id in {"ollama", "nemoclaw"}:
provider_id = "sglang"
if provider_id:
provider = providers.get(provider_id)
if provider is None:
raise ValueError(f"Unknown provider '{provider_id}'.")
return provider
if "nemoclaw" in providers:
return providers["nemoclaw"]
if "ollama" in providers:
return providers["ollama"]
if "sglang" in providers:
return providers["sglang"]
raise ValueError("No runtime LLM providers are configured.")
async def list_providers(self) -> list[dict[str, Any]]:
@@ -101,28 +93,18 @@ class RuntimeLLMService:
error: str | None = None
try:
if provider.provider_id == "ollama":
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(OLLAMA_TAGS_URL)
response.raise_for_status()
payload = response.json()
models = [str(item.get("name", "")).strip() for item in payload.get("models", []) if item.get("name")]
if provider.default_model not in models:
models.insert(0, provider.default_model)
status = "online"
else:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
provider.chat_url,
json={
"model": provider.default_model,
"messages": [{"role": "user", "content": "ping"}],
"max_tokens": 4,
},
headers=provider.headers,
)
response.raise_for_status()
status = "online"
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(SGLANG_MODELS_URL, headers=provider.headers)
response.raise_for_status()
payload = response.json()
models = [
str(item.get("id", "")).strip()
for item in payload.get("data", [])
if item.get("id")
]
if provider.default_model not in models:
models.insert(0, provider.default_model)
status = "online"
except Exception as exc: # pragma: no cover - network/runtime dependent
error = str(exc)