forked from sagnik/Project_Velocity
feat: Oracle Canvas, Revision History and Canvas Sharing (#33)
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: sagnik/Project_Velocity#33
This commit is contained in:
@@ -32,6 +32,20 @@ SUPABASE_SERVICE_ROLE_KEY=PLACEHOLDER_your_supabase_service_role_key
|
||||
# Base URL of ComfyUI server running locally or on GPU node
|
||||
COMFY_BASE_URL=http://localhost:8188
|
||||
|
||||
# —— Shared Desineuron coding / Oracle / NemoClaw runtime —————————————————————
|
||||
# Stable OpenAI-compatible SGLang route rendered through ingress.
|
||||
LLM_BASE_URL=https://llm.desineuron.in
|
||||
SGLANG_BASE_URL=https://llm.desineuron.in
|
||||
SGLANG_CHAT_URL=https://llm.desineuron.in/v1/chat/completions
|
||||
SGLANG_MODELS_URL=https://llm.desineuron.in/v1/models
|
||||
SGLANG_MODEL=qwen3.6:35b-a3b
|
||||
SGLANG_API_TOKEN=
|
||||
|
||||
# NemoClaw follows the same routed SGLang runtime.
|
||||
NEMOCLAW_BASE_URL=https://llm.desineuron.in
|
||||
NEMOCLAW_MODEL=qwen3.6:35b-a3b
|
||||
NEMOCLAW_API_TOKEN=
|
||||
|
||||
# ── Backend ───────────────────────────────────────────────────────────────────
|
||||
# CORS origins — comma-separated list of allowed frontend origins
|
||||
CORS_ORIGINS=http://localhost:5173,http://localhost:3000
|
||||
|
||||
@@ -70,6 +70,31 @@ def _json_object(value: Any) -> dict[str, Any]:
|
||||
return {}
|
||||
|
||||
|
||||
def _json_array(value: Any) -> list[Any]:
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
if isinstance(value, str) and value.strip():
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
if isinstance(parsed, list):
|
||||
return parsed
|
||||
except Exception:
|
||||
logger.warning("canvas_service: failed to parse JSON array field; using empty array")
|
||||
return []
|
||||
|
||||
|
||||
def _json_safe(value: Any) -> Any:
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _json_safe(val) for key, val in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_json_safe(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def _normalize_component(component: dict[str, Any]) -> dict[str, Any]:
|
||||
normalized = deepcopy(component)
|
||||
normalized["componentId"] = _stringify(normalized.get("componentId"))
|
||||
@@ -224,9 +249,15 @@ class CanvasService:
|
||||
async def get_first_page_for_owner(self, *, tenant_id: str, owner_id: str) -> dict[str, Any] | None:
|
||||
_ensure_ready()
|
||||
if _is_demo():
|
||||
for page in _DEMO_PAGES.values():
|
||||
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id:
|
||||
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))}
|
||||
candidates = [
|
||||
page
|
||||
for page in _DEMO_PAGES.values()
|
||||
if page["tenantId"] == tenant_id and page["ownerId"] == owner_id
|
||||
]
|
||||
if candidates:
|
||||
candidates.sort(key=lambda page: page.get("updatedAt", ""), reverse=True)
|
||||
page = candidates[0]
|
||||
return {**page, "components": deepcopy(_DEMO_COMPONENTS.get(page["pageId"], []))}
|
||||
return None
|
||||
|
||||
assert asyncpg is not None
|
||||
@@ -237,7 +268,7 @@ class CanvasService:
|
||||
SELECT *
|
||||
FROM oracle_canvas_pages
|
||||
WHERE tenant_id = $1 AND owner_id = $2
|
||||
ORDER BY created_at ASC
|
||||
ORDER BY updated_at DESC, created_at DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
tenant_id,
|
||||
@@ -310,7 +341,7 @@ class CanvasService:
|
||||
"actorId": actor_id,
|
||||
"executionId": execution_id,
|
||||
"mergeRequestId": merge_request_id,
|
||||
"componentsSnapshot": json.dumps(components),
|
||||
"componentsSnapshot": json.dumps(_json_safe(components)),
|
||||
"idempotencyKey": idempotency_key,
|
||||
"createdAt": _now(),
|
||||
}
|
||||
@@ -346,7 +377,7 @@ class CanvasService:
|
||||
"actorId": existing["actor_id"],
|
||||
"executionId": _stringify(existing["execution_id"]) if existing["execution_id"] else None,
|
||||
"mergeRequestId": _stringify(existing["merge_request_id"]) if existing["merge_request_id"] else None,
|
||||
"componentsSnapshot": json.dumps(existing["components_snapshot"]),
|
||||
"componentsSnapshot": json.dumps(_json_safe(existing["components_snapshot"])),
|
||||
"idempotencyKey": existing["idempotency_key"],
|
||||
"createdAt": existing["created_at"].isoformat(),
|
||||
}
|
||||
@@ -385,7 +416,7 @@ class CanvasService:
|
||||
actor_id,
|
||||
execution_id or "",
|
||||
merge_request_id or "",
|
||||
json.dumps(normalized_components),
|
||||
json.dumps(_json_safe(normalized_components)),
|
||||
idempotency_key,
|
||||
)
|
||||
|
||||
@@ -411,7 +442,7 @@ class CanvasService:
|
||||
"actorId": revision["actor_id"],
|
||||
"executionId": _stringify(revision["execution_id"]) if revision["execution_id"] else None,
|
||||
"mergeRequestId": _stringify(revision["merge_request_id"]) if revision["merge_request_id"] else None,
|
||||
"componentsSnapshot": json.dumps(revision["components_snapshot"]),
|
||||
"componentsSnapshot": json.dumps(_json_safe(revision["components_snapshot"])),
|
||||
"idempotencyKey": revision["idempotency_key"],
|
||||
"createdAt": revision["created_at"].isoformat(),
|
||||
}
|
||||
@@ -462,13 +493,14 @@ class CanvasService:
|
||||
)
|
||||
if not revision:
|
||||
raise ValueError(f"Revision {target_revision} not found for page {page_id}")
|
||||
snapshot = _json_array(revision["components_snapshot"])
|
||||
return await self.commit_revision(
|
||||
page_id=page_id,
|
||||
tenant_id=tenant_id,
|
||||
actor_id=actor_id,
|
||||
commit_kind="rollback",
|
||||
commit_summary=f"Rollback to revision {target_revision}",
|
||||
components=list(revision["components_snapshot"]),
|
||||
components=snapshot,
|
||||
idempotency_key=idempotency_key,
|
||||
)
|
||||
finally:
|
||||
@@ -604,15 +636,15 @@ class CanvasService:
|
||||
component.get("description"),
|
||||
int(component.get("version", 1)),
|
||||
component.get("lifecycleState", "active"),
|
||||
json.dumps(component.get("dataSourceDescriptor", {})),
|
||||
json.dumps(component.get("visualizationParameters", {})),
|
||||
json.dumps(component.get("dataBindings", {})),
|
||||
json.dumps(component.get("provenance", {})),
|
||||
json.dumps(component.get("renderingHints", {})),
|
||||
json.dumps(component.get("layout", {})),
|
||||
json.dumps(component.get("accessControls", {})),
|
||||
json.dumps(component.get("styleSignature", {})),
|
||||
json.dumps(component.get("validationState", {})),
|
||||
json.dumps(_json_safe(component.get("dataSourceDescriptor", {}))),
|
||||
json.dumps(_json_safe(component.get("visualizationParameters", {}))),
|
||||
json.dumps(_json_safe(component.get("dataBindings", {}))),
|
||||
json.dumps(_json_safe(component.get("provenance", {}))),
|
||||
json.dumps(_json_safe(component.get("renderingHints", {}))),
|
||||
json.dumps(_json_safe(component.get("layout", {}))),
|
||||
json.dumps(_json_safe(component.get("accessControls", {}))),
|
||||
json.dumps(_json_safe(component.get("styleSignature", {}))),
|
||||
json.dumps(_json_safe(component.get("validationState", {}))),
|
||||
list(component.get("auditLog", [])),
|
||||
)
|
||||
|
||||
|
||||
@@ -261,13 +261,17 @@ class OracleCodebookService:
|
||||
if not prompt_terms:
|
||||
prompt_terms = set(_tokenize(prompt.replace("_", " ")))
|
||||
|
||||
lowered_prompt = prompt.lower()
|
||||
crm_prompt = any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts", "crm", "lead", "account"))
|
||||
interaction_prompt = any(term in lowered_prompt for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up"))
|
||||
property_prompt = any(term in lowered_prompt for term in ("property", "properties", "project", "projects", "interest", "interested"))
|
||||
|
||||
scored: list[tuple[int, CodebookExample]] = []
|
||||
for example in self.load()["examples"]:
|
||||
score = 0
|
||||
term_set = set(example.score_terms)
|
||||
overlap = prompt_terms.intersection(term_set)
|
||||
score += len(overlap) * 6
|
||||
lowered_prompt = prompt.lower()
|
||||
if example.template_name.lower() in lowered_prompt:
|
||||
score += 24
|
||||
if example.subchapter_name.lower() in lowered_prompt:
|
||||
@@ -280,6 +284,15 @@ class OracleCodebookService:
|
||||
score += 8
|
||||
if "live_data_first" in example.policy_tags:
|
||||
score += 4
|
||||
chapter = example.chapter_name.lower()
|
||||
subchapter = example.subchapter_name.lower()
|
||||
title = example.title.lower()
|
||||
if crm_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("lead", "client", "contact", "crm", "account", "pipeline")):
|
||||
score += 18
|
||||
if interaction_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("interaction", "timeline", "call", "message", "email", "whatsapp", "follow-up")):
|
||||
score += 16
|
||||
if property_prompt and any(term in " ".join((chapter, subchapter, title, example.template_name.lower())) for term in ("property", "inventory", "interest", "project")):
|
||||
score += 16
|
||||
if score > 0:
|
||||
scored.append((score, example))
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from .canvas_service import canvas_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── In-memory store (demo mode) ───────────────────────────────────────────────
|
||||
@@ -23,6 +25,32 @@ def _now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _clone_components_for_fork(
|
||||
components: list[dict[str, Any]],
|
||||
*,
|
||||
actor_id: str,
|
||||
source_page_id: str,
|
||||
source_branch_id: str,
|
||||
source_revision: int,
|
||||
) -> list[dict[str, Any]]:
|
||||
cloned: list[dict[str, Any]] = []
|
||||
for component in components:
|
||||
forked = copy.deepcopy(component)
|
||||
original_component_id = str(forked.get("componentId") or "")
|
||||
forked["componentId"] = str(uuid.uuid4())
|
||||
provenance = dict(forked.get("provenance") or {})
|
||||
provenance["forkedAt"] = _now()
|
||||
provenance["forkedBy"] = actor_id
|
||||
provenance["sourcePageId"] = source_page_id
|
||||
provenance["sourceBranchId"] = source_branch_id
|
||||
provenance["sourceRevision"] = source_revision
|
||||
if original_component_id:
|
||||
provenance["sourceComponentId"] = original_component_id
|
||||
forked["provenance"] = provenance
|
||||
cloned.append(forked)
|
||||
return cloned
|
||||
|
||||
|
||||
# ── Three-way diff engine ─────────────────────────────────────────────────────
|
||||
|
||||
def _three_way_diff(
|
||||
@@ -228,17 +256,50 @@ class CollaborationService:
|
||||
Creates a fork from the source_page snapshot at its current headRevision.
|
||||
Returns ForkRecord.
|
||||
"""
|
||||
if recipient_user_id == created_by:
|
||||
raise ValueError("You cannot share a canvas with your own account.")
|
||||
|
||||
fork_id = str(uuid.uuid4())
|
||||
fork_page_id = str(uuid.uuid4())
|
||||
fork_branch_id = str(uuid.uuid4())
|
||||
fork_page = await canvas_service.create_page(
|
||||
tenant_id=source_page["tenantId"],
|
||||
owner_id=recipient_user_id,
|
||||
title=f"{source_page['title']} Fork",
|
||||
page_type="fork",
|
||||
branch_name=f"fork-{str(fork_id)[:8]}",
|
||||
sharing_policy={
|
||||
"shareMode": "direct_fork_only",
|
||||
"allowReshare": visibility == "team",
|
||||
"defaultForkVisibility": visibility,
|
||||
},
|
||||
)
|
||||
|
||||
fork_components = _clone_components_for_fork(
|
||||
source_page.get("components", []),
|
||||
actor_id=created_by,
|
||||
source_page_id=source_page["pageId"],
|
||||
source_branch_id=source_page["branchId"],
|
||||
source_revision=source_page["headRevision"],
|
||||
)
|
||||
|
||||
await canvas_service.commit_revision(
|
||||
page_id=fork_page["pageId"],
|
||||
tenant_id=source_page["tenantId"],
|
||||
actor_id=created_by,
|
||||
commit_kind="merge",
|
||||
commit_summary=f"Forked from {source_page['title']} at rev.{source_page['headRevision']}",
|
||||
components=fork_components,
|
||||
execution_id=None,
|
||||
merge_request_id=None,
|
||||
idempotency_key=f"fork_{fork_id}",
|
||||
)
|
||||
|
||||
fork = {
|
||||
"forkId": fork_id,
|
||||
"sourcePageId": source_page["pageId"],
|
||||
"sourceBranchId": source_page["branchId"],
|
||||
"sourceRevision": source_page["headRevision"],
|
||||
"forkPageId": fork_page_id,
|
||||
"forkBranchId": fork_branch_id,
|
||||
"forkPageId": fork_page["pageId"],
|
||||
"forkBranchId": fork_page["branchId"],
|
||||
"recipientUserId": recipient_user_id,
|
||||
"createdBy": created_by,
|
||||
"visibility": visibility,
|
||||
|
||||
@@ -159,14 +159,20 @@ class DataAccessGateway:
|
||||
if dataset == "broker_performance":
|
||||
sql = """
|
||||
SELECT
|
||||
ROW_NUMBER() OVER (ORDER BY COALESCE(revenue_generated, 0) DESC, broker_name ASC)::int AS rank,
|
||||
broker_name AS name,
|
||||
deals_closed::int AS deals_closed,
|
||||
COALESCE(revenue_generated, 0)::float AS revenue_generated,
|
||||
avatar_url AS avatar
|
||||
FROM broker_performance
|
||||
WHERE tenant_id = $1
|
||||
ORDER BY revenue_generated DESC, broker_name ASC
|
||||
ROW_NUMBER() OVER (
|
||||
ORDER BY COUNT(DISTINCT l.person_id) DESC, COALESCE(u.full_name, u.email, u.id::text) ASC
|
||||
)::int AS rank,
|
||||
COALESCE(u.full_name, u.email, u.id::text) AS name,
|
||||
COUNT(DISTINCT l.person_id)::int AS deals_closed,
|
||||
COALESCE(SUM(o.value), 0)::float AS revenue_generated,
|
||||
u.avatar_url AS avatar
|
||||
FROM users_and_roles u
|
||||
LEFT JOIN crm_leads l ON l.assigned_user_id = u.id
|
||||
LEFT JOIN crm_opportunities o ON o.lead_id = l.lead_id
|
||||
WHERE u.is_active = TRUE
|
||||
GROUP BY u.id, u.full_name, u.email, u.avatar_url
|
||||
HAVING COUNT(DISTINCT l.person_id) > 0 OR COALESCE(SUM(o.value), 0) > 0
|
||||
ORDER BY revenue_generated DESC, name ASC
|
||||
LIMIT $2
|
||||
"""
|
||||
return sql, [ctx.tenant_id, row_limit]
|
||||
@@ -245,13 +251,20 @@ class DataAccessGateway:
|
||||
COALESCE(p.primary_phone, '') AS phone,
|
||||
COALESCE(p.city, '') AS city,
|
||||
COALESCE(p.buyer_type, 'unclassified') AS buyer_type,
|
||||
COALESCE(q.qd_score, 0)::float AS qd_score
|
||||
COALESCE(q.current_value, 0)::float AS qd_score
|
||||
FROM crm_people p
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT qd_score
|
||||
SELECT current_value
|
||||
FROM intel_qd_scores q
|
||||
WHERE q.person_id = p.person_id
|
||||
ORDER BY q.scored_at DESC
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN q.score_type = 'engagement_score' THEN 0
|
||||
WHEN q.score_type = 'intent_score' THEN 1
|
||||
WHEN q.score_type = 'urgency_score' THEN 2
|
||||
ELSE 3
|
||||
END,
|
||||
q.computed_at DESC
|
||||
LIMIT 1
|
||||
) q ON TRUE
|
||||
ORDER BY qd_score DESC, p.full_name ASC
|
||||
@@ -301,6 +314,71 @@ class DataAccessGateway:
|
||||
"""
|
||||
return sql, [row_limit]
|
||||
|
||||
if dataset == "crm_last_interacted_clients":
|
||||
sql = """
|
||||
SELECT
|
||||
p.person_id::text AS id,
|
||||
p.full_name AS name,
|
||||
COALESCE(p.primary_email, '') AS email,
|
||||
COALESCE(p.primary_phone, '') AS phone,
|
||||
COALESCE(MAX(i.happened_at), p.updated_at, p.created_at) AS last_interaction_at,
|
||||
COUNT(i.interaction_id)::int AS interaction_count,
|
||||
COALESCE(q.current_value, 0)::float AS qd_score
|
||||
FROM crm_people p
|
||||
LEFT JOIN intel_interactions i ON i.person_id = p.person_id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT current_value
|
||||
FROM intel_qd_scores q
|
||||
WHERE q.person_id = p.person_id
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN q.score_type = 'engagement_score' THEN 0
|
||||
WHEN q.score_type = 'intent_score' THEN 1
|
||||
WHEN q.score_type = 'urgency_score' THEN 2
|
||||
ELSE 3
|
||||
END,
|
||||
q.computed_at DESC
|
||||
LIMIT 1
|
||||
) q ON TRUE
|
||||
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
|
||||
ORDER BY last_interaction_at DESC NULLS LAST, interaction_count DESC, p.full_name ASC
|
||||
LIMIT $1
|
||||
"""
|
||||
return sql, [row_limit]
|
||||
|
||||
if dataset == "crm_top_interested_clients":
|
||||
sql = """
|
||||
SELECT
|
||||
p.person_id::text AS id,
|
||||
p.full_name AS name,
|
||||
COALESCE(p.primary_email, '') AS email,
|
||||
COALESCE(p.primary_phone, '') AS phone,
|
||||
COUNT(pi.interest_id)::int AS interest_count,
|
||||
STRING_AGG(DISTINCT pi.project_name, ', ' ORDER BY pi.project_name) AS projects,
|
||||
COALESCE(MAX(pi.created_at), p.updated_at, p.created_at) AS last_interest_at,
|
||||
COALESCE(q.current_value, 0)::float AS qd_score
|
||||
FROM crm_people p
|
||||
INNER JOIN crm_property_interests pi ON pi.person_id = p.person_id
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT current_value
|
||||
FROM intel_qd_scores q
|
||||
WHERE q.person_id = p.person_id
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN q.score_type = 'engagement_score' THEN 0
|
||||
WHEN q.score_type = 'intent_score' THEN 1
|
||||
WHEN q.score_type = 'urgency_score' THEN 2
|
||||
ELSE 3
|
||||
END,
|
||||
q.computed_at DESC
|
||||
LIMIT 1
|
||||
) q ON TRUE
|
||||
GROUP BY p.person_id, p.full_name, p.primary_email, p.primary_phone, p.updated_at, p.created_at, q.current_value
|
||||
ORDER BY interest_count DESC, qd_score DESC, last_interest_at DESC NULLS LAST, p.full_name ASC
|
||||
LIMIT $1
|
||||
"""
|
||||
return sql, [row_limit]
|
||||
|
||||
if dataset == "crm_interaction_timeline":
|
||||
sql = """
|
||||
SELECT
|
||||
|
||||
@@ -56,6 +56,18 @@ def _coerce_datetime(value: datetime | str | None) -> datetime | None:
|
||||
|
||||
# ── Execution store ───────────────────────────────────────────────────────────
|
||||
|
||||
def _json_safe(value: Any) -> Any:
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _json_safe(val) for key, val in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_json_safe(item) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_json_safe(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
_DEMO_EXECUTIONS: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
@@ -117,13 +129,13 @@ def _build_demo_retrieval_plan(
|
||||
|
||||
|
||||
_DATASET_MAP: dict[str, str] = {
|
||||
"pipeline_board": "deals",
|
||||
"bar_chart": "lead_daily_snapshot",
|
||||
"pipeline_board": "crm_opportunity_pipeline",
|
||||
"bar_chart": "crm_property_interest_rollup",
|
||||
"geo_map": "lead_geo_interest_rollup",
|
||||
"table": "broker_performance",
|
||||
"line_chart": "inventory_absorption",
|
||||
"table": "crm_contacts_overview",
|
||||
"line_chart": "crm_property_interest_rollup",
|
||||
"kpi_tile": "oracle_aggregated_metric",
|
||||
"activity_stream": "lead_activity_log",
|
||||
"activity_stream": "crm_interaction_timeline",
|
||||
}
|
||||
|
||||
_CODEBOOK_COMPONENT_MAP: dict[str, str] = {
|
||||
@@ -162,6 +174,10 @@ def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_
|
||||
return "crm_interaction_timeline"
|
||||
if component_plan_type == "pipeline_board":
|
||||
return "crm_opportunity_pipeline"
|
||||
if component_plan_type == "table" and any(term in lowered_prompt for term in ("last interacted", "last interaction", "recently contacted", "recent interaction")):
|
||||
return "crm_last_interacted_clients"
|
||||
if component_plan_type == "table" and any(term in lowered_prompt for term in ("interest", "interested", "project", "property", "properties")) and any(term in lowered_prompt for term in ("client", "clients", "contact", "contacts")):
|
||||
return "crm_top_interested_clients"
|
||||
if component_plan_type == "line_chart" and any(term in lowered_prompt for term in ("trend", "time", "history", "growth")):
|
||||
return "crm_property_interest_rollup"
|
||||
|
||||
@@ -170,8 +186,12 @@ def _dataset_for_codebook(example: CodebookExample, prompt: str, component_plan_
|
||||
return "crm_interaction_timeline"
|
||||
if "pipeline" in lowered_prompt or "opportunit" in lowered_prompt:
|
||||
return "crm_opportunity_pipeline"
|
||||
if ("interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt) and ("client" in lowered_prompt or "contact" in lowered_prompt):
|
||||
return "crm_top_interested_clients"
|
||||
if "interest" in lowered_prompt or "project" in lowered_prompt or "property" in lowered_prompt:
|
||||
return "crm_property_interest_rollup"
|
||||
if "last interacted" in lowered_prompt or "recently contacted" in lowered_prompt or "recent interaction" in lowered_prompt:
|
||||
return "crm_last_interacted_clients"
|
||||
return "crm_contacts_overview"
|
||||
|
||||
if "client" in chapter or "client" in subchapter or "contact" in subchapter:
|
||||
@@ -205,6 +225,7 @@ def _build_codebook_retrieval_plan(
|
||||
exemplar = matches[0]
|
||||
for component_plan_type in desired_types[:4]:
|
||||
dataset = _dataset_for_codebook(exemplar, prompt, component_plan_type)
|
||||
title_hint = _title_for_dataset(dataset, component_plan_type, prompt) or title_hints.get(component_plan_type, exemplar.title)
|
||||
components.append(
|
||||
{
|
||||
"suggestedType": component_plan_type,
|
||||
@@ -222,7 +243,7 @@ def _build_codebook_retrieval_plan(
|
||||
"subchapterName": exemplar.subchapter_name,
|
||||
"sourcePack": exemplar.source_pack,
|
||||
},
|
||||
"titleHint": title_hints.get(component_plan_type, exemplar.title),
|
||||
"titleHint": title_hint,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -235,6 +256,24 @@ def _build_codebook_retrieval_plan(
|
||||
}
|
||||
|
||||
|
||||
def _title_for_dataset(dataset: str, component_plan_type: str, prompt: str) -> str | None:
|
||||
lowered_prompt = prompt.lower()
|
||||
dataset_titles = {
|
||||
"crm_contacts_overview": "CRM Contacts Overview",
|
||||
"crm_opportunity_pipeline": "Opportunity Pipeline",
|
||||
"crm_property_interest_rollup": "Property Interest Rollup",
|
||||
"crm_interaction_timeline": "Client Interaction Timeline",
|
||||
"crm_last_interacted_clients": "Last Interacted Clients",
|
||||
"crm_top_interested_clients": "Top Interested Clients",
|
||||
"broker_performance": "Broker Performance",
|
||||
}
|
||||
if dataset == "crm_top_interested_clients" and "top" in lowered_prompt:
|
||||
return "Top Interested Clients"
|
||||
if dataset == "crm_last_interacted_clients" and ("top" in lowered_prompt or "last" in lowered_prompt):
|
||||
return "Last Interacted Clients"
|
||||
return dataset_titles.get(dataset)
|
||||
|
||||
|
||||
_RUNTIME_ALLOWED_DATASETS = {
|
||||
"deals",
|
||||
"lead_daily_snapshot",
|
||||
@@ -247,6 +286,8 @@ _RUNTIME_ALLOWED_DATASETS = {
|
||||
"crm_opportunity_pipeline",
|
||||
"crm_property_interest_rollup",
|
||||
"crm_interaction_timeline",
|
||||
"crm_last_interacted_clients",
|
||||
"crm_top_interested_clients",
|
||||
}
|
||||
|
||||
|
||||
@@ -371,6 +412,11 @@ class PromptOrchestrator:
|
||||
execution["status"] = "executing"
|
||||
await self._persist_execution(execution)
|
||||
|
||||
page = await canvas_service.get_page(page_id, tenant_id)
|
||||
existing_comps = page.get("components", []) if page else []
|
||||
next_order_base = self._next_order_base(existing_comps)
|
||||
section_id = f"sec_prompt_generated_{execution_id.replace('-', '')[:12]}"
|
||||
|
||||
# ── Step 3: Build visualization plan (component descriptors) ──────────
|
||||
viz_plan = await self._build_visualization_plan(
|
||||
retrieval_plan=retrieval_plan,
|
||||
@@ -382,6 +428,8 @@ class PromptOrchestrator:
|
||||
placement_mode=placement_mode,
|
||||
ctx=ctx,
|
||||
persona_plan=persona_plan,
|
||||
base_order=next_order_base,
|
||||
section_id=section_id,
|
||||
)
|
||||
execution["visualizationPlan"] = viz_plan
|
||||
|
||||
@@ -391,9 +439,7 @@ class PromptOrchestrator:
|
||||
|
||||
# Commit a revision bump with the new components
|
||||
try:
|
||||
page = await canvas_service.get_page(page_id, tenant_id)
|
||||
if page:
|
||||
existing_comps = page.get("components", [])
|
||||
new_comps = existing_comps + viz_plan.get("components", [])
|
||||
revision = await canvas_service.commit_revision(
|
||||
page_id=page_id,
|
||||
@@ -429,6 +475,8 @@ class PromptOrchestrator:
|
||||
placement_mode: str,
|
||||
ctx: PolicyContext,
|
||||
persona_plan: dict[str, Any],
|
||||
base_order: int,
|
||||
section_id: str,
|
||||
) -> dict[str, Any]:
|
||||
"""Converts a retrieval plan into a list of CanvasComponent descriptors."""
|
||||
components = [
|
||||
@@ -438,9 +486,10 @@ class PromptOrchestrator:
|
||||
branch_id=branch_id,
|
||||
prompt=prompt,
|
||||
persona_plan=persona_plan,
|
||||
order_index=base_order + 100,
|
||||
section_id=section_id,
|
||||
)
|
||||
]
|
||||
base_order = 900 # Append after existing components
|
||||
|
||||
component_plans = retrieval_plan.get("components", [])
|
||||
for i, plan in enumerate(component_plans):
|
||||
@@ -469,7 +518,7 @@ class PromptOrchestrator:
|
||||
"privacyTier": plan.get("privacyTier", "standard"),
|
||||
"cachePolicy": {"mode": "ttl", "ttlSeconds": 120},
|
||||
},
|
||||
"visualizationParameters": self._default_viz_params(ctype, data_rows),
|
||||
"visualizationParameters": self._default_viz_params(ctype, dataset, data_rows),
|
||||
"dataBindings": self._default_bindings(ctype),
|
||||
"version": 1,
|
||||
"lifecycleState": "active",
|
||||
@@ -483,7 +532,7 @@ class PromptOrchestrator:
|
||||
"renderingHints": self._rendering_hints(ctype),
|
||||
"layout": {
|
||||
"orderIndex": base_order + (i + 1) * 100,
|
||||
"sectionId": "sec_prompt_generated",
|
||||
"sectionId": section_id,
|
||||
"widthMode": "full" if ctype in ("pipeline_board", "table", "geo_map") else "half",
|
||||
"minHeightPx": 300,
|
||||
"stickyHeader": False,
|
||||
@@ -520,11 +569,29 @@ class PromptOrchestrator:
|
||||
dataset=dataset,
|
||||
warnings=component_warnings,
|
||||
order_index=base_order + (i + 1) * 100,
|
||||
section_id=section_id,
|
||||
)
|
||||
components.append(comp)
|
||||
|
||||
if len(components) > 1:
|
||||
planning_component = components.pop(0)
|
||||
planning_component["layout"]["orderIndex"] = base_order + (len(component_plans) + 1) * 100
|
||||
components.append(planning_component)
|
||||
|
||||
return {"components": components}
|
||||
|
||||
@staticmethod
|
||||
def _next_order_base(existing_components: list[dict[str, Any]]) -> int:
|
||||
max_existing = 0
|
||||
for component in existing_components:
|
||||
try:
|
||||
order_index = int((component.get("layout") or {}).get("orderIndex", 0))
|
||||
except (TypeError, ValueError):
|
||||
order_index = 0
|
||||
if order_index > max_existing:
|
||||
max_existing = order_index
|
||||
return ((max_existing // 100) + 1) * 100
|
||||
|
||||
@staticmethod
|
||||
def _persona_text_canvas(
|
||||
*,
|
||||
@@ -533,13 +600,13 @@ class PromptOrchestrator:
|
||||
branch_id: str,
|
||||
prompt: str,
|
||||
persona_plan: dict[str, Any],
|
||||
order_index: int,
|
||||
section_id: str,
|
||||
) -> dict[str, Any]:
|
||||
recommended = ", ".join(persona_plan.get("recommendedTemplates", [])) or "no direct template matches"
|
||||
content = (
|
||||
f"Oracle received: {prompt}\n\n"
|
||||
f"Reusable templates: {recommended}\n\n"
|
||||
"Execution policy: query live CRM data first, reuse matching templates, "
|
||||
"synthesize missing UI blocks, then dispatch the required ComfyUI-backed workflow."
|
||||
"Execution policy: query live CRM data first, pick the strongest-fitting canvas components, "
|
||||
"and synthesize any missing UI blocks before rendering the result."
|
||||
)
|
||||
return {
|
||||
"componentId": str(uuid.uuid4()),
|
||||
@@ -574,8 +641,8 @@ class PromptOrchestrator:
|
||||
},
|
||||
"renderingHints": {"estimatedHeightPx": 180, "skeletonVariant": "text", "virtualizationPriority": 4},
|
||||
"layout": {
|
||||
"orderIndex": 910,
|
||||
"sectionId": "sec_prompt_generated",
|
||||
"orderIndex": order_index,
|
||||
"sectionId": section_id,
|
||||
"widthMode": "full",
|
||||
"minHeightPx": 180,
|
||||
"stickyHeader": False,
|
||||
@@ -631,17 +698,34 @@ class PromptOrchestrator:
|
||||
return labels.get(comp_type, "Oracle Canvas Component")
|
||||
|
||||
@staticmethod
|
||||
def _default_viz_params(comp_type: str, rows: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
def _default_viz_params(comp_type: str, dataset: str, rows: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
first_row = rows[0] if rows else {}
|
||||
inferred_columns = [key for key in first_row.keys() if key not in {"avatar"}] or ["name", "status"]
|
||||
table_columns_by_dataset: dict[str, list[str]] = {
|
||||
"broker_performance": ["name", "deals_closed", "revenue_generated"],
|
||||
"crm_contacts_overview": ["name", "email", "phone", "city", "buyer_type", "qd_score"],
|
||||
"crm_last_interacted_clients": ["name", "email", "phone", "last_interaction_at", "interaction_count", "qd_score"],
|
||||
"crm_top_interested_clients": ["name", "email", "phone", "interest_count", "projects", "qd_score"],
|
||||
}
|
||||
defaults: dict[str, dict[str, Any]] = {
|
||||
"bar_chart": {"xAxis": "category", "yAxis": "value", "sort": "desc", "showLabels": True, "legend": False},
|
||||
"line_chart": {"showPoints": True, "smooth": True},
|
||||
"kpi_tile": {
|
||||
"label": rows[0].get("metric_label", "Result") if rows else "Result",
|
||||
"trend": str(rows[0].get("trend_value", "")) if rows else "",
|
||||
"comparisonLabel": rows[0].get("comparison_label", "") if rows else "",
|
||||
"label": first_row.get("metric_label", "Result"),
|
||||
"trend": str(first_row.get("trend_value", "")),
|
||||
"comparisonLabel": first_row.get("comparison_label", ""),
|
||||
},
|
||||
"geo_map": {"mapStyle": "dubai_district_heat", "intensityField": "lead_count", "interactive": True, "tooltipFields": ["district", "lead_count", "avg_qd_score"]},
|
||||
"table": {"rankBy": "revenue_generated", "showTopBadge": True, "columns": ["name", "deals_closed", "revenue_generated"]},
|
||||
"table": {
|
||||
"rankBy": "revenue_generated",
|
||||
"showTopBadge": True,
|
||||
"columns": table_columns_by_dataset.get(
|
||||
dataset,
|
||||
inferred_columns,
|
||||
),
|
||||
"emptyStateTitle": "No matching records found",
|
||||
"emptyStateDescription": "The query ran successfully but returned no rows for this prompt.",
|
||||
},
|
||||
"pipeline_board": {"showValue": True, "colorByStage": True},
|
||||
"activity_stream": {"showUrgencyIndicator": True},
|
||||
}
|
||||
@@ -674,7 +758,8 @@ class PromptOrchestrator:
|
||||
def _generate_summary(prompt: str, viz_plan: dict[str, Any]) -> str:
|
||||
count = len(viz_plan.get("components", []))
|
||||
short_prompt = prompt[:60] + ("…" if len(prompt) > 60 else "")
|
||||
return f'Generated {count} component{"s" if count != 1 else ""} for: "{short_prompt}"'
|
||||
data_component_count = max(count - 1, 0)
|
||||
return f'Generated {data_component_count} component{"s" if data_component_count != 1 else ""} for: "{short_prompt}"'
|
||||
|
||||
@staticmethod
|
||||
def _error_component(
|
||||
@@ -686,6 +771,7 @@ class PromptOrchestrator:
|
||||
dataset: str,
|
||||
warnings: list[str],
|
||||
order_index: int,
|
||||
section_id: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"componentId": component_id,
|
||||
@@ -722,7 +808,7 @@ class PromptOrchestrator:
|
||||
"renderingHints": {"estimatedHeightPx": 140, "skeletonVariant": "generic", "virtualizationPriority": 5},
|
||||
"layout": {
|
||||
"orderIndex": order_index,
|
||||
"sectionId": "sec_prompt_generated",
|
||||
"sectionId": section_id,
|
||||
"widthMode": "full",
|
||||
"minHeightPx": 140,
|
||||
"stickyHeader": False,
|
||||
@@ -875,8 +961,8 @@ class PromptOrchestrator:
|
||||
execution["status"],
|
||||
execution["modelRuntime"],
|
||||
execution["semanticModelVersion"],
|
||||
json.dumps(execution.get("retrievalPlan") or {}),
|
||||
json.dumps(execution.get("visualizationPlan") or {}),
|
||||
json.dumps(_json_safe(execution.get("retrievalPlan") or {})),
|
||||
json.dumps(_json_safe(execution.get("visualizationPlan") or {})),
|
||||
execution.get("warnings", []),
|
||||
execution.get("summary"),
|
||||
execution.get("componentsCreated", []),
|
||||
|
||||
@@ -257,13 +257,16 @@ async def create_fork(
|
||||
page = await canvas_service.get_page(page_id, ctx.tenant_id)
|
||||
if not page:
|
||||
raise HTTPException(status_code=404, detail="Source page not found.")
|
||||
fork = await collaboration_service.create_fork(
|
||||
source_page=page,
|
||||
recipient_user_id=payload.recipientUserId,
|
||||
created_by=ctx.actor_id,
|
||||
visibility=payload.visibility,
|
||||
message=payload.message,
|
||||
)
|
||||
try:
|
||||
fork = await collaboration_service.create_fork(
|
||||
source_page=page,
|
||||
recipient_user_id=payload.recipientUserId,
|
||||
created_by=ctx.actor_id,
|
||||
visibility=payload.visibility,
|
||||
message=payload.message,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
return _ok(fork)
|
||||
|
||||
|
||||
|
||||
@@ -1,394 +1,95 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# nemoclaw_deploy.sh
|
||||
# Deploys NemoClaw on the AWS G6.12xlarge instance.
|
||||
# - All data/install paths on NVMe (/opt/dlami/nvme/)
|
||||
# - Configures OpenShell to use existing Ollama (qwen3.5:27b, port 11434)
|
||||
# - GPUs 0+1 are Ollama's. Do NOT reassign them.
|
||||
# - ComfyUI owns GPUs 2+3. Do NOT touch.
|
||||
# - Creates a systemd service for the NemoClaw gateway.
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
NVME="/opt/dlami/nvme"
|
||||
AGENT_NAME="velocity-sentinel"
|
||||
OLLAMA_URL="http://127.0.0.1:11434"
|
||||
OLLAMA_MODEL="qwen3.5:27b"
|
||||
OPENCLAW_PORT=8080 # Port our FastAPI backend targets
|
||||
|
||||
echo "================================================================"
|
||||
echo " Project Velocity — NemoClaw + OpenShell Deploy Script"
|
||||
echo " Instance: G6.12xlarge | NVMe: $NVME"
|
||||
echo "================================================================"
|
||||
# NemoClaw deployment helper for the Desineuron SGLang runtime.
|
||||
# This script intentionally avoids Ollama-era assumptions and configures
|
||||
# NemoClaw/OpenShell to talk to the shared OpenAI-compatible SGLang endpoint.
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 0. Safety checks
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "[ERROR] Run as root or with sudo"; exit 1
|
||||
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/nemoclaw}"
|
||||
SGLANG_BASE_URL="${SGLANG_BASE_URL:-https://llm.desineuron.in}"
|
||||
SGLANG_MODEL="${SGLANG_MODEL:-qwen3.6:35b-a3b}"
|
||||
SGLANG_API_TOKEN="${SGLANG_API_TOKEN:-}"
|
||||
OPENSHELL_PORT="${OPENSHELL_PORT:-8080}"
|
||||
AGENT_NAME="${AGENT_NAME:-velocity-sentinel}"
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
echo "Run this script with sudo or as root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! mountpoint -q "$NVME" 2>/dev/null && [ ! -d "$NVME" ]; then
|
||||
echo "[WARN] NVMe not mounted at $NVME — using /home/ubuntu/nvme as fallback"
|
||||
NVME="/home/ubuntu/nvme"
|
||||
mkdir -p "$NVME"
|
||||
fi
|
||||
echo "==> Desineuron NemoClaw deploy"
|
||||
echo "NVME root : ${NVME_ROOT}"
|
||||
echo "SGLang base URL: ${SGLANG_BASE_URL}"
|
||||
echo "Model : ${SGLANG_MODEL}"
|
||||
echo "Agent : ${AGENT_NAME}"
|
||||
|
||||
echo "[✓] NVMe target: $NVME"
|
||||
mkdir -p "${NVME_ROOT}"/{logs,state,home}
|
||||
|
||||
# Confirm Ollama is alive before proceeding
|
||||
if ! curl -sf "$OLLAMA_URL/api/tags" | grep -q "qwen"; then
|
||||
echo "[WARN] Ollama at $OLLAMA_URL doesn't show qwen3.5:27b yet — proceeding anyway"
|
||||
else
|
||||
echo "[✓] Ollama confirmed running with qwen3.5:27b"
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 1. Node.js 22 (NemoClaw requirement: >=22.16)
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[1/7] Installing Node.js 22..."
|
||||
|
||||
NODE_VERSION=$(node --version 2>/dev/null | sed 's/v//' | cut -d. -f1 || echo "0")
|
||||
if [ "$NODE_VERSION" -ge 22 ]; then
|
||||
echo "[✓] Node.js $(node --version) already installed"
|
||||
else
|
||||
if ! command -v node >/dev/null 2>&1; then
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
|
||||
apt-get update -y
|
||||
apt-get install -y nodejs
|
||||
echo "[✓] Node.js $(node --version) installed"
|
||||
fi
|
||||
|
||||
npm --version
|
||||
echo "[✓] npm $(npm --version)"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 2. Docker (required for OpenShell container runtime)
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[2/7] Ensuring Docker is installed..."
|
||||
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
echo "[✓] Docker $(docker --version | awk '{print $3}') already running"
|
||||
else
|
||||
echo " Installing Docker..."
|
||||
apt-get install -y ca-certificates curl gnupg lsb-release
|
||||
install -m 0755 -d /etc/apt/keyrings
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
|
||||
chmod a+r /etc/apt/keyrings/docker.gpg
|
||||
echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \
|
||||
https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \
|
||||
| tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
apt-get update -q
|
||||
apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
systemctl enable docker
|
||||
systemctl start docker
|
||||
echo "[✓] Docker installed"
|
||||
if ! command -v docker >/dev/null 2>&1; then
|
||||
apt-get update -y
|
||||
apt-get install -y docker.io
|
||||
systemctl enable --now docker
|
||||
fi
|
||||
|
||||
# Move Docker data root to NVMe so images don't fill root disk
|
||||
DOCKER_DAEMON_JSON="/etc/docker/daemon.json"
|
||||
if ! grep -q "nvme" "$DOCKER_DAEMON_JSON" 2>/dev/null; then
|
||||
echo " Moving Docker data-root → $NVME/docker"
|
||||
mkdir -p "$NVME/docker"
|
||||
# Preserve existing config if any
|
||||
EXISTING=$(cat "$DOCKER_DAEMON_JSON" 2>/dev/null || echo "{}")
|
||||
python3 -c "
|
||||
import json, sys
|
||||
cfg = json.loads('''$EXISTING''')
|
||||
cfg['data-root'] = '$NVME/docker'
|
||||
print(json.dumps(cfg, indent=2))
|
||||
" > "$DOCKER_DAEMON_JSON"
|
||||
systemctl restart docker
|
||||
echo "[✓] Docker data-root → $NVME/docker"
|
||||
if ! command -v openshell >/dev/null 2>&1; then
|
||||
npm install -g @nvidia/openshell || true
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 3. Install NemoClaw (headless via env vars)
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[3/7] Installing NemoClaw..."
|
||||
|
||||
# Set HOME so NemoClaw installs to NVMe-backed location
|
||||
export NEMOCLAW_HOME="$NVME/nemoclaw"
|
||||
export OPENSHELL_HOME="$NVME/openshell"
|
||||
export HOME_OVERRIDE="$NVME/home"
|
||||
mkdir -p "$NEMOCLAW_HOME" "$OPENSHELL_HOME" "$HOME_OVERRIDE"
|
||||
|
||||
# Link ~/.nemoclaw and ~/.openshell to NVMe
|
||||
ln -sfn "$NEMOCLAW_HOME" /root/.nemoclaw 2>/dev/null || true
|
||||
ln -sfn "$NEMOCLAW_HOME" /home/ubuntu/.nemoclaw 2>/dev/null || true
|
||||
ln -sfn "$OPENSHELL_HOME" /root/.openshell 2>/dev/null || true
|
||||
ln -sfn "$OPENSHELL_HOME" /home/ubuntu/.openshell 2>/dev/null || true
|
||||
|
||||
if command -v nemoclaw &>/dev/null; then
|
||||
echo "[✓] nemoclaw already installed: $(nemoclaw --version 2>/dev/null || echo 'version unknown')"
|
||||
else
|
||||
echo " Downloading NemoClaw installer..."
|
||||
INSTALLER_SCRIPT="$NVME/nemoclaw_install.sh"
|
||||
curl -fsSL https://www.nvidia.com/nemoclaw.sh -o "$INSTALLER_SCRIPT"
|
||||
chmod +x "$INSTALLER_SCRIPT"
|
||||
|
||||
# Run the installer non-interactively
|
||||
# NEMOCLAW_SKIP_ONBOARD=1 bypasses the interactive wizard (undocumented but standard pattern)
|
||||
# We'll do manual onboarding after install using CLI flags
|
||||
NEMOCLAW_SKIP_ONBOARD=1 \
|
||||
NEMOCLAW_HOME="$NEMOCLAW_HOME" \
|
||||
bash "$INSTALLER_SCRIPT" || true
|
||||
|
||||
# Reload PATH
|
||||
export PATH="$PATH:/usr/local/bin:/root/.local/bin"
|
||||
source ~/.bashrc 2>/dev/null || true
|
||||
|
||||
if ! command -v nemoclaw &>/dev/null; then
|
||||
echo "[WARN] nemoclaw not in PATH yet — checking common locations..."
|
||||
for p in /usr/local/bin/nemoclaw /root/.local/bin/nemoclaw "$NVME/bin/nemoclaw"; do
|
||||
if [ -f "$p" ]; then
|
||||
ln -sfn "$p" /usr/local/bin/nemoclaw
|
||||
echo "[✓] Linked nemoclaw from $p"
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo "[✓] nemoclaw installed"
|
||||
if ! command -v nemoclaw >/dev/null 2>&1; then
|
||||
npm install -g @nvidia/nemoclaw || true
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 4. Onboard the Velocity Sentinel agent sandbox
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[4/7] Onboarding '$AGENT_NAME' NemoClaw sandbox..."
|
||||
cat >/etc/default/desineuron-nemoclaw <<EOF
|
||||
SGLANG_BASE_URL=${SGLANG_BASE_URL}
|
||||
SGLANG_MODEL=${SGLANG_MODEL}
|
||||
SGLANG_API_TOKEN=${SGLANG_API_TOKEN}
|
||||
NEMOCLAW_BASE_URL=${SGLANG_BASE_URL}
|
||||
NEMOCLAW_MODEL=${SGLANG_MODEL}
|
||||
NEMOCLAW_API_TOKEN=${SGLANG_API_TOKEN}
|
||||
EOF
|
||||
chmod 600 /etc/default/desineuron-nemoclaw
|
||||
|
||||
# Check if sandbox already exists
|
||||
if nemoclaw "$AGENT_NAME" status &>/dev/null; then
|
||||
echo "[✓] Sandbox '$AGENT_NAME' already exists — skipping creation"
|
||||
else
|
||||
echo " Running nemoclaw onboard (this may take a few minutes)..."
|
||||
# --provider compatible-endpoint: use our local Ollama instead of NVIDIA cloud
|
||||
# --yes: skip confirmation prompts
|
||||
nemoclaw onboard \
|
||||
--name "$AGENT_NAME" \
|
||||
if command -v openshell >/dev/null 2>&1; then
|
||||
openshell inference set \
|
||||
--provider compatible-endpoint \
|
||||
--endpoint "$OLLAMA_URL/v1" \
|
||||
--model "$OLLAMA_MODEL" \
|
||||
--yes \
|
||||
--no-messaging-bridge \
|
||||
--no-skills || {
|
||||
echo "[WARN] Structured onboard failed — trying minimal onboard..."
|
||||
# Fallback: let it run with defaults if flags are not supported in this alpha version
|
||||
yes "" | nemoclaw onboard --name "$AGENT_NAME" 2>&1 | head -60 || true
|
||||
}
|
||||
echo "[✓] Sandbox onboarded"
|
||||
--base-url "${SGLANG_BASE_URL}/v1" \
|
||||
--api-key "${SGLANG_API_TOKEN:-desineuron}" \
|
||||
--model "${SGLANG_MODEL}" \
|
||||
--context-window 8192 \
|
||||
--max-tokens 4096 || true
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 5. Configure OpenShell to use Ollama (compatible endpoint)
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[5/7] Configuring OpenShell inference → Ollama (qwen3.5:27b)..."
|
||||
|
||||
# Set inference route to our local Ollama
|
||||
openshell inference set \
|
||||
--provider compatible-endpoint \
|
||||
--base-url "$OLLAMA_URL/v1" \
|
||||
--api-key "ollama" \
|
||||
--model "$OLLAMA_MODEL" \
|
||||
--context-window 32768 \
|
||||
--max-tokens 4096 || {
|
||||
echo "[WARN] openshell inference set failed — trying alternate syntax..."
|
||||
openshell inference set \
|
||||
--provider compatible-endpoint \
|
||||
--model "$OLLAMA_MODEL" || true
|
||||
}
|
||||
|
||||
# Also set the context window on the Ollama model side
|
||||
echo " Setting Ollama num_ctx=32768..."
|
||||
curl -s -X POST "$OLLAMA_URL/api/generate" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"model\":\"$OLLAMA_MODEL\",\"prompt\":\"\",\"options\":{\"num_ctx\":32768},\"stream\":false}" \
|
||||
> /dev/null 2>&1 || true
|
||||
|
||||
echo "[✓] OpenShell inference configured → $OLLAMA_URL ($OLLAMA_MODEL)"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 6. Write OpenShell network policy (allow Velocity backend egress)
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[6/7] Writing OpenShell network policy..."
|
||||
|
||||
POLICY_DIR="$OPENSHELL_HOME/policy"
|
||||
mkdir -p "$POLICY_DIR"
|
||||
|
||||
cat > "$POLICY_DIR/velocity_egress.yaml" << 'POLICY'
|
||||
# OpenShell Network Egress Policy — Project Velocity Sentinel
|
||||
# Applied to the velocity-sentinel sandbox.
|
||||
# All non-listed hosts are blocked by default.
|
||||
|
||||
version: "1"
|
||||
sandbox: velocity-sentinel
|
||||
|
||||
egress:
|
||||
# Local Ollama inference (Qwen 3.5 27B)
|
||||
- host: "127.0.0.1"
|
||||
ports: [11434]
|
||||
description: "Ollama LLM inference"
|
||||
action: allow
|
||||
|
||||
# OpenShell gateway itself (loopback)
|
||||
- host: "127.0.0.1"
|
||||
ports: [8080, 8081, 8082, 8083, 8084, 8085]
|
||||
description: "OpenShell gateway ports"
|
||||
action: allow
|
||||
|
||||
# Velocity FastAPI backend (same host)
|
||||
- host: "127.0.0.1"
|
||||
ports: [8000, 8001, 8288]
|
||||
description: "Velocity FastAPI backend"
|
||||
action: allow
|
||||
|
||||
# PostgreSQL (same host)
|
||||
- host: "127.0.0.1"
|
||||
ports: [5432]
|
||||
description: "PostgreSQL DB"
|
||||
action: allow
|
||||
|
||||
# Block everything else
|
||||
- host: "*"
|
||||
action: deny
|
||||
description: "Default deny — data sovereignty (India/Abu Dhabi)"
|
||||
POLICY
|
||||
|
||||
# Apply the policy if openshell supports it
|
||||
openshell policy apply "$POLICY_DIR/velocity_egress.yaml" 2>/dev/null || \
|
||||
echo "[WARN] Policy apply not supported yet in this alpha — YAML written for future use"
|
||||
|
||||
echo "[✓] Network policy written → $POLICY_DIR/velocity_egress.yaml"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# 7. Write NemoClaw systemd service
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "[7/7] Installing systemd service: nemoclaw-velocity.service..."
|
||||
|
||||
NEMOCLAW_BIN=$(command -v nemoclaw || echo "/usr/local/bin/nemoclaw")
|
||||
OPENSHELL_BIN=$(command -v openshell || echo "/usr/local/bin/openshell")
|
||||
|
||||
cat > /etc/systemd/system/nemoclaw-velocity.service << SERVICE
|
||||
cat >/etc/systemd/system/desineuron-nemoclaw-gateway.service <<EOF
|
||||
[Unit]
|
||||
Description=NemoClaw Velocity Sentinel Gateway
|
||||
Documentation=https://github.com/NVIDIA/NemoClaw
|
||||
After=network.target ollama.service docker.service
|
||||
Wants=ollama.service docker.service
|
||||
Description=Desineuron NemoClaw Gateway
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=ubuntu
|
||||
Group=ubuntu
|
||||
WorkingDirectory=$NVME/nemoclaw
|
||||
|
||||
# GPU constraint: NemoClaw itself is CPU-bound (inference goes to Ollama)
|
||||
# Ollama already owns GPUs 0,1. ComfyUI owns GPUs 2,3.
|
||||
Environment=CUDA_VISIBLE_DEVICES=""
|
||||
Environment=NEMOCLAW_HOME=$NVME/nemoclaw
|
||||
Environment=OPENSHELL_HOME=$NVME/openshell
|
||||
Environment=OLLAMA_BASE_URL=http://127.0.0.1:11434
|
||||
Environment=VELOCITY_NEMO_MODEL=qwen3.5:27b
|
||||
Environment=GATEWAY_PORT=$OPENCLAW_PORT
|
||||
|
||||
ExecStart=$NEMOCLAW_BIN $AGENT_NAME connect --gateway-port $OPENCLAW_PORT
|
||||
ExecReload=/bin/kill -HUP \$MAINPID
|
||||
EnvironmentFile=/etc/default/desineuron-nemoclaw
|
||||
WorkingDirectory=${NVME_ROOT}
|
||||
Environment=HOME=${NVME_ROOT}/home
|
||||
ExecStart=/usr/bin/env bash -lc 'nemoclaw serve --name ${AGENT_NAME} --port ${OPENSHELL_PORT}'
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StandardOutput=append:$NVME/logs/nemoclaw-velocity.log
|
||||
StandardError=append:$NVME/logs/nemoclaw-velocity.log
|
||||
|
||||
# Limits
|
||||
LimitNOFILE=65536
|
||||
TimeoutStopSec=30
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
SERVICE
|
||||
EOF
|
||||
|
||||
mkdir -p "$NVME/logs"
|
||||
systemctl daemon-reload
|
||||
systemctl enable nemoclaw-velocity.service
|
||||
systemctl start nemoclaw-velocity.service || true # May fail on first boot if onboard not done
|
||||
systemctl enable --now desineuron-nemoclaw-gateway.service
|
||||
systemctl --no-pager --full status desineuron-nemoclaw-gateway.service
|
||||
|
||||
echo "[✓] nemoclaw-velocity.service enabled and started"
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# Finalize: Detect gateway port & write env file
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "================================================================"
|
||||
echo " Writing Velocity backend environment file..."
|
||||
echo "================================================================"
|
||||
|
||||
VELOCITY_ENV="$NVME/velocity/env"
|
||||
mkdir -p "$(dirname "$VELOCITY_ENV")"
|
||||
|
||||
# Detect actual OpenShell gateway URL
|
||||
GATEWAY_URL="http://127.0.0.1:$OPENCLAW_PORT"
|
||||
GATEWAY_CHAT_URL="$GATEWAY_URL/v1/chat/completions"
|
||||
|
||||
# Quick connectivity test (will succeed once nemoclaw starts)
|
||||
echo " Testing gateway at $GATEWAY_CHAT_URL ..."
|
||||
sleep 5
|
||||
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" \
|
||||
-X POST "$GATEWAY_CHAT_URL" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"qwen3.5:27b","messages":[{"role":"user","content":"ping"}],"max_tokens":5}' \
|
||||
2>/dev/null || echo "000")
|
||||
|
||||
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ]; then
|
||||
echo "[✓] Gateway responding at $GATEWAY_CHAT_URL (HTTP $HTTP_CODE)"
|
||||
else
|
||||
echo "[WARN] Gateway not yet responding (HTTP $HTTP_CODE) — it may still be starting up"
|
||||
fi
|
||||
|
||||
cat > "$VELOCITY_ENV" << ENV
|
||||
# Project Velocity — Backend Environment
|
||||
# Generated by nemoclaw_deploy.sh
|
||||
# Loaded by: source $VELOCITY_ENV
|
||||
|
||||
# ── NemoClaw / OpenShell Gateway ──────────────────────────────────
|
||||
NEMOCLAW_BASE_URL=$GATEWAY_URL
|
||||
NEMOCLAW_CHAT_URL=$GATEWAY_CHAT_URL
|
||||
NEMOCLAW_MODEL=qwen3.5:27b
|
||||
NEMOCLAW_TIMEOUT_S=30.0
|
||||
NEMOCLAW_TEMPERATURE=0.2
|
||||
|
||||
# ── Ollama (direct fallback if OpenShell gateway not up) ──────────
|
||||
OLLAMA_BASE_URL=http://127.0.0.1:11434
|
||||
|
||||
# ── NemoClaw Prompts ──────────────────────────────────────────────
|
||||
NEMOCLAW_PROMPT_DIR=$NVME/nemoclaw/prompts
|
||||
|
||||
# ── JWT / Auth ────────────────────────────────────────────────────
|
||||
# VELOCITY_JWT_SECRET=<SET_THIS>
|
||||
|
||||
# ── PostgreSQL ────────────────────────────────────────────────────
|
||||
# VELOCITY_DB_DSN=postgresql://velocity_app:<PW>@127.0.0.1:5432/velocity
|
||||
ENV
|
||||
|
||||
echo "[✓] Environment file written → $VELOCITY_ENV"
|
||||
echo ""
|
||||
echo "================================================================"
|
||||
echo " DONE. Summary:"
|
||||
echo ""
|
||||
echo " Agent name : $AGENT_NAME"
|
||||
echo " Gateway URL : $GATEWAY_URL"
|
||||
echo " Chat endpoint: $GATEWAY_CHAT_URL"
|
||||
echo " Model : $OLLAMA_MODEL (via Ollama on port 11434)"
|
||||
echo " GPUs 0,1 : Ollama (unchanged)"
|
||||
echo " GPUs 2,3 : ComfyUI (unchanged)"
|
||||
echo " Env file : $VELOCITY_ENV"
|
||||
echo " Service log : $NVME/logs/nemoclaw-velocity.log"
|
||||
echo ""
|
||||
echo " Next commands to verify:"
|
||||
echo " nemoclaw $AGENT_NAME status"
|
||||
echo " nemoclaw $AGENT_NAME logs --follow"
|
||||
echo " curl $GATEWAY_CHAT_URL (POST with messages[])"
|
||||
echo "================================================================"
|
||||
echo
|
||||
echo "NemoClaw deployment complete."
|
||||
echo "Gateway port : ${OPENSHELL_PORT}"
|
||||
echo "Model : ${SGLANG_MODEL}"
|
||||
echo "Runtime : ${SGLANG_BASE_URL}/v1"
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
"""
|
||||
backend/services/nemoclaw_client.py - NemoClaw inference client.
|
||||
|
||||
Primary path:
|
||||
1. NVIDIA-hosted OpenAI-compatible chat completions.
|
||||
2. Optional compatible endpoint via NEMOCLAW_BASE_URL.
|
||||
3. Optional local Ollama fallback only when ALLOW_LOCAL_FALLBACK=true.
|
||||
Production path:
|
||||
1. Shared SGLang / OpenAI-compatible coding runtime.
|
||||
|
||||
Compatibility:
|
||||
- Legacy NEMOCLAW_* env names are still honored.
|
||||
- Legacy OLLAMA_BASE_URL can still seed the base URL, but Ollama is no longer
|
||||
a production fallback path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -24,28 +27,23 @@ logger = logging.getLogger("velocity.nemoclaw")
|
||||
NEMOCLAW_TIMEOUT = float(os.getenv("NEMOCLAW_TIMEOUT_S", "45.0"))
|
||||
NEMOCLAW_TEMPERATURE = float(os.getenv("NEMOCLAW_TEMPERATURE", "0.2"))
|
||||
|
||||
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
|
||||
NVIDIA_BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
|
||||
NVIDIA_CHAT_URL = os.getenv("NVIDIA_CHAT_URL", f"{NVIDIA_BASE_URL}/chat/completions")
|
||||
NVIDIA_MODEL = os.getenv("NVIDIA_MODEL", "nvidia/nemotron-3-super-120b-a12b")
|
||||
NVIDIA_FALLBACK_MODEL = os.getenv(
|
||||
"NVIDIA_FALLBACK_MODEL",
|
||||
"nvidia/llama-3.3-nemotron-super-49b-v1",
|
||||
SGLANG_BASE_URL = os.getenv(
|
||||
"SGLANG_BASE_URL",
|
||||
os.getenv(
|
||||
"NEMOCLAW_BASE_URL",
|
||||
os.getenv("LLM_BASE_URL", os.getenv("OLLAMA_BASE_URL", "https://llm.desineuron.in")),
|
||||
),
|
||||
).rstrip("/")
|
||||
SGLANG_CHAT_URL = os.getenv(
|
||||
"SGLANG_CHAT_URL",
|
||||
os.getenv("NEMOCLAW_CHAT_URL", f"{SGLANG_BASE_URL}/v1/chat/completions"),
|
||||
)
|
||||
|
||||
NEMOCLAW_BASE_URL = os.getenv("NEMOCLAW_BASE_URL", "")
|
||||
NEMOCLAW_CHAT_URL = (
|
||||
os.getenv("NEMOCLAW_CHAT_URL") or f"{NEMOCLAW_BASE_URL}/v1/chat/completions"
|
||||
if NEMOCLAW_BASE_URL
|
||||
else ""
|
||||
SGLANG_MODELS_URL = os.getenv("SGLANG_MODELS_URL", f"{SGLANG_BASE_URL}/v1/models")
|
||||
SGLANG_MODEL = os.getenv(
|
||||
"SGLANG_MODEL",
|
||||
os.getenv("NEMOCLAW_MODEL", os.getenv("OLLAMA_MODEL", "qwen3.6:35b-a3b")),
|
||||
)
|
||||
NEMOCLAW_MODEL = os.getenv("NEMOCLAW_MODEL", NVIDIA_MODEL)
|
||||
NEMOCLAW_API_TOKEN = os.getenv("NEMOCLAW_API_TOKEN", "")
|
||||
|
||||
ALLOW_LOCAL_FALLBACK = os.getenv("ALLOW_LOCAL_FALLBACK", "false").lower() == "true"
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434")
|
||||
OLLAMA_CHAT_URL = f"{OLLAMA_BASE_URL}/v1/chat/completions"
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:27b")
|
||||
SGLANG_API_TOKEN = os.getenv("SGLANG_API_TOKEN", os.getenv("NEMOCLAW_API_TOKEN", ""))
|
||||
|
||||
_PROMPT_DIR = os.getenv("NEMOCLAW_PROMPT_DIR", "/opt/dlami/nvme/nemoclaw/prompts")
|
||||
|
||||
@@ -201,83 +199,40 @@ async def _nemoclaw_chat(
|
||||
user_content: str,
|
||||
timeout: float = NEMOCLAW_TIMEOUT,
|
||||
) -> dict:
|
||||
endpoints: list[tuple[str, str, str, dict[str, str]]] = []
|
||||
if NVIDIA_API_KEY:
|
||||
endpoints.append(
|
||||
(
|
||||
"nvidia_primary",
|
||||
NVIDIA_CHAT_URL,
|
||||
NVIDIA_MODEL,
|
||||
{
|
||||
"Authorization": f"Bearer {NVIDIA_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
)
|
||||
if NVIDIA_FALLBACK_MODEL and NVIDIA_FALLBACK_MODEL != NVIDIA_MODEL:
|
||||
endpoints.append(
|
||||
(
|
||||
"nvidia_fallback",
|
||||
NVIDIA_CHAT_URL,
|
||||
NVIDIA_FALLBACK_MODEL,
|
||||
{
|
||||
"Authorization": f"Bearer {NVIDIA_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
)
|
||||
if NEMOCLAW_CHAT_URL:
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if NEMOCLAW_API_TOKEN:
|
||||
headers["Authorization"] = f"Bearer {NEMOCLAW_API_TOKEN}"
|
||||
endpoints.append(("compatible_endpoint", NEMOCLAW_CHAT_URL, NEMOCLAW_MODEL, headers))
|
||||
if ALLOW_LOCAL_FALLBACK:
|
||||
endpoints.append(
|
||||
("ollama_fallback", OLLAMA_CHAT_URL, OLLAMA_MODEL, {"Content-Type": "application/json"})
|
||||
if not SGLANG_CHAT_URL:
|
||||
raise RuntimeError(
|
||||
"No NemoClaw inference endpoint is configured. Set SGLANG_BASE_URL or NEMOCLAW_BASE_URL."
|
||||
)
|
||||
|
||||
if not endpoints:
|
||||
raise RuntimeError(
|
||||
"No NemoClaw inference endpoint is configured. "
|
||||
"Set NVIDIA_API_KEY or NEMOCLAW_BASE_URL."
|
||||
)
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if SGLANG_API_TOKEN:
|
||||
headers["Authorization"] = f"Bearer {SGLANG_API_TOKEN}"
|
||||
|
||||
t_start = time.monotonic()
|
||||
last_error: Exception | None = None
|
||||
for label, url, model, headers in endpoints:
|
||||
try:
|
||||
result = await _attempt_chat(
|
||||
label=label,
|
||||
url=url,
|
||||
model=model,
|
||||
system_content=system_content,
|
||||
user_content=user_content,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
)
|
||||
logger.info(
|
||||
"NemoClaw inference via %s model=%s elapsed=%.2fs",
|
||||
label,
|
||||
model,
|
||||
time.monotonic() - t_start,
|
||||
)
|
||||
return result
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
||||
logger.warning("NemoClaw %s unreachable (%s), trying next endpoint", label, exc)
|
||||
last_error = exc
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.error(
|
||||
"NemoClaw %s HTTP %s: %s",
|
||||
label,
|
||||
exc.response.status_code,
|
||||
exc.response.text[:300],
|
||||
)
|
||||
last_error = exc
|
||||
except (KeyError, IndexError, TypeError, json.JSONDecodeError) as exc:
|
||||
logger.error("NemoClaw %s returned invalid JSON: %s", label, exc)
|
||||
last_error = exc
|
||||
|
||||
raise RuntimeError(f"All NemoClaw endpoints failed. Last error: {last_error}")
|
||||
try:
|
||||
result = await _attempt_chat(
|
||||
label="sglang",
|
||||
url=SGLANG_CHAT_URL,
|
||||
model=SGLANG_MODEL,
|
||||
system_content=system_content,
|
||||
user_content=user_content,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
)
|
||||
logger.info(
|
||||
"NemoClaw inference via sglang model=%s elapsed=%.2fs",
|
||||
SGLANG_MODEL,
|
||||
time.monotonic() - t_start,
|
||||
)
|
||||
return result
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as exc:
|
||||
raise RuntimeError(f"NemoClaw SGLang endpoint unreachable: {exc}") from exc
|
||||
except httpx.HTTPStatusError as exc:
|
||||
raise RuntimeError(
|
||||
f"NemoClaw SGLang HTTP {exc.response.status_code}: {exc.response.text[:300]}"
|
||||
) from exc
|
||||
except (KeyError, IndexError, TypeError, json.JSONDecodeError) as exc:
|
||||
raise RuntimeError(f"NemoClaw SGLang returned invalid JSON: {exc}") from exc
|
||||
|
||||
|
||||
async def score_qd(
|
||||
@@ -368,46 +323,32 @@ async def profile_cctv_visitor(
|
||||
|
||||
|
||||
async def health_check() -> dict:
|
||||
results: dict[str, str] = {}
|
||||
endpoints: list[tuple[str, str, str, dict[str, str]]] = []
|
||||
if NVIDIA_API_KEY:
|
||||
endpoints.append(
|
||||
(
|
||||
"nvidia_primary",
|
||||
NVIDIA_CHAT_URL,
|
||||
NVIDIA_MODEL,
|
||||
{
|
||||
"Authorization": f"Bearer {NVIDIA_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if SGLANG_API_TOKEN:
|
||||
headers["Authorization"] = f"Bearer {SGLANG_API_TOKEN}"
|
||||
|
||||
results: dict[str, str] = {
|
||||
"model": SGLANG_MODEL,
|
||||
"primary_url": SGLANG_CHAT_URL,
|
||||
"models_url": SGLANG_MODELS_URL,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
models_response = await client.get(SGLANG_MODELS_URL, headers=headers)
|
||||
models_response.raise_for_status()
|
||||
chat_response = await client.post(
|
||||
SGLANG_CHAT_URL,
|
||||
json={
|
||||
"model": SGLANG_MODEL,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
"max_tokens": 5,
|
||||
},
|
||||
headers=headers,
|
||||
)
|
||||
)
|
||||
if NEMOCLAW_CHAT_URL:
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if NEMOCLAW_API_TOKEN:
|
||||
headers["Authorization"] = f"Bearer {NEMOCLAW_API_TOKEN}"
|
||||
endpoints.append(("compatible_endpoint", NEMOCLAW_CHAT_URL, NEMOCLAW_MODEL, headers))
|
||||
if ALLOW_LOCAL_FALLBACK:
|
||||
endpoints.append(
|
||||
("ollama_fallback", OLLAMA_CHAT_URL, OLLAMA_MODEL, {"Content-Type": "application/json"})
|
||||
)
|
||||
chat_response.raise_for_status()
|
||||
results["sglang"] = "ok"
|
||||
except Exception as exc:
|
||||
results["sglang"] = f"error: {exc}"
|
||||
|
||||
for name, url, model, headers in endpoints:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
json={
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
"max_tokens": 5,
|
||||
},
|
||||
headers=headers,
|
||||
)
|
||||
results[name] = "ok" if response.status_code < 500 else f"http_{response.status_code}"
|
||||
except Exception as exc:
|
||||
results[name] = f"error: {exc}"
|
||||
|
||||
results["model"] = NVIDIA_MODEL if NVIDIA_API_KEY else NEMOCLAW_MODEL
|
||||
results["primary_url"] = NVIDIA_CHAT_URL if NVIDIA_API_KEY else (NEMOCLAW_CHAT_URL or OLLAMA_CHAT_URL)
|
||||
return results
|
||||
|
||||
@@ -13,15 +13,17 @@ import httpx
|
||||
|
||||
logger = logging.getLogger("velocity.runtime_llm")
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
||||
OLLAMA_CHAT_URL = os.getenv("OLLAMA_CHAT_URL", f"{OLLAMA_BASE_URL}/v1/chat/completions")
|
||||
OLLAMA_TAGS_URL = os.getenv("OLLAMA_TAGS_URL", f"{OLLAMA_BASE_URL}/api/tags")
|
||||
OLLAMA_DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:27b")
|
||||
|
||||
NEMOCLAW_BASE_URL = os.getenv("NEMOCLAW_BASE_URL", "").rstrip("/")
|
||||
NEMOCLAW_CHAT_URL = (os.getenv("NEMOCLAW_CHAT_URL") or f"{NEMOCLAW_BASE_URL}/v1/chat/completions").rstrip("/") if NEMOCLAW_BASE_URL else ""
|
||||
NEMOCLAW_DEFAULT_MODEL = os.getenv("NEMOCLAW_MODEL", "nvidia/nemotron-3-super-120b-a12b")
|
||||
NEMOCLAW_API_TOKEN = os.getenv("NEMOCLAW_API_TOKEN", "")
|
||||
SGLANG_BASE_URL = os.getenv(
|
||||
"SGLANG_BASE_URL",
|
||||
os.getenv("LLM_BASE_URL", os.getenv("OLLAMA_BASE_URL", "https://llm.desineuron.in")),
|
||||
).rstrip("/")
|
||||
SGLANG_CHAT_URL = os.getenv("SGLANG_CHAT_URL", f"{SGLANG_BASE_URL}/v1/chat/completions")
|
||||
SGLANG_MODELS_URL = os.getenv("SGLANG_MODELS_URL", f"{SGLANG_BASE_URL}/v1/models")
|
||||
SGLANG_DEFAULT_MODEL = os.getenv(
|
||||
"SGLANG_MODEL",
|
||||
os.getenv("OLLAMA_MODEL", "qwen3.6:35b-a3b"),
|
||||
)
|
||||
SGLANG_API_TOKEN = os.getenv("SGLANG_API_TOKEN", "")
|
||||
|
||||
RUNTIME_LLM_TIMEOUT_S = float(os.getenv("RUNTIME_LLM_TIMEOUT_S", "90.0"))
|
||||
RUNTIME_LLM_CONCURRENCY = int(os.getenv("RUNTIME_LLM_BATCH_CONCURRENCY", "2"))
|
||||
@@ -57,40 +59,30 @@ class RuntimeLLMService:
|
||||
self._jobs: dict[str, dict[str, Any]] = {}
|
||||
|
||||
def _provider_catalog(self) -> list[RuntimeProvider]:
|
||||
providers: list[RuntimeProvider] = []
|
||||
if OLLAMA_CHAT_URL:
|
||||
providers.append(
|
||||
RuntimeProvider(
|
||||
provider_id="ollama",
|
||||
base_url=OLLAMA_BASE_URL,
|
||||
chat_url=OLLAMA_CHAT_URL,
|
||||
default_model=OLLAMA_DEFAULT_MODEL,
|
||||
)
|
||||
if not SGLANG_CHAT_URL:
|
||||
return []
|
||||
return [
|
||||
RuntimeProvider(
|
||||
provider_id="sglang",
|
||||
base_url=SGLANG_BASE_URL,
|
||||
chat_url=SGLANG_CHAT_URL,
|
||||
default_model=SGLANG_DEFAULT_MODEL,
|
||||
auth_token=SGLANG_API_TOKEN or None,
|
||||
)
|
||||
if NEMOCLAW_CHAT_URL:
|
||||
providers.append(
|
||||
RuntimeProvider(
|
||||
provider_id="nemoclaw",
|
||||
base_url=NEMOCLAW_BASE_URL,
|
||||
chat_url=NEMOCLAW_CHAT_URL,
|
||||
default_model=NEMOCLAW_DEFAULT_MODEL,
|
||||
auth_token=NEMOCLAW_API_TOKEN or None,
|
||||
)
|
||||
)
|
||||
return providers
|
||||
]
|
||||
|
||||
def get_provider(self, provider_id: str | None) -> RuntimeProvider:
|
||||
providers = {provider.provider_id: provider for provider in self._provider_catalog()}
|
||||
if provider_id in {"ollama", "nemoclaw"}:
|
||||
provider_id = "sglang"
|
||||
if provider_id:
|
||||
provider = providers.get(provider_id)
|
||||
if provider is None:
|
||||
raise ValueError(f"Unknown provider '{provider_id}'.")
|
||||
return provider
|
||||
|
||||
if "nemoclaw" in providers:
|
||||
return providers["nemoclaw"]
|
||||
if "ollama" in providers:
|
||||
return providers["ollama"]
|
||||
if "sglang" in providers:
|
||||
return providers["sglang"]
|
||||
raise ValueError("No runtime LLM providers are configured.")
|
||||
|
||||
async def list_providers(self) -> list[dict[str, Any]]:
|
||||
@@ -101,28 +93,18 @@ class RuntimeLLMService:
|
||||
error: str | None = None
|
||||
|
||||
try:
|
||||
if provider.provider_id == "ollama":
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(OLLAMA_TAGS_URL)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
models = [str(item.get("name", "")).strip() for item in payload.get("models", []) if item.get("name")]
|
||||
if provider.default_model not in models:
|
||||
models.insert(0, provider.default_model)
|
||||
status = "online"
|
||||
else:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.post(
|
||||
provider.chat_url,
|
||||
json={
|
||||
"model": provider.default_model,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
"max_tokens": 4,
|
||||
},
|
||||
headers=provider.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
status = "online"
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(SGLANG_MODELS_URL, headers=provider.headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
models = [
|
||||
str(item.get("id", "")).strip()
|
||||
for item in payload.get("data", [])
|
||||
if item.get("id")
|
||||
]
|
||||
if provider.default_model not in models:
|
||||
models.insert(0, provider.default_model)
|
||||
status = "online"
|
||||
except Exception as exc: # pragma: no cover - network/runtime dependent
|
||||
error = str(exc)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user