feat: Oracle Canvas, Revision History and Canvas Sharing (#33)
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #33
This commit was merged in pull request #33.
This commit is contained in:
@@ -13,15 +13,17 @@ import httpx
|
||||
|
||||
logger = logging.getLogger("velocity.runtime_llm")
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
||||
OLLAMA_CHAT_URL = os.getenv("OLLAMA_CHAT_URL", f"{OLLAMA_BASE_URL}/v1/chat/completions")
|
||||
OLLAMA_TAGS_URL = os.getenv("OLLAMA_TAGS_URL", f"{OLLAMA_BASE_URL}/api/tags")
|
||||
OLLAMA_DEFAULT_MODEL = os.getenv("OLLAMA_MODEL", "qwen3.5:27b")
|
||||
|
||||
NEMOCLAW_BASE_URL = os.getenv("NEMOCLAW_BASE_URL", "").rstrip("/")
|
||||
NEMOCLAW_CHAT_URL = (os.getenv("NEMOCLAW_CHAT_URL") or f"{NEMOCLAW_BASE_URL}/v1/chat/completions").rstrip("/") if NEMOCLAW_BASE_URL else ""
|
||||
NEMOCLAW_DEFAULT_MODEL = os.getenv("NEMOCLAW_MODEL", "nvidia/nemotron-3-super-120b-a12b")
|
||||
NEMOCLAW_API_TOKEN = os.getenv("NEMOCLAW_API_TOKEN", "")
|
||||
SGLANG_BASE_URL = os.getenv(
|
||||
"SGLANG_BASE_URL",
|
||||
os.getenv("LLM_BASE_URL", os.getenv("OLLAMA_BASE_URL", "https://llm.desineuron.in")),
|
||||
).rstrip("/")
|
||||
SGLANG_CHAT_URL = os.getenv("SGLANG_CHAT_URL", f"{SGLANG_BASE_URL}/v1/chat/completions")
|
||||
SGLANG_MODELS_URL = os.getenv("SGLANG_MODELS_URL", f"{SGLANG_BASE_URL}/v1/models")
|
||||
SGLANG_DEFAULT_MODEL = os.getenv(
|
||||
"SGLANG_MODEL",
|
||||
os.getenv("OLLAMA_MODEL", "qwen3.6:35b-a3b"),
|
||||
)
|
||||
SGLANG_API_TOKEN = os.getenv("SGLANG_API_TOKEN", "")
|
||||
|
||||
RUNTIME_LLM_TIMEOUT_S = float(os.getenv("RUNTIME_LLM_TIMEOUT_S", "90.0"))
|
||||
RUNTIME_LLM_CONCURRENCY = int(os.getenv("RUNTIME_LLM_BATCH_CONCURRENCY", "2"))
|
||||
@@ -57,40 +59,30 @@ class RuntimeLLMService:
|
||||
self._jobs: dict[str, dict[str, Any]] = {}
|
||||
|
||||
def _provider_catalog(self) -> list[RuntimeProvider]:
|
||||
providers: list[RuntimeProvider] = []
|
||||
if OLLAMA_CHAT_URL:
|
||||
providers.append(
|
||||
RuntimeProvider(
|
||||
provider_id="ollama",
|
||||
base_url=OLLAMA_BASE_URL,
|
||||
chat_url=OLLAMA_CHAT_URL,
|
||||
default_model=OLLAMA_DEFAULT_MODEL,
|
||||
)
|
||||
if not SGLANG_CHAT_URL:
|
||||
return []
|
||||
return [
|
||||
RuntimeProvider(
|
||||
provider_id="sglang",
|
||||
base_url=SGLANG_BASE_URL,
|
||||
chat_url=SGLANG_CHAT_URL,
|
||||
default_model=SGLANG_DEFAULT_MODEL,
|
||||
auth_token=SGLANG_API_TOKEN or None,
|
||||
)
|
||||
if NEMOCLAW_CHAT_URL:
|
||||
providers.append(
|
||||
RuntimeProvider(
|
||||
provider_id="nemoclaw",
|
||||
base_url=NEMOCLAW_BASE_URL,
|
||||
chat_url=NEMOCLAW_CHAT_URL,
|
||||
default_model=NEMOCLAW_DEFAULT_MODEL,
|
||||
auth_token=NEMOCLAW_API_TOKEN or None,
|
||||
)
|
||||
)
|
||||
return providers
|
||||
]
|
||||
|
||||
def get_provider(self, provider_id: str | None) -> RuntimeProvider:
|
||||
providers = {provider.provider_id: provider for provider in self._provider_catalog()}
|
||||
if provider_id in {"ollama", "nemoclaw"}:
|
||||
provider_id = "sglang"
|
||||
if provider_id:
|
||||
provider = providers.get(provider_id)
|
||||
if provider is None:
|
||||
raise ValueError(f"Unknown provider '{provider_id}'.")
|
||||
return provider
|
||||
|
||||
if "nemoclaw" in providers:
|
||||
return providers["nemoclaw"]
|
||||
if "ollama" in providers:
|
||||
return providers["ollama"]
|
||||
if "sglang" in providers:
|
||||
return providers["sglang"]
|
||||
raise ValueError("No runtime LLM providers are configured.")
|
||||
|
||||
async def list_providers(self) -> list[dict[str, Any]]:
|
||||
@@ -101,28 +93,18 @@ class RuntimeLLMService:
|
||||
error: str | None = None
|
||||
|
||||
try:
|
||||
if provider.provider_id == "ollama":
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(OLLAMA_TAGS_URL)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
models = [str(item.get("name", "")).strip() for item in payload.get("models", []) if item.get("name")]
|
||||
if provider.default_model not in models:
|
||||
models.insert(0, provider.default_model)
|
||||
status = "online"
|
||||
else:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.post(
|
||||
provider.chat_url,
|
||||
json={
|
||||
"model": provider.default_model,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
"max_tokens": 4,
|
||||
},
|
||||
headers=provider.headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
status = "online"
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(SGLANG_MODELS_URL, headers=provider.headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
models = [
|
||||
str(item.get("id", "")).strip()
|
||||
for item in payload.get("data", [])
|
||||
if item.get("id")
|
||||
]
|
||||
if provider.default_model not in models:
|
||||
models.insert(0, provider.default_model)
|
||||
status = "online"
|
||||
except Exception as exc: # pragma: no cover - network/runtime dependent
|
||||
error = str(exc)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user