Initial commit: Velocity-OS migration
This commit is contained in:
147
infrastructure/ci/.gitlab-ci.yml
Normal file
147
infrastructure/ci/.gitlab-ci.yml
Normal file
@@ -0,0 +1,147 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — GitLab CI/CD Pipeline
|
||||
# Build → Sign → Push to ECR → Notify Ingress Box
|
||||
# ============================================================
|
||||
stages:
|
||||
- lint
|
||||
- build
|
||||
- sign
|
||||
- notify
|
||||
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
DOCKER_BUILDKIT: "1"
|
||||
AWS_REGION: "ap-south-1"
|
||||
ECR_REGISTRY: "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
|
||||
IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
|
||||
|
||||
# ── Lint + Type Check ─────────────────────────────────────────
|
||||
lint:webos:
|
||||
stage: lint
|
||||
image: node:20-alpine
|
||||
cache:
|
||||
key: "${CI_COMMIT_REF_SLUG}-node"
|
||||
paths: [webos/node_modules/]
|
||||
script:
|
||||
- cd webos && npm ci && npm run type-check && npm run lint
|
||||
rules:
|
||||
- changes: [webos/**/*]
|
||||
|
||||
lint:core:
|
||||
stage: lint
|
||||
image: python:3.11-slim
|
||||
script:
|
||||
- pip install ruff mypy -q
|
||||
- cd core && ruff check . && mypy . --ignore-missing-imports
|
||||
rules:
|
||||
- changes: [core/**/*]
|
||||
|
||||
# ── Build + Push Images ───────────────────────────────────────
|
||||
.build_template: &build_template
|
||||
stage: build
|
||||
image: docker:24-dind
|
||||
services: [docker:24-dind]
|
||||
before_script:
|
||||
- aws ecr get-login-password --region $AWS_REGION |
|
||||
docker login --username AWS --password-stdin $ECR_REGISTRY
|
||||
script:
|
||||
- |
|
||||
docker build \
|
||||
--cache-from ${ECR_REGISTRY}/velocity-os/${SERVICE}:latest \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
--label git.sha=${CI_COMMIT_SHA} \
|
||||
--label git.ref=${CI_COMMIT_REF_NAME} \
|
||||
-t ${ECR_REGISTRY}/velocity-os/${SERVICE}:${IMAGE_TAG} \
|
||||
-t ${ECR_REGISTRY}/velocity-os/${SERVICE}:latest \
|
||||
./${SERVICE}
|
||||
docker push ${ECR_REGISTRY}/velocity-os/${SERVICE}:${IMAGE_TAG}
|
||||
docker push ${ECR_REGISTRY}/velocity-os/${SERVICE}:latest
|
||||
|
||||
build:core:
|
||||
<<: *build_template
|
||||
variables:
|
||||
SERVICE: core
|
||||
rules:
|
||||
- changes: [core/**/*]
|
||||
|
||||
build:webos:
|
||||
<<: *build_template
|
||||
variables:
|
||||
SERVICE: webos
|
||||
rules:
|
||||
- changes: [webos/**/*]
|
||||
|
||||
build:media-engine:
|
||||
<<: *build_template
|
||||
variables:
|
||||
SERVICE: media-engine
|
||||
rules:
|
||||
- changes: [media-engine/**/*]
|
||||
|
||||
build:agents:
|
||||
<<: *build_template
|
||||
variables:
|
||||
SERVICE: agents
|
||||
rules:
|
||||
- changes: [agents/**/*]
|
||||
|
||||
# ── Sign Images with cosign ───────────────────────────────────
|
||||
.sign_template: &sign_template
|
||||
stage: sign
|
||||
image: ghcr.io/sigstore/cosign:v2.4.0
|
||||
script:
|
||||
- |
|
||||
IMAGE="${ECR_REGISTRY}/velocity-os/${SERVICE}:${IMAGE_TAG}"
|
||||
DIGEST=$(docker inspect --format='{{index .RepoDigests 0}}' $IMAGE || \
|
||||
aws ecr describe-images \
|
||||
--repository-name velocity-os/${SERVICE} \
|
||||
--image-ids imageTag=${IMAGE_TAG} \
|
||||
--region ${AWS_REGION} \
|
||||
--query 'imageDetails[0].imageDigest' --output text)
|
||||
cosign sign --yes "${ECR_REGISTRY}/velocity-os/${SERVICE}@${DIGEST}"
|
||||
|
||||
sign:core:
|
||||
<<: *sign_template
|
||||
variables:
|
||||
SERVICE: core
|
||||
needs: [build:core]
|
||||
|
||||
sign:webos:
|
||||
<<: *sign_template
|
||||
variables:
|
||||
SERVICE: webos
|
||||
needs: [build:webos]
|
||||
|
||||
sign:media-engine:
|
||||
<<: *sign_template
|
||||
variables:
|
||||
SERVICE: media-engine
|
||||
needs: [build:media-engine]
|
||||
|
||||
sign:agents:
|
||||
<<: *sign_template
|
||||
variables:
|
||||
SERVICE: agents
|
||||
needs: [build:agents]
|
||||
|
||||
# ── Notify Ingress Box ────────────────────────────────────────
|
||||
notify:ingress-box:
|
||||
stage: notify
|
||||
image: alpine:latest
|
||||
before_script:
|
||||
- apk add --no-cache curl openssh-client
|
||||
script:
|
||||
# Trigger the poll_and_transfer.sh on the ingress box via SSH
|
||||
# INGRESS_BOX_IP and SSH key set in GitLab CI/CD variables
|
||||
- |
|
||||
ssh -i "${INGRESS_SSH_KEY_FILE}" \
|
||||
-o StrictHostKeyChecking=no \
|
||||
ubuntu@${INGRESS_BOX_IP} \
|
||||
"sudo systemctl start velocity-ingress-poll.service"
|
||||
needs:
|
||||
- sign:core
|
||||
- sign:webos
|
||||
- sign:media-engine
|
||||
- sign:agents
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == "main"'
|
||||
91
infrastructure/ecr/build_push_sign.sh
Normal file
91
infrastructure/ecr/build_push_sign.sh
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================
|
||||
# Velocity-OS — ECR Registry Provisioner + Image Push Script
|
||||
# Assumes: aws cli v2, docker, cosign installed on build host
|
||||
# Run from the Velocity-OS repo root in CI or locally.
|
||||
# ============================================================
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────
|
||||
AWS_REGION="${AWS_REGION:-ap-south-1}"
|
||||
AWS_ACCOUNT_ID="${AWS_ACCOUNT_ID:?Must set AWS_ACCOUNT_ID}"
|
||||
ECR_REGISTRY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
|
||||
REGISTRY_PREFIX="velocity-os"
|
||||
|
||||
# Image tags from git (deterministic, immutable)
|
||||
GIT_SHA=$(git rev-parse --short HEAD)
|
||||
GIT_TAG=$(git tag --points-at HEAD | head -n1 || echo "")
|
||||
IMAGE_TAG="${GIT_TAG:-$GIT_SHA}"
|
||||
|
||||
SERVICES=("core" "webos" "media-engine" "agents")
|
||||
|
||||
# ── Step 1: Provision ECR repositories (idempotent) ──────────
|
||||
echo "=== Provisioning ECR repositories ==="
|
||||
for svc in "${SERVICES[@]}"; do
|
||||
REPO_NAME="${REGISTRY_PREFIX}/${svc}"
|
||||
echo " Ensuring repo: ${REPO_NAME}"
|
||||
aws ecr describe-repositories \
|
||||
--repository-names "${REPO_NAME}" \
|
||||
--region "${AWS_REGION}" \
|
||||
--no-cli-pager \
|
||||
> /dev/null 2>&1 || \
|
||||
aws ecr create-repository \
|
||||
--repository-name "${REPO_NAME}" \
|
||||
--region "${AWS_REGION}" \
|
||||
--image-scanning-configuration scanOnPush=true \
|
||||
--image-tag-mutability IMMUTABLE \
|
||||
--encryption-configuration encryptionType=AES256 \
|
||||
--no-cli-pager
|
||||
done
|
||||
|
||||
# ── Step 2: ECR Login ─────────────────────────────────────────
|
||||
echo "=== Authenticating to ECR ==="
|
||||
aws ecr get-login-password --region "${AWS_REGION}" | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
# ── Step 3: Build + Push + Sign each image ───────────────────
|
||||
echo "=== Building, pushing, and signing images ==="
|
||||
for svc in "${SERVICES[@]}"; do
|
||||
LOCAL_IMAGE="velocity-os/${svc}:${IMAGE_TAG}"
|
||||
REMOTE_IMAGE="${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:${IMAGE_TAG}"
|
||||
REMOTE_LATEST="${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:latest"
|
||||
|
||||
echo ""
|
||||
echo "--- Service: ${svc} ---"
|
||||
|
||||
# Build
|
||||
echo " Building ${LOCAL_IMAGE}..."
|
||||
docker build \
|
||||
--cache-from "${REMOTE_LATEST}" \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
--label "git.sha=${GIT_SHA}" \
|
||||
--label "git.tag=${GIT_TAG}" \
|
||||
--label "build.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
-t "${LOCAL_IMAGE}" \
|
||||
-t "${REMOTE_IMAGE}" \
|
||||
-t "${REMOTE_LATEST}" \
|
||||
"./${svc}"
|
||||
|
||||
# Push (sha-tagged first for immutability, then latest)
|
||||
echo " Pushing ${REMOTE_IMAGE}..."
|
||||
docker push "${REMOTE_IMAGE}"
|
||||
docker push "${REMOTE_LATEST}"
|
||||
|
||||
# Sign with cosign (Sigstore keyless or KMS key)
|
||||
echo " Signing ${REMOTE_IMAGE} with cosign..."
|
||||
IMAGE_DIGEST=$(docker inspect --format='{{index .RepoDigests 0}}' "${REMOTE_IMAGE}" || \
|
||||
aws ecr describe-images \
|
||||
--repository-name "${REGISTRY_PREFIX}/${svc}" \
|
||||
--image-ids imageTag="${IMAGE_TAG}" \
|
||||
--region "${AWS_REGION}" \
|
||||
--query 'imageDetails[0].imageDigest' \
|
||||
--output text)
|
||||
cosign sign --yes "${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}@${IMAGE_DIGEST}"
|
||||
|
||||
echo " ✓ ${svc} pushed and signed: ${REMOTE_IMAGE}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== All images built, pushed, and signed. ==="
|
||||
echo "ECR Registry: ${ECR_REGISTRY}"
|
||||
echo "Image tag: ${IMAGE_TAG}"
|
||||
118
infrastructure/ingress-box/poll_and_transfer.sh
Normal file
118
infrastructure/ingress-box/poll_and_transfer.sh
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================
|
||||
# Velocity-OS — Ingress Box: Air-Gap Transfer Agent
|
||||
# Runs on a LAN-connected node (Raspberry Pi / VM).
|
||||
# Polls ECR every 5 minutes for new signed images.
|
||||
# Verifies cosign signature. Transfers to air-gapped workstation.
|
||||
# Triggers K3s rolling restart on new image.
|
||||
#
|
||||
# Install as systemd service:
|
||||
# sudo cp poll_and_transfer.service /etc/systemd/system/
|
||||
# sudo systemctl enable --now poll_and_transfer
|
||||
# ============================================================
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────
|
||||
AWS_REGION="${AWS_REGION:-ap-south-1}"
|
||||
AWS_ACCOUNT_ID="${AWS_ACCOUNT_ID:?Must set AWS_ACCOUNT_ID}"
|
||||
ECR_REGISTRY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
|
||||
REGISTRY_PREFIX="velocity-os"
|
||||
SERVICES=("core" "webos" "media-engine" "agents")
|
||||
|
||||
# Air-gapped workstation (LAN only — no internet)
|
||||
WORKSTATION_IP="${WORKSTATION_IP:-192.168.1.100}"
|
||||
WORKSTATION_USER="${WORKSTATION_USER:-ubuntu}"
|
||||
WORKSTATION_SSH_KEY="${WORKSTATION_SSH_KEY:-/home/ingress/.ssh/velocity_workstation_ed25519}"
|
||||
|
||||
# State file: tracks last-transferred digest per service
|
||||
STATE_DIR="/var/lib/velocity-ingress"
|
||||
mkdir -p "${STATE_DIR}"
|
||||
|
||||
# Temp dir for image tarballs
|
||||
TRANSFER_DIR="/tmp/velocity-transfer"
|
||||
mkdir -p "${TRANSFER_DIR}"
|
||||
|
||||
# ── Functions ─────────────────────────────────────────────────
|
||||
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*"; }
|
||||
|
||||
get_latest_digest() {
|
||||
local repo="${REGISTRY_PREFIX}/$1"
|
||||
aws ecr describe-images \
|
||||
--repository-name "${repo}" \
|
||||
--image-ids imageTag=latest \
|
||||
--region "${AWS_REGION}" \
|
||||
--query 'imageDetails[0].imageDigest' \
|
||||
--output text 2>/dev/null || echo "NONE"
|
||||
}
|
||||
|
||||
transfer_image() {
|
||||
local svc="$1"
|
||||
local digest="$2"
|
||||
local full_image="${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}@${digest}"
|
||||
local tar_file="${TRANSFER_DIR}/${svc}.tar"
|
||||
|
||||
log " [${svc}] Pulling from ECR..."
|
||||
docker pull "${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:latest"
|
||||
|
||||
log " [${svc}] Verifying cosign signature..."
|
||||
cosign verify \
|
||||
--certificate-identity-regexp ".*" \
|
||||
--certificate-oidc-issuer-regexp ".*" \
|
||||
"${full_image}" || {
|
||||
log " [${svc}] ERROR: Signature verification FAILED. Refusing transfer."
|
||||
return 1
|
||||
}
|
||||
|
||||
log " [${svc}] Saving image to tarball..."
|
||||
docker save "${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:latest" \
|
||||
-o "${tar_file}"
|
||||
|
||||
log " [${svc}] Transferring to workstation via SCP..."
|
||||
scp -i "${WORKSTATION_SSH_KEY}" \
|
||||
-o StrictHostKeyChecking=yes \
|
||||
"${tar_file}" \
|
||||
"${WORKSTATION_USER}@${WORKSTATION_IP}:/tmp/${svc}.tar"
|
||||
|
||||
log " [${svc}] Importing into K3s containerd + rolling restart..."
|
||||
ssh -i "${WORKSTATION_SSH_KEY}" \
|
||||
-o StrictHostKeyChecking=yes \
|
||||
"${WORKSTATION_USER}@${WORKSTATION_IP}" \
|
||||
"sudo k3s ctr images import /tmp/${svc}.tar && \
|
||||
sudo kubectl rollout restart deployment/${svc} -n velocity-os && \
|
||||
rm /tmp/${svc}.tar"
|
||||
|
||||
# Record transferred digest
|
||||
echo "${digest}" > "${STATE_DIR}/${svc}.last_digest"
|
||||
log " [${svc}] ✓ Transfer complete. Digest: ${digest}"
|
||||
rm -f "${tar_file}"
|
||||
}
|
||||
|
||||
# ── Main poll loop ────────────────────────────────────────────
|
||||
log "=== Velocity-OS Ingress Box polling ECR ==="
|
||||
|
||||
# Login to ECR (token expires every 12h; cron re-runs this)
|
||||
aws ecr get-login-password --region "${AWS_REGION}" | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
for svc in "${SERVICES[@]}"; do
|
||||
log "[${svc}] Checking for updates..."
|
||||
|
||||
CURRENT_DIGEST=$(get_latest_digest "${svc}")
|
||||
LAST_DIGEST=$(cat "${STATE_DIR}/${svc}.last_digest" 2>/dev/null || echo "NONE")
|
||||
|
||||
if [[ "${CURRENT_DIGEST}" == "NONE" ]]; then
|
||||
log " [${svc}] No image found in ECR. Skipping."
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "${CURRENT_DIGEST}" == "${LAST_DIGEST}" ]]; then
|
||||
log " [${svc}] Up to date. No transfer needed."
|
||||
continue
|
||||
fi
|
||||
|
||||
log " [${svc}] New digest detected: ${CURRENT_DIGEST}"
|
||||
transfer_image "${svc}" "${CURRENT_DIGEST}" || \
|
||||
log " [${svc}] Transfer FAILED. Will retry next cycle."
|
||||
done
|
||||
|
||||
log "=== Poll cycle complete ==="
|
||||
142
infrastructure/ingress-box/sync_comfy_route.py
Normal file
142
infrastructure/ingress-box/sync_comfy_route.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
data: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return data
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
return data
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def resolve_target_instance(ec2) -> dict | None:
|
||||
explicit_instance_id = env("COMFY_INSTANCE_ID")
|
||||
if explicit_instance_id:
|
||||
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
||||
for reservation in reservations:
|
||||
for instance in reservation["Instances"]:
|
||||
if instance["State"]["Name"] == "running":
|
||||
return instance
|
||||
return None
|
||||
|
||||
tag_key = env("COMFY_INSTANCE_TAG_KEY", "DesineuronRole")
|
||||
tag_value = env("COMFY_INSTANCE_TAG_VALUE", "comfyui")
|
||||
filters = [
|
||||
{"Name": "instance-state-name", "Values": ["running"]},
|
||||
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
||||
]
|
||||
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
|
||||
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
|
||||
if not instances:
|
||||
return None
|
||||
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
||||
return instances[0]
|
||||
|
||||
|
||||
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
|
||||
ingress_host = env("INGRESS_SSH_HOST")
|
||||
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
|
||||
ingress_port = env("INGRESS_SSH_PORT", "22")
|
||||
ingress_key = env("INGRESS_SSH_KEY_PATH")
|
||||
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
|
||||
payload = json.dumps(
|
||||
{
|
||||
"hostname": hostname,
|
||||
"scheme": "http",
|
||||
"target_host": private_ip,
|
||||
"target_port": port,
|
||||
}
|
||||
)
|
||||
command = (
|
||||
f"sudo {helper} upsert '{payload}'"
|
||||
" && sudo caddy validate --config /etc/caddy/Caddyfile"
|
||||
" && sudo systemctl reload caddy"
|
||||
)
|
||||
return subprocess.run(
|
||||
[
|
||||
"ssh",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/dev/null",
|
||||
"-i",
|
||||
ingress_key,
|
||||
"-p",
|
||||
ingress_port,
|
||||
f"{ingress_user}@{ingress_host}",
|
||||
command,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
||||
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
||||
if key not in os.environ and key in ops_env:
|
||||
os.environ[key] = ops_env[key]
|
||||
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
||||
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
|
||||
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
|
||||
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
|
||||
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
|
||||
if normalized_key_path.startswith("/app/state/"):
|
||||
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
||||
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
|
||||
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
|
||||
|
||||
region = os.environ["AWS_DEFAULT_REGION"]
|
||||
hostname = env("COMFY_ROUTE_HOSTNAME", "comfy.desineuron.in")
|
||||
port = int(env("COMFY_ROUTE_PORT", "8188"))
|
||||
state_file = Path(env("COMFY_ROUTE_STATE_FILE", "/var/lib/desineuron-comfy-route-sync/current_target.txt"))
|
||||
|
||||
ec2 = boto3.client("ec2", region_name=region)
|
||||
instance = resolve_target_instance(ec2)
|
||||
if not instance:
|
||||
print("No running comfyui target instance found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
private_ip = instance.get("PrivateIpAddress")
|
||||
if not private_ip:
|
||||
print("Target instance has no private IP", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
|
||||
if current == private_ip:
|
||||
print(json.dumps({"status": "noop", "hostname": hostname, "target_host": private_ip}))
|
||||
return 0
|
||||
|
||||
result = upsert_route(hostname, private_ip, port)
|
||||
if result.returncode != 0:
|
||||
print(result.stdout)
|
||||
print(result.stderr, file=sys.stderr)
|
||||
return result.returncode
|
||||
|
||||
state_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
state_file.write_text(private_ip, encoding="utf-8")
|
||||
print(json.dumps({"status": "updated", "hostname": hostname, "target_host": private_ip}))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
152
infrastructure/ingress-box/sync_llm_route.py
Normal file
152
infrastructure/ingress-box/sync_llm_route.py
Normal file
@@ -0,0 +1,152 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
data: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return data
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
return data
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def resolve_target_instance(ec2) -> dict | None:
|
||||
explicit_instance_id = env("LLM_INSTANCE_ID")
|
||||
if explicit_instance_id:
|
||||
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
||||
for reservation in reservations:
|
||||
for instance in reservation["Instances"]:
|
||||
if instance["State"]["Name"] == "running":
|
||||
return instance
|
||||
return None
|
||||
|
||||
# We assume the LLM runtime runs on the same GPU instance as comfyui initially
|
||||
tag_key = env("LLM_INSTANCE_TAG_KEY", "DesineuronRole")
|
||||
tag_value = env("LLM_INSTANCE_TAG_VALUE", "comfyui")
|
||||
filters = [
|
||||
{"Name": "instance-state-name", "Values": ["running"]},
|
||||
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
||||
]
|
||||
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
|
||||
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
|
||||
if not instances:
|
||||
return None
|
||||
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
||||
return instances[0]
|
||||
|
||||
|
||||
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
|
||||
ingress_host = env("INGRESS_SSH_HOST")
|
||||
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
|
||||
ingress_port = env("INGRESS_SSH_PORT", "22")
|
||||
ingress_key = env("INGRESS_SSH_KEY_PATH")
|
||||
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
|
||||
payload = json.dumps(
|
||||
{
|
||||
"hostname": hostname,
|
||||
"scheme": "http",
|
||||
"target_host": private_ip,
|
||||
"target_port": port,
|
||||
}
|
||||
)
|
||||
command = (
|
||||
f"sudo {helper} upsert '{payload}'"
|
||||
" && sudo caddy validate --config /etc/caddy/Caddyfile"
|
||||
" && sudo systemctl reload caddy"
|
||||
)
|
||||
return subprocess.run(
|
||||
[
|
||||
"ssh",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/dev/null",
|
||||
"-i",
|
||||
ingress_key,
|
||||
"-p",
|
||||
ingress_port,
|
||||
f"{ingress_user}@{ingress_host}",
|
||||
command,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
||||
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
||||
if key not in os.environ and key in ops_env:
|
||||
os.environ[key] = ops_env[key]
|
||||
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
||||
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
|
||||
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
|
||||
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
|
||||
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
|
||||
if normalized_key_path.startswith("/app/state/"):
|
||||
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
||||
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
|
||||
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
|
||||
|
||||
region = os.environ["AWS_DEFAULT_REGION"]
|
||||
hostname = env("LLM_ROUTE_HOSTNAME", "llm.desineuron.in")
|
||||
port = int(env("LLM_ROUTE_PORT", "11434"))
|
||||
state_file = Path(env("LLM_ROUTE_STATE_FILE", "/var/lib/desineuron-llm-route-sync/current_target.txt"))
|
||||
|
||||
ec2 = boto3.client("ec2", region_name=region)
|
||||
instance = resolve_target_instance(ec2)
|
||||
if not instance:
|
||||
print("No running LLM target instance found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
private_ip = instance.get("PrivateIpAddress")
|
||||
if not private_ip:
|
||||
print("Target instance has no private IP", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
desired_state = f"{private_ip}:{port}"
|
||||
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
|
||||
if current == desired_state:
|
||||
print(
|
||||
json.dumps(
|
||||
{"status": "noop", "hostname": hostname, "target_host": private_ip, "target_port": port}
|
||||
)
|
||||
)
|
||||
return 0
|
||||
|
||||
result = upsert_route(hostname, private_ip, port)
|
||||
if result.returncode != 0:
|
||||
print(result.stdout)
|
||||
print(result.stderr, file=sys.stderr)
|
||||
return result.returncode
|
||||
|
||||
state_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
state_file.write_text(desired_state, encoding="utf-8")
|
||||
print(
|
||||
json.dumps(
|
||||
{"status": "updated", "hostname": hostname, "target_host": private_ip, "target_port": port}
|
||||
)
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
142
infrastructure/ingress-box/sync_velocity_route.py
Normal file
142
infrastructure/ingress-box/sync_velocity_route.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
data: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return data
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
return data
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def resolve_target_instance(ec2) -> dict | None:
|
||||
explicit_instance_id = env("VELOCITY_INSTANCE_ID")
|
||||
if explicit_instance_id:
|
||||
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
||||
for reservation in reservations:
|
||||
for instance in reservation["Instances"]:
|
||||
if instance["State"]["Name"] == "running":
|
||||
return instance
|
||||
return None
|
||||
|
||||
tag_key = env("VELOCITY_INSTANCE_TAG_KEY", "DesineuronRole")
|
||||
tag_value = env("VELOCITY_INSTANCE_TAG_VALUE", "velocity-backend")
|
||||
filters = [
|
||||
{"Name": "instance-state-name", "Values": ["running"]},
|
||||
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
||||
]
|
||||
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
|
||||
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
|
||||
if not instances:
|
||||
return None
|
||||
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
||||
return instances[0]
|
||||
|
||||
|
||||
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
|
||||
ingress_host = env("INGRESS_SSH_HOST")
|
||||
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
|
||||
ingress_port = env("INGRESS_SSH_PORT", "22")
|
||||
ingress_key = env("INGRESS_SSH_KEY_PATH")
|
||||
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
|
||||
payload = json.dumps(
|
||||
{
|
||||
"hostname": hostname,
|
||||
"scheme": "http",
|
||||
"target_host": private_ip,
|
||||
"target_port": port,
|
||||
}
|
||||
)
|
||||
command = (
|
||||
f"sudo {helper} upsert '{payload}'"
|
||||
" && sudo caddy validate --config /etc/caddy/Caddyfile"
|
||||
" && sudo systemctl reload caddy"
|
||||
)
|
||||
return subprocess.run(
|
||||
[
|
||||
"ssh",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/dev/null",
|
||||
"-i",
|
||||
ingress_key,
|
||||
"-p",
|
||||
ingress_port,
|
||||
f"{ingress_user}@{ingress_host}",
|
||||
command,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
||||
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
||||
if key not in os.environ and key in ops_env:
|
||||
os.environ[key] = ops_env[key]
|
||||
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
||||
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
|
||||
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
|
||||
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
|
||||
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
|
||||
if normalized_key_path.startswith("/app/state/"):
|
||||
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
||||
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
|
||||
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
|
||||
|
||||
region = os.environ["AWS_DEFAULT_REGION"]
|
||||
hostname = env("VELOCITY_ROUTE_HOSTNAME", "api.desineuron.in")
|
||||
port = int(env("VELOCITY_ROUTE_PORT", "8001"))
|
||||
state_file = Path(env("VELOCITY_ROUTE_STATE_FILE", "/var/lib/desineuron-velocity-route-sync/current_target.txt"))
|
||||
|
||||
ec2 = boto3.client("ec2", region_name=region)
|
||||
instance = resolve_target_instance(ec2)
|
||||
if not instance:
|
||||
print("No running velocity-backend target instance found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
private_ip = instance.get("PrivateIpAddress")
|
||||
if not private_ip:
|
||||
print("Target instance has no private IP", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
|
||||
if current == private_ip:
|
||||
print(json.dumps({"status": "noop", "hostname": hostname, "target_host": private_ip}))
|
||||
return 0
|
||||
|
||||
result = upsert_route(hostname, private_ip, port)
|
||||
if result.returncode != 0:
|
||||
print(result.stdout)
|
||||
print(result.stderr, file=sys.stderr)
|
||||
return result.returncode
|
||||
|
||||
state_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
state_file.write_text(private_ip, encoding="utf-8")
|
||||
print(json.dumps({"status": "updated", "hostname": hostname, "target_host": private_ip}))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
342
infrastructure/k3s/deployments/deployments.yaml
Normal file
342
infrastructure/k3s/deployments/deployments.yaml
Normal file
@@ -0,0 +1,342 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — K3s Deployments
|
||||
# All services in velocity-os namespace.
|
||||
# GPU: RTX 6000 Blackwell 96GB VRAM — MIG partitioned.
|
||||
# MIG slice 0 (48GB): SGLang LLM inference (core-api)
|
||||
# MIG slice 1 (48GB): ComfyUI media generation (media-engine)
|
||||
# ============================================================
|
||||
|
||||
---
|
||||
# ── PostgreSQL (StatefulSet for stable identity) ─────────────
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: postgres
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: postgres
|
||||
tier: database
|
||||
spec:
|
||||
serviceName: postgres
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: postgres
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: postgres
|
||||
tier: database
|
||||
spec:
|
||||
containers:
|
||||
- name: postgres
|
||||
image: ${ECR_REGISTRY}/postgres:15-alpine
|
||||
ports:
|
||||
- containerPort: 5432
|
||||
env:
|
||||
- name: POSTGRES_DB
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: velocity-secrets
|
||||
key: POSTGRES_DB
|
||||
- name: POSTGRES_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: velocity-secrets
|
||||
key: POSTGRES_USER
|
||||
- name: POSTGRES_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: velocity-secrets
|
||||
key: POSTGRES_PASSWORD
|
||||
- name: PGDATA
|
||||
value: /var/lib/postgresql/data/pgdata
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
volumeMounts:
|
||||
- name: postgres-data
|
||||
mountPath: /var/lib/postgresql/data
|
||||
livenessProbe:
|
||||
exec:
|
||||
command: [pg_isready, -U, velocity]
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: [pg_isready, -U, velocity]
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: postgres-data
|
||||
persistentVolumeClaim:
|
||||
claimName: pvc-postgres-data
|
||||
|
||||
---
|
||||
# ── Redis (session cache, future queue) ──────────────────────
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: ${ECR_REGISTRY}/redis:7-alpine
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "250m"
|
||||
args: ["--maxmemory", "400mb", "--maxmemory-policy", "allkeys-lru"]
|
||||
|
||||
---
|
||||
# ── Core API (FastAPI) ────────────────────────────────────────
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: core-api
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: core-api
|
||||
tier: backend
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: core-api
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 0
|
||||
maxSurge: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: core-api
|
||||
tier: backend
|
||||
spec:
|
||||
# MIG slice 0: SGLang LLM inference
|
||||
# The core-api pod requests MIG slice via resource limit
|
||||
runtimeClassName: nvidia
|
||||
containers:
|
||||
- name: core-api
|
||||
image: ${ECR_REGISTRY}/velocity-os/core:latest
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: velocity-secrets
|
||||
- configMapRef:
|
||||
name: velocity-config
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
# RTX 6000 Blackwell MIG 3g.48gb (SGLang slice)
|
||||
nvidia.com/mig-3g.48gb: "1"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
nvidia.com/mig-3g.48gb: "1"
|
||||
volumeMounts:
|
||||
- name: asset-store
|
||||
mountPath: /opt/assets
|
||||
- name: model-cache
|
||||
mountPath: /opt/models
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8443
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 15
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8443
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: asset-store
|
||||
persistentVolumeClaim:
|
||||
claimName: pvc-asset-store
|
||||
- name: model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: pvc-model-cache
|
||||
|
||||
---
|
||||
# ── WebOS (Nginx static + React) ─────────────────────────────
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: webos
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: webos
|
||||
tier: frontend
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: webos
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 0
|
||||
maxSurge: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: webos
|
||||
tier: frontend
|
||||
spec:
|
||||
containers:
|
||||
- name: webos
|
||||
image: ${ECR_REGISTRY}/velocity-os/webos:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
cpu: "250m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health.txt
|
||||
port: 80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
|
||||
---
|
||||
# ── Media Engine (Dream Weaver Gateway) ──────────────────────
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: media-engine
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: media-engine
|
||||
tier: ai
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: media-engine
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: media-engine
|
||||
tier: ai
|
||||
spec:
|
||||
# MIG slice 1: ComfyUI media generation
|
||||
runtimeClassName: nvidia
|
||||
containers:
|
||||
- name: media-engine
|
||||
image: ${ECR_REGISTRY}/velocity-os/media-engine:latest
|
||||
ports:
|
||||
- containerPort: 8290
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: velocity-secrets
|
||||
- configMapRef:
|
||||
name: velocity-config
|
||||
resources:
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
cpu: "1000m"
|
||||
# RTX 6000 Blackwell MIG 3g.48gb (ComfyUI slice)
|
||||
nvidia.com/mig-3g.48gb: "1"
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "2000m"
|
||||
nvidia.com/mig-3g.48gb: "1"
|
||||
volumeMounts:
|
||||
- name: model-cache
|
||||
mountPath: /opt/models
|
||||
readOnly: true
|
||||
- name: asset-store
|
||||
mountPath: /opt/assets
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8290
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
volumes:
|
||||
- name: model-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: pvc-model-cache
|
||||
- name: asset-store
|
||||
persistentVolumeClaim:
|
||||
claimName: pvc-asset-store
|
||||
|
||||
---
|
||||
# ── DB Init Job (runs once: schema apply + seed) ─────────────
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: db-init
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: db-init
|
||||
spec:
|
||||
# Never auto-restart; operator re-runs manually if needed
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: db-init
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
initContainers:
|
||||
# Wait for postgres to be ready before running init
|
||||
- name: wait-for-postgres
|
||||
image: ${ECR_REGISTRY}/postgres:15-alpine
|
||||
command: [sh, -c, "until pg_isready -h postgres -U $(POSTGRES_USER); do echo waiting...; sleep 2; done"]
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: velocity-secrets
|
||||
containers:
|
||||
- name: db-init
|
||||
image: ${ECR_REGISTRY}/velocity-os/core:latest
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "=== Applying schemas ==="
|
||||
psql $DATABASE_URL -f /app/db/schema.sql
|
||||
psql $DATABASE_URL -f /app/db/schema_addendum.sql
|
||||
psql $DATABASE_URL -f /app/db/schema_comms.sql
|
||||
psql $DATABASE_URL -f /app/db/schema_crm_canonical.sql
|
||||
psql $DATABASE_URL -f /app/oracle/schema_oracle.sql
|
||||
psql $DATABASE_URL -f /app/oracle/schema_extension_v2.sql
|
||||
echo "=== Seeding synthetic CRM v2 ==="
|
||||
python /app/scripts/seed_synthetic_crm.py
|
||||
echo "=== DB init complete ==="
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: velocity-secrets
|
||||
- configMapRef:
|
||||
name: velocity-config
|
||||
107
infrastructure/k3s/deployments/gpu-mig-config.yaml
Normal file
107
infrastructure/k3s/deployments/gpu-mig-config.yaml
Normal file
@@ -0,0 +1,107 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — NVIDIA MIG Configuration for K3s
|
||||
# Target GPU: NVIDIA RTX 6000 Blackwell (96GB VRAM)
|
||||
#
|
||||
# MIG Strategy: Partition 96GB into two equal 48GB slices:
|
||||
# MIG slice 0 (3g.48gb): SGLang LLM inference (core-api)
|
||||
# MIG slice 1 (3g.48gb): ComfyUI media generation (media-engine)
|
||||
#
|
||||
# Result: Concurrent zero-contention GPU execution.
|
||||
# No operator toggle required.
|
||||
#
|
||||
# Prerequisites on workstation:
|
||||
# - nvidia-driver >= 550
|
||||
# - CUDA >= 12.4
|
||||
# - k3s with nvidia-container-toolkit
|
||||
# - NVIDIA device plugin with MIG support
|
||||
# ============================================================
|
||||
|
||||
# ── Step 1: Enable MIG mode on the GPU ───────────────────────
|
||||
# Run on workstation (one-time, survives reboot via service):
|
||||
# sudo nvidia-smi -i 0 --mig-mode=ENABLE
|
||||
# sudo reboot
|
||||
|
||||
# ── Step 2: Create MIG instances ─────────────────────────────
|
||||
# Run after reboot:
|
||||
# sudo nvidia-smi mig -cgi "3g.48gb,3g.48gb" -C
|
||||
# This creates:
|
||||
# GPU instance 0: 3g.48gb (48GB) → MIG device 0
|
||||
# GPU instance 1: 3g.48gb (48GB) → MIG device 1
|
||||
# Verify: nvidia-smi -L
|
||||
|
||||
---
|
||||
# ── K3s: NVIDIA Device Plugin with MIG strategy ──────────────
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-device-plugin
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: nvidia-device-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: nvidia-device-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: nvidia-device-plugin
|
||||
spec:
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: nvidia-device-plugin
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
|
||||
env:
|
||||
# "mixed" strategy: expose both full GPU and MIG devices
|
||||
- name: MIG_STRATEGY
|
||||
value: "mixed"
|
||||
- name: FAIL_ON_INIT_ERROR
|
||||
value: "false"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ALL]
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /var/lib/kubelet/device-plugins
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
|
||||
---
|
||||
# ── Node label: MIG-capable workstation ──────────────────────
|
||||
# Apply once: kubectl label node velocity-workstation nvidia.com/mig.strategy=mixed
|
||||
# This ensures GPU pods only schedule on the correct node.
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mig-setup-instructions
|
||||
namespace: velocity-os
|
||||
data:
|
||||
README: |
|
||||
RTX 6000 Blackwell MIG Setup (run on workstation before deploying):
|
||||
|
||||
1. Enable MIG mode:
|
||||
sudo nvidia-smi -i 0 --mig-mode=ENABLE && sudo reboot
|
||||
|
||||
2. Create two 3g.48gb instances (post-reboot):
|
||||
sudo nvidia-smi mig -cgi "3g.48gb,3g.48gb" -C
|
||||
|
||||
3. Label K3s node:
|
||||
kubectl label node velocity-workstation \
|
||||
nvidia.com/mig.strategy=mixed \
|
||||
kubernetes.io/hostname=velocity-workstation
|
||||
|
||||
4. Verify resource availability:
|
||||
kubectl describe node velocity-workstation | grep nvidia
|
||||
|
||||
Expected output:
|
||||
nvidia.com/mig-3g.48gb: 2 (2 slices available)
|
||||
|
||||
Deployment assignments:
|
||||
core-api → nvidia.com/mig-3g.48gb: 1 (SGLang, slice 0)
|
||||
media-engine → nvidia.com/mig-3g.48gb: 1 (ComfyUI, slice 1)
|
||||
64
infrastructure/k3s/ingress/caddyfile-base.conf
Normal file
64
infrastructure/k3s/ingress/caddyfile-base.conf
Normal file
@@ -0,0 +1,64 @@
|
||||
{
|
||||
email admin@desineuron.in
|
||||
log {
|
||||
output file /var/log/caddy/admin.log
|
||||
format json
|
||||
}
|
||||
}
|
||||
|
||||
office.desineuron.in, git.desineuron.in, cloud.desineuron.in, projects.desineuron.in, talk.desineuron.in, vpn.desineuron.in {
|
||||
tls /etc/caddy/tls/fullchain.pem /etc/caddy/tls/privkey.pem
|
||||
|
||||
log {
|
||||
output file /var/log/caddy/access.log
|
||||
format json
|
||||
}
|
||||
|
||||
reverse_proxy https://127.0.0.1:8443 {
|
||||
header_up Host {host}
|
||||
header_up X-Forwarded-Host {host}
|
||||
header_up X-Forwarded-Proto {scheme}
|
||||
header_up X-Forwarded-For {remote_host}
|
||||
transport http {
|
||||
tls_insecure_skip_verify
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
velocity.desineuron.in {
|
||||
log {
|
||||
output file /var/log/caddy/access.log
|
||||
format json
|
||||
}
|
||||
|
||||
import /etc/caddy/managed/llm_upstream.caddy_inc
|
||||
|
||||
reverse_proxy https://127.0.0.1:8443 {
|
||||
header_up Host {host}
|
||||
header_up X-Forwarded-Host {host}
|
||||
header_up X-Forwarded-Proto {scheme}
|
||||
header_up X-Forwarded-For {remote_host}
|
||||
transport http {
|
||||
tls_insecure_skip_verify
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ops.desineuron.in {
|
||||
log {
|
||||
output file /var/log/caddy/access.log
|
||||
format json
|
||||
}
|
||||
|
||||
reverse_proxy https://127.0.0.1:8443 {
|
||||
header_up Host {host}
|
||||
header_up X-Forwarded-Host {host}
|
||||
header_up X-Forwarded-Proto {scheme}
|
||||
header_up X-Forwarded-For {remote_host}
|
||||
transport http {
|
||||
tls_insecure_skip_verify
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import /etc/caddy/managed/*.caddy
|
||||
158
infrastructure/k3s/ingress/ingress.yaml
Normal file
158
infrastructure/k3s/ingress/ingress.yaml
Normal file
@@ -0,0 +1,158 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — K3s Traefik Ingress
|
||||
# Domain: velocity.local | TLS: self-signed via cert-manager
|
||||
# ============================================================
|
||||
|
||||
# ── cert-manager ClusterIssuer (self-signed for velocity.local) ──
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: velocity-selfsigned-issuer
|
||||
spec:
|
||||
selfSigned: {}
|
||||
|
||||
---
|
||||
# Self-signed CA Certificate
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: velocity-local-ca
|
||||
namespace: velocity-infra
|
||||
spec:
|
||||
isCA: true
|
||||
commonName: velocity-local-ca
|
||||
secretName: velocity-local-ca-secret
|
||||
privateKey:
|
||||
algorithm: ECDSA
|
||||
size: 256
|
||||
issuerRef:
|
||||
name: velocity-selfsigned-issuer
|
||||
kind: ClusterIssuer
|
||||
group: cert-manager.io
|
||||
|
||||
---
|
||||
# CA-backed ClusterIssuer for velocity.local
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: velocity-ca-issuer
|
||||
spec:
|
||||
ca:
|
||||
secretName: velocity-local-ca-secret
|
||||
|
||||
---
|
||||
# TLS Certificate for velocity.local
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: velocity-local-tls
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
secretName: velocity-local-tls-secret
|
||||
duration: 8760h # 1 year
|
||||
renewBefore: 720h # renew 30 days before expiry
|
||||
subject:
|
||||
organizations: [Desineuron]
|
||||
commonName: velocity.local
|
||||
dnsNames:
|
||||
- velocity.local
|
||||
- "*.velocity.local"
|
||||
issuerRef:
|
||||
name: velocity-ca-issuer
|
||||
kind: ClusterIssuer
|
||||
group: cert-manager.io
|
||||
|
||||
---
|
||||
# ── Main Ingress ─────────────────────────────────────────────
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: velocity-os-ingress
|
||||
namespace: velocity-os
|
||||
annotations:
|
||||
# Traefik (K3s built-in)
|
||||
kubernetes.io/ingress.class: traefik
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
# WebSocket support (Sentinel, Oracle canvas, Catalyst)
|
||||
traefik.ingress.kubernetes.io/router.middlewares: velocity-os-ws-headers@kubernetescrd
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- velocity.local
|
||||
secretName: velocity-local-tls-secret
|
||||
rules:
|
||||
- host: velocity.local
|
||||
http:
|
||||
paths:
|
||||
# API (FastAPI backend)
|
||||
- path: /api
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: core-api
|
||||
port:
|
||||
number: 8443
|
||||
# WebSockets (must route before generic /api catch)
|
||||
- path: /ws
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: core-api
|
||||
port:
|
||||
number: 8443
|
||||
# Dream Weaver gateway
|
||||
- path: /dream-weaver
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: media-engine
|
||||
port:
|
||||
number: 8290
|
||||
# Vault public links (no auth)
|
||||
- path: /vault
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: core-api
|
||||
port:
|
||||
number: 8443
|
||||
# WebOS (React SPA — catch-all last)
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: webos
|
||||
port:
|
||||
number: 80
|
||||
|
||||
---
|
||||
# ── Traefik Middleware: WebSocket upgrade headers ─────────────
|
||||
apiVersion: traefik.containo.us/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: ws-headers
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
headers:
|
||||
customRequestHeaders:
|
||||
Connection: "Upgrade"
|
||||
Upgrade: "websocket"
|
||||
|
||||
---
|
||||
# ── Traefik Middleware: Security headers ─────────────────────
|
||||
apiVersion: traefik.containo.us/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: security-headers
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
headers:
|
||||
stsSeconds: 31536000
|
||||
stsIncludeSubdomains: true
|
||||
forceSTSHeader: true
|
||||
contentTypeNosniff: true
|
||||
browserXssFilter: true
|
||||
referrerPolicy: strict-origin-when-cross-origin
|
||||
frameDeny: true
|
||||
27
infrastructure/k3s/namespaces/namespaces.yaml
Normal file
27
infrastructure/k3s/namespaces/namespaces.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — K3s Namespaces
|
||||
# ============================================================
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: velocity-os
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: velocity-os
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: velocity-agents
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: velocity-os
|
||||
environment: production
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: velocity-infra
|
||||
labels:
|
||||
app.kubernetes.io/managed-by: velocity-os
|
||||
environment: production
|
||||
82
infrastructure/k3s/services/services.yaml
Normal file
82
infrastructure/k3s/services/services.yaml
Normal file
@@ -0,0 +1,82 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — K3s Services
|
||||
# ClusterIP for internal, none for headless StatefulSet
|
||||
# ============================================================
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: postgres
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: postgres
|
||||
spec:
|
||||
clusterIP: None # Headless for StatefulSet stable DNS
|
||||
selector:
|
||||
app: postgres
|
||||
ports:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: redis
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: redis
|
||||
ports:
|
||||
- port: 6379
|
||||
targetPort: 6379
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: core-api
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: core-api
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: core-api
|
||||
ports:
|
||||
- name: http
|
||||
port: 8443
|
||||
targetPort: 8443
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: webos
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: webos
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: webos
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: media-engine
|
||||
namespace: velocity-os
|
||||
labels:
|
||||
app: media-engine
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: media-engine
|
||||
ports:
|
||||
- name: http
|
||||
port: 8290
|
||||
targetPort: 8290
|
||||
132
infrastructure/k3s/volumes/persistent-volumes.yaml
Normal file
132
infrastructure/k3s/volumes/persistent-volumes.yaml
Normal file
@@ -0,0 +1,132 @@
|
||||
# ============================================================
|
||||
# Velocity-OS — K3s StorageClasses + PersistentVolumes
|
||||
# Target: RTX 6000 Blackwell workstation NVMe drive
|
||||
# ============================================================
|
||||
---
|
||||
# StorageClass: local-nvme (no provisioner — manually bound PVs)
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: local-nvme
|
||||
provisioner: kubernetes.io/no-provisioner
|
||||
volumeBindingMode: WaitForFirstConsumer
|
||||
reclaimPolicy: Retain
|
||||
|
||||
---
|
||||
# PV: PostgreSQL data (50Gi on NVMe)
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: pv-postgres-data
|
||||
labels:
|
||||
app: postgres
|
||||
spec:
|
||||
capacity:
|
||||
storage: 50Gi
|
||||
accessModes: [ReadWriteOnce]
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: local-nvme
|
||||
local:
|
||||
path: /opt/dlami/nvme/data/postgres
|
||||
nodeAffinity:
|
||||
required:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values: [velocity-workstation]
|
||||
|
||||
---
|
||||
# PV: AI model cache (500Gi — Wan 2.2, Qwen-Image, Qwen3.6)
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: pv-model-cache
|
||||
labels:
|
||||
app: model-cache
|
||||
spec:
|
||||
capacity:
|
||||
storage: 500Gi
|
||||
accessModes: [ReadOnlyMany]
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: local-nvme
|
||||
local:
|
||||
path: /opt/dlami/nvme/models
|
||||
nodeAffinity:
|
||||
required:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values: [velocity-workstation]
|
||||
|
||||
---
|
||||
# PV: Generated asset store (200Gi)
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: pv-asset-store
|
||||
labels:
|
||||
app: asset-store
|
||||
spec:
|
||||
capacity:
|
||||
storage: 200Gi
|
||||
accessModes: [ReadWriteMany]
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: local-nvme
|
||||
local:
|
||||
path: /opt/dlami/nvme/assets
|
||||
nodeAffinity:
|
||||
required:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values: [velocity-workstation]
|
||||
|
||||
---
|
||||
# PVCs
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: pvc-postgres-data
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
accessModes: [ReadWriteOnce]
|
||||
storageClassName: local-nvme
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
app: postgres
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: pvc-model-cache
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
accessModes: [ReadOnlyMany]
|
||||
storageClassName: local-nvme
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
app: model-cache
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: pvc-asset-store
|
||||
namespace: velocity-os
|
||||
spec:
|
||||
accessModes: [ReadWriteMany]
|
||||
storageClassName: local-nvme
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
app: asset-store
|
||||
164
infrastructure/model-hydration/hydrate_gpu_comfy_models.py
Normal file
164
infrastructure/model-hydration/hydrate_gpu_comfy_models.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
DEFAULT_CHECKPOINTS = {
|
||||
"realvisxlV50_v50LightningBakedvae.safetensors": (
|
||||
"s3://project-velocity/models/realvisxlV50_v50LightningBakedvae.safetensors"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
data: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return data
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
return data
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def resolve_target_instance(ec2) -> dict | None:
|
||||
explicit_instance_id = env("COMFY_INSTANCE_ID")
|
||||
if explicit_instance_id:
|
||||
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
||||
else:
|
||||
tag_key = env("COMFY_INSTANCE_TAG_KEY", "DesineuronRole")
|
||||
tag_value = env("COMFY_INSTANCE_TAG_VALUE", "comfyui")
|
||||
reservations = ec2.describe_instances(
|
||||
Filters=[
|
||||
{"Name": "instance-state-name", "Values": ["running"]},
|
||||
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
||||
]
|
||||
)["Reservations"]
|
||||
|
||||
instances = [
|
||||
instance
|
||||
for reservation in reservations
|
||||
for instance in reservation["Instances"]
|
||||
if instance["State"]["Name"] == "running"
|
||||
]
|
||||
if not instances:
|
||||
return None
|
||||
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
||||
return instances[0]
|
||||
|
||||
|
||||
def parse_checkpoints() -> dict[str, str]:
|
||||
raw = env("COMFY_CHECKPOINTS_JSON")
|
||||
if not raw:
|
||||
return dict(DEFAULT_CHECKPOINTS)
|
||||
parsed = json.loads(raw)
|
||||
if not isinstance(parsed, dict):
|
||||
raise ValueError("COMFY_CHECKPOINTS_JSON must be a JSON object of filename to source URI")
|
||||
return {str(name): str(source) for name, source in parsed.items()}
|
||||
|
||||
|
||||
def remote_hydration_script(checkpoints: dict[str, str]) -> str:
|
||||
payload = json.dumps(checkpoints)
|
||||
return f"""#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
CHECKPOINT_DIR="${{COMFY_CHECKPOINT_DIR:-/opt/dlami/nvme/ComfyUI/models/checkpoints}}"
|
||||
mkdir -p "$CHECKPOINT_DIR"
|
||||
if ! mountpoint -q /opt/dlami/nvme; then
|
||||
echo "GPU NVMe mount /opt/dlami/nvme is not mounted" >&2
|
||||
exit 2
|
||||
fi
|
||||
changed=0
|
||||
python3 - <<'PY' > /tmp/desineuron-comfy-checkpoints.tsv
|
||||
import json
|
||||
for name, source in json.loads({payload!r}).items():
|
||||
print(f"{{name}}\\t{{source}}")
|
||||
PY
|
||||
while IFS=$'\\t' read -r filename source; do
|
||||
target="$CHECKPOINT_DIR/$filename"
|
||||
if [ ! -s "$target" ]; then
|
||||
tmp="$target.part"
|
||||
rm -f "$tmp"
|
||||
aws s3 cp "$source" "$tmp" --no-progress
|
||||
mv "$tmp" "$target"
|
||||
chmod 0644 "$target"
|
||||
changed=1
|
||||
fi
|
||||
done < /tmp/desineuron-comfy-checkpoints.tsv
|
||||
rm -f /tmp/desineuron-comfy-checkpoints.tsv
|
||||
if [ "$changed" = "1" ]; then
|
||||
sudo systemctl restart comfyui
|
||||
fi
|
||||
sleep 3
|
||||
curl -fsS http://127.0.0.1:8188/models/checkpoints
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
||||
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
||||
if key not in os.environ and key in ops_env:
|
||||
os.environ[key] = ops_env[key]
|
||||
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
||||
|
||||
key_path = env(
|
||||
"GPU_SSH_KEY_PATH",
|
||||
ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem"),
|
||||
)
|
||||
if key_path.startswith("/app/state/"):
|
||||
key_path = key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
||||
ssh_user = env("GPU_SSH_USER", "ubuntu")
|
||||
|
||||
ec2 = boto3.client("ec2", region_name=os.environ["AWS_DEFAULT_REGION"])
|
||||
instance = resolve_target_instance(ec2)
|
||||
if not instance:
|
||||
print("No running ComfyUI GPU instance found", file=sys.stderr)
|
||||
return 1
|
||||
target_host = instance.get("PublicIpAddress") or instance.get("PrivateIpAddress")
|
||||
if not target_host:
|
||||
print("Target GPU instance has no reachable IP", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
checkpoints = parse_checkpoints()
|
||||
command = [
|
||||
"sudo",
|
||||
"ssh",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"ConnectTimeout=15",
|
||||
"-i",
|
||||
key_path,
|
||||
f"{ssh_user}@{target_host}",
|
||||
"bash -s",
|
||||
]
|
||||
result = subprocess.run(
|
||||
command,
|
||||
input=remote_hydration_script(checkpoints),
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
if result.stdout:
|
||||
print(result.stdout.strip())
|
||||
if result.returncode != 0:
|
||||
if result.stderr:
|
||||
print(result.stderr.strip(), file=sys.stderr)
|
||||
return result.returncode
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
83
infrastructure/model-hydration/hydrate_models.sh
Normal file
83
infrastructure/model-hydration/hydrate_models.sh
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================
|
||||
# Velocity-OS — Model Hydration Script
|
||||
# Uses s5cmd for high-throughput parallel S3 → NVMe sync.
|
||||
# Run once at initial install; safe to re-run for updates.
|
||||
#
|
||||
# Models synced (all stored on NVMe, never in Docker images):
|
||||
# - Wan 2.2 (ComfyUI video/image model)
|
||||
# - Qwen-Image 2512 (ComfyUI poster/image model)
|
||||
# - Qwen3.6 35B A3B (SGLang LLM — MIG slice 0)
|
||||
#
|
||||
# Requires: s5cmd, AWS credentials with S3 read access
|
||||
# ============================================================
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────
|
||||
S3_BUCKET="${S3_MODEL_BUCKET:-s3://desineuron-models}"
|
||||
LOCAL_BASE="/opt/dlami/nvme/models"
|
||||
S5CMD_CONCURRENCY="${S5CMD_CONCURRENCY:-32}" # Tune to NVMe write IOPS
|
||||
|
||||
# ── Ensure directories exist ─────────────────────────────────
|
||||
mkdir -p \
|
||||
"${LOCAL_BASE}/comfy/wan2.2" \
|
||||
"${LOCAL_BASE}/comfy/qwen-image-2512" \
|
||||
"${LOCAL_BASE}/llm/qwen3.6-35b-a3b"
|
||||
|
||||
# ── Check s5cmd installed ────────────────────────────────────
|
||||
if ! command -v s5cmd &> /dev/null; then
|
||||
echo "ERROR: s5cmd not found. Install from https://github.com/peak/s5cmd"
|
||||
echo " curl -L https://github.com/peak/s5cmd/releases/latest/download/s5cmd_Linux_x86_64.tar.gz | tar xz -C /usr/local/bin"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Velocity-OS Model Hydration ==="
|
||||
echo "Source: ${S3_BUCKET}"
|
||||
echo "Target: ${LOCAL_BASE}"
|
||||
echo "s5cmd workers: ${S5CMD_CONCURRENCY}"
|
||||
echo ""
|
||||
|
||||
# ── Wan 2.2 (ComfyUI — MIG slice 1) ──────────────────────────
|
||||
echo "[1/3] Syncing Wan 2.2..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/wan2.2/*" \
|
||||
"${LOCAL_BASE}/comfy/wan2.2/"
|
||||
echo " ✓ Wan 2.2 synced."
|
||||
|
||||
# ── Qwen-Image 2512 (ComfyUI — MIG slice 1) ──────────────────
|
||||
echo "[2/3] Syncing Qwen-Image 2512..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/qwen-image-2512/*" \
|
||||
"${LOCAL_BASE}/comfy/qwen-image-2512/"
|
||||
echo " ✓ Qwen-Image 2512 synced."
|
||||
|
||||
# ── Qwen3.6 35B A3B (SGLang — MIG slice 0) ───────────────────
|
||||
echo "[3/3] Syncing Qwen3.6 35B A3B (LLM — ~70GB, be patient)..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/qwen3.6-35b-a3b/*" \
|
||||
"${LOCAL_BASE}/llm/qwen3.6-35b-a3b/"
|
||||
echo " ✓ Qwen3.6 35B synced."
|
||||
|
||||
# ── Verify checksums (optional — if .sha256 files exist in S3) ─
|
||||
echo ""
|
||||
echo "=== Verifying checksums ==="
|
||||
for dir in "${LOCAL_BASE}/comfy/wan2.2" "${LOCAL_BASE}/comfy/qwen-image-2512" "${LOCAL_BASE}/llm/qwen3.6-35b-a3b"; do
|
||||
if ls "${dir}"/*.sha256 2>/dev/null | head -1 | grep -q sha256; then
|
||||
echo " Checking ${dir}..."
|
||||
(cd "${dir}" && sha256sum -c ./*.sha256 --quiet) && echo " ✓ ${dir} checksums OK"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Model hydration complete ==="
|
||||
echo "NVMe usage:"
|
||||
du -sh "${LOCAL_BASE}"/*/* 2>/dev/null || true
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
STOP_SGLANG_FOR_COMFY_POOL="${STOP_SGLANG_FOR_COMFY_POOL:-0}"
|
||||
WORKERS="${COMFY_WORKER_COUNT:-4}"
|
||||
|
||||
if [[ ! -d /opt/dlami/nvme/ComfyUI ]]; then
|
||||
echo "Missing ComfyUI at /opt/dlami/nvme/ComfyUI" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! mountpoint -q /opt/dlami/nvme; then
|
||||
echo "/opt/dlami/nvme is not mounted; refusing to run model workers on root disk" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$STOP_SGLANG_FOR_COMFY_POOL" == "1" ]]; then
|
||||
sudo systemctl stop desineuron-sglang.service || true
|
||||
fi
|
||||
|
||||
sudo systemctl stop comfyui.service || true
|
||||
sudo systemctl disable comfyui.service || true
|
||||
|
||||
sudo install -m 0755 "$SCRIPT_DIR/desineuron-start-comfy-worker" /usr/local/bin/desineuron-start-comfy-worker
|
||||
sudo install -m 0644 "$SCRIPT_DIR/comfyui-worker@.service" /etc/systemd/system/comfyui-worker@.service
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
for index in $(seq 0 "$((WORKERS - 1))"); do
|
||||
sudo systemctl enable --now "comfyui-worker@${index}.service"
|
||||
sudo systemctl restart "comfyui-worker@${index}.service"
|
||||
done
|
||||
|
||||
sleep 5
|
||||
for index in $(seq 0 "$((WORKERS - 1))"); do
|
||||
port=$((8188 + index))
|
||||
echo "worker ${index} http://127.0.0.1:${port}"
|
||||
curl -fsS "http://127.0.0.1:${port}/models/checkpoints" | head -c 500
|
||||
echo
|
||||
done
|
||||
104
infrastructure/model-hydration/install_gpu_sglang_runtime.sh
Normal file
104
infrastructure/model-hydration/install_gpu_sglang_runtime.sh
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/sglang}"
|
||||
RUNTIME_ROOT="${RUNTIME_ROOT:-/opt/desineuron-sglang}"
|
||||
VENV_PATH="${RUNTIME_ROOT}/.venv"
|
||||
PORT="${SGLANG_PORT:-30100}"
|
||||
HOST="${SGLANG_HOST:-}"
|
||||
MODEL_ID="${SGLANG_MODEL_ID:-qwen3.6-35b-a3b}"
|
||||
MODEL_PATH="${SGLANG_MODEL_PATH:-/opt/dlami/nvme/models/Qwen-Qwen3.6-35B-A3B-FP8}"
|
||||
TP_SIZE="${SGLANG_TP_SIZE:-4}"
|
||||
CONTEXT_LENGTH="${SGLANG_CONTEXT_LENGTH:-131072}"
|
||||
MEM_FRACTION_STATIC="${SGLANG_MEM_FRACTION_STATIC:-0.88}"
|
||||
ATTENTION_BACKEND="${SGLANG_ATTENTION_BACKEND:-flashinfer}"
|
||||
DIST_INIT_ADDR="${SGLANG_DIST_INIT_ADDR:-127.0.0.1:50000}"
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
IMDS_TOKEN="$(curl -fsS -X PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' || true)"
|
||||
if [[ -n "${IMDS_TOKEN}" ]]; then
|
||||
HOST="$(curl -fsS -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" http://169.254.169.254/latest/meta-data/local-ipv4 || true)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
HOST="$(hostname -I | awk '{print $1}')"
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
echo "Unable to resolve GPU private IP for SGLang host binding" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo mkdir -p "${NVME_ROOT}"/{cache,logs,state} "${RUNTIME_ROOT}"
|
||||
python3 -m venv "${VENV_PATH}"
|
||||
"${VENV_PATH}/bin/pip" install --upgrade pip wheel setuptools
|
||||
"${VENV_PATH}/bin/pip" install "sglang[all]>=0.5.3" flashinfer-python huggingface_hub
|
||||
|
||||
sudo tee /etc/default/desineuron-sglang >/dev/null <<EOF
|
||||
SGLANG_HOST=${HOST}
|
||||
SGLANG_PORT=${PORT}
|
||||
SGLANG_MODEL_ID=${MODEL_ID}
|
||||
SGLANG_MODEL_PATH=${MODEL_PATH}
|
||||
SGLANG_TP_SIZE=${TP_SIZE}
|
||||
SGLANG_CONTEXT_LENGTH=${CONTEXT_LENGTH}
|
||||
SGLANG_MEM_FRACTION_STATIC=${MEM_FRACTION_STATIC}
|
||||
SGLANG_ATTENTION_BACKEND=${ATTENTION_BACKEND}
|
||||
SGLANG_DIST_INIT_ADDR=${DIST_INIT_ADDR}
|
||||
SGLANG_CACHE_DIR=${NVME_ROOT}/cache
|
||||
SGLANG_LOG_DIR=${NVME_ROOT}/logs
|
||||
SGLANG_STATE_DIR=${NVME_ROOT}/state
|
||||
SGLANG_USE_FLASHINFER=1
|
||||
SGLANG_ENABLE_PREFIX_CACHE=1
|
||||
SGLANG_SERVED_MODEL_NAME=${MODEL_ID}
|
||||
SGLANG_EXTRA_ARGS=
|
||||
EOF
|
||||
sudo chmod 600 /etc/default/desineuron-sglang
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-launch.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
source /etc/default/desineuron-sglang
|
||||
export HF_HOME="${SGLANG_CACHE_DIR}/hf"
|
||||
export HUGGINGFACE_HUB_CACHE="${SGLANG_CACHE_DIR}/hf"
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||
export SGLANG_USE_FLASHINFER="${SGLANG_USE_FLASHINFER}"
|
||||
exec /opt/desineuron-sglang/.venv/bin/sglang serve \
|
||||
--host "${SGLANG_HOST}" \
|
||||
--port "${SGLANG_PORT}" \
|
||||
--model-path "${SGLANG_MODEL_PATH}" \
|
||||
--served-model-name "${SGLANG_SERVED_MODEL_NAME}" \
|
||||
--tp-size "${SGLANG_TP_SIZE}" \
|
||||
--context-length "${SGLANG_CONTEXT_LENGTH}" \
|
||||
--mem-fraction-static "${SGLANG_MEM_FRACTION_STATIC}" \
|
||||
--attention-backend "${SGLANG_ATTENTION_BACKEND}" \
|
||||
--dist-init-addr "${SGLANG_DIST_INIT_ADDR}" \
|
||||
--enable-metrics \
|
||||
--skip-server-warmup \
|
||||
${SGLANG_EXTRA_ARGS}
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-launch.sh
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang.service >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron SGLang Runtime
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/default/desineuron-sglang
|
||||
WorkingDirectory=${RUNTIME_ROOT}
|
||||
ExecStart=/usr/local/bin/desineuron-sglang-launch.sh
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
LimitNOFILE=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-sglang.service
|
||||
sudo systemctl --no-pager --full status desineuron-sglang.service
|
||||
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-watchdog.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
source /etc/default/desineuron-sglang
|
||||
|
||||
HEALTH_URL="http://127.0.0.1:${SGLANG_PORT}/v1/models"
|
||||
HYDRATE_HELPER="/usr/local/bin/desineuron-sglang-hydrate.sh"
|
||||
STARTUP_GRACE_SECONDS="${SGLANG_STARTUP_GRACE_SECONDS:-900}"
|
||||
HEALTH_TIMEOUT_SECONDS="${SGLANG_HEALTH_TIMEOUT_SECONDS:-60}"
|
||||
|
||||
if [[ ! -d "${SGLANG_MODEL_PATH}" ]]; then
|
||||
"${HYDRATE_HELPER}" "${SGLANG_MODEL_ID}" "${SGLANG_MODEL_PATH}"
|
||||
fi
|
||||
|
||||
if ! systemctl is-active --quiet desineuron-sglang.service; then
|
||||
systemctl restart desineuron-sglang.service
|
||||
sleep 10
|
||||
fi
|
||||
|
||||
main_pid="$(systemctl show -p MainPID --value desineuron-sglang.service || true)"
|
||||
if [[ -n "${main_pid}" && "${main_pid}" != "0" ]]; then
|
||||
runtime_age="$(( $(date +%s) - $(stat -c %Y "/proc/${main_pid}" 2>/dev/null || date +%s) ))"
|
||||
if (( runtime_age < STARTUP_GRACE_SECONDS )); then
|
||||
echo "startup_grace"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null; then
|
||||
systemctl restart desineuron-sglang.service
|
||||
sleep 20
|
||||
fi
|
||||
|
||||
curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null
|
||||
echo "healthy"
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-watchdog.sh
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-hydrate.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
MODEL_ID="${1:?model id required}"
|
||||
TARGET_PATH="${2:?target path required}"
|
||||
mkdir -p "$(dirname "${TARGET_PATH}")"
|
||||
if command -v hf >/dev/null 2>&1; then
|
||||
hf download "${MODEL_ID}" --local-dir "${TARGET_PATH}" --max-workers 8
|
||||
else
|
||||
python3 - <<PY
|
||||
from huggingface_hub import snapshot_download
|
||||
snapshot_download(repo_id="${MODEL_ID}", local_dir="${TARGET_PATH}", max_workers=8)
|
||||
PY
|
||||
fi
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-hydrate.sh
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.service >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron SGLang Runtime Watchdog
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/desineuron-sglang-watchdog.sh
|
||||
EOF
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.timer >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Run the Desineuron SGLang watchdog every 5 minutes
|
||||
|
||||
[Timer]
|
||||
OnBootSec=2min
|
||||
OnUnitActiveSec=5min
|
||||
Unit=desineuron-sglang-watchdog.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-sglang-watchdog.timer
|
||||
sudo systemctl start desineuron-sglang-watchdog.service
|
||||
sudo systemctl --no-pager --full status desineuron-sglang-watchdog.timer
|
||||
Reference in New Issue
Block a user