feat: Oracle Canvas, Revision History and Canvas Sharing (#33)

Co-authored-by: Sagnik <sagnik7896@gmail.com>
Reviewed-on: sagnik/Project_Velocity#33
This commit is contained in:
2026-04-23 01:20:21 +05:30
parent e519339cc9
commit 6cdc366718
58 changed files with 3187 additions and 705 deletions

View File

@@ -25,6 +25,25 @@ office.desineuron.in, git.desineuron.in, cloud.desineuron.in, projects.desineuro
}
}
velocity.desineuron.in {
log {
output file /var/log/caddy/access.log
format json
}
import /etc/caddy/managed/llm_upstream.caddy_inc
reverse_proxy https://127.0.0.1:8443 {
header_up Host {host}
header_up X-Forwarded-Host {host}
header_up X-Forwarded-Proto {scheme}
header_up X-Forwarded-For {remote_host}
transport http {
tls_insecure_skip_verify
}
}
}
ops.desineuron.in {
log {
output file /var/log/caddy/access.log

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
TARGET_PATH="${TARGET_PATH:-/opt/dlami/nvme/models/cyankiwi-Qwen3.5-122B-A10B-AWQ-4bit}"
MODEL_REPO="${MODEL_REPO:-cyankiwi/Qwen3.5-122B-A10B-AWQ-4bit}"
mkdir -p "${TARGET_PATH}"
if command -v hf >/dev/null 2>&1; then
hf download "${MODEL_REPO}" --local-dir "${TARGET_PATH}" --max-workers 8
else
python3 - <<PY
from huggingface_hub import snapshot_download
snapshot_download(repo_id="${MODEL_REPO}", local_dir="${TARGET_PATH}", max_workers=8)
PY
fi
echo "Staged ${MODEL_REPO} under ${TARGET_PATH}"
echo "This is an acquisition/staging path only. The live L4 runtime remains qwen3.6:35b-a3b unless explicitly cut over."
echo "Use MODEL_REPO=txn545/Qwen3.5-122B-A10B-NVFP4 only on hardware validated for NVFP4."

View File

@@ -0,0 +1,17 @@
#!/bin/bash
set -ex
# Copy latest config files
sudo scp -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem /tmp/manage_desineuron_routes.py ec2-user@98.87.120.120:/tmp/manage_desineuron_routes.py
sudo scp -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem /tmp/Caddyfile ec2-user@98.87.120.120:/tmp/Caddyfile
# Bootstrap on the proxy target
sudo ssh -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem ec2-user@98.87.120.120 "sudo cp /tmp/manage_desineuron_routes.py /usr/local/bin/manage_desineuron_routes.py && sudo chmod +x /usr/local/bin/manage_desineuron_routes.py && sudo touch /etc/caddy/managed/llm_upstream.caddy_inc && sudo cp /tmp/Caddyfile /etc/caddy/Caddyfile"
# Invoke immediate synchronization pulse to populate llm_upstream.caddy_inc
sudo systemctl start desineuron-llm-route-sync.service
sleep 5
# Safely initiate proxy reload
sudo ssh -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem ec2-user@98.87.120.120 "sudo systemctl reload caddy"

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Sync llm.desineuron.in managed route to current GPU private IP
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
EnvironmentFile=/etc/desineuron-llm-route-sync.env
ExecStart=/usr/local/bin/run_llm_route_sync.sh

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Run LLM route sync on boot and every 2 minutes
[Timer]
OnBootSec=1min
OnUnitActiveSec=2min
Unit=desineuron-llm-route-sync.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,108 @@
#!/usr/bin/env bash
set -euo pipefail
MODEL_NAME="qwen3.6:35b-a3b"
NVME_ROOT="/opt/dlami/nvme/ollama"
OLLAMA_OVERRIDE_DIR="/etc/systemd/system/ollama.service.d"
# 1. Configure Ollama to use NVME
sudo mkdir -p "${NVME_ROOT}/models" "${NVME_ROOT}/state" "${NVME_ROOT}/logs"
sudo chown -R root:root "${NVME_ROOT}"
echo "Configuring Ollama to use NVME storage at ${NVME_ROOT}/models..."
sudo mkdir -p "${OLLAMA_OVERRIDE_DIR}"
sudo tee "${OLLAMA_OVERRIDE_DIR}/override.conf" >/dev/null <<EOF
[Service]
Environment="OLLAMA_MODELS=${NVME_ROOT}/models"
Environment="OLLAMA_HOST=0.0.0.0"
EOF
sudo systemctl daemon-reload
sudo systemctl enable --now ollama.service
# 2. Write the Hydrate Helper
HYDRATE_HELPER="/usr/local/bin/desineuron-hydrate-qwen36.sh"
echo "Creating Hydrate Helper map at $HYDRATE_HELPER"
sudo tee "$HYDRATE_HELPER" >/dev/null <<EOF
#!/usr/bin/env bash
set -euo pipefail
echo "(\$(date)) Hydrating \$1 model using ollama pull..." | sudo tee -a "${NVME_ROOT}/logs/qwen36_hydrate.log"
# This requires outward access or an Ollama compatible registry proxy
# Note: For S3-based private GGUFs, this would use s5cmd
ollama pull "\$1"
echo "(\$(date)) Hydration complete" | sudo tee -a "${NVME_ROOT}/logs/qwen36_hydrate.log"
EOF
sudo chmod 0755 "$HYDRATE_HELPER"
# 3. Write Watchdog Script
WATCHDOG_SCRIPT="/usr/local/bin/desineuron-ollama-watchdog.sh"
echo "Creating Watchdog Script map at $WATCHDOG_SCRIPT"
sudo tee "$WATCHDOG_SCRIPT" >/dev/null <<EOF
#!/usr/bin/env bash
set -euo pipefail
MODEL_NAME="${MODEL_NAME}"
OLLAMA_URL="http://127.0.0.1:11434"
if ! systemctl is-active --quiet ollama; then
systemctl restart ollama
sleep 5
fi
# Try asking Ollama if the tag exists
if ! curl -fsS "\$OLLAMA_URL/api/tags" | grep -q "\$MODEL_NAME"; then
echo "Expected model \$MODEL_NAME missing. Initiating hydration..."
# Ensure wiped ephemeral NVMe disks are scaffolded pre-hydration
sudo mkdir -p "${NVME_ROOT}/logs" "${NVME_ROOT}/models" "${NVME_ROOT}/state"
sudo chown -R ollama:ollama "${NVME_ROOT}"
/usr/local/bin/desineuron-hydrate-qwen36.sh "\$MODEL_NAME"
sleep 5
fi
# Verify final state
if curl -fsS "\$OLLAMA_URL/api/tags" | grep -q "\$MODEL_NAME"; then
echo "healthy"
exit 0
else
echo "unhealthy: Model \$MODEL_NAME failed to register" >&2
exit 1
fi
EOF
sudo chmod 0755 "$WATCHDOG_SCRIPT"
# 4. Write Watchdog Systemd Service & Timer
sudo tee "/etc/systemd/system/desineuron-ollama-watchdog.service" >/dev/null <<EOF
[Unit]
Description=Desineuron GPU Ollama Watchdog for Model $MODEL_NAME
After=network-online.target
[Service]
Type=oneshot
Environment="HOME=/root"
ExecStart=$WATCHDOG_SCRIPT
EOF
sudo tee "/etc/systemd/system/desineuron-ollama-watchdog.timer" >/dev/null <<EOF
[Unit]
Description=Watchdog run for Ollama Model $MODEL_NAME every 5 mins
[Timer]
OnBootSec=2min
OnUnitActiveSec=5min
Unit=desineuron-ollama-watchdog.service
[Install]
WantedBy=timers.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable --now desineuron-ollama-watchdog.timer
sudo systemctl start desineuron-ollama-watchdog.service
echo "Ollama Watchdog installed and model $MODEL_NAME setup initiated."
sudo systemctl --no-pager status desineuron-ollama-watchdog.timer

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env bash
set -euo pipefail
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/sglang}"
RUNTIME_ROOT="${RUNTIME_ROOT:-/opt/desineuron-sglang}"
VENV_PATH="${RUNTIME_ROOT}/.venv"
PORT="${SGLANG_PORT:-30100}"
HOST="${SGLANG_HOST:-}"
MODEL_ID="${SGLANG_MODEL_ID:-qwen3.6-35b-a3b}"
MODEL_PATH="${SGLANG_MODEL_PATH:-/opt/dlami/nvme/models/Qwen-Qwen3.6-35B-A3B-FP8}"
TP_SIZE="${SGLANG_TP_SIZE:-4}"
CONTEXT_LENGTH="${SGLANG_CONTEXT_LENGTH:-131072}"
MEM_FRACTION_STATIC="${SGLANG_MEM_FRACTION_STATIC:-0.88}"
ATTENTION_BACKEND="${SGLANG_ATTENTION_BACKEND:-flashinfer}"
DIST_INIT_ADDR="${SGLANG_DIST_INIT_ADDR:-127.0.0.1:50000}"
if [[ -z "${HOST}" ]]; then
IMDS_TOKEN="$(curl -fsS -X PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' || true)"
if [[ -n "${IMDS_TOKEN}" ]]; then
HOST="$(curl -fsS -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" http://169.254.169.254/latest/meta-data/local-ipv4 || true)"
fi
fi
if [[ -z "${HOST}" ]]; then
HOST="$(hostname -I | awk '{print $1}')"
fi
if [[ -z "${HOST}" ]]; then
echo "Unable to resolve GPU private IP for SGLang host binding" >&2
exit 1
fi
sudo mkdir -p "${NVME_ROOT}"/{cache,logs,state} "${RUNTIME_ROOT}"
python3 -m venv "${VENV_PATH}"
"${VENV_PATH}/bin/pip" install --upgrade pip wheel setuptools
"${VENV_PATH}/bin/pip" install "sglang[all]>=0.5.3" flashinfer-python huggingface_hub
sudo tee /etc/default/desineuron-sglang >/dev/null <<EOF
SGLANG_HOST=${HOST}
SGLANG_PORT=${PORT}
SGLANG_MODEL_ID=${MODEL_ID}
SGLANG_MODEL_PATH=${MODEL_PATH}
SGLANG_TP_SIZE=${TP_SIZE}
SGLANG_CONTEXT_LENGTH=${CONTEXT_LENGTH}
SGLANG_MEM_FRACTION_STATIC=${MEM_FRACTION_STATIC}
SGLANG_ATTENTION_BACKEND=${ATTENTION_BACKEND}
SGLANG_DIST_INIT_ADDR=${DIST_INIT_ADDR}
SGLANG_CACHE_DIR=${NVME_ROOT}/cache
SGLANG_LOG_DIR=${NVME_ROOT}/logs
SGLANG_STATE_DIR=${NVME_ROOT}/state
SGLANG_USE_FLASHINFER=1
SGLANG_ENABLE_PREFIX_CACHE=1
SGLANG_SERVED_MODEL_NAME=${MODEL_ID}
SGLANG_EXTRA_ARGS=
EOF
sudo chmod 600 /etc/default/desineuron-sglang
sudo tee /usr/local/bin/desineuron-sglang-launch.sh >/dev/null <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
source /etc/default/desineuron-sglang
export HF_HOME="${SGLANG_CACHE_DIR}/hf"
export HUGGINGFACE_HUB_CACHE="${SGLANG_CACHE_DIR}/hf"
export CUDA_DEVICE_MAX_CONNECTIONS=1
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export SGLANG_USE_FLASHINFER="${SGLANG_USE_FLASHINFER}"
exec /opt/desineuron-sglang/.venv/bin/sglang serve \
--host "${SGLANG_HOST}" \
--port "${SGLANG_PORT}" \
--model-path "${SGLANG_MODEL_PATH}" \
--served-model-name "${SGLANG_SERVED_MODEL_NAME}" \
--tp-size "${SGLANG_TP_SIZE}" \
--context-length "${SGLANG_CONTEXT_LENGTH}" \
--mem-fraction-static "${SGLANG_MEM_FRACTION_STATIC}" \
--attention-backend "${SGLANG_ATTENTION_BACKEND}" \
--dist-init-addr "${SGLANG_DIST_INIT_ADDR}" \
--enable-metrics \
--skip-server-warmup \
${SGLANG_EXTRA_ARGS}
EOF
sudo chmod 0755 /usr/local/bin/desineuron-sglang-launch.sh
sudo tee /etc/systemd/system/desineuron-sglang.service >/dev/null <<EOF
[Unit]
Description=Desineuron SGLang Runtime
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
EnvironmentFile=/etc/default/desineuron-sglang
WorkingDirectory=${RUNTIME_ROOT}
ExecStart=/usr/local/bin/desineuron-sglang-launch.sh
Restart=always
RestartSec=5
LimitNOFILE=1048576
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable --now desineuron-sglang.service
sudo systemctl --no-pager --full status desineuron-sglang.service

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env bash
set -euo pipefail
sudo tee /usr/local/bin/desineuron-sglang-watchdog.sh >/dev/null <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
source /etc/default/desineuron-sglang
HEALTH_URL="http://127.0.0.1:${SGLANG_PORT}/v1/models"
HYDRATE_HELPER="/usr/local/bin/desineuron-sglang-hydrate.sh"
STARTUP_GRACE_SECONDS="${SGLANG_STARTUP_GRACE_SECONDS:-900}"
HEALTH_TIMEOUT_SECONDS="${SGLANG_HEALTH_TIMEOUT_SECONDS:-60}"
if [[ ! -d "${SGLANG_MODEL_PATH}" ]]; then
"${HYDRATE_HELPER}" "${SGLANG_MODEL_ID}" "${SGLANG_MODEL_PATH}"
fi
if ! systemctl is-active --quiet desineuron-sglang.service; then
systemctl restart desineuron-sglang.service
sleep 10
fi
main_pid="$(systemctl show -p MainPID --value desineuron-sglang.service || true)"
if [[ -n "${main_pid}" && "${main_pid}" != "0" ]]; then
runtime_age="$(( $(date +%s) - $(stat -c %Y "/proc/${main_pid}" 2>/dev/null || date +%s) ))"
if (( runtime_age < STARTUP_GRACE_SECONDS )); then
echo "startup_grace"
exit 0
fi
fi
if ! curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null; then
systemctl restart desineuron-sglang.service
sleep 20
fi
curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null
echo "healthy"
EOF
sudo chmod 0755 /usr/local/bin/desineuron-sglang-watchdog.sh
sudo tee /usr/local/bin/desineuron-sglang-hydrate.sh >/dev/null <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
MODEL_ID="${1:?model id required}"
TARGET_PATH="${2:?target path required}"
mkdir -p "$(dirname "${TARGET_PATH}")"
if command -v hf >/dev/null 2>&1; then
hf download "${MODEL_ID}" --local-dir "${TARGET_PATH}" --max-workers 8
else
python3 - <<PY
from huggingface_hub import snapshot_download
snapshot_download(repo_id="${MODEL_ID}", local_dir="${TARGET_PATH}", max_workers=8)
PY
fi
EOF
sudo chmod 0755 /usr/local/bin/desineuron-sglang-hydrate.sh
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.service >/dev/null <<EOF
[Unit]
Description=Desineuron SGLang Runtime Watchdog
After=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/local/bin/desineuron-sglang-watchdog.sh
EOF
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.timer >/dev/null <<EOF
[Unit]
Description=Run the Desineuron SGLang watchdog every 5 minutes
[Timer]
OnBootSec=2min
OnUnitActiveSec=5min
Unit=desineuron-sglang-watchdog.service
[Install]
WantedBy=timers.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable --now desineuron-sglang-watchdog.timer
sudo systemctl start desineuron-sglang-watchdog.service
sudo systemctl --no-pager --full status desineuron-sglang-watchdog.timer

View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -euo pipefail
APP_ROOT=/opt/desineuron-llm-route-sync
VENV_PATH="$APP_ROOT/.venv"
ENV_FILE=/etc/desineuron-llm-route-sync.env
SCRIPT_PATH=/usr/local/bin/sync_llm_route.py
WRAPPER_PATH=/usr/local/bin/run_llm_route_sync.sh
SERVICE_FILE=/etc/systemd/system/desineuron-llm-route-sync.service
TIMER_FILE=/etc/systemd/system/desineuron-llm-route-sync.timer
sudo mkdir -p "$APP_ROOT" /var/lib/desineuron-llm-route-sync
python3 -m venv "$VENV_PATH"
"$VENV_PATH/bin/pip" install --upgrade pip boto3
sudo install -m 0755 /tmp/desineuron_ingress/sync_llm_route.py "$SCRIPT_PATH"
sudo install -m 0755 /tmp/desineuron_ingress/run_llm_route_sync.sh "$WRAPPER_PATH"
sudo install -m 0644 /tmp/desineuron_ingress/desineuron-llm-route-sync.service "$SERVICE_FILE"
sudo install -m 0644 /tmp/desineuron_ingress/desineuron-llm-route-sync.timer "$TIMER_FILE"
sudo tee "$ENV_FILE" >/dev/null <<EOF
OPS_ENV_FILE=/opt/desineuron-ops-control-plane/.env
LLM_ROUTE_HOSTNAME=llm.desineuron.in
LLM_ROUTE_PORT=30100
LLM_INSTANCE_TAG_KEY=DesineuronRole
LLM_INSTANCE_TAG_VALUE=comfyui
LLM_ROUTE_STATE_FILE=/var/lib/desineuron-llm-route-sync/current_target.txt
INGRESS_SSH_KEY_PATH=/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem
EOF
sudo chmod 600 "$ENV_FILE"
sudo systemctl daemon-reload
sudo systemctl enable --now desineuron-llm-route-sync.timer
sudo systemctl start desineuron-llm-route-sync.service
sudo systemctl --no-pager --full status desineuron-llm-route-sync.service desineuron-llm-route-sync.timer

View File

@@ -0,0 +1,94 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import sys
from pathlib import Path
STATE_FILE = Path("/etc/caddy/managed/desineuron-routes.json")
SNIPPET_FILE = Path("/etc/caddy/managed/desineuron-routes.caddy")
def load_routes() -> dict[str, dict]:
if STATE_FILE.exists():
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
return {}
def save_routes(routes: dict[str, dict]) -> None:
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(routes, indent=2), encoding="utf-8")
def render_routes(routes: dict[str, dict]) -> None:
lines: list[str] = []
for hostname, route in sorted(routes.items()):
lines.extend(
[
f"{hostname} {{",
"\ttls /etc/caddy/tls/fullchain.pem /etc/caddy/tls/privkey.pem",
"\tlog {",
"\t\toutput file /var/log/caddy/access.log",
"\t\tformat json",
"\t}",
f"\treverse_proxy {route['scheme']}://{route['target_host']}:{route['target_port']} {{",
"\t\theader_up Host {host}",
"\t\theader_up X-Forwarded-Host {host}",
"\t\theader_up X-Forwarded-Proto {scheme}",
"\t\theader_up X-Forwarded-For {remote_host}",
"\t}",
"}",
"",
]
)
SNIPPET_FILE.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
# Generate a dedicated upstream include exclusively for velocity.desineuron.in/llm
llm_inc = Path("/etc/caddy/managed/llm_upstream.caddy_inc")
if "llm.desineuron.in" in routes:
route = routes["llm.desineuron.in"]
llm_inc.write_text(
f"handle_path /llm/* {{\n"
f"\treverse_proxy {route['scheme']}://{route['target_host']}:{route['target_port']} {{\n"
f"\t\theader_up Host {{host}}\n"
f"\t\theader_up X-Forwarded-For {{remote_host}}\n"
f"\t\tflush_interval -1\n"
f"\t\theader_down X-Accel-Buffering no\n"
f"\t}}\n"
f"}}\n",
encoding="utf-8",
)
else:
llm_inc.write_text("", encoding="utf-8")
def main() -> int:
if len(sys.argv) < 2:
print("usage: manage_desineuron_routes.py <upsert|delete|list> [payload|hostname]")
return 1
command = sys.argv[1]
routes = load_routes()
if command == "upsert":
payload = json.loads(sys.argv[2])
routes[payload["hostname"]] = payload
save_routes(routes)
render_routes(routes)
print(json.dumps({"status": "ok", "action": "upsert", "hostname": payload["hostname"]}))
return 0
if command == "delete":
hostname = sys.argv[2]
routes.pop(hostname, None)
save_routes(routes)
render_routes(routes)
print(json.dumps({"status": "ok", "action": "delete", "hostname": hostname}))
return 0
if command == "list":
print(json.dumps(routes, indent=2))
return 0
print(f"unknown command: {command}")
return 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,34 @@
$ErrorActionPreference = "Stop"
$gpuGroups = @(
"sg-0b144c17b1b89f4c6",
"sg-05e4de3fe94ad6558"
)
$ingressGroup = "sg-0721b8b48e12c531d"
try {
aws ec2 authorize-security-group-ingress `
--group-id "sg-0b144c17b1b89f4c6" `
--protocol tcp --port 11434 `
--source-group $ingressGroup | Out-Null
} catch {
}
foreach ($group in $gpuGroups) {
foreach ($port in 11434) {
try {
aws ec2 revoke-security-group-ingress `
--group-id $group `
--protocol tcp `
--port $port `
--cidr 0.0.0.0/0 | Out-Null
} catch {
}
}
}
aws ec2 describe-security-groups `
--group-ids $gpuGroups `
--query "SecurityGroups[].{GroupId:GroupId,GroupName:GroupName,Ingress:IpPermissions}" `
--output json

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail
APP_ROOT=/opt/desineuron-llm-route-sync
SCRIPT_PATH=/usr/local/bin/sync_llm_route.py
VENV_PYTHON="$APP_ROOT/.venv/bin/python"
if [[ ! -x "$VENV_PYTHON" ]]; then
echo "Missing route-sync venv python at $VENV_PYTHON" >&2
exit 1
fi
exec "$VENV_PYTHON" "$SCRIPT_PATH"

View File

@@ -0,0 +1,42 @@
import boto3, os, time
from pathlib import Path
d={}
for l in Path('/opt/desineuron-ops-control-plane/.env').read_text().splitlines():
if '=' in l and not l.startswith('#'):
k,v=l.split('=',1)
d[k.strip()]=v.strip()
os.environ['AWS_ACCESS_KEY_ID']=d.get('AWS_ACCESS_KEY_ID','')
os.environ['AWS_SECRET_ACCESS_KEY']=d.get('AWS_SECRET_ACCESS_KEY','')
ec2=boto3.client('ec2', region_name='us-east-1')
def get_gpu():
for r in ec2.describe_instances()['Reservations']:
for i in r['Instances']:
if any(t['Key'] == 'Name' and t['Value'] == 'desineuron-comfy-gpu' for t in i.get('Tags', [])):
return i
return None
def main():
while True:
i = get_gpu()
if not i:
print('Not found')
break
state = i['State']['Name']
print(f"Instance {i['InstanceId']} is {state}")
if state == 'stopped':
print('Starting instance...')
ec2.start_instances(InstanceIds=[i['InstanceId']])
time.sleep(5)
elif state == 'stopping':
print('Waiting for extremely aggressive stop sequence gracefully...')
time.sleep(10)
elif state == 'running':
print('Instance successfully running payload on IP:', i.get('PrivateIpAddress'))
break
else:
print('Waiting eagerly...')
time.sleep(10)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,152 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import boto3
def load_env_file(path: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not path.exists():
return data
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
data[key.strip()] = value.strip()
return data
def env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def resolve_target_instance(ec2) -> dict | None:
explicit_instance_id = env("LLM_INSTANCE_ID")
if explicit_instance_id:
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
for reservation in reservations:
for instance in reservation["Instances"]:
if instance["State"]["Name"] == "running":
return instance
return None
# We assume the LLM runtime runs on the same GPU instance as comfyui initially
tag_key = env("LLM_INSTANCE_TAG_KEY", "DesineuronRole")
tag_value = env("LLM_INSTANCE_TAG_VALUE", "comfyui")
filters = [
{"Name": "instance-state-name", "Values": ["running"]},
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
]
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
if not instances:
return None
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
return instances[0]
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
ingress_host = env("INGRESS_SSH_HOST")
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
ingress_port = env("INGRESS_SSH_PORT", "22")
ingress_key = env("INGRESS_SSH_KEY_PATH")
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
payload = json.dumps(
{
"hostname": hostname,
"scheme": "http",
"target_host": private_ip,
"target_port": port,
}
)
command = (
f"sudo {helper} upsert '{payload}'"
" && sudo caddy validate --config /etc/caddy/Caddyfile"
" && sudo systemctl reload caddy"
)
return subprocess.run(
[
"ssh",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-i",
ingress_key,
"-p",
ingress_port,
f"{ingress_user}@{ingress_host}",
command,
],
capture_output=True,
text=True,
check=False,
)
def main() -> int:
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
if key not in os.environ and key in ops_env:
os.environ[key] = ops_env[key]
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
if normalized_key_path.startswith("/app/state/"):
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
region = os.environ["AWS_DEFAULT_REGION"]
hostname = env("LLM_ROUTE_HOSTNAME", "llm.desineuron.in")
port = int(env("LLM_ROUTE_PORT", "11434"))
state_file = Path(env("LLM_ROUTE_STATE_FILE", "/var/lib/desineuron-llm-route-sync/current_target.txt"))
ec2 = boto3.client("ec2", region_name=region)
instance = resolve_target_instance(ec2)
if not instance:
print("No running LLM target instance found", file=sys.stderr)
return 1
private_ip = instance.get("PrivateIpAddress")
if not private_ip:
print("Target instance has no private IP", file=sys.stderr)
return 1
desired_state = f"{private_ip}:{port}"
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
if current == desired_state:
print(
json.dumps(
{"status": "noop", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
result = upsert_route(hostname, private_ip, port)
if result.returncode != 0:
print(result.stdout)
print(result.stderr, file=sys.stderr)
return result.returncode
state_file.parent.mkdir(parents=True, exist_ok=True)
state_file.write_text(desired_state, encoding="utf-8")
print(
json.dumps(
{"status": "updated", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -ex
# Push the Caddyfile configuration
sudo scp -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem /tmp/Caddyfile ec2-user@98.87.120.120:/tmp/Caddyfile
sudo ssh -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem ec2-user@98.87.120.120 'sudo cp /tmp/Caddyfile /etc/caddy/Caddyfile'
# Fix cloudflare token
sudo mkdir -p /etc/letsencrypt/.secrets/
echo "dns_cloudflare_api_token = O1CyZ45txLgTXu04KAGTJmZ6CENZZtQIlIxUMXVL" | sudo tee /etc/letsencrypt/.secrets/cloudflare.ini > /dev/null
sudo chmod 600 /etc/letsencrypt/.secrets/cloudflare.ini
# Renew and expand Let's Encrypt certificates locally on velocity-linux utilizing cloudflare dns
sudo certbot certonly --cert-name desineuron-infra --dns-cloudflare --dns-cloudflare-credentials /etc/letsencrypt/.secrets/cloudflare.ini -d '*.desineuron.in' -d desineuron.in --expand --non-interactive --agree-tos
# Copy the fresh certs directly to the proxy substrate
sudo scp -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem /etc/letsencrypt/live/desineuron-infra/fullchain.pem ec2-user@98.87.120.120:/tmp/fullchain.pem
sudo scp -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem /etc/letsencrypt/live/desineuron-infra/privkey.pem ec2-user@98.87.120.120:/tmp/privkey.pem
# Apply to Caddy
sudo ssh -o StrictHostKeyChecking=no -i /opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem ec2-user@98.87.120.120 'sudo cp /tmp/fullchain.pem /etc/caddy/tls/fullchain.pem && sudo cp /tmp/privkey.pem /etc/caddy/tls/privkey.pem && sudo systemctl reload caddy'

View File

@@ -11,6 +11,17 @@ server {
access_log /var/log/nginx/velocity.desineuron.in.access.log;
error_log /var/log/nginx/velocity.desineuron.in.error.log;
location /api/ {
proxy_pass http://127.0.0.1:8001;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location / {
try_files $uri $uri/ /index.html;
}