forked from sagnik/Velocity-OS
Initial commit: Velocity-OS migration
This commit is contained in:
164
infrastructure/model-hydration/hydrate_gpu_comfy_models.py
Normal file
164
infrastructure/model-hydration/hydrate_gpu_comfy_models.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
DEFAULT_CHECKPOINTS = {
|
||||
"realvisxlV50_v50LightningBakedvae.safetensors": (
|
||||
"s3://project-velocity/models/realvisxlV50_v50LightningBakedvae.safetensors"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
data: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return data
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
return data
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.environ.get(name, default)
|
||||
|
||||
|
||||
def resolve_target_instance(ec2) -> dict | None:
|
||||
explicit_instance_id = env("COMFY_INSTANCE_ID")
|
||||
if explicit_instance_id:
|
||||
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
||||
else:
|
||||
tag_key = env("COMFY_INSTANCE_TAG_KEY", "DesineuronRole")
|
||||
tag_value = env("COMFY_INSTANCE_TAG_VALUE", "comfyui")
|
||||
reservations = ec2.describe_instances(
|
||||
Filters=[
|
||||
{"Name": "instance-state-name", "Values": ["running"]},
|
||||
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
||||
]
|
||||
)["Reservations"]
|
||||
|
||||
instances = [
|
||||
instance
|
||||
for reservation in reservations
|
||||
for instance in reservation["Instances"]
|
||||
if instance["State"]["Name"] == "running"
|
||||
]
|
||||
if not instances:
|
||||
return None
|
||||
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
||||
return instances[0]
|
||||
|
||||
|
||||
def parse_checkpoints() -> dict[str, str]:
|
||||
raw = env("COMFY_CHECKPOINTS_JSON")
|
||||
if not raw:
|
||||
return dict(DEFAULT_CHECKPOINTS)
|
||||
parsed = json.loads(raw)
|
||||
if not isinstance(parsed, dict):
|
||||
raise ValueError("COMFY_CHECKPOINTS_JSON must be a JSON object of filename to source URI")
|
||||
return {str(name): str(source) for name, source in parsed.items()}
|
||||
|
||||
|
||||
def remote_hydration_script(checkpoints: dict[str, str]) -> str:
|
||||
payload = json.dumps(checkpoints)
|
||||
return f"""#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
CHECKPOINT_DIR="${{COMFY_CHECKPOINT_DIR:-/opt/dlami/nvme/ComfyUI/models/checkpoints}}"
|
||||
mkdir -p "$CHECKPOINT_DIR"
|
||||
if ! mountpoint -q /opt/dlami/nvme; then
|
||||
echo "GPU NVMe mount /opt/dlami/nvme is not mounted" >&2
|
||||
exit 2
|
||||
fi
|
||||
changed=0
|
||||
python3 - <<'PY' > /tmp/desineuron-comfy-checkpoints.tsv
|
||||
import json
|
||||
for name, source in json.loads({payload!r}).items():
|
||||
print(f"{{name}}\\t{{source}}")
|
||||
PY
|
||||
while IFS=$'\\t' read -r filename source; do
|
||||
target="$CHECKPOINT_DIR/$filename"
|
||||
if [ ! -s "$target" ]; then
|
||||
tmp="$target.part"
|
||||
rm -f "$tmp"
|
||||
aws s3 cp "$source" "$tmp" --no-progress
|
||||
mv "$tmp" "$target"
|
||||
chmod 0644 "$target"
|
||||
changed=1
|
||||
fi
|
||||
done < /tmp/desineuron-comfy-checkpoints.tsv
|
||||
rm -f /tmp/desineuron-comfy-checkpoints.tsv
|
||||
if [ "$changed" = "1" ]; then
|
||||
sudo systemctl restart comfyui
|
||||
fi
|
||||
sleep 3
|
||||
curl -fsS http://127.0.0.1:8188/models/checkpoints
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
||||
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
||||
if key not in os.environ and key in ops_env:
|
||||
os.environ[key] = ops_env[key]
|
||||
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
||||
|
||||
key_path = env(
|
||||
"GPU_SSH_KEY_PATH",
|
||||
ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem"),
|
||||
)
|
||||
if key_path.startswith("/app/state/"):
|
||||
key_path = key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
||||
ssh_user = env("GPU_SSH_USER", "ubuntu")
|
||||
|
||||
ec2 = boto3.client("ec2", region_name=os.environ["AWS_DEFAULT_REGION"])
|
||||
instance = resolve_target_instance(ec2)
|
||||
if not instance:
|
||||
print("No running ComfyUI GPU instance found", file=sys.stderr)
|
||||
return 1
|
||||
target_host = instance.get("PublicIpAddress") or instance.get("PrivateIpAddress")
|
||||
if not target_host:
|
||||
print("Target GPU instance has no reachable IP", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
checkpoints = parse_checkpoints()
|
||||
command = [
|
||||
"sudo",
|
||||
"ssh",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=no",
|
||||
"-o",
|
||||
"ConnectTimeout=15",
|
||||
"-i",
|
||||
key_path,
|
||||
f"{ssh_user}@{target_host}",
|
||||
"bash -s",
|
||||
]
|
||||
result = subprocess.run(
|
||||
command,
|
||||
input=remote_hydration_script(checkpoints),
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
if result.stdout:
|
||||
print(result.stdout.strip())
|
||||
if result.returncode != 0:
|
||||
if result.stderr:
|
||||
print(result.stderr.strip(), file=sys.stderr)
|
||||
return result.returncode
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
83
infrastructure/model-hydration/hydrate_models.sh
Normal file
83
infrastructure/model-hydration/hydrate_models.sh
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
# ============================================================
|
||||
# Velocity-OS — Model Hydration Script
|
||||
# Uses s5cmd for high-throughput parallel S3 → NVMe sync.
|
||||
# Run once at initial install; safe to re-run for updates.
|
||||
#
|
||||
# Models synced (all stored on NVMe, never in Docker images):
|
||||
# - Wan 2.2 (ComfyUI video/image model)
|
||||
# - Qwen-Image 2512 (ComfyUI poster/image model)
|
||||
# - Qwen3.6 35B A3B (SGLang LLM — MIG slice 0)
|
||||
#
|
||||
# Requires: s5cmd, AWS credentials with S3 read access
|
||||
# ============================================================
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ────────────────────────────────────────────
|
||||
S3_BUCKET="${S3_MODEL_BUCKET:-s3://desineuron-models}"
|
||||
LOCAL_BASE="/opt/dlami/nvme/models"
|
||||
S5CMD_CONCURRENCY="${S5CMD_CONCURRENCY:-32}" # Tune to NVMe write IOPS
|
||||
|
||||
# ── Ensure directories exist ─────────────────────────────────
|
||||
mkdir -p \
|
||||
"${LOCAL_BASE}/comfy/wan2.2" \
|
||||
"${LOCAL_BASE}/comfy/qwen-image-2512" \
|
||||
"${LOCAL_BASE}/llm/qwen3.6-35b-a3b"
|
||||
|
||||
# ── Check s5cmd installed ────────────────────────────────────
|
||||
if ! command -v s5cmd &> /dev/null; then
|
||||
echo "ERROR: s5cmd not found. Install from https://github.com/peak/s5cmd"
|
||||
echo " curl -L https://github.com/peak/s5cmd/releases/latest/download/s5cmd_Linux_x86_64.tar.gz | tar xz -C /usr/local/bin"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Velocity-OS Model Hydration ==="
|
||||
echo "Source: ${S3_BUCKET}"
|
||||
echo "Target: ${LOCAL_BASE}"
|
||||
echo "s5cmd workers: ${S5CMD_CONCURRENCY}"
|
||||
echo ""
|
||||
|
||||
# ── Wan 2.2 (ComfyUI — MIG slice 1) ──────────────────────────
|
||||
echo "[1/3] Syncing Wan 2.2..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/wan2.2/*" \
|
||||
"${LOCAL_BASE}/comfy/wan2.2/"
|
||||
echo " ✓ Wan 2.2 synced."
|
||||
|
||||
# ── Qwen-Image 2512 (ComfyUI — MIG slice 1) ──────────────────
|
||||
echo "[2/3] Syncing Qwen-Image 2512..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/qwen-image-2512/*" \
|
||||
"${LOCAL_BASE}/comfy/qwen-image-2512/"
|
||||
echo " ✓ Qwen-Image 2512 synced."
|
||||
|
||||
# ── Qwen3.6 35B A3B (SGLang — MIG slice 0) ───────────────────
|
||||
echo "[3/3] Syncing Qwen3.6 35B A3B (LLM — ~70GB, be patient)..."
|
||||
s5cmd \
|
||||
--numworkers "${S5CMD_CONCURRENCY}" \
|
||||
--credentials-file /etc/velocity/aws-credentials \
|
||||
sync \
|
||||
"${S3_BUCKET}/qwen3.6-35b-a3b/*" \
|
||||
"${LOCAL_BASE}/llm/qwen3.6-35b-a3b/"
|
||||
echo " ✓ Qwen3.6 35B synced."
|
||||
|
||||
# ── Verify checksums (optional — if .sha256 files exist in S3) ─
|
||||
echo ""
|
||||
echo "=== Verifying checksums ==="
|
||||
for dir in "${LOCAL_BASE}/comfy/wan2.2" "${LOCAL_BASE}/comfy/qwen-image-2512" "${LOCAL_BASE}/llm/qwen3.6-35b-a3b"; do
|
||||
if ls "${dir}"/*.sha256 2>/dev/null | head -1 | grep -q sha256; then
|
||||
echo " Checking ${dir}..."
|
||||
(cd "${dir}" && sha256sum -c ./*.sha256 --quiet) && echo " ✓ ${dir} checksums OK"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Model hydration complete ==="
|
||||
echo "NVMe usage:"
|
||||
du -sh "${LOCAL_BASE}"/*/* 2>/dev/null || true
|
||||
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
STOP_SGLANG_FOR_COMFY_POOL="${STOP_SGLANG_FOR_COMFY_POOL:-0}"
|
||||
WORKERS="${COMFY_WORKER_COUNT:-4}"
|
||||
|
||||
if [[ ! -d /opt/dlami/nvme/ComfyUI ]]; then
|
||||
echo "Missing ComfyUI at /opt/dlami/nvme/ComfyUI" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! mountpoint -q /opt/dlami/nvme; then
|
||||
echo "/opt/dlami/nvme is not mounted; refusing to run model workers on root disk" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$STOP_SGLANG_FOR_COMFY_POOL" == "1" ]]; then
|
||||
sudo systemctl stop desineuron-sglang.service || true
|
||||
fi
|
||||
|
||||
sudo systemctl stop comfyui.service || true
|
||||
sudo systemctl disable comfyui.service || true
|
||||
|
||||
sudo install -m 0755 "$SCRIPT_DIR/desineuron-start-comfy-worker" /usr/local/bin/desineuron-start-comfy-worker
|
||||
sudo install -m 0644 "$SCRIPT_DIR/comfyui-worker@.service" /etc/systemd/system/comfyui-worker@.service
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
for index in $(seq 0 "$((WORKERS - 1))"); do
|
||||
sudo systemctl enable --now "comfyui-worker@${index}.service"
|
||||
sudo systemctl restart "comfyui-worker@${index}.service"
|
||||
done
|
||||
|
||||
sleep 5
|
||||
for index in $(seq 0 "$((WORKERS - 1))"); do
|
||||
port=$((8188 + index))
|
||||
echo "worker ${index} http://127.0.0.1:${port}"
|
||||
curl -fsS "http://127.0.0.1:${port}/models/checkpoints" | head -c 500
|
||||
echo
|
||||
done
|
||||
104
infrastructure/model-hydration/install_gpu_sglang_runtime.sh
Normal file
104
infrastructure/model-hydration/install_gpu_sglang_runtime.sh
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/sglang}"
|
||||
RUNTIME_ROOT="${RUNTIME_ROOT:-/opt/desineuron-sglang}"
|
||||
VENV_PATH="${RUNTIME_ROOT}/.venv"
|
||||
PORT="${SGLANG_PORT:-30100}"
|
||||
HOST="${SGLANG_HOST:-}"
|
||||
MODEL_ID="${SGLANG_MODEL_ID:-qwen3.6-35b-a3b}"
|
||||
MODEL_PATH="${SGLANG_MODEL_PATH:-/opt/dlami/nvme/models/Qwen-Qwen3.6-35B-A3B-FP8}"
|
||||
TP_SIZE="${SGLANG_TP_SIZE:-4}"
|
||||
CONTEXT_LENGTH="${SGLANG_CONTEXT_LENGTH:-131072}"
|
||||
MEM_FRACTION_STATIC="${SGLANG_MEM_FRACTION_STATIC:-0.88}"
|
||||
ATTENTION_BACKEND="${SGLANG_ATTENTION_BACKEND:-flashinfer}"
|
||||
DIST_INIT_ADDR="${SGLANG_DIST_INIT_ADDR:-127.0.0.1:50000}"
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
IMDS_TOKEN="$(curl -fsS -X PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' || true)"
|
||||
if [[ -n "${IMDS_TOKEN}" ]]; then
|
||||
HOST="$(curl -fsS -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" http://169.254.169.254/latest/meta-data/local-ipv4 || true)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
HOST="$(hostname -I | awk '{print $1}')"
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
echo "Unable to resolve GPU private IP for SGLang host binding" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo mkdir -p "${NVME_ROOT}"/{cache,logs,state} "${RUNTIME_ROOT}"
|
||||
python3 -m venv "${VENV_PATH}"
|
||||
"${VENV_PATH}/bin/pip" install --upgrade pip wheel setuptools
|
||||
"${VENV_PATH}/bin/pip" install "sglang[all]>=0.5.3" flashinfer-python huggingface_hub
|
||||
|
||||
sudo tee /etc/default/desineuron-sglang >/dev/null <<EOF
|
||||
SGLANG_HOST=${HOST}
|
||||
SGLANG_PORT=${PORT}
|
||||
SGLANG_MODEL_ID=${MODEL_ID}
|
||||
SGLANG_MODEL_PATH=${MODEL_PATH}
|
||||
SGLANG_TP_SIZE=${TP_SIZE}
|
||||
SGLANG_CONTEXT_LENGTH=${CONTEXT_LENGTH}
|
||||
SGLANG_MEM_FRACTION_STATIC=${MEM_FRACTION_STATIC}
|
||||
SGLANG_ATTENTION_BACKEND=${ATTENTION_BACKEND}
|
||||
SGLANG_DIST_INIT_ADDR=${DIST_INIT_ADDR}
|
||||
SGLANG_CACHE_DIR=${NVME_ROOT}/cache
|
||||
SGLANG_LOG_DIR=${NVME_ROOT}/logs
|
||||
SGLANG_STATE_DIR=${NVME_ROOT}/state
|
||||
SGLANG_USE_FLASHINFER=1
|
||||
SGLANG_ENABLE_PREFIX_CACHE=1
|
||||
SGLANG_SERVED_MODEL_NAME=${MODEL_ID}
|
||||
SGLANG_EXTRA_ARGS=
|
||||
EOF
|
||||
sudo chmod 600 /etc/default/desineuron-sglang
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-launch.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
source /etc/default/desineuron-sglang
|
||||
export HF_HOME="${SGLANG_CACHE_DIR}/hf"
|
||||
export HUGGINGFACE_HUB_CACHE="${SGLANG_CACHE_DIR}/hf"
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||
export SGLANG_USE_FLASHINFER="${SGLANG_USE_FLASHINFER}"
|
||||
exec /opt/desineuron-sglang/.venv/bin/sglang serve \
|
||||
--host "${SGLANG_HOST}" \
|
||||
--port "${SGLANG_PORT}" \
|
||||
--model-path "${SGLANG_MODEL_PATH}" \
|
||||
--served-model-name "${SGLANG_SERVED_MODEL_NAME}" \
|
||||
--tp-size "${SGLANG_TP_SIZE}" \
|
||||
--context-length "${SGLANG_CONTEXT_LENGTH}" \
|
||||
--mem-fraction-static "${SGLANG_MEM_FRACTION_STATIC}" \
|
||||
--attention-backend "${SGLANG_ATTENTION_BACKEND}" \
|
||||
--dist-init-addr "${SGLANG_DIST_INIT_ADDR}" \
|
||||
--enable-metrics \
|
||||
--skip-server-warmup \
|
||||
${SGLANG_EXTRA_ARGS}
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-launch.sh
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang.service >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron SGLang Runtime
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/default/desineuron-sglang
|
||||
WorkingDirectory=${RUNTIME_ROOT}
|
||||
ExecStart=/usr/local/bin/desineuron-sglang-launch.sh
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
LimitNOFILE=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-sglang.service
|
||||
sudo systemctl --no-pager --full status desineuron-sglang.service
|
||||
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-watchdog.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
source /etc/default/desineuron-sglang
|
||||
|
||||
HEALTH_URL="http://127.0.0.1:${SGLANG_PORT}/v1/models"
|
||||
HYDRATE_HELPER="/usr/local/bin/desineuron-sglang-hydrate.sh"
|
||||
STARTUP_GRACE_SECONDS="${SGLANG_STARTUP_GRACE_SECONDS:-900}"
|
||||
HEALTH_TIMEOUT_SECONDS="${SGLANG_HEALTH_TIMEOUT_SECONDS:-60}"
|
||||
|
||||
if [[ ! -d "${SGLANG_MODEL_PATH}" ]]; then
|
||||
"${HYDRATE_HELPER}" "${SGLANG_MODEL_ID}" "${SGLANG_MODEL_PATH}"
|
||||
fi
|
||||
|
||||
if ! systemctl is-active --quiet desineuron-sglang.service; then
|
||||
systemctl restart desineuron-sglang.service
|
||||
sleep 10
|
||||
fi
|
||||
|
||||
main_pid="$(systemctl show -p MainPID --value desineuron-sglang.service || true)"
|
||||
if [[ -n "${main_pid}" && "${main_pid}" != "0" ]]; then
|
||||
runtime_age="$(( $(date +%s) - $(stat -c %Y "/proc/${main_pid}" 2>/dev/null || date +%s) ))"
|
||||
if (( runtime_age < STARTUP_GRACE_SECONDS )); then
|
||||
echo "startup_grace"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null; then
|
||||
systemctl restart desineuron-sglang.service
|
||||
sleep 20
|
||||
fi
|
||||
|
||||
curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null
|
||||
echo "healthy"
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-watchdog.sh
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-hydrate.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
MODEL_ID="${1:?model id required}"
|
||||
TARGET_PATH="${2:?target path required}"
|
||||
mkdir -p "$(dirname "${TARGET_PATH}")"
|
||||
if command -v hf >/dev/null 2>&1; then
|
||||
hf download "${MODEL_ID}" --local-dir "${TARGET_PATH}" --max-workers 8
|
||||
else
|
||||
python3 - <<PY
|
||||
from huggingface_hub import snapshot_download
|
||||
snapshot_download(repo_id="${MODEL_ID}", local_dir="${TARGET_PATH}", max_workers=8)
|
||||
PY
|
||||
fi
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-hydrate.sh
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.service >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron SGLang Runtime Watchdog
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/desineuron-sglang-watchdog.sh
|
||||
EOF
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.timer >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Run the Desineuron SGLang watchdog every 5 minutes
|
||||
|
||||
[Timer]
|
||||
OnBootSec=2min
|
||||
OnUnitActiveSec=5min
|
||||
Unit=desineuron-sglang-watchdog.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-sglang-watchdog.timer
|
||||
sudo systemctl start desineuron-sglang-watchdog.service
|
||||
sudo systemctl --no-pager --full status desineuron-sglang-watchdog.timer
|
||||
Reference in New Issue
Block a user