forked from sagnik/Project_Velocity
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: sagnik/Project_Velocity#33
86 lines
2.5 KiB
Bash
86 lines
2.5 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
sudo tee /usr/local/bin/desineuron-sglang-watchdog.sh >/dev/null <<'EOF'
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
source /etc/default/desineuron-sglang
|
|
|
|
HEALTH_URL="http://127.0.0.1:${SGLANG_PORT}/v1/models"
|
|
HYDRATE_HELPER="/usr/local/bin/desineuron-sglang-hydrate.sh"
|
|
STARTUP_GRACE_SECONDS="${SGLANG_STARTUP_GRACE_SECONDS:-900}"
|
|
HEALTH_TIMEOUT_SECONDS="${SGLANG_HEALTH_TIMEOUT_SECONDS:-60}"
|
|
|
|
if [[ ! -d "${SGLANG_MODEL_PATH}" ]]; then
|
|
"${HYDRATE_HELPER}" "${SGLANG_MODEL_ID}" "${SGLANG_MODEL_PATH}"
|
|
fi
|
|
|
|
if ! systemctl is-active --quiet desineuron-sglang.service; then
|
|
systemctl restart desineuron-sglang.service
|
|
sleep 10
|
|
fi
|
|
|
|
main_pid="$(systemctl show -p MainPID --value desineuron-sglang.service || true)"
|
|
if [[ -n "${main_pid}" && "${main_pid}" != "0" ]]; then
|
|
runtime_age="$(( $(date +%s) - $(stat -c %Y "/proc/${main_pid}" 2>/dev/null || date +%s) ))"
|
|
if (( runtime_age < STARTUP_GRACE_SECONDS )); then
|
|
echo "startup_grace"
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
if ! curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null; then
|
|
systemctl restart desineuron-sglang.service
|
|
sleep 20
|
|
fi
|
|
|
|
curl --max-time "${HEALTH_TIMEOUT_SECONDS}" -fsS "${HEALTH_URL}" >/dev/null
|
|
echo "healthy"
|
|
EOF
|
|
sudo chmod 0755 /usr/local/bin/desineuron-sglang-watchdog.sh
|
|
|
|
sudo tee /usr/local/bin/desineuron-sglang-hydrate.sh >/dev/null <<'EOF'
|
|
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
MODEL_ID="${1:?model id required}"
|
|
TARGET_PATH="${2:?target path required}"
|
|
mkdir -p "$(dirname "${TARGET_PATH}")"
|
|
if command -v hf >/dev/null 2>&1; then
|
|
hf download "${MODEL_ID}" --local-dir "${TARGET_PATH}" --max-workers 8
|
|
else
|
|
python3 - <<PY
|
|
from huggingface_hub import snapshot_download
|
|
snapshot_download(repo_id="${MODEL_ID}", local_dir="${TARGET_PATH}", max_workers=8)
|
|
PY
|
|
fi
|
|
EOF
|
|
sudo chmod 0755 /usr/local/bin/desineuron-sglang-hydrate.sh
|
|
|
|
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.service >/dev/null <<EOF
|
|
[Unit]
|
|
Description=Desineuron SGLang Runtime Watchdog
|
|
After=network-online.target
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
ExecStart=/usr/local/bin/desineuron-sglang-watchdog.sh
|
|
EOF
|
|
|
|
sudo tee /etc/systemd/system/desineuron-sglang-watchdog.timer >/dev/null <<EOF
|
|
[Unit]
|
|
Description=Run the Desineuron SGLang watchdog every 5 minutes
|
|
|
|
[Timer]
|
|
OnBootSec=2min
|
|
OnUnitActiveSec=5min
|
|
Unit=desineuron-sglang-watchdog.service
|
|
|
|
[Install]
|
|
WantedBy=timers.target
|
|
EOF
|
|
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl enable --now desineuron-sglang-watchdog.timer
|
|
sudo systemctl start desineuron-sglang-watchdog.service
|
|
sudo systemctl --no-pager --full status desineuron-sglang-watchdog.timer
|