feat: Oracle Canvas, Revision History and Canvas Sharing (#33)
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #33
This commit was merged in pull request #33.
This commit is contained in:
104
infrastructure/desineuron_ingress/install_gpu_sglang_runtime.sh
Normal file
104
infrastructure/desineuron_ingress/install_gpu_sglang_runtime.sh
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NVME_ROOT="${NVME_ROOT:-/opt/dlami/nvme/sglang}"
|
||||
RUNTIME_ROOT="${RUNTIME_ROOT:-/opt/desineuron-sglang}"
|
||||
VENV_PATH="${RUNTIME_ROOT}/.venv"
|
||||
PORT="${SGLANG_PORT:-30100}"
|
||||
HOST="${SGLANG_HOST:-}"
|
||||
MODEL_ID="${SGLANG_MODEL_ID:-qwen3.6-35b-a3b}"
|
||||
MODEL_PATH="${SGLANG_MODEL_PATH:-/opt/dlami/nvme/models/Qwen-Qwen3.6-35B-A3B-FP8}"
|
||||
TP_SIZE="${SGLANG_TP_SIZE:-4}"
|
||||
CONTEXT_LENGTH="${SGLANG_CONTEXT_LENGTH:-131072}"
|
||||
MEM_FRACTION_STATIC="${SGLANG_MEM_FRACTION_STATIC:-0.88}"
|
||||
ATTENTION_BACKEND="${SGLANG_ATTENTION_BACKEND:-flashinfer}"
|
||||
DIST_INIT_ADDR="${SGLANG_DIST_INIT_ADDR:-127.0.0.1:50000}"
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
IMDS_TOKEN="$(curl -fsS -X PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' || true)"
|
||||
if [[ -n "${IMDS_TOKEN}" ]]; then
|
||||
HOST="$(curl -fsS -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" http://169.254.169.254/latest/meta-data/local-ipv4 || true)"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
HOST="$(hostname -I | awk '{print $1}')"
|
||||
fi
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
echo "Unable to resolve GPU private IP for SGLang host binding" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sudo mkdir -p "${NVME_ROOT}"/{cache,logs,state} "${RUNTIME_ROOT}"
|
||||
python3 -m venv "${VENV_PATH}"
|
||||
"${VENV_PATH}/bin/pip" install --upgrade pip wheel setuptools
|
||||
"${VENV_PATH}/bin/pip" install "sglang[all]>=0.5.3" flashinfer-python huggingface_hub
|
||||
|
||||
sudo tee /etc/default/desineuron-sglang >/dev/null <<EOF
|
||||
SGLANG_HOST=${HOST}
|
||||
SGLANG_PORT=${PORT}
|
||||
SGLANG_MODEL_ID=${MODEL_ID}
|
||||
SGLANG_MODEL_PATH=${MODEL_PATH}
|
||||
SGLANG_TP_SIZE=${TP_SIZE}
|
||||
SGLANG_CONTEXT_LENGTH=${CONTEXT_LENGTH}
|
||||
SGLANG_MEM_FRACTION_STATIC=${MEM_FRACTION_STATIC}
|
||||
SGLANG_ATTENTION_BACKEND=${ATTENTION_BACKEND}
|
||||
SGLANG_DIST_INIT_ADDR=${DIST_INIT_ADDR}
|
||||
SGLANG_CACHE_DIR=${NVME_ROOT}/cache
|
||||
SGLANG_LOG_DIR=${NVME_ROOT}/logs
|
||||
SGLANG_STATE_DIR=${NVME_ROOT}/state
|
||||
SGLANG_USE_FLASHINFER=1
|
||||
SGLANG_ENABLE_PREFIX_CACHE=1
|
||||
SGLANG_SERVED_MODEL_NAME=${MODEL_ID}
|
||||
SGLANG_EXTRA_ARGS=
|
||||
EOF
|
||||
sudo chmod 600 /etc/default/desineuron-sglang
|
||||
|
||||
sudo tee /usr/local/bin/desineuron-sglang-launch.sh >/dev/null <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
source /etc/default/desineuron-sglang
|
||||
export HF_HOME="${SGLANG_CACHE_DIR}/hf"
|
||||
export HUGGINGFACE_HUB_CACHE="${SGLANG_CACHE_DIR}/hf"
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1
|
||||
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
||||
export SGLANG_USE_FLASHINFER="${SGLANG_USE_FLASHINFER}"
|
||||
exec /opt/desineuron-sglang/.venv/bin/sglang serve \
|
||||
--host "${SGLANG_HOST}" \
|
||||
--port "${SGLANG_PORT}" \
|
||||
--model-path "${SGLANG_MODEL_PATH}" \
|
||||
--served-model-name "${SGLANG_SERVED_MODEL_NAME}" \
|
||||
--tp-size "${SGLANG_TP_SIZE}" \
|
||||
--context-length "${SGLANG_CONTEXT_LENGTH}" \
|
||||
--mem-fraction-static "${SGLANG_MEM_FRACTION_STATIC}" \
|
||||
--attention-backend "${SGLANG_ATTENTION_BACKEND}" \
|
||||
--dist-init-addr "${SGLANG_DIST_INIT_ADDR}" \
|
||||
--enable-metrics \
|
||||
--skip-server-warmup \
|
||||
${SGLANG_EXTRA_ARGS}
|
||||
EOF
|
||||
sudo chmod 0755 /usr/local/bin/desineuron-sglang-launch.sh
|
||||
|
||||
sudo tee /etc/systemd/system/desineuron-sglang.service >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron SGLang Runtime
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
EnvironmentFile=/etc/default/desineuron-sglang
|
||||
WorkingDirectory=${RUNTIME_ROOT}
|
||||
ExecStart=/usr/local/bin/desineuron-sglang-launch.sh
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
LimitNOFILE=1048576
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-sglang.service
|
||||
sudo systemctl --no-pager --full status desineuron-sglang.service
|
||||
Reference in New Issue
Block a user