feat: Oracle Canvas, Revision History and Canvas Sharing (#33)
Co-authored-by: Sagnik <sagnik7896@gmail.com> Reviewed-on: #33
This commit was merged in pull request #33.
This commit is contained in:
108
infrastructure/desineuron_ingress/install_gpu_ollama_watchdog.sh
Normal file
108
infrastructure/desineuron_ingress/install_gpu_ollama_watchdog.sh
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MODEL_NAME="qwen3.6:35b-a3b"
|
||||
NVME_ROOT="/opt/dlami/nvme/ollama"
|
||||
OLLAMA_OVERRIDE_DIR="/etc/systemd/system/ollama.service.d"
|
||||
|
||||
# 1. Configure Ollama to use NVME
|
||||
sudo mkdir -p "${NVME_ROOT}/models" "${NVME_ROOT}/state" "${NVME_ROOT}/logs"
|
||||
sudo chown -R root:root "${NVME_ROOT}"
|
||||
|
||||
echo "Configuring Ollama to use NVME storage at ${NVME_ROOT}/models..."
|
||||
sudo mkdir -p "${OLLAMA_OVERRIDE_DIR}"
|
||||
sudo tee "${OLLAMA_OVERRIDE_DIR}/override.conf" >/dev/null <<EOF
|
||||
[Service]
|
||||
Environment="OLLAMA_MODELS=${NVME_ROOT}/models"
|
||||
Environment="OLLAMA_HOST=0.0.0.0"
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now ollama.service
|
||||
|
||||
# 2. Write the Hydrate Helper
|
||||
HYDRATE_HELPER="/usr/local/bin/desineuron-hydrate-qwen36.sh"
|
||||
echo "Creating Hydrate Helper map at $HYDRATE_HELPER"
|
||||
sudo tee "$HYDRATE_HELPER" >/dev/null <<EOF
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "(\$(date)) Hydrating \$1 model using ollama pull..." | sudo tee -a "${NVME_ROOT}/logs/qwen36_hydrate.log"
|
||||
# This requires outward access or an Ollama compatible registry proxy
|
||||
# Note: For S3-based private GGUFs, this would use s5cmd
|
||||
ollama pull "\$1"
|
||||
echo "(\$(date)) Hydration complete" | sudo tee -a "${NVME_ROOT}/logs/qwen36_hydrate.log"
|
||||
EOF
|
||||
sudo chmod 0755 "$HYDRATE_HELPER"
|
||||
|
||||
# 3. Write Watchdog Script
|
||||
WATCHDOG_SCRIPT="/usr/local/bin/desineuron-ollama-watchdog.sh"
|
||||
echo "Creating Watchdog Script map at $WATCHDOG_SCRIPT"
|
||||
sudo tee "$WATCHDOG_SCRIPT" >/dev/null <<EOF
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MODEL_NAME="${MODEL_NAME}"
|
||||
OLLAMA_URL="http://127.0.0.1:11434"
|
||||
|
||||
if ! systemctl is-active --quiet ollama; then
|
||||
systemctl restart ollama
|
||||
sleep 5
|
||||
fi
|
||||
|
||||
# Try asking Ollama if the tag exists
|
||||
if ! curl -fsS "\$OLLAMA_URL/api/tags" | grep -q "\$MODEL_NAME"; then
|
||||
echo "Expected model \$MODEL_NAME missing. Initiating hydration..."
|
||||
|
||||
# Ensure wiped ephemeral NVMe disks are scaffolded pre-hydration
|
||||
sudo mkdir -p "${NVME_ROOT}/logs" "${NVME_ROOT}/models" "${NVME_ROOT}/state"
|
||||
sudo chown -R ollama:ollama "${NVME_ROOT}"
|
||||
|
||||
/usr/local/bin/desineuron-hydrate-qwen36.sh "\$MODEL_NAME"
|
||||
sleep 5
|
||||
fi
|
||||
|
||||
# Verify final state
|
||||
if curl -fsS "\$OLLAMA_URL/api/tags" | grep -q "\$MODEL_NAME"; then
|
||||
echo "healthy"
|
||||
exit 0
|
||||
else
|
||||
echo "unhealthy: Model \$MODEL_NAME failed to register" >&2
|
||||
exit 1
|
||||
fi
|
||||
EOF
|
||||
sudo chmod 0755 "$WATCHDOG_SCRIPT"
|
||||
|
||||
|
||||
# 4. Write Watchdog Systemd Service & Timer
|
||||
sudo tee "/etc/systemd/system/desineuron-ollama-watchdog.service" >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Desineuron GPU Ollama Watchdog for Model $MODEL_NAME
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment="HOME=/root"
|
||||
ExecStart=$WATCHDOG_SCRIPT
|
||||
EOF
|
||||
|
||||
sudo tee "/etc/systemd/system/desineuron-ollama-watchdog.timer" >/dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Watchdog run for Ollama Model $MODEL_NAME every 5 mins
|
||||
|
||||
[Timer]
|
||||
OnBootSec=2min
|
||||
OnUnitActiveSec=5min
|
||||
Unit=desineuron-ollama-watchdog.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now desineuron-ollama-watchdog.timer
|
||||
sudo systemctl start desineuron-ollama-watchdog.service
|
||||
|
||||
echo "Ollama Watchdog installed and model $MODEL_NAME setup initiated."
|
||||
sudo systemctl --no-pager status desineuron-ollama-watchdog.timer
|
||||
|
||||
Reference in New Issue
Block a user