Initial commit: Velocity-OS migration

This commit is contained in:
2026-05-01 12:32:19 +05:30
commit 407af828d4
283 changed files with 207782 additions and 0 deletions

View File

@@ -0,0 +1,118 @@
#!/usr/bin/env bash
# ============================================================
# Velocity-OS — Ingress Box: Air-Gap Transfer Agent
# Runs on a LAN-connected node (Raspberry Pi / VM).
# Polls ECR every 5 minutes for new signed images.
# Verifies cosign signature. Transfers to air-gapped workstation.
# Triggers K3s rolling restart on new image.
#
# Install as systemd service:
# sudo cp poll_and_transfer.service /etc/systemd/system/
# sudo systemctl enable --now poll_and_transfer
# ============================================================
set -euo pipefail
# ── Configuration ────────────────────────────────────────────
AWS_REGION="${AWS_REGION:-ap-south-1}"
AWS_ACCOUNT_ID="${AWS_ACCOUNT_ID:?Must set AWS_ACCOUNT_ID}"
ECR_REGISTRY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
REGISTRY_PREFIX="velocity-os"
SERVICES=("core" "webos" "media-engine" "agents")
# Air-gapped workstation (LAN only — no internet)
WORKSTATION_IP="${WORKSTATION_IP:-192.168.1.100}"
WORKSTATION_USER="${WORKSTATION_USER:-ubuntu}"
WORKSTATION_SSH_KEY="${WORKSTATION_SSH_KEY:-/home/ingress/.ssh/velocity_workstation_ed25519}"
# State file: tracks last-transferred digest per service
STATE_DIR="/var/lib/velocity-ingress"
mkdir -p "${STATE_DIR}"
# Temp dir for image tarballs
TRANSFER_DIR="/tmp/velocity-transfer"
mkdir -p "${TRANSFER_DIR}"
# ── Functions ─────────────────────────────────────────────────
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*"; }
get_latest_digest() {
local repo="${REGISTRY_PREFIX}/$1"
aws ecr describe-images \
--repository-name "${repo}" \
--image-ids imageTag=latest \
--region "${AWS_REGION}" \
--query 'imageDetails[0].imageDigest' \
--output text 2>/dev/null || echo "NONE"
}
transfer_image() {
local svc="$1"
local digest="$2"
local full_image="${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}@${digest}"
local tar_file="${TRANSFER_DIR}/${svc}.tar"
log " [${svc}] Pulling from ECR..."
docker pull "${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:latest"
log " [${svc}] Verifying cosign signature..."
cosign verify \
--certificate-identity-regexp ".*" \
--certificate-oidc-issuer-regexp ".*" \
"${full_image}" || {
log " [${svc}] ERROR: Signature verification FAILED. Refusing transfer."
return 1
}
log " [${svc}] Saving image to tarball..."
docker save "${ECR_REGISTRY}/${REGISTRY_PREFIX}/${svc}:latest" \
-o "${tar_file}"
log " [${svc}] Transferring to workstation via SCP..."
scp -i "${WORKSTATION_SSH_KEY}" \
-o StrictHostKeyChecking=yes \
"${tar_file}" \
"${WORKSTATION_USER}@${WORKSTATION_IP}:/tmp/${svc}.tar"
log " [${svc}] Importing into K3s containerd + rolling restart..."
ssh -i "${WORKSTATION_SSH_KEY}" \
-o StrictHostKeyChecking=yes \
"${WORKSTATION_USER}@${WORKSTATION_IP}" \
"sudo k3s ctr images import /tmp/${svc}.tar && \
sudo kubectl rollout restart deployment/${svc} -n velocity-os && \
rm /tmp/${svc}.tar"
# Record transferred digest
echo "${digest}" > "${STATE_DIR}/${svc}.last_digest"
log " [${svc}] ✓ Transfer complete. Digest: ${digest}"
rm -f "${tar_file}"
}
# ── Main poll loop ────────────────────────────────────────────
log "=== Velocity-OS Ingress Box polling ECR ==="
# Login to ECR (token expires every 12h; cron re-runs this)
aws ecr get-login-password --region "${AWS_REGION}" | \
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
for svc in "${SERVICES[@]}"; do
log "[${svc}] Checking for updates..."
CURRENT_DIGEST=$(get_latest_digest "${svc}")
LAST_DIGEST=$(cat "${STATE_DIR}/${svc}.last_digest" 2>/dev/null || echo "NONE")
if [[ "${CURRENT_DIGEST}" == "NONE" ]]; then
log " [${svc}] No image found in ECR. Skipping."
continue
fi
if [[ "${CURRENT_DIGEST}" == "${LAST_DIGEST}" ]]; then
log " [${svc}] Up to date. No transfer needed."
continue
fi
log " [${svc}] New digest detected: ${CURRENT_DIGEST}"
transfer_image "${svc}" "${CURRENT_DIGEST}" || \
log " [${svc}] Transfer FAILED. Will retry next cycle."
done
log "=== Poll cycle complete ==="

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import boto3
def load_env_file(path: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not path.exists():
return data
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
data[key.strip()] = value.strip()
return data
def env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def resolve_target_instance(ec2) -> dict | None:
explicit_instance_id = env("COMFY_INSTANCE_ID")
if explicit_instance_id:
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
for reservation in reservations:
for instance in reservation["Instances"]:
if instance["State"]["Name"] == "running":
return instance
return None
tag_key = env("COMFY_INSTANCE_TAG_KEY", "DesineuronRole")
tag_value = env("COMFY_INSTANCE_TAG_VALUE", "comfyui")
filters = [
{"Name": "instance-state-name", "Values": ["running"]},
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
]
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
if not instances:
return None
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
return instances[0]
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
ingress_host = env("INGRESS_SSH_HOST")
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
ingress_port = env("INGRESS_SSH_PORT", "22")
ingress_key = env("INGRESS_SSH_KEY_PATH")
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
payload = json.dumps(
{
"hostname": hostname,
"scheme": "http",
"target_host": private_ip,
"target_port": port,
}
)
command = (
f"sudo {helper} upsert '{payload}'"
" && sudo caddy validate --config /etc/caddy/Caddyfile"
" && sudo systemctl reload caddy"
)
return subprocess.run(
[
"ssh",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-i",
ingress_key,
"-p",
ingress_port,
f"{ingress_user}@{ingress_host}",
command,
],
capture_output=True,
text=True,
check=False,
)
def main() -> int:
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
if key not in os.environ and key in ops_env:
os.environ[key] = ops_env[key]
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
if normalized_key_path.startswith("/app/state/"):
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
region = os.environ["AWS_DEFAULT_REGION"]
hostname = env("COMFY_ROUTE_HOSTNAME", "comfy.desineuron.in")
port = int(env("COMFY_ROUTE_PORT", "8188"))
state_file = Path(env("COMFY_ROUTE_STATE_FILE", "/var/lib/desineuron-comfy-route-sync/current_target.txt"))
ec2 = boto3.client("ec2", region_name=region)
instance = resolve_target_instance(ec2)
if not instance:
print("No running comfyui target instance found", file=sys.stderr)
return 1
private_ip = instance.get("PrivateIpAddress")
if not private_ip:
print("Target instance has no private IP", file=sys.stderr)
return 1
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
if current == private_ip:
print(json.dumps({"status": "noop", "hostname": hostname, "target_host": private_ip}))
return 0
result = upsert_route(hostname, private_ip, port)
if result.returncode != 0:
print(result.stdout)
print(result.stderr, file=sys.stderr)
return result.returncode
state_file.parent.mkdir(parents=True, exist_ok=True)
state_file.write_text(private_ip, encoding="utf-8")
print(json.dumps({"status": "updated", "hostname": hostname, "target_host": private_ip}))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,152 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import boto3
def load_env_file(path: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not path.exists():
return data
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
data[key.strip()] = value.strip()
return data
def env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def resolve_target_instance(ec2) -> dict | None:
explicit_instance_id = env("LLM_INSTANCE_ID")
if explicit_instance_id:
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
for reservation in reservations:
for instance in reservation["Instances"]:
if instance["State"]["Name"] == "running":
return instance
return None
# We assume the LLM runtime runs on the same GPU instance as comfyui initially
tag_key = env("LLM_INSTANCE_TAG_KEY", "DesineuronRole")
tag_value = env("LLM_INSTANCE_TAG_VALUE", "comfyui")
filters = [
{"Name": "instance-state-name", "Values": ["running"]},
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
]
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
if not instances:
return None
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
return instances[0]
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
ingress_host = env("INGRESS_SSH_HOST")
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
ingress_port = env("INGRESS_SSH_PORT", "22")
ingress_key = env("INGRESS_SSH_KEY_PATH")
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
payload = json.dumps(
{
"hostname": hostname,
"scheme": "http",
"target_host": private_ip,
"target_port": port,
}
)
command = (
f"sudo {helper} upsert '{payload}'"
" && sudo caddy validate --config /etc/caddy/Caddyfile"
" && sudo systemctl reload caddy"
)
return subprocess.run(
[
"ssh",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-i",
ingress_key,
"-p",
ingress_port,
f"{ingress_user}@{ingress_host}",
command,
],
capture_output=True,
text=True,
check=False,
)
def main() -> int:
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
if key not in os.environ and key in ops_env:
os.environ[key] = ops_env[key]
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
if normalized_key_path.startswith("/app/state/"):
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
region = os.environ["AWS_DEFAULT_REGION"]
hostname = env("LLM_ROUTE_HOSTNAME", "llm.desineuron.in")
port = int(env("LLM_ROUTE_PORT", "11434"))
state_file = Path(env("LLM_ROUTE_STATE_FILE", "/var/lib/desineuron-llm-route-sync/current_target.txt"))
ec2 = boto3.client("ec2", region_name=region)
instance = resolve_target_instance(ec2)
if not instance:
print("No running LLM target instance found", file=sys.stderr)
return 1
private_ip = instance.get("PrivateIpAddress")
if not private_ip:
print("Target instance has no private IP", file=sys.stderr)
return 1
desired_state = f"{private_ip}:{port}"
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
if current == desired_state:
print(
json.dumps(
{"status": "noop", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
result = upsert_route(hostname, private_ip, port)
if result.returncode != 0:
print(result.stdout)
print(result.stderr, file=sys.stderr)
return result.returncode
state_file.parent.mkdir(parents=True, exist_ok=True)
state_file.write_text(desired_state, encoding="utf-8")
print(
json.dumps(
{"status": "updated", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import boto3
def load_env_file(path: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not path.exists():
return data
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
data[key.strip()] = value.strip()
return data
def env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def resolve_target_instance(ec2) -> dict | None:
explicit_instance_id = env("VELOCITY_INSTANCE_ID")
if explicit_instance_id:
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
for reservation in reservations:
for instance in reservation["Instances"]:
if instance["State"]["Name"] == "running":
return instance
return None
tag_key = env("VELOCITY_INSTANCE_TAG_KEY", "DesineuronRole")
tag_value = env("VELOCITY_INSTANCE_TAG_VALUE", "velocity-backend")
filters = [
{"Name": "instance-state-name", "Values": ["running"]},
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
]
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
if not instances:
return None
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
return instances[0]
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
ingress_host = env("INGRESS_SSH_HOST")
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
ingress_port = env("INGRESS_SSH_PORT", "22")
ingress_key = env("INGRESS_SSH_KEY_PATH")
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
payload = json.dumps(
{
"hostname": hostname,
"scheme": "http",
"target_host": private_ip,
"target_port": port,
}
)
command = (
f"sudo {helper} upsert '{payload}'"
" && sudo caddy validate --config /etc/caddy/Caddyfile"
" && sudo systemctl reload caddy"
)
return subprocess.run(
[
"ssh",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-i",
ingress_key,
"-p",
ingress_port,
f"{ingress_user}@{ingress_host}",
command,
],
capture_output=True,
text=True,
check=False,
)
def main() -> int:
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
if key not in os.environ and key in ops_env:
os.environ[key] = ops_env[key]
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
if normalized_key_path.startswith("/app/state/"):
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
region = os.environ["AWS_DEFAULT_REGION"]
hostname = env("VELOCITY_ROUTE_HOSTNAME", "api.desineuron.in")
port = int(env("VELOCITY_ROUTE_PORT", "8001"))
state_file = Path(env("VELOCITY_ROUTE_STATE_FILE", "/var/lib/desineuron-velocity-route-sync/current_target.txt"))
ec2 = boto3.client("ec2", region_name=region)
instance = resolve_target_instance(ec2)
if not instance:
print("No running velocity-backend target instance found", file=sys.stderr)
return 1
private_ip = instance.get("PrivateIpAddress")
if not private_ip:
print("Target instance has no private IP", file=sys.stderr)
return 1
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
if current == private_ip:
print(json.dumps({"status": "noop", "hostname": hostname, "target_host": private_ip}))
return 0
result = upsert_route(hostname, private_ip, port)
if result.returncode != 0:
print(result.stdout)
print(result.stderr, file=sys.stderr)
return result.returncode
state_file.parent.mkdir(parents=True, exist_ok=True)
state_file.write_text(private_ip, encoding="utf-8")
print(json.dumps({"status": "updated", "hostname": hostname, "target_host": private_ip}))
return 0
if __name__ == "__main__":
raise SystemExit(main())