Files
Project_Velocity/infrastructure/desineuron_ingress/sync_llm_route.py

153 lines
5.4 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import boto3
def load_env_file(path: Path) -> dict[str, str]:
data: dict[str, str] = {}
if not path.exists():
return data
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
data[key.strip()] = value.strip()
return data
def env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def resolve_target_instance(ec2) -> dict | None:
explicit_instance_id = env("LLM_INSTANCE_ID")
if explicit_instance_id:
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
for reservation in reservations:
for instance in reservation["Instances"]:
if instance["State"]["Name"] == "running":
return instance
return None
# We assume the LLM runtime runs on the same GPU instance as comfyui initially
tag_key = env("LLM_INSTANCE_TAG_KEY", "DesineuronRole")
tag_value = env("LLM_INSTANCE_TAG_VALUE", "comfyui")
filters = [
{"Name": "instance-state-name", "Values": ["running"]},
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
]
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
if not instances:
return None
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
return instances[0]
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
ingress_host = env("INGRESS_SSH_HOST")
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
ingress_port = env("INGRESS_SSH_PORT", "22")
ingress_key = env("INGRESS_SSH_KEY_PATH")
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
payload = json.dumps(
{
"hostname": hostname,
"scheme": "http",
"target_host": private_ip,
"target_port": port,
}
)
command = (
f"sudo {helper} upsert '{payload}'"
" && sudo caddy validate --config /etc/caddy/Caddyfile"
" && sudo systemctl reload caddy"
)
return subprocess.run(
[
"ssh",
"-o",
"StrictHostKeyChecking=no",
"-o",
"UserKnownHostsFile=/dev/null",
"-i",
ingress_key,
"-p",
ingress_port,
f"{ingress_user}@{ingress_host}",
command,
],
capture_output=True,
text=True,
check=False,
)
def main() -> int:
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
if key not in os.environ and key in ops_env:
os.environ[key] = ops_env[key]
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
if normalized_key_path.startswith("/app/state/"):
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
region = os.environ["AWS_DEFAULT_REGION"]
hostname = env("LLM_ROUTE_HOSTNAME", "llm.desineuron.in")
port = int(env("LLM_ROUTE_PORT", "11434"))
state_file = Path(env("LLM_ROUTE_STATE_FILE", "/var/lib/desineuron-llm-route-sync/current_target.txt"))
ec2 = boto3.client("ec2", region_name=region)
instance = resolve_target_instance(ec2)
if not instance:
print("No running LLM target instance found", file=sys.stderr)
return 1
private_ip = instance.get("PrivateIpAddress")
if not private_ip:
print("Target instance has no private IP", file=sys.stderr)
return 1
desired_state = f"{private_ip}:{port}"
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
if current == desired_state:
print(
json.dumps(
{"status": "noop", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
result = upsert_route(hostname, private_ip, port)
if result.returncode != 0:
print(result.stdout)
print(result.stderr, file=sys.stderr)
return result.returncode
state_file.parent.mkdir(parents=True, exist_ok=True)
state_file.write_text(desired_state, encoding="utf-8")
print(
json.dumps(
{"status": "updated", "hostname": hostname, "target_host": private_ip, "target_port": port}
)
)
return 0
if __name__ == "__main__":
raise SystemExit(main())