153 lines
5.4 KiB
Python
153 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import boto3
|
|
|
|
|
|
def load_env_file(path: Path) -> dict[str, str]:
|
|
data: dict[str, str] = {}
|
|
if not path.exists():
|
|
return data
|
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
key, value = line.split("=", 1)
|
|
data[key.strip()] = value.strip()
|
|
return data
|
|
|
|
|
|
def env(name: str, default: str = "") -> str:
|
|
return os.environ.get(name, default)
|
|
|
|
|
|
def resolve_target_instance(ec2) -> dict | None:
|
|
explicit_instance_id = env("LLM_INSTANCE_ID")
|
|
if explicit_instance_id:
|
|
reservations = ec2.describe_instances(InstanceIds=[explicit_instance_id])["Reservations"]
|
|
for reservation in reservations:
|
|
for instance in reservation["Instances"]:
|
|
if instance["State"]["Name"] == "running":
|
|
return instance
|
|
return None
|
|
|
|
# We assume the LLM runtime runs on the same GPU instance as comfyui initially
|
|
tag_key = env("LLM_INSTANCE_TAG_KEY", "DesineuronRole")
|
|
tag_value = env("LLM_INSTANCE_TAG_VALUE", "comfyui")
|
|
filters = [
|
|
{"Name": "instance-state-name", "Values": ["running"]},
|
|
{"Name": f"tag:{tag_key}", "Values": [tag_value]},
|
|
]
|
|
reservations = ec2.describe_instances(Filters=filters)["Reservations"]
|
|
instances = [instance for reservation in reservations for instance in reservation["Instances"]]
|
|
if not instances:
|
|
return None
|
|
instances.sort(key=lambda row: row["LaunchTime"], reverse=True)
|
|
return instances[0]
|
|
|
|
|
|
def upsert_route(hostname: str, private_ip: str, port: int) -> subprocess.CompletedProcess[str]:
|
|
ingress_host = env("INGRESS_SSH_HOST")
|
|
ingress_user = env("INGRESS_SSH_USER", "ec2-user")
|
|
ingress_port = env("INGRESS_SSH_PORT", "22")
|
|
ingress_key = env("INGRESS_SSH_KEY_PATH")
|
|
helper = env("INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py")
|
|
payload = json.dumps(
|
|
{
|
|
"hostname": hostname,
|
|
"scheme": "http",
|
|
"target_host": private_ip,
|
|
"target_port": port,
|
|
}
|
|
)
|
|
command = (
|
|
f"sudo {helper} upsert '{payload}'"
|
|
" && sudo caddy validate --config /etc/caddy/Caddyfile"
|
|
" && sudo systemctl reload caddy"
|
|
)
|
|
return subprocess.run(
|
|
[
|
|
"ssh",
|
|
"-o",
|
|
"StrictHostKeyChecking=no",
|
|
"-o",
|
|
"UserKnownHostsFile=/dev/null",
|
|
"-i",
|
|
ingress_key,
|
|
"-p",
|
|
ingress_port,
|
|
f"{ingress_user}@{ingress_host}",
|
|
command,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
check=False,
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
ops_env = load_env_file(Path(env("OPS_ENV_FILE", "/opt/desineuron-ops-control-plane/.env")))
|
|
for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION"]:
|
|
if key not in os.environ and key in ops_env:
|
|
os.environ[key] = ops_env[key]
|
|
os.environ.setdefault("AWS_DEFAULT_REGION", ops_env.get("OPS_DEFAULT_REGION", "us-east-1"))
|
|
os.environ.setdefault("INGRESS_SSH_HOST", ops_env.get("OPS_INGRESS_SSH_HOST", ""))
|
|
os.environ.setdefault("INGRESS_SSH_USER", ops_env.get("OPS_INGRESS_SSH_USER", "ec2-user"))
|
|
os.environ.setdefault("INGRESS_SSH_PORT", ops_env.get("OPS_INGRESS_SSH_PORT", "22"))
|
|
normalized_key_path = ops_env.get("OPS_SSH_KEY_PATH", "/opt/desineuron-ops-control-plane/state/desineuron-l4-node.pem")
|
|
if normalized_key_path.startswith("/app/state/"):
|
|
normalized_key_path = normalized_key_path.replace("/app/state/", "/opt/desineuron-ops-control-plane/state/")
|
|
os.environ.setdefault("INGRESS_SSH_KEY_PATH", normalized_key_path)
|
|
os.environ.setdefault("INGRESS_ROUTE_HELPER", ops_env.get("OPS_INGRESS_ROUTE_HELPER", "/usr/local/bin/manage_desineuron_routes.py"))
|
|
|
|
region = os.environ["AWS_DEFAULT_REGION"]
|
|
hostname = env("LLM_ROUTE_HOSTNAME", "llm.desineuron.in")
|
|
port = int(env("LLM_ROUTE_PORT", "11434"))
|
|
state_file = Path(env("LLM_ROUTE_STATE_FILE", "/var/lib/desineuron-llm-route-sync/current_target.txt"))
|
|
|
|
ec2 = boto3.client("ec2", region_name=region)
|
|
instance = resolve_target_instance(ec2)
|
|
if not instance:
|
|
print("No running LLM target instance found", file=sys.stderr)
|
|
return 1
|
|
|
|
private_ip = instance.get("PrivateIpAddress")
|
|
if not private_ip:
|
|
print("Target instance has no private IP", file=sys.stderr)
|
|
return 1
|
|
|
|
desired_state = f"{private_ip}:{port}"
|
|
current = state_file.read_text(encoding="utf-8").strip() if state_file.exists() else ""
|
|
if current == desired_state:
|
|
print(
|
|
json.dumps(
|
|
{"status": "noop", "hostname": hostname, "target_host": private_ip, "target_port": port}
|
|
)
|
|
)
|
|
return 0
|
|
|
|
result = upsert_route(hostname, private_ip, port)
|
|
if result.returncode != 0:
|
|
print(result.stdout)
|
|
print(result.stderr, file=sys.stderr)
|
|
return result.returncode
|
|
|
|
state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
state_file.write_text(desired_state, encoding="utf-8")
|
|
print(
|
|
json.dumps(
|
|
{"status": "updated", "hostname": hostname, "target_host": private_ip, "target_port": port}
|
|
)
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|