feat: Added the ComfyUI engine (#12)

#11 Added the complete ComfyUI engine. Co-authored-by: Sayan Datta <sayan@Sayans-MacBook-Air.local> Reviewed-on: #12
2026-03-27 22:48:34 +05:30
parent 5478f2815e
commit 8e1ffe0e43
74 changed files with 9390 additions and 7119 deletions
--- a/comfy_engine/scripts/prompt_expander.py
+++ b/comfy_engine/scripts/prompt_expander.py
@@ -96,7 +96,7 @@ Generate JSON containing:
 1. "positive_prompt" (rich, photorealistic, 80-120 words)
 2. "negative_prompt" (preventing artifacts, 30-50 words)
 3. "cfg" (float 6.0-9.0)
-4. "denoise" (float 0.5-0.85)
+4. "denoise" (float 0.45-0.65) - CRITICAL: Must be kept low to preserve input image structure
 5. "steps" (int 25-40)

 RULES FOR POSITIVE PROMPT:
@@ -144,7 +144,8 @@ def _call_ollama(user_message: str) -> str:
        timeout=180 # Large models take time
    )
    r.raise_for_status()
-    return r.json()["response"]
+    resp_json = r.json()
+    return resp_json["response"]


 def expand_prompt(keywords: list[str], room_type: str = "living_room", additional_notes: str = "") -> ExpandedPrompt:
@@ -166,38 +167,68 @@ AVOID: {ctx['avoid']}
        logger.info("Calling local Ollama LLM...")
        raw = _call_ollama(user_message).strip()
        
-        json_match = re.search(r'\{[\s\S]*\}', raw)
-        if json_match:
-            raw_json = json_match.group(0)
+        # Log the raw response for debugging
+        logger.info(f"Raw Ollama response length: {len(raw)}")
+        
+        # Handle empty response
+        if not raw:
+            logger.error("Empty response from Ollama")
+            raise ValueError("Ollama returned an empty response")
+
+        # Clean string of common junk (control characters, leading/trailing non-bracket junk)
+        raw_cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', raw)
+        
+        # More robust JSON block extraction
+        # Try finding the first '{' and last '}'
+        start_idx = raw_cleaned.find('{')
+        end_idx = raw_cleaned.rfind('}')
+        
+        if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
+            raw_json = raw_cleaned[start_idx:end_idx+1]
        else:
-            raw_json = raw
-            
-        data = json.loads(raw_json)
+            raw_json = raw_cleaned
+
+        try:
+            data = json.loads(raw_json)
+        except json.JSONDecodeError as je:
+            logger.error(f"JSON Decode failed. Raw tail: {raw_json[:100]}...")
+            # Emergency fallback: if we can't parse, try to create a basic structure from keywords
+            return ExpandedPrompt(
+                style_name="fallback_" + (keywords[0] if keywords else "custom"),
+                positive_prompt=", ".join(keywords) + f", photorealistic, high quality, {room_type}",
+                negative_prompt="blurry, distorted, low quality",
+                cfg=7.5,
+                denoise=0.55,
+                steps=30,
+                reasoning="Fallback due to LLM parsing error",
+                source="fallback"
+            )
        
        return ExpandedPrompt(
            style_name=data.get("style_name", "custom_local"),
-            positive_prompt=data["positive_prompt"],
-            negative_prompt=data["negative_prompt"],
+            positive_prompt=data.get("positive_prompt", ", ".join(keywords)),
+            negative_prompt=data.get("negative_prompt", "blurry, distorted, low quality"),
            cfg=float(data.get("cfg", 7.5)),
-            denoise=float(data.get("denoise", 0.72)),
+            denoise=float(data.get("denoise", 0.55)),
            steps=int(data.get("steps", 30)),
            reasoning=data.get("reasoning", ""),
            source="ollama_local"
        )
    except Exception as e:
-        logger.warning(f"Ollama failed, using sync fallback: {e}")
-        return expand_prompt_simple(keywords, room_type)
-
-
-def expand_prompt_simple(keywords: list[str], room_type: str = "living_room") -> ExpandedPrompt:
-    ctx = ROOM_CONTEXTS.get(room_type.replace(" ", "_"), ROOM_CONTEXTS["living_room"])
-    kw_str = ", ".join(keywords)
-    positive = f"{kw_str} interior design, {', '.join(ctx['key_elements'][:4])}, photorealistic {room_type.replace('_', ' ')} interior, architectural photography, 8k resolution, photorealistic"
-    negative = "(worst quality, low quality, illustration, 3d render, 2d, painting, cartoon, sketch), blurry, distorted, extra windows, unrealistic lighting, structural changes"
-    return ExpandedPrompt(
-        style_name="fallback", positive_prompt=positive, negative_prompt=negative, 
-        cfg=7.5, denoise=0.72, steps=30, reasoning="No LLM", source="fallback"
-    )
+        logger.error(f"Ollama LLM expansion failed: {e}")
+        import traceback
+        traceback.print_exc()
+        # Full fallback if anything goes wrong
+        return ExpandedPrompt(
+            style_name="emergency_fallback",
+            positive_prompt=", ".join(keywords) + f", photorealistic, {room_type}",
+            negative_prompt="blurry, distorted",
+            cfg=7.5,
+            denoise=0.55,
+            steps=30,
+            reasoning=f"Emergency fallback due to: {str(e)}",
+            source="emergency"
+        )

 if __name__ == "__main__":
    import sys