Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

3a333bb

verified ·

1 Parent(s): 54de3fd

Update main.py

Browse files

Files changed (1) hide show

main.py +27 -25

main.py CHANGED Viewed

@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="7.1.0 (Streaming Space Fix)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -40,8 +40,7 @@ SUPPORTED_MODELS = {
 def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
     """
     Formats the input for Replicate's API, flattening the message history into a
-    single 'prompt' string and handling images separately. This is the required
-    format for all their current chat/vision models.
     """
     payload = {}
     prompt_parts = []
@@ -81,7 +80,7 @@ def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, A
     return payload
 async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
-    """Handles the full streaming lifecycle with robust token parsing."""
     url = f"https://api.replicate.com/v1/models/{replicate_model_id}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json"}
@@ -105,37 +104,40 @@ async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
             return
         try:
-            async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 async for line in sse.aiter_lines():
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
-                        data = line[len("data:"):].strip()
                         if current_event == "output":
-                            # --- START OF STREAMING FIX ---
-                            # Replicate streams tokens that can be plain text or JSON-encoded strings.
-                            # We need to robustly parse them to preserve spaces correctly.
                             content_token = ""
                             try:
-                                # Attempt to parse data as JSON. This handles tokens like "\" Hello\""
-                                decoded_data = json.loads(data)
-                                if isinstance(decoded_data, str):
-                                    content_token = decoded_data
-                                else:
-                                    # It's some other JSON type, convert to string
-                                    content_token = str(decoded_data)
                             except json.JSONDecodeError:
-                                # It's not valid JSON, so it's a plain text token.
                                 content_token = data
-                            if content_token:
-                                chunk = {
-                                    "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
-                                    "choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
-                                }
-                                yield json.dumps(chunk)
-                            # --- END OF STREAMING FIX ---
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout:

     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="8.0.0 (Definitive Spacing Fix)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
 def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
     """
     Formats the input for Replicate's API, flattening the message history into a
+    single 'prompt' string and handling images separately.
     """
     payload = {}
     prompt_parts = []
     return payload
 async def stream_replicate_sse(replicate_model_id: str, input_payload: dict):
+    """Handles the full streaming lifecycle with correct whitespace preservation."""
     url = f"https://api.replicate.com/v1/models/{replicate_model_id}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json"}
             return
         try:
+            async with client.stream("GET", stream_url, headers={"Accept": "text-event-stream"}, timeout=None) as sse:
                 current_event = None
                 async for line in sse.aiter_lines():
                     if line.startswith("event:"):
                         current_event = line[len("event:"):].strip()
                     elif line.startswith("data:"):
+                        # --- START OF DEFINITIVE SPACING FIX ---
+                        # The .strip() method was the bug. It removed crucial whitespace.
+                        # This new logic correctly implements the SSE spec.
+                        raw_data = line[len("data:"):]
+                        if raw_data.startswith(" "):
+                            # Remove only the single, optional leading space
+                            data = raw_data[1:]
+                        else:
+                            data = raw_data
                         if current_event == "output":
+                            # The data is now guaranteed to have its whitespace preserved.
+                            # Replicate sometimes sends tokens as JSON strings (e.g., "\" a\""),
+                            # so we still need to decode them.
                             content_token = ""
                             try:
+                                content_token = json.loads(data)
                             except json.JSONDecodeError:
+                                # Not a JSON string, use the raw data
                                 content_token = data
+                            # We must send content_token even if it's just a space
+                            chunk = {
+                                "id": prediction_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": replicate_model_id,
+                                "choices": [{"index": 0, "delta": {"content": content_token}, "finish_reason": None}]
+                            }
+                            yield json.dumps(chunk)
+                        # --- END OF DEFINITIVE SPACING FIX ---
                         elif current_event == "done":
                             break
         except httpx.ReadTimeout: