SylphAI-Inc · devdattatalele · Aug 17, 2025 · Aug 17, 2025 · liyin2015 · Aug 17, 2025
diff --git a/adalflow/adalflow/components/model_client/ollama_client.py b/adalflow/adalflow/components/model_client/ollama_client.py
@@ -345,14 +345,15 @@ def parse_chat_completion(
         # Check for async generator (async streaming)
         if hasattr(completion, '__aiter__'):
             log.debug("Async streaming response detected")
-            # For streaming, return GeneratorOutput with the generator in raw_response
-            # This matches the OpenAI client pattern
-            return GeneratorOutput(data=None, raw_response=completion, api_response=completion)
+            # For streaming, return the raw async generator directly.
+            # The Generator component will iterate over this and wrap each chunk.
+            return completion
         # Check for sync generator (sync streaming)
         elif isinstance(completion, GeneratorType):
             log.debug("Sync streaming response detected")
-            # For streaming, return GeneratorOutput with the generator in raw_response
-            return GeneratorOutput(data=None, raw_response=completion, api_response=completion)
+            # For streaming, return the raw sync generator directly.
+            # The Generator component will iterate over this and wrap each chunk.
+            return completion
         # Non-streaming generate API
         elif self.generate:
             return parse_generate_response(completion)

diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -1266,8 +1266,12 @@ def call(
                     output = self._post_call(completion)
                 except Exception as e:
                     log.error(f"Error processing the output: {e}")
+                    # Check if completion is a generator to avoid placing generator object in raw_response
+                    from typing import Generator as GeneratorType
+                    from collections.abc import AsyncGenerator as AsyncGeneratorABC
+                    raw_response = None if isinstance(completion, (GeneratorType, AsyncGeneratorABC)) else str(completion)
                     output = GeneratorOutput(
-                        raw_response=str(completion), error=str(e), id=id, input=prompt_str
+                        raw_response=raw_response, error=str(e), id=id, input=prompt_str
                     )
 
             # User only need to use one of them, no need to use them all.
@@ -1356,8 +1360,12 @@ async def acall(
                     output = await self._async_post_call(completion)
                 except Exception as e:
                     log.error(f"Error processing the output: {e}")
+                    # Check if completion is a generator to avoid placing generator object in raw_response
+                    from typing import Generator as GeneratorType
+                    from collections.abc import AsyncGenerator as AsyncGeneratorABC
+                    raw_response = None if isinstance(completion, (GeneratorType, AsyncGeneratorABC)) else str(completion)
                     output = GeneratorOutput(
-                        raw_response=str(completion), error=str(e), id=id, input=prompt_str
+                        raw_response=raw_response, error=str(e), id=id, input=prompt_str
                     )
 
             # User only need to use one of them, no need to use them all.

diff --git a/adalflow/tests/test_ollama_client.py b/adalflow/tests/test_ollama_client.py
@@ -125,14 +125,13 @@ def mock_stream_generator():
             # Parse the result
             parsed = ollama_client.parse_chat_completion(result)
 
-            # For streaming, the parsed result should be a GeneratorOutput with raw_response containing the generator
-            self.assertIsInstance(parsed, GeneratorOutput)
-            self.assertIsNotNone(parsed.raw_response)
-            self.assertEqual(parsed.api_response, result)
+            # For streaming, the parsed result should be the raw generator directly
+            self.assertTrue(hasattr(parsed, '__iter__'))
+            self.assertNotIsInstance(parsed, GeneratorOutput)
 
-            # Verify we can iterate through the raw_response
+            # Verify we can iterate through the raw generator
             content_parts = []
-            for chunk in parsed.raw_response:
+            for chunk in parsed:
                 if "message" in chunk:
                     content_parts.append(chunk["message"]["content"])
 
@@ -173,14 +172,13 @@ async def mock_async_stream_generator():
         # Parse the result
         parsed = ollama_client.parse_chat_completion(result)
 
-        # For streaming, the parsed result should be a GeneratorOutput with raw_response containing the async generator
-        self.assertIsInstance(parsed, GeneratorOutput)
-        self.assertIsNotNone(parsed.raw_response)
-        self.assertEqual(parsed.api_response, result)
+        # For streaming, the parsed result should be the raw async generator directly
+        self.assertTrue(hasattr(parsed, '__aiter__'))
+        self.assertNotIsInstance(parsed, GeneratorOutput)
 
-        # Verify we can iterate through the raw_response asynchronously
+        # Verify we can iterate through the raw async generator
         content_parts = []
-        async for chunk in parsed.raw_response:
+        async for chunk in parsed:
             if "message" in chunk:
                 content_parts.append(chunk["message"]["content"])