19
19
import json
20
20
import logging
21
21
import mimetypes
22
- from contextlib import contextmanager
23
22
from dataclasses import dataclass
24
23
from pathlib import Path
25
24
from typing import (
26
25
TYPE_CHECKING ,
27
26
Any ,
28
27
AsyncIterable ,
29
28
BinaryIO ,
30
- ContextManager ,
31
29
Dict ,
32
- Generator ,
33
30
Iterable ,
34
31
List ,
35
32
Literal ,
61
58
# TYPES
62
59
UrlT = str
63
60
PathT = Union [str , Path ]
64
- BinaryT = Union [bytes , BinaryIO ]
65
- ContentT = Union [BinaryT , PathT , UrlT , "Image" ]
61
+ ContentT = Union [bytes , BinaryIO , PathT , UrlT , "Image" , bytearray , memoryview ]
66
62
67
63
# Use to set a Accept: image/png header
68
64
TASKS_EXPECTING_IMAGES = {"text-to-image" , "image-to-image" }
@@ -76,10 +72,35 @@ class RequestParameters:
76
72
task : str
77
73
model : Optional [str ]
78
74
json : Optional [Union [str , Dict , List ]]
79
- data : Optional [ContentT ]
75
+ data : Optional [bytes ]
80
76
headers : Dict [str , Any ]
81
77
82
78
79
+ class MimeBytes (bytes ):
80
+ """
81
+ A bytes object with a mime type.
82
+ To be returned by `_prepare_payload_open_as_mime_bytes` in subclasses.
83
+
84
+ Example:
85
+ ```python
86
+ >>> b = MimeBytes(b"hello", "text/plain")
87
+ >>> isinstance(b, bytes)
88
+ True
89
+ >>> b.mime_type
90
+ 'text/plain'
91
+ ```
92
+ """
93
+
94
+ mime_type : Optional [str ]
95
+
96
+ def __new__ (cls , data : bytes , mime_type : Optional [str ] = None ):
97
+ obj = super ().__new__ (cls , data )
98
+ obj .mime_type = mime_type
99
+ if isinstance (data , MimeBytes ) and mime_type is None :
100
+ obj .mime_type = data .mime_type
101
+ return obj
102
+
103
+
83
104
## IMPORT UTILS
84
105
85
106
@@ -117,31 +138,49 @@ def _import_pil_image():
117
138
118
139
119
140
@overload
120
- def _open_as_binary (
121
- content : ContentT ,
122
- ) -> ContextManager [BinaryT ]: ... # means "if input is not None, output is not None"
141
+ def _open_as_mime_bytes (content : ContentT ) -> MimeBytes : ... # means "if input is not None, output is not None"
123
142
124
143
125
144
@overload
126
- def _open_as_binary (
127
- content : Literal [None ],
128
- ) -> ContextManager [Literal [None ]]: ... # means "if input is None, output is None"
145
+ def _open_as_mime_bytes (content : Literal [None ]) -> Literal [None ]: ... # means "if input is None, output is None"
129
146
130
147
131
- @contextmanager # type: ignore
132
- def _open_as_binary (content : Optional [ContentT ]) -> Generator [Optional [BinaryT ], None , None ]:
148
+ def _open_as_mime_bytes (content : Optional [ContentT ]) -> Optional [MimeBytes ]:
133
149
"""Open `content` as a binary file, either from a URL, a local path, raw bytes, or a PIL Image.
134
150
135
151
Do nothing if `content` is None.
136
-
137
- TODO: handle base64 as input
138
152
"""
153
+ # If content is None, yield None
154
+ if content is None :
155
+ return None
156
+
157
+ # If content is bytes, return it
158
+ if isinstance (content , bytes ):
159
+ return MimeBytes (content )
160
+
161
+ # If content is raw binary data (bytearray, memoryview)
162
+ if isinstance (content , (bytearray , memoryview )):
163
+ return MimeBytes (bytes (content ))
164
+
165
+ # If content is a binary file-like object
166
+ if hasattr (content , "read" ): # duck-typing instead of isinstance(content, BinaryIO)
167
+ logger .debug ("Reading content from BinaryIO" )
168
+ data = content .read ()
169
+ mime_type = mimetypes .guess_type (content .name )[0 ] if hasattr (content , "name" ) else None
170
+ if isinstance (data , str ):
171
+ raise TypeError ("Expected binary stream (bytes), but got text stream" )
172
+ return MimeBytes (data , mime_type = mime_type )
173
+
139
174
# If content is a string => must be either a URL or a path
140
175
if isinstance (content , str ):
141
176
if content .startswith ("https://" ) or content .startswith ("http://" ):
142
177
logger .debug (f"Downloading content from { content } " )
143
- yield get_session ().get (content ).content # TODO: retrieve as stream and pipe to post request ?
144
- return
178
+ response = get_session ().get (content )
179
+ mime_type = response .headers .get ("Content-Type" )
180
+ if mime_type is None :
181
+ mime_type = mimetypes .guess_type (content )[0 ]
182
+ return MimeBytes (response .content , mime_type = mime_type )
183
+
145
184
content = Path (content )
146
185
if not content .exists ():
147
186
raise FileNotFoundError (
@@ -152,9 +191,7 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
152
191
# If content is a Path => open it
153
192
if isinstance (content , Path ):
154
193
logger .debug (f"Opening content from { content } " )
155
- with content .open ("rb" ) as f :
156
- yield f
157
- return
194
+ return MimeBytes (content .read_bytes (), mime_type = mimetypes .guess_type (content )[0 ])
158
195
159
196
# If content is a PIL Image => convert to bytes
160
197
if is_pillow_available ():
@@ -163,38 +200,37 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
163
200
if isinstance (content , Image .Image ):
164
201
logger .debug ("Converting PIL Image to bytes" )
165
202
buffer = io .BytesIO ()
166
- content . save ( buffer , format = content .format or "PNG" )
167
- yield buffer . getvalue ( )
168
- return
203
+ format = content .format or "PNG"
204
+ content . save ( buffer , format = format )
205
+ return MimeBytes ( buffer . getvalue (), mime_type = f"image/ { format . lower () } " )
169
206
170
- # Otherwise: already a file-like object or None
171
- yield content # type: ignore
207
+ # If nothing matched, raise error
208
+ raise TypeError (
209
+ f"Unsupported content type: { type (content )} . "
210
+ "Expected one of: bytes, bytearray, BinaryIO, memoryview, Path, str (URL or file path), or PIL.Image.Image."
211
+ )
172
212
173
213
174
214
def _b64_encode (content : ContentT ) -> str :
175
215
"""Encode a raw file (image, audio) into base64. Can be bytes, an opened file, a path or a URL."""
176
- with _open_as_binary (content ) as data :
177
- data_as_bytes = data if isinstance (data , bytes ) else data .read ()
178
- return base64 .b64encode (data_as_bytes ).decode ()
216
+ raw_bytes = _open_as_mime_bytes (content )
217
+ return base64 .b64encode (raw_bytes ).decode ()
179
218
180
219
181
220
def _as_url (content : ContentT , default_mime_type : str ) -> str :
182
- if isinstance (content , str ) and ( content .startswith ("https ://") or content . startswith ( "http ://" )):
221
+ if isinstance (content , str ) and content .startswith (( "http ://", "https ://" , "data: " )):
183
222
return content
184
223
185
- # Handle MIME type detection for different content types
186
- mime_type = None
187
- if isinstance (content , (str , Path )):
188
- mime_type = mimetypes .guess_type (content , strict = False )[0 ]
189
- elif is_pillow_available ():
190
- from PIL import Image
224
+ # Convert content to bytes
225
+ raw_bytes = _open_as_mime_bytes (content )
191
226
192
- if isinstance (content , Image .Image ):
193
- # Determine MIME type from PIL Image format, in sync with `_open_as_binary`
194
- mime_type = f"image/{ (content .format or 'PNG' ).lower ()} "
227
+ # Get MIME type
228
+ mime_type = raw_bytes .mime_type or default_mime_type
195
229
196
- mime_type = mime_type or default_mime_type
197
- encoded_data = _b64_encode (content )
230
+ # Encode content to base64
231
+ encoded_data = base64 .b64encode (raw_bytes ).decode ()
232
+
233
+ # Build data URL
198
234
return f"data:{ mime_type } ;base64,{ encoded_data } "
199
235
200
236
@@ -239,9 +275,6 @@ def _as_dict(response: Union[bytes, Dict]) -> Dict:
239
275
return json .loads (response ) if isinstance (response , bytes ) else response
240
276
241
277
242
- ## PAYLOAD UTILS
243
-
244
-
245
278
## STREAMING UTILS
246
279
247
280
0 commit comments