@@ -947,8 +947,8 @@ def document_question_answering(
947
947
Answer questions on document images.
948
948
949
949
Args:
950
- image (`Union[str, Path, bytes, BinaryIO]`):
951
- The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
950
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
951
+ The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
952
952
question (`str`):
953
953
Question to be answered.
954
954
model (`str`, *optional*):
@@ -1156,8 +1156,8 @@ def image_classification(
1156
1156
Perform image classification on the given image using the specified model.
1157
1157
1158
1158
Args:
1159
- image (`Union[str, Path, bytes, BinaryIO]`):
1160
- The image to classify. It can be raw bytes, an image file, or a URL to an online image.
1159
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
1160
+ The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
1161
1161
model (`str`, *optional*):
1162
1162
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1163
1163
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
@@ -1214,8 +1214,8 @@ def image_segmentation(
1214
1214
</Tip>
1215
1215
1216
1216
Args:
1217
- image (`Union[str, Path, bytes, BinaryIO]`):
1218
- The image to segment. It can be raw bytes, an image file, or a URL to an online image.
1217
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
1218
+ The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
1219
1219
model (`str`, *optional*):
1220
1220
The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1221
1221
deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
@@ -1286,8 +1286,8 @@ def image_to_image(
1286
1286
</Tip>
1287
1287
1288
1288
Args:
1289
- image (`Union[str, Path, bytes, BinaryIO]`):
1290
- The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
1289
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
1290
+ The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
1291
1291
prompt (`str`, *optional*):
1292
1292
The text prompt to guide the image generation.
1293
1293
negative_prompt (`str`, *optional*):
@@ -1348,8 +1348,8 @@ def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> Imag
1348
1348
(OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
1349
1349
1350
1350
Args:
1351
- image (`Union[str, Path, bytes, BinaryIO]`):
1352
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image. .
1351
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
1352
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
1353
1353
model (`str`, *optional*):
1354
1354
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1355
1355
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
@@ -1399,8 +1399,8 @@ def object_detection(
1399
1399
</Tip>
1400
1400
1401
1401
Args:
1402
- image (`Union[str, Path, bytes, BinaryIO]`):
1403
- The image to detect objects on. It can be raw bytes, an image file, or a URL to an online image.
1402
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
1403
+ The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
1404
1404
model (`str`, *optional*):
1405
1405
The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1406
1406
deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
@@ -2974,8 +2974,8 @@ def visual_question_answering(
2974
2974
Answering open-ended questions based on an image.
2975
2975
2976
2976
Args:
2977
- image (`Union[str, Path, bytes, BinaryIO]`):
2978
- The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
2977
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
2978
+ The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
2979
2979
question (`str`):
2980
2980
Question to be answered.
2981
2981
model (`str`, *optional*):
@@ -3141,8 +3141,8 @@ def zero_shot_image_classification(
3141
3141
Provide input image and text labels to predict text labels for the image.
3142
3142
3143
3143
Args:
3144
- image (`Union[str, Path, bytes, BinaryIO]`):
3145
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
3144
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image ]`):
3145
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image .
3146
3146
candidate_labels (`List[str]`):
3147
3147
The candidate labels for this image
3148
3148
labels (`List[str]`, *optional*):
0 commit comments