Skip to content

Commit 503d353

Browse files
HuggingFaceInfraWauplinhanouticelina
authored
[Bot] Update inference types (#2664)
* Update inference types (automated commit) * fix quality after merging main * another fix * fix tests * Update inference types (automated commit) * Update inference types (automated commit) * fix quality * Update inference types (automated commit) * Update inference types (automated commit) --------- Co-authored-by: Wauplin <11801849+Wauplin@users.noreply.github.com> Co-authored-by: Celina Hanouti <hanouticelina@gmail.com>
1 parent 446e9c1 commit 503d353

34 files changed

+138
-222
lines changed

docs/source/en/package_reference/inference_types.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,6 @@ This part of the lib is still under development and will be improved in future r
369369

370370
[[autodoc]] huggingface_hub.ZeroShotClassificationInput
371371

372-
[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
373-
374372
[[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
375373

376374
[[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -381,8 +379,6 @@ This part of the lib is still under development and will be improved in future r
381379

382380
[[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
383381

384-
[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
385-
386382
[[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
387383

388384
[[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -395,6 +391,6 @@ This part of the lib is still under development and will be improved in future r
395391

396392
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
397393

398-
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
399-
400394
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
395+
396+
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters

docs/source/ko/package_reference/inference_types.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,6 @@ rendered properly in your Markdown viewer.
368368

369369
[[autodoc]] huggingface_hub.ZeroShotClassificationInput
370370

371-
[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
372-
373371
[[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
374372

375373
[[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -380,8 +378,6 @@ rendered properly in your Markdown viewer.
380378

381379
[[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
382380

383-
[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
384-
385381
[[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
386382

387383
[[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -394,6 +390,6 @@ rendered properly in your Markdown viewer.
394390

395391
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
396392

397-
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
398-
399393
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
394+
395+
[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters

src/huggingface_hub/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -416,17 +416,15 @@
416416
"VisualQuestionAnsweringOutputElement",
417417
"VisualQuestionAnsweringParameters",
418418
"ZeroShotClassificationInput",
419-
"ZeroShotClassificationInputData",
420419
"ZeroShotClassificationOutputElement",
421420
"ZeroShotClassificationParameters",
422421
"ZeroShotImageClassificationInput",
423-
"ZeroShotImageClassificationInputData",
424422
"ZeroShotImageClassificationOutputElement",
425423
"ZeroShotImageClassificationParameters",
426424
"ZeroShotObjectDetectionBoundingBox",
427425
"ZeroShotObjectDetectionInput",
428-
"ZeroShotObjectDetectionInputData",
429426
"ZeroShotObjectDetectionOutputElement",
427+
"ZeroShotObjectDetectionParameters",
430428
],
431429
"inference_api": [
432430
"InferenceApi",
@@ -947,17 +945,15 @@ def __dir__():
947945
VisualQuestionAnsweringOutputElement, # noqa: F401
948946
VisualQuestionAnsweringParameters, # noqa: F401
949947
ZeroShotClassificationInput, # noqa: F401
950-
ZeroShotClassificationInputData, # noqa: F401
951948
ZeroShotClassificationOutputElement, # noqa: F401
952949
ZeroShotClassificationParameters, # noqa: F401
953950
ZeroShotImageClassificationInput, # noqa: F401
954-
ZeroShotImageClassificationInputData, # noqa: F401
955951
ZeroShotImageClassificationOutputElement, # noqa: F401
956952
ZeroShotImageClassificationParameters, # noqa: F401
957953
ZeroShotObjectDetectionBoundingBox, # noqa: F401
958954
ZeroShotObjectDetectionInput, # noqa: F401
959-
ZeroShotObjectDetectionInputData, # noqa: F401
960955
ZeroShotObjectDetectionOutputElement, # noqa: F401
956+
ZeroShotObjectDetectionParameters, # noqa: F401
961957
)
962958
from .inference_api import InferenceApi # noqa: F401
963959
from .keras_mixin import (

src/huggingface_hub/inference/_client.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ def audio_classification(
350350
top_k (`int`, *optional*):
351351
When specified, limits the output to the top K most probable classes.
352352
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
353-
The function to apply to the output.
353+
The function to apply to the model outputs in order to retrieve the scores.
354354
355355
Returns:
356356
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -982,7 +982,7 @@ def document_question_answering(
982982
>>> from huggingface_hub import InferenceClient
983983
>>> client = InferenceClient()
984984
>>> client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
985-
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
985+
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
986986
```
987987
"""
988988
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
@@ -1133,7 +1133,7 @@ def image_classification(
11331133
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
11341134
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
11351135
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
1136-
The function to apply to the output.
1136+
The function to apply to the model outputs in order to retrieve the scores.
11371137
top_k (`int`, *optional*):
11381138
When specified, limits the output to the top K most probable classes.
11391139
Returns:
@@ -1812,7 +1812,7 @@ def text_classification(
18121812
top_k (`int`, *optional*):
18131813
When specified, limits the output to the top K most probable classes.
18141814
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
1815-
The function to apply to the output.
1815+
The function to apply to the model outputs in order to retrieve the scores.
18161816
18171817
Returns:
18181818
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2484,11 +2484,11 @@ def text_to_speech(
24842484
max_length (`int`, *optional*):
24852485
The maximum length (in tokens) of the generated text, including the input.
24862486
max_new_tokens (`int`, *optional*):
2487-
The maximum number of tokens to generate. Takes precedence over maxLength.
2487+
The maximum number of tokens to generate. Takes precedence over max_length.
24882488
min_length (`int`, *optional*):
24892489
The minimum length (in tokens) of the generated text, including the input.
24902490
min_new_tokens (`int`, *optional*):
2491-
The minimum number of tokens to generate. Takes precedence over maxLength.
2491+
The minimum number of tokens to generate. Takes precedence over min_length.
24922492
num_beam_groups (`int`, *optional*):
24932493
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
24942494
See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2791,12 +2791,13 @@ def zero_shot_classification(
27912791
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
27922792
probabilities are normalized for each candidate.
27932793
hypothesis_template (`str`, *optional*):
2794-
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
2795-
the placeholder with the candidate labels.
2794+
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
2795+
replacing the placeholder with the candidate labels.
27962796
model (`str`, *optional*):
27972797
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
27982798
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
27992799
2800+
28002801
Returns:
28012802
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
28022803
@@ -2887,12 +2888,12 @@ def zero_shot_image_classification(
28872888
self,
28882889
image: ContentT,
28892890
# temporarily keeping it optional for backward compatibility.
2890-
candidate_labels: Optional[List[str]] = None,
2891+
candidate_labels: List[str] = None, # type: ignore
28912892
*,
28922893
model: Optional[str] = None,
28932894
hypothesis_template: Optional[str] = None,
28942895
# deprecated argument
2895-
labels: Optional[List[str]] = None, # type: ignore
2896+
labels: List[str] = None, # type: ignore
28962897
) -> List[ZeroShotImageClassificationOutputElement]:
28972898
"""
28982899
Provide input image and text labels to predict text labels for the image.
@@ -2908,8 +2909,8 @@ def zero_shot_image_classification(
29082909
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
29092910
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
29102911
hypothesis_template (`str`, *optional*):
2911-
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
2912-
the placeholder with the candidate labels.
2912+
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
2913+
replacing the placeholder with the candidate labels.
29132914
29142915
Returns:
29152916
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.

src/huggingface_hub/inference/_generated/_async_client.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ async def audio_classification(
383383
top_k (`int`, *optional*):
384384
When specified, limits the output to the top K most probable classes.
385385
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
386-
The function to apply to the output.
386+
The function to apply to the model outputs in order to retrieve the scores.
387387
388388
Returns:
389389
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -1025,7 +1025,7 @@ async def document_question_answering(
10251025
>>> from huggingface_hub import AsyncInferenceClient
10261026
>>> client = AsyncInferenceClient()
10271027
>>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
1028-
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
1028+
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
10291029
```
10301030
"""
10311031
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
@@ -1178,7 +1178,7 @@ async def image_classification(
11781178
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
11791179
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
11801180
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
1181-
The function to apply to the output.
1181+
The function to apply to the model outputs in order to retrieve the scores.
11821182
top_k (`int`, *optional*):
11831183
When specified, limits the output to the top K most probable classes.
11841184
Returns:
@@ -1874,7 +1874,7 @@ async def text_classification(
18741874
top_k (`int`, *optional*):
18751875
When specified, limits the output to the top K most probable classes.
18761876
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
1877-
The function to apply to the output.
1877+
The function to apply to the model outputs in order to retrieve the scores.
18781878
18791879
Returns:
18801880
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2549,11 +2549,11 @@ async def text_to_speech(
25492549
max_length (`int`, *optional*):
25502550
The maximum length (in tokens) of the generated text, including the input.
25512551
max_new_tokens (`int`, *optional*):
2552-
The maximum number of tokens to generate. Takes precedence over maxLength.
2552+
The maximum number of tokens to generate. Takes precedence over max_length.
25532553
min_length (`int`, *optional*):
25542554
The minimum length (in tokens) of the generated text, including the input.
25552555
min_new_tokens (`int`, *optional*):
2556-
The minimum number of tokens to generate. Takes precedence over maxLength.
2556+
The minimum number of tokens to generate. Takes precedence over min_length.
25572557
num_beam_groups (`int`, *optional*):
25582558
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
25592559
See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2860,12 +2860,13 @@ async def zero_shot_classification(
28602860
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
28612861
probabilities are normalized for each candidate.
28622862
hypothesis_template (`str`, *optional*):
2863-
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
2864-
the placeholder with the candidate labels.
2863+
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
2864+
replacing the placeholder with the candidate labels.
28652865
model (`str`, *optional*):
28662866
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
28672867
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
28682868
2869+
28692870
Returns:
28702871
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
28712872
@@ -2958,12 +2959,12 @@ async def zero_shot_image_classification(
29582959
self,
29592960
image: ContentT,
29602961
# temporarily keeping it optional for backward compatibility.
2961-
candidate_labels: Optional[List[str]] = None,
2962+
candidate_labels: List[str] = None, # type: ignore
29622963
*,
29632964
model: Optional[str] = None,
29642965
hypothesis_template: Optional[str] = None,
29652966
# deprecated argument
2966-
labels: Optional[List[str]] = None, # type: ignore
2967+
labels: List[str] = None, # type: ignore
29672968
) -> List[ZeroShotImageClassificationOutputElement]:
29682969
"""
29692970
Provide input image and text labels to predict text labels for the image.
@@ -2979,8 +2980,8 @@ async def zero_shot_image_classification(
29792980
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
29802981
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
29812982
hypothesis_template (`str`, *optional*):
2982-
The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
2983-
the placeholder with the candidate labels.
2983+
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
2984+
replacing the placeholder with the candidate labels.
29842985
29852986
Returns:
29862987
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.

src/huggingface_hub/inference/_generated/types/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,19 +168,17 @@
168168
)
169169
from .zero_shot_classification import (
170170
ZeroShotClassificationInput,
171-
ZeroShotClassificationInputData,
172171
ZeroShotClassificationOutputElement,
173172
ZeroShotClassificationParameters,
174173
)
175174
from .zero_shot_image_classification import (
176175
ZeroShotImageClassificationInput,
177-
ZeroShotImageClassificationInputData,
178176
ZeroShotImageClassificationOutputElement,
179177
ZeroShotImageClassificationParameters,
180178
)
181179
from .zero_shot_object_detection import (
182180
ZeroShotObjectDetectionBoundingBox,
183181
ZeroShotObjectDetectionInput,
184-
ZeroShotObjectDetectionInputData,
185182
ZeroShotObjectDetectionOutputElement,
183+
ZeroShotObjectDetectionParameters,
186184
)

src/huggingface_hub/inference/_generated/types/audio_classification.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@
1414

1515
@dataclass
1616
class AudioClassificationParameters(BaseInferenceType):
17-
"""Additional inference parameters
18-
Additional inference parameters for Audio Classification
19-
"""
17+
"""Additional inference parameters for Audio Classification"""
2018

2119
function_to_apply: Optional["AudioClassificationOutputTransform"] = None
22-
"""The function to apply to the output."""
20+
"""The function to apply to the model outputs in order to retrieve the scores."""
2321
top_k: Optional[int] = None
2422
"""When specified, limits the output to the top K most probable classes."""
2523

@@ -33,7 +31,7 @@ class AudioClassificationInput(BaseInferenceType):
3331
also provide the audio data as a raw bytes payload.
3432
"""
3533
parameters: Optional[AudioClassificationParameters] = None
36-
"""Additional inference parameters"""
34+
"""Additional inference parameters for Audio Classification"""
3735

3836

3937
@dataclass

src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414

1515
@dataclass
1616
class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
17-
"""Parametrization of the text generation process
18-
Ad-hoc parametrization of the text generation process
19-
"""
17+
"""Parametrization of the text generation process"""
2018

2119
do_sample: Optional[bool] = None
2220
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
@@ -76,11 +74,9 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
7674

7775
@dataclass
7876
class AutomaticSpeechRecognitionParameters(BaseInferenceType):
79-
"""Additional inference parameters
80-
Additional inference parameters for Automatic Speech Recognition
81-
"""
77+
"""Additional inference parameters for Automatic Speech Recognition"""
8278

83-
generate: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
79+
generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
8480
"""Parametrization of the text generation process"""
8581
return_timestamps: Optional[bool] = None
8682
"""Whether to output corresponding timestamps with the generated text"""
@@ -95,7 +91,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
9591
also provide the audio data as a raw bytes payload.
9692
"""
9793
parameters: Optional[AutomaticSpeechRecognitionParameters] = None
98-
"""Additional inference parameters"""
94+
"""Additional inference parameters for Automatic Speech Recognition"""
9995

10096

10197
@dataclass

src/huggingface_hub/inference/_generated/types/depth_estimation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class DepthEstimationInput(BaseInferenceType):
1616
inputs: Any
1717
"""The input image data"""
1818
parameters: Optional[Dict[str, Any]] = None
19-
"""Additional inference parameters"""
19+
"""Additional inference parameters for Depth Estimation"""
2020

2121

2222
@dataclass

0 commit comments

Comments
 (0)