@@ -10,8 +10,16 @@ export class Models extends APIResource {
1010 * The results will be sorted by descending order of relevance. For each document,
1111 * the index and the score will be returned. The index is relative to the documents
1212 * array that was passed in. The score is the query-document relevancy determined
13- * by the reranker model. The value will be returned in descending order to
13+ * by the reranker model. The results will be returned in descending order of
1414 * relevance.
15+ *
16+ * Organizations will, by default, have a ratelimit of `2,500,000`
17+ * bytes-per-minute. If this is exceeded, requests will be throttled into
18+ * `latency: "slow"` mode, up to `10,000,000` bytes-per-minute. If even this is
19+ * exceeded, you will get a `429` error. To request higher ratelimits, please
20+ * contact [founders@zeroentropy.dev](mailto:founders@zeroentropy.dev) or message
21+ * us on [Discord](https://go.zeroentropy.dev/discord) or
22+ * [Slack](https://go.zeroentropy.dev/slack)!
1523 */
1624 rerank ( body : ModelRerankParams , options ?: Core . RequestOptions ) : Core . APIPromise < ModelRerankResponse > {
1725 return this . _client . post ( '/models/rerank' , { body, ...options } ) ;
@@ -36,8 +44,8 @@ export namespace ModelRerankResponse {
3644 /**
3745 * The relevance score between this document and the query. This number will range
3846 * between 0.0 and 1.0. This score is dependent on only the query and the scored
39- * document; other documents do not affect this score. This value is deterministic,
40- * but may vary slightly due to floating point error.
47+ * document; other documents do not affect this score. This value is intended to be
48+ * deterministic, but it may vary slightly due to floating point error.
4149 */
4250 relevance_score : number ;
4351 }
@@ -50,19 +58,29 @@ export interface ModelRerankParams {
5058 documents : Array < string > ;
5159
5260 /**
53- * The query to rerank the documents by. Results will be in descending order of
54- * relevance.
61+ * The model ID to use for reranking. Options are: ["zerank-2", "zerank-1",
62+ * "zerank-1-small"]
63+ */
64+ model : string ;
65+
66+ /**
67+ * The query to rerank the documents by.
5568 */
5669 query : string ;
5770
5871 /**
59- * The model ID to use for reranking. Options are: ["zerank-1-large"]
72+ * Whether the call will be inferenced "fast" or "slow". RateLimits for slow API
73+ * calls are orders of magnitude higher, but you can expect >10 second latency.
74+ * Fast inferences are guaranteed subsecond, but rate limits are lower. If not
75+ * specified, first a "fast" call will be attempted, but if you have exceeded your
76+ * fast rate limit, then a slow call will be executed. If explicitly set to "fast",
77+ * then 429 will be returned if it cannot be executed fast.
6078 */
61- model ?: string ;
79+ latency ?: 'fast' | 'slow' | null ;
6280
6381 /**
6482 * If provided, then only the top `n` documents will be returned in the results
65- * array.
83+ * array. Otherwise, `n` will be the length of the provided documents array.
6684 */
6785 top_n ?: number | null ;
6886}
0 commit comments