Skip to content

Commit 70861ee

Browse files
Add gp-llm-v2 model ID and inference endpoint (#138045)
* Add gp-llm-v2 model ID and pre-configured inference endpoint * Rename inference endpoint ID * Autoformat * Rename variables for gp-llm-v2 model ID and inference endpoint ID * Autoformat * Remove new model from Mock auth server * Rename completion service settings for new model * Rename completion settings * Add gp-llm-v2 to mock response * Rename gp-llm-v2 inference endpoint ID * Add support for several inference endpoints for gp-llm-v2 model ID
1 parent 928e584 commit 70861ee

File tree

4 files changed

+43
-2
lines changed

4 files changed

+43
-2
lines changed

x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,15 @@ public void testGetDefaultEndpoints() throws IOException {
4242
var allModels = getAllModels();
4343
var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);
4444

45-
assertThat(allModels, hasSize(7));
46-
assertThat(chatCompletionModels, hasSize(1));
45+
assertThat(allModels, hasSize(8));
46+
assertThat(chatCompletionModels, hasSize(2));
4747

4848
for (var model : chatCompletionModels) {
4949
assertEquals("chat_completion", model.get("task_type"));
5050
}
5151

5252
assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION);
53+
assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION);
5354
assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING);
5455
assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING);
5556
assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK);

x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ public void enqueueAuthorizeAllModelsResponse() {
3838
"model_name": "rainbow-sprinkles",
3939
"task_types": ["chat"]
4040
},
41+
{
42+
"model_name": "gp-llm-v2",
43+
"task_types": ["chat"]
44+
},
4145
{
4246
"model_name": "elser_model_2",
4347
"task_types": ["embed/text/sparse"]

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ public class InternalPreconfiguredEndpoints {
3333
public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";
3434
public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1 = ".rainbow-sprinkles-elastic";
3535

36+
// gp-llm-v2
37+
public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2";
38+
public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion";
39+
3640
// elser-2
3741
public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2";
3842
public static final String DEFAULT_ELSER_ENDPOINT_ID_V2 = ".elser-2-elastic";
@@ -53,6 +57,8 @@ public record MinimalModel(
5357

5458
private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS =
5559
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
60+
private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SERVICE_SETTINGS =
61+
new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID);
5662
private static final ElasticInferenceServiceSparseEmbeddingsServiceSettings SPARSE_EMBEDDINGS_SERVICE_SETTINGS =
5763
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null);
5864
private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_TEXT_EMBEDDINGS_SERVICE_SETTINGS =
@@ -80,6 +86,19 @@ public record MinimalModel(
8086
COMPLETION_SERVICE_SETTINGS
8187
)
8288
),
89+
GP_LLM_V2_MODEL_ID,
90+
List.of(
91+
new MinimalModel(
92+
new ModelConfigurations(
93+
GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
94+
TaskType.CHAT_COMPLETION,
95+
ElasticInferenceService.NAME,
96+
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
97+
ChunkingSettingsBuilder.DEFAULT_SETTINGS
98+
),
99+
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
100+
)
101+
),
83102
DEFAULT_ELSER_2_MODEL_ID,
84103
List.of(
85104
new MinimalModel(

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_ENDPOINT_ID_V1;
3535
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_MODEL_ID_V1;
3636
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DENSE_TEXT_EMBEDDINGS_DIMENSIONS;
37+
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID;
38+
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID;
3739
import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.defaultDenseTextEmbeddingsSimilarity;
3840
import static org.hamcrest.Matchers.containsInAnyOrder;
3941
import static org.hamcrest.Matchers.hasSize;
@@ -45,6 +47,8 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase {
4547
new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null);
4648
private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS =
4749
new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
50+
private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SETTINGS =
51+
new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID);
4852
private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_SETTINGS =
4953
new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(
5054
DEFAULT_MULTILINGUAL_EMBED_MODEL_ID,
@@ -60,6 +64,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase {
6064
public void testGetModelsWithValidId() {
6165
var endpointIds = Set.of(
6266
DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
67+
GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
6368
DEFAULT_ELSER_ENDPOINT_ID_V2,
6469
DEFAULT_RERANK_ENDPOINT_ID_V1,
6570
DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID
@@ -94,6 +99,18 @@ public void testGetModelsWithValidId() {
9499
COMPLETION_SETTINGS,
95100
EIS_COMPONENTS
96101
),
102+
new ElasticInferenceServiceModel(
103+
new ModelConfigurations(
104+
GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
105+
TaskType.CHAT_COMPLETION,
106+
ElasticInferenceService.NAME,
107+
GP_LLM_V2_COMPLETION_SETTINGS,
108+
ChunkingSettingsBuilder.DEFAULT_SETTINGS
109+
),
110+
new ModelSecrets(EmptySecretSettings.INSTANCE),
111+
GP_LLM_V2_COMPLETION_SETTINGS,
112+
EIS_COMPONENTS
113+
),
97114
new ElasticInferenceServiceModel(
98115
new ModelConfigurations(
99116
DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID,

0 commit comments

Comments
 (0)