Add gp-llm-v2 model ID and inference endpoint (#138045)

AntMoraisElastic · web-flow · commit 70861eecd39f · 2025-11-14T16:40:32.000Z
* Add gp-llm-v2 model ID and pre-configured inference endpoint

* Rename inference endpoint ID

* Autoformat

* Rename variables for gp-llm-v2 model ID and inference endpoint ID

* Autoformat

* Remove new model from Mock auth server

* Rename completion service settings for new model

* Rename completion settings

* Add gp-llm-v2 to mock response

* Rename gp-llm-v2 inference endpoint ID

* Add support for several inference endpoints for gp-llm-v2 model ID
diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java
@@ -42,14 +42,15 @@ public void testGetDefaultEndpoints() throws IOException {
         var allModels = getAllModels();
         var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);
 
-        assertThat(allModels, hasSize(7));
-        assertThat(chatCompletionModels, hasSize(1));
+        assertThat(allModels, hasSize(8));
+        assertThat(chatCompletionModels, hasSize(2));
 
         for (var model : chatCompletionModels) {
             assertEquals("chat_completion", model.get("task_type"));
         }
 
         assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION);
+        assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION);
         assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING);
         assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING);
         assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK);
diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockElasticInferenceServiceAuthorizationServer.java
@@ -38,6 +38,10 @@ public void enqueueAuthorizeAllModelsResponse() {
                       "model_name": "rainbow-sprinkles",
                       "task_types": ["chat"]
                     },
+                    {
+                      "model_name": "gp-llm-v2",
+                      "task_types": ["chat"]
+                    },
                     {
                       "model_name": "elser_model_2",
                       "task_types": ["embed/text/sparse"]
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
@@ -33,6 +33,10 @@ public class InternalPreconfiguredEndpoints {
     public static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";
     public static final String DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1 = ".rainbow-sprinkles-elastic";
 
+    // gp-llm-v2
+    public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2";
+    public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion";
+
     // elser-2
     public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2";
     public static final String DEFAULT_ELSER_ENDPOINT_ID_V2 = ".elser-2-elastic";
@@ -53,6 +57,8 @@ public record MinimalModel(
 
     private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SERVICE_SETTINGS =
         new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
+    private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SERVICE_SETTINGS =
+        new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID);
     private static final ElasticInferenceServiceSparseEmbeddingsServiceSettings SPARSE_EMBEDDINGS_SERVICE_SETTINGS =
         new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null);
     private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_TEXT_EMBEDDINGS_SERVICE_SETTINGS =
@@ -80,6 +86,19 @@ public record MinimalModel(
                 COMPLETION_SERVICE_SETTINGS
             )
         ),
+        GP_LLM_V2_MODEL_ID,
+        List.of(
+            new MinimalModel(
+                new ModelConfigurations(
+                    GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
+                    TaskType.CHAT_COMPLETION,
+                    ElasticInferenceService.NAME,
+                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
+                    ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                ),
+                GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
+            )
+        ),
         DEFAULT_ELSER_2_MODEL_ID,
         List.of(
             new MinimalModel(
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/authorization/PreconfiguredEndpointModelAdapterTests.java
@@ -34,6 +34,8 @@
 import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_ENDPOINT_ID_V1;
 import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DEFAULT_RERANK_MODEL_ID_V1;
 import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.DENSE_TEXT_EMBEDDINGS_DIMENSIONS;
+import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID;
+import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID;
 import static org.elasticsearch.xpack.inference.services.elastic.InternalPreconfiguredEndpoints.defaultDenseTextEmbeddingsSimilarity;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.hasSize;
@@ -45,6 +47,8 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase {
         new ElasticInferenceServiceSparseEmbeddingsServiceSettings(DEFAULT_ELSER_2_MODEL_ID, null);
     private static final ElasticInferenceServiceCompletionServiceSettings COMPLETION_SETTINGS =
         new ElasticInferenceServiceCompletionServiceSettings(DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
+    private static final ElasticInferenceServiceCompletionServiceSettings GP_LLM_V2_COMPLETION_SETTINGS =
+        new ElasticInferenceServiceCompletionServiceSettings(GP_LLM_V2_MODEL_ID);
     private static final ElasticInferenceServiceDenseTextEmbeddingsServiceSettings DENSE_SETTINGS =
         new ElasticInferenceServiceDenseTextEmbeddingsServiceSettings(
             DEFAULT_MULTILINGUAL_EMBED_MODEL_ID,
@@ -60,6 +64,7 @@ public class PreconfiguredEndpointModelAdapterTests extends ESTestCase {
     public void testGetModelsWithValidId() {
         var endpointIds = Set.of(
             DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
+            GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
             DEFAULT_ELSER_ENDPOINT_ID_V2,
             DEFAULT_RERANK_ENDPOINT_ID_V1,
             DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID
@@ -94,6 +99,18 @@ public void testGetModelsWithValidId() {
                     COMPLETION_SETTINGS,
                     EIS_COMPONENTS
                 ),
+                new ElasticInferenceServiceModel(
+                    new ModelConfigurations(
+                        GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
+                        TaskType.CHAT_COMPLETION,
+                        ElasticInferenceService.NAME,
+                        GP_LLM_V2_COMPLETION_SETTINGS,
+                        ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                    ),
+                    new ModelSecrets(EmptySecretSettings.INSTANCE),
+                    GP_LLM_V2_COMPLETION_SETTINGS,
+                    EIS_COMPONENTS
+                ),
                 new ElasticInferenceServiceModel(
                     new ModelConfigurations(
                         DEFAULT_MULTILINGUAL_EMBED_ENDPOINT_ID,