@@ -13,12 +13,33 @@ import (
1313 "github.com/openai/openai-go"
1414 "github.com/openai/openai-go/shared/constant"
1515
16+ "github.com/NexaAI/nexa-sdk/runner/internal/config"
1617 "github.com/NexaAI/nexa-sdk/runner/internal/store"
1718 "github.com/NexaAI/nexa-sdk/runner/internal/types"
1819 nexa_sdk "github.com/NexaAI/nexa-sdk/runner/nexa-sdk"
1920 "github.com/NexaAI/nexa-sdk/runner/server/service"
2021)
2122
23+ type BaseParams struct {
24+ // stream: if false the response will be returned as a single response object, rather than a stream of objects
25+ Stream bool `json:"stream" default:"false"`
26+ // keep_alive: controls how long the model will stay loaded into memory following the request (default: 5m)
27+ KeepAlive * int64 `json:"keep_alive" default:"300"`
28+ }
29+
30+ // getKeepAliveValue extracts the keepAlive value from BaseParams, using default if not set
31+ func getKeepAliveValue (param BaseParams ) int64 {
32+ if param .KeepAlive != nil {
33+ return * param .KeepAlive
34+ }
35+ return config .Get ().KeepAlive
36+ }
37+
38+ type CompletionRequest struct {
39+ BaseParams
40+ openai.CompletionNewParams
41+ }
42+
2243// @Router /completions [post]
2344// @Summary completion
2445// @Description Legacy completion endpoint for text generation. It is recommended to use the Chat Completions endpoint for new applications.
@@ -27,16 +48,18 @@ import (
2748// @Produce json
2849// @Success 200 {object} openai.Completion
2950func Completions (c * gin.Context ) {
30- param := openai. CompletionNewParams {}
51+ param := CompletionRequest {}
3152 if err := c .ShouldBindJSON (& param ); err != nil {
3253 c .JSON (http .StatusBadRequest , map [string ]any {"error" : err .Error ()})
3354 return
3455 }
56+ slog .Debug ("param" , "param" , param )
3557
3658 p , err := service .KeepAliveGet [nexa_sdk.LLM ](
3759 string (param .Model ),
3860 types.ModelParam {NCtx : 4096 },
3961 c .GetHeader ("Nexa-KeepCache" ) != "true" ,
62+ getKeepAliveValue (param .BaseParams ),
4063 )
4164 if err != nil {
4265 c .JSON (http .StatusInternalServerError , map [string ]any {"error" : err .Error ()})
@@ -58,19 +81,18 @@ func Completions(c *gin.Context) {
5881 }
5982}
6083
61- type ChatCompletionNewParams openai.ChatCompletionNewParams
62-
6384// ChatCompletionRequest defines the request body for the chat completions API.
6485// example: { "model": "nexaml/nexaml-models", "messages": [ { "role": "user", "content": "why is the sky blue?" } ] }
6586type ChatCompletionRequest struct {
66- Stream bool `json:"stream" default:"false"`
6787 EnableThink bool `json:"enable_think" default:"true"`
68-
69- ChatCompletionNewParams
88+ BaseParams
89+ openai. ChatCompletionNewParams
7090}
7191
7292var toolCallRegex = regexp .MustCompile (`<tool_call>([\s\S]+)<\/tool_call>` + "|" + "```json([\\ s\\ S]+)```" )
7393
94+
95+
7496// @Router /chat/completions [post]
7597// @Summary Creates a model response for the given chat conversation.
7698// @Description This endpoint generates a model response for a given conversation, which can include text and images. It supports both single-turn and multi-turn conversations and can be used for various tasks like question answering, code generation, and function calling.
@@ -85,6 +107,8 @@ func ChatCompletions(c *gin.Context) {
85107 return
86108 }
87109
110+ slog .Debug ("param" , "param" , param )
111+
88112 s := store .Get ()
89113 manifest , err := s .GetManifest (param .Model )
90114 if err != nil {
@@ -109,6 +133,7 @@ func chatCompletionsLLM(c *gin.Context, param ChatCompletionRequest) {
109133 string (param .Model ),
110134 types.ModelParam {NCtx : 4096 },
111135 c .GetHeader ("Nexa-KeepCache" ) != "true" ,
136+ getKeepAliveValue (param .BaseParams ),
112137 )
113138 if errors .Is (err , os .ErrNotExist ) {
114139 c .JSON (http .StatusNotFound , map [string ]any {"error" : "model not found" })
@@ -276,6 +301,7 @@ func chatCompletionsVLM(c *gin.Context, param ChatCompletionRequest) {
276301 string (param .Model ),
277302 types.ModelParam {NCtx : 4096 },
278303 c .GetHeader ("Nexa-KeepCache" ) != "true" ,
304+ getKeepAliveValue (param .BaseParams ),
279305 )
280306 if errors .Is (err , os .ErrNotExist ) {
281307 c .JSON (http .StatusNotFound , map [string ]any {"error" : "model not found" })
0 commit comments