diff --git a/charts/deepgram-self-hosted/CHANGELOG.md b/charts/deepgram-self-hosted/CHANGELOG.md index e08b4ef..cba2b88 100644 --- a/charts/deepgram-self-hosted/CHANGELOG.md +++ b/charts/deepgram-self-hosted/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## Unreleased +### Added + +- Exposed the ability to add custom TOML sections in api.toml and engine.toml via `customToml` + ## [0.19.0] - 2025-09-12 ### Added diff --git a/charts/deepgram-self-hosted/README.md b/charts/deepgram-self-hosted/README.md index 35591a1..b4d28bf 100644 --- a/charts/deepgram-self-hosted/README.md +++ b/charts/deepgram-self-hosted/README.md @@ -199,6 +199,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | api.additionalAnnotations | object | `nil` | Additional annotations to add to the API deployment | | api.additionalLabels | object | `{}` | Additional labels to add to API resources | | api.affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) to apply for API pods. | +| api.customToml | string | `nil` | Custom TOML sections can be added to extend api.toml | | api.driverPool | object | `` | driverPool configures the backend pool of speech engines (generically referred to as "drivers" here). The API will load-balance among drivers in the standard pool; if one standard driver fails, the next one will be tried. | | api.driverPool.standard | object | `` | standard is the main driver pool to use. | | api.driverPool.standard.maxResponseSize | string | `"1073741824"` | Maximum response to deserialize from Driver (in bytes). Default is 1GB, expressed in bytes. | @@ -252,6 +253,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.chunking.speechToText.streaming.minDuration | float | `nil` | minDuration is the minimum audio duration for a STT chunk size for a streaming request | | engine.chunking.speechToText.streaming.step | float | `1` | step defines how often to return interim results, in seconds. This value may be lowered to increase the frequency of interim results. However, this also causes a significant decrease in the number of concurrent streams supported by a single GPU. Please contact your Deepgram Account representative for more details. | | engine.concurrencyLimit.activeRequests | int | `nil` | activeRequests limits the number of active requests handled by a single Engine container. If additional requests beyond the limit are sent, the API container forming the request will try a different Engine pod. If no Engine pods are able to accept the request, the API will return a 429 HTTP response to the client. The `nil` default means no limit will be set. | +| engine.customToml | string | `nil` | Custom TOML sections can be added to extend engine.toml | | engine.features.streamingNer | bool | `true` | Enables format entity tags on streaming audio *if* a valid NER model is available. | | engine.halfPrecision.state | string | `"auto"` | Engine will automatically enable half precision operations if your GPU supports them. You can explicitly enable or disable this behavior with the state parameter which supports `"enable"`, `"disabled"`, and `"auto"`. | | engine.image.path | string | `"quay.io/deepgram/self-hosted-engine"` | path configures the image path to use for creating Engine containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | diff --git a/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml b/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml index c82f7f9..1ca7a3f 100644 --- a/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml +++ b/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml @@ -70,6 +70,10 @@ api: memory: "8Gi" cpu: "4000m" + # -- Custom TOML sections can be added here to extend api.toml + # customToml: | + # [custom_section] + engine: affinity: nodeAffinity: @@ -95,6 +99,16 @@ engine: concurrencyLimit: activeRequests: + # -- Custom TOML sections can be added here to extend engine.toml + # customToml: | + # # Preload models on engine startup for faster initial requests + # # See https://deepgram.gitbook.io/help-center/how-can-i-pre-load-models-to-reduce-cold-start-latency + # [preload_models] + # models = [ + # # Example model preload configuration: + # # { model = "general-nova-3", version = "2025-09-05.12808", language = "multi", format = false } + # ] + modelManager: volumes: aws: diff --git a/charts/deepgram-self-hosted/templates/api/api.config.yaml b/charts/deepgram-self-hosted/templates/api/api.config.yaml index 7c968d1..4f9f5e1 100644 --- a/charts/deepgram-self-hosted/templates/api/api.config.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.config.yaml @@ -108,3 +108,7 @@ data: {{- end }} {{- end }} {{- end }} + + {{- if .Values.api.customToml }} + {{ .Values.api.customToml | nindent 4 }} + {{- end }} diff --git a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml index 178ba9b..7b94481 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml @@ -67,3 +67,6 @@ data: [half_precision] state = "{{ .Values.engine.halfPrecision.state }}" + {{- if .Values.engine.customToml }} + {{ .Values.engine.customToml | nindent 4 }} + {{- end }} diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index a68af5c..a83af9b 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -373,6 +373,9 @@ api: # Default is 1GB, expressed in bytes. maxResponseSize: "1073741824" + # -- Custom TOML sections can be added to extend api.toml + customToml: + engine: # -- namePrefix is the prefix to apply to the name of all K8s objects # associated with the Deepgram Engine containers. @@ -482,6 +485,9 @@ engine: # to the client. The `nil` default means no limit will be set. activeRequests: + # -- Custom TOML sections can be added to extend engine.toml + customToml: + # -- Configure Engine containers to listen for requests from API containers. # @default -- `` server: