From 31aa5205118b588d6ffa03ab9b751b8fd1084fd7 Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Sat, 9 Aug 2025 15:49:54 -0400 Subject: [PATCH 1/7] Fix securityContext template references for API and Engine deployments API and Engine deployments were incorrectly referencing licenseProxy.securityContext instead of their respective api.securityContext and engine.securityContext values. --- charts/deepgram-self-hosted/templates/api/api.deployment.yaml | 2 +- .../templates/engine/engine.deployment.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml index 4d25df7..0edc896 100644 --- a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml @@ -42,7 +42,7 @@ spec: tolerations: {{- toYaml .Values.api.tolerations | nindent 8 }} securityContext: - {{- toYaml .Values.licenseProxy.securityContext | nindent 8 }} + {{- toYaml .Values.api.securityContext | nindent 8 }} {{- if or .Values.api.serviceAccount.create .Values.api.serviceAccount.name }} serviceAccountName: {{ default (printf "%s-sa" .Values.api.namePrefix) .Values.api.serviceAccount.name }} {{- end }} diff --git a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml index 1ce8f52..3408eb7 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml @@ -42,7 +42,7 @@ spec: tolerations: {{- toYaml .Values.engine.tolerations | nindent 8 }} securityContext: - {{- toYaml .Values.licenseProxy.securityContext | nindent 8 }} + {{- toYaml .Values.engine.securityContext | nindent 8 }} {{- if or .Values.engine.serviceAccount.create .Values.engine.serviceAccount.name }} serviceAccountName: {{ default (printf "%s-sa" .Values.engine.namePrefix) .Values.engine.serviceAccount.name }} {{- end }} From 02583f9b4ec32b62673705cc715bad092c7851e6 Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Sun, 10 Aug 2025 08:25:57 -0400 Subject: [PATCH 2/7] Fix misleading securityContext documentation comments Comments for Engine and License Proxy securityContext fields incorrectly stated 'for API pods' instead of their respective component names. --- charts/deepgram-self-hosted/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index 685c8c6..f164dde 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -354,7 +354,7 @@ engine: # to apply to Engine pods. tolerations: [] - # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. + # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for Engine pods. securityContext: {} serviceAccount: @@ -591,7 +591,7 @@ licenseProxy: # to apply to License Proxy pods. tolerations: [] - # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. + # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for License Proxy pods. securityContext: {} serviceAccount: From e38e28d91628db863a1c24e99ebe79486560ac8b Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Thu, 21 Aug 2025 09:22:57 -0400 Subject: [PATCH 3/7] Add container-level security context support to Helm templates - Add containerSecurityContext field to API, Engine, and License Proxy components - Add proper template conditionals for security context rendering --- .../templates/api/api.deployment.yaml | 5 ++ .../templates/engine/engine.deployment.yaml | 6 ++- .../license-proxy.deployment.yaml | 8 ++- .../volumes/aws/efs-model-download.job.yaml | 8 +++ charts/deepgram-self-hosted/values.yaml | 54 +++++++++++++++++-- 5 files changed, 76 insertions(+), 5 deletions(-) diff --git a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml index 0edc896..6af0acf 100644 --- a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml @@ -41,6 +41,7 @@ spec: {{- toYaml .Values.api.affinity | nindent 8 }} tolerations: {{- toYaml .Values.api.tolerations | nindent 8 }} + {{- with .Values.api.securityContext }} securityContext: {{- toYaml .Values.api.securityContext | nindent 8 }} {{- if or .Values.api.serviceAccount.create .Values.api.serviceAccount.name }} @@ -48,6 +49,10 @@ spec: {{- end }} containers: - name: {{ .Values.api.namePrefix }} + {{- with .Values.api.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} image: {{ .Values.api.image.path }}:{{ .Values.api.image.tag }} imagePullPolicy: {{ .Values.api.image.pullPolicy }} envFrom: diff --git a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml index 3408eb7..f2a6d42 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml @@ -41,6 +41,7 @@ spec: {{- toYaml .Values.engine.affinity | nindent 8 }} tolerations: {{- toYaml .Values.engine.tolerations | nindent 8 }} + {{- with .Values.engine.securityContext }} securityContext: {{- toYaml .Values.engine.securityContext | nindent 8 }} {{- if or .Values.engine.serviceAccount.create .Values.engine.serviceAccount.name }} @@ -48,6 +49,10 @@ spec: {{- end }} containers: - name: {{ .Values.engine.namePrefix }} + {{- with .Values.engine.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} image: {{ .Values.engine.image.path }}:{{ .Values.engine.image.tag }} imagePullPolicy: {{ .Values.engine.image.pullPolicy }} envFrom: @@ -153,4 +158,3 @@ spec: {{- else if $gcpGpdEnabled }} claimName: {{ .Values.engine.modelManager.volumes.gcp.gpd.namePrefix }}-gcp-gpd-pvc {{- end }} - diff --git a/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml b/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml index 254d97f..ceb1730 100644 --- a/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml @@ -41,13 +41,19 @@ spec: {{- toYaml .Values.licenseProxy.affinity | nindent 8 }} tolerations: {{- toYaml .Values.licenseProxy.tolerations | nindent 8 }} + {{- with .Values.licenseProxy.securityContext }} securityContext: - {{- toYaml .Values.licenseProxy.securityContext | nindent 8 }} + {{- toYaml . | nindent 8 }} + {{- end }} {{- if or .Values.licenseProxy.serviceAccount.create .Values.licenseProxy.serviceAccount.name }} serviceAccountName: {{ default (printf "%s-sa" .Values.licenseProxy.namePrefix) .Values.licenseProxy.serviceAccount.name }} {{- end }} containers: - name: {{ .Values.licenseProxy.namePrefix }} + {{- with .Values.licenseProxy.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} image: {{ .Values.licenseProxy.image.path }}:{{ .Values.licenseProxy.image.tag }} imagePullPolicy: {{ .Values.licenseProxy.image.pullPolicy }} envFrom: diff --git a/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml b/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml index 71ec6e6..427fcb7 100644 --- a/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml +++ b/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml @@ -16,8 +16,16 @@ spec: {{- toYaml .Values.engine.affinity | nindent 8 }} tolerations: {{- toYaml .Values.engine.tolerations | nindent 8 }} + {{- with .Values.engine.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: model-management + {{- with .Values.engine.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} image: alpine command: - /bin/sh diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index f164dde..5fae398 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -165,8 +165,24 @@ api: # to apply to API pods. tolerations: [] - # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for API pods. securityContext: {} + # runAsNonRoot: true + # runAsUser: 1000 + # runAsGroup: 3000 + # fsGroup: 2000 + # seccompProfile: + # type: RuntimeDefault + + # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for API containers. + containerSecurityContext: {} + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram API Deployment. @@ -354,8 +370,24 @@ engine: # to apply to Engine pods. tolerations: [] - # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for Engine pods. + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for Engine pods. securityContext: {} + # runAsNonRoot: true + # runAsUser: 1000 + # runAsGroup: 3000 + # fsGroup: 2000 + # seccompProfile: + # type: RuntimeDefault + + # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for Engine containers. + containerSecurityContext: {} + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram Engine Deployment. @@ -591,8 +623,24 @@ licenseProxy: # to apply to License Proxy pods. tolerations: [] - # -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for License Proxy pods. + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for License Proxy pods. securityContext: {} + # runAsNonRoot: true + # runAsUser: 1000 + # runAsGroup: 3000 + # fsGroup: 2000 + # seccompProfile: + # type: RuntimeDefault + + # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for License Proxy containers. + containerSecurityContext: {} + # allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram License Proxy Deployment. From fe91b102de310c7371a39dfa09b599128f645533 Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Thu, 21 Aug 2025 10:14:07 -0400 Subject: [PATCH 4/7] Clean up comments --- charts/deepgram-self-hosted/values.yaml | 39 ------------------------- 1 file changed, 39 deletions(-) diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index 5fae398..b0f9534 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -167,22 +167,9 @@ api: # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for API pods. securityContext: {} - # runAsNonRoot: true - # runAsUser: 1000 - # runAsGroup: 3000 - # fsGroup: 2000 - # seccompProfile: - # type: RuntimeDefault # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for API containers. containerSecurityContext: {} - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram API Deployment. @@ -372,22 +359,9 @@ engine: # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for Engine pods. securityContext: {} - # runAsNonRoot: true - # runAsUser: 1000 - # runAsGroup: 3000 - # fsGroup: 2000 - # seccompProfile: - # type: RuntimeDefault # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for Engine containers. containerSecurityContext: {} - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram Engine Deployment. @@ -625,22 +599,9 @@ licenseProxy: # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for License Proxy pods. securityContext: {} - # runAsNonRoot: true - # runAsUser: 1000 - # runAsGroup: 3000 - # fsGroup: 2000 - # seccompProfile: - # type: RuntimeDefault # -- [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for License Proxy containers. containerSecurityContext: {} - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 serviceAccount: # -- Specifies whether to create a default service account for the Deepgram License Proxy Deployment. From edf92ace974a2928bd334ce3414a78c0e293bbfd Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Mon, 25 Aug 2025 21:58:18 -0400 Subject: [PATCH 5/7] helm-docs --- charts/deepgram-self-hosted/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/charts/deepgram-self-hosted/README.md b/charts/deepgram-self-hosted/README.md index a521f37..6129aaf 100644 --- a/charts/deepgram-self-hosted/README.md +++ b/charts/deepgram-self-hosted/README.md @@ -180,6 +180,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | api.additionalAnnotations | object | `nil` | Additional annotations to add to the API deployment | | api.additionalLabels | object | `{}` | Additional labels to add to API resources | | api.affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) to apply for API pods. | +| api.containerSecurityContext | object | `{}` | [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for API containers. | | api.driverPool | object | `` | driverPool configures the backend pool of speech engines (generically referred to as "drivers" here). The API will load-balance among drivers in the standard pool; if one standard driver fails, the next one will be tried. | | api.driverPool.standard | object | `` | standard is the main driver pool to use. | | api.driverPool.standard.maxResponseSize | string | `"1073741824"` | Maximum response to deserialize from Driver (in bytes). Default is 1GB, expressed in bytes. | @@ -201,7 +202,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | api.resolver.maxTTL | int | `nil` | maxTTL sets the DNS TTL value if specifying a custom DNS nameserver. | | api.resolver.nameservers | list | `[]` | nameservers allows for specifying custom domain name server(s). A valid list item's format is "{IP} {PORT} {PROTOCOL (tcp or udp)}", e.g. `"127.0.0.1 53 udp"`. | | api.resources | object | `` | Configure resource limits per API container. See [Deepgram's documentation](https://developers.deepgram.com/docs/self-hosted-deployment-environments#api) for more details. | -| api.securityContext | object | `{}` | [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. | +| api.securityContext | object | `{}` | [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for API pods. | | api.server | object | `` | Configure how the API will listen for your requests | | api.server.callbackConnTimeout | string | `"1s"` | callbackConnTimeout configures how long to wait for a connection to a callback URL. See [Deepgram's callback documentation](https://developers.deepgram.com/docs/callback) for more details. The value should be a humantime duration. | | api.server.callbackTimeout | string | `"10s"` | callbackTimeout configures how long to wait for a response from a callback URL. See [Deepgram's callback documentation](https://developers.deepgram.com/docs/callback) for more details. The value should be a humantime duration. | @@ -233,6 +234,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.chunking.speechToText.streaming.minDuration | float | `nil` | minDuration is the minimum audio duration for a STT chunk size for a streaming request | | engine.chunking.speechToText.streaming.step | float | `1` | step defines how often to return interim results, in seconds. This value may be lowered to increase the frequency of interim results. However, this also causes a significant decrease in the number of concurrent streams supported by a single GPU. Please contact your Deepgram Account representative for more details. | | engine.concurrencyLimit.activeRequests | int | `nil` | activeRequests limits the number of active requests handled by a single Engine container. If additional requests beyond the limit are sent, the API container forming the request will try a different Engine pod. If no Engine pods are able to accept the request, the API will return a 429 HTTP response to the client. The `nil` default means no limit will be set. | +| engine.containerSecurityContext | object | `{}` | [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for Engine containers. | | engine.features.streamingNer | bool | `false` | Enables format entity tags on streaming audio *if* a valid NER model is available. | | engine.halfPrecision.state | string | `"auto"` | Engine will automatically enable half precision operations if your GPU supports them. You can explicitly enable or disable this behavior with the state parameter which supports `"enable"`, `"disabled"`, and `"auto"`. | | engine.image.path | string | `"quay.io/deepgram/self-hosted-engine"` | path configures the image path to use for creating Engine containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | @@ -263,7 +265,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.resources | object | `` | Configure resource limits per Engine container. See [Deepgram's documentation](https://developers.deepgram.com/docs/self-hosted-deployment-environments#engine) for more details. | | engine.resources.limits.gpu | int | `1` | gpu maps to the nvidia.com/gpu resource parameter | | engine.resources.requests.gpu | int | `1` | gpu maps to the nvidia.com/gpu resource parameter | -| engine.securityContext | object | `{}` | [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. | +| engine.securityContext | object | `{}` | [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for Engine pods. | | engine.server | object | `` | Configure Engine containers to listen for requests from API containers. | | engine.server.host | string | `"0.0.0.0"` | host is the IP address to listen on for inference requests. You will want to listen on all interfaces to interact with other pods in the cluster. | | engine.server.port | int | `8080` | port to listen on for inference requests | @@ -291,6 +293,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | licenseProxy.additionalAnnotations | object | `nil` | Additional annotations to add to the LicenseProxy deployment | | licenseProxy.additionalLabels | object | `{}` | Additional labels to add to License Proxy resources | | licenseProxy.affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) to apply for License Proxy pods. | +| licenseProxy.containerSecurityContext | object | `{}` | [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for License Proxy containers. | | licenseProxy.deploySecondReplica | bool | `false` | If the License Proxy is deployed, one replica should be sufficient to support many API/Engine pods. Highly available environments may wish to deploy a second replica to ensure uptime, which can be toggled with this option. | | licenseProxy.enabled | bool | `false` | The License Proxy is optional, but highly recommended to be deployed in production to enable highly available environments. | | licenseProxy.image.path | string | `"quay.io/deepgram/self-hosted-license-proxy"` | path configures the image path to use for creating License Proxy containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | @@ -301,7 +304,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | licenseProxy.namePrefix | string | `"deepgram-license-proxy"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram License Proxy containers. | | licenseProxy.readinessProbe | object | `` | Readiness probe customization for License Proxy pods. | | licenseProxy.resources | object | `` | Configure resource limits per License Proxy container. See [Deepgram's documentation](https://developers.deepgram.com/docs/license-proxy#system-requirements) for more details. | -| licenseProxy.securityContext | object | `{}` | [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) for API pods. | +| licenseProxy.securityContext | object | `{}` | [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for License Proxy pods. | | licenseProxy.server | object | `` | Configure how the license proxy will listen for licensing requests. | | licenseProxy.server.baseUrl | string | `"/"` | baseUrl is the prefix for incoming license verification requests. | | licenseProxy.server.host | string | `"0.0.0.0"` | host is the IP address to listen on. You will want to listen on all interfaces to interact with other pods in the cluster. | From 291d12a26c793911d9916ee2a76ae924257b956b Mon Sep 17 00:00:00 2001 From: TJ Rhynard Date: Tue, 26 Aug 2025 15:52:29 -0400 Subject: [PATCH 6/7] Changelog updates --- charts/deepgram-self-hosted/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/deepgram-self-hosted/CHANGELOG.md b/charts/deepgram-self-hosted/CHANGELOG.md index d735011..38fe795 100644 --- a/charts/deepgram-self-hosted/CHANGELOG.md +++ b/charts/deepgram-self-hosted/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## Unreleased +### Added + +- Container-level security context support to Helm templates + ## [0.17.0] - 2025-08-14 ### Added From 3f8f782df7efb4914df1a836ac22448032e4572f Mon Sep 17 00:00:00 2001 From: jkroll-deepgram Date: Mon, 15 Sep 2025 14:10:56 -0500 Subject: [PATCH 7/7] Resolve merge conflicts --- charts/deepgram-self-hosted/CHANGELOG.md | 37 ++++- charts/deepgram-self-hosted/Chart.yaml | 4 +- charts/deepgram-self-hosted/README.md | 37 ++++- .../samples/01-basic-setup-aws.values.yaml | 19 +++ .../samples/02-basic-setup-gcp.yaml | 3 + .../samples/04-aura-2-setup.values.yaml | 6 +- charts/deepgram-self-hosted/samples/README.md | 1 + .../deepgram-self-hosted/templates/NOTES.txt | 2 +- .../templates/api/api.config.yaml | 50 ++++++ .../templates/api/api.deployment.yaml | 61 ++++++- .../templates/engine/engine.config.yaml | 3 + .../templates/engine/engine.deployment.yaml | 156 ++++++++++-------- .../templates/engine/engine.hpa.yaml | 6 +- .../templates/engine/engine.service.yaml | 49 ++++-- .../license-proxy.deployment.yaml | 8 +- .../volumes/aws/efs-model-download.job.yaml | 2 + charts/deepgram-self-hosted/values.yaml | 134 ++++++++++++++- .../engine.aura-2-en.toml | 11 +- .../engine.aura-2-es.toml | 11 +- common/license_proxy_deploy/engine.toml | 11 +- common/standard_deploy/api.toml | 2 +- common/standard_deploy/engine.aura-2-en.toml | 11 +- common/standard_deploy/engine.aura-2-es.toml | 11 +- common/standard_deploy/engine.toml | 11 +- docker/docker-compose.aura-2.yml | 10 +- docker/docker-compose.license-proxy.yml | 6 +- docker/docker-compose.standard.yml | 4 +- podman/podman-compose.aura-2.yml | 10 +- podman/podman-compose.license-proxy.yml | 6 +- podman/podman-compose.standard.yml | 4 +- 30 files changed, 552 insertions(+), 134 deletions(-) diff --git a/charts/deepgram-self-hosted/CHANGELOG.md b/charts/deepgram-self-hosted/CHANGELOG.md index 38fe795..3317c26 100644 --- a/charts/deepgram-self-hosted/CHANGELOG.md +++ b/charts/deepgram-self-hosted/CHANGELOG.md @@ -8,8 +8,39 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added +- Exposed the ability to add custom TOML sections in api.toml and engine.toml via `customToml` +- Added `nodeSelector` support for all components (API, Engine, License Proxy) to allow scheduling pods on specific nodes. - Container-level security context support to Helm templates +## [0.19.0] - 2025-09-12 + +### Added + +- Changes the defaults of `.Values.api.features.formatEntityTags` and `.Values.engine.features.streamingNer` to `true`, so that NER formatting is enabled by default. This formatting is required with Nova-3 models. See our [self-hosted NER guide](https://deepgram.gitbook.io/help-center/self-hosted/how-can-i-enable-ner-formatting-in-my-self-hosted-deployment) for further details. +- Updated default container tags to September 2025 release (`release-250912`). Refer to the [main Deepgram changelog](https://developers.deepgram.com/changelog/self-hosted-changelog#deepgram-self-hosted-september-2025-release-250912) for additional details. + +## [0.18.1] - 2025-09-03 + +### Added + +- Defined `allowNonpublicEndpoints` Voice Agent flag for use with custom LLM endpoints + +### Fixed + +- Fixed HPA replica conflicts in API and Engine deployments by conditionally removing hardcoded replicas when autoscaling is enabled + +## [0.18.0] - 2025-08-28 + +### Added + +- Added built-in support for Voice Agent. +- Updated default container tags to August 2025 release (`release-250828`). Refer to the [main Deepgram changelog](https://developers.deepgram.com/changelog/self-hosted-changelog#deepgram-self-hosted-august-2025-release-250828) for additional details. + +### Fixed + +- Fixed securityContext template references for API and Engine deployments +- Fixed securityContext documentation comments + ## [0.17.0] - 2025-08-14 ### Added @@ -47,7 +78,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - Updated default container tags to March 2025 release. Refer to the [main Deepgram changelog](https://deepgram.com/changelog/deepgram-self-hosted-march-2025-release-250331) for additional details. - ## [0.11.1] - 2025-03-28 ### Added @@ -213,7 +243,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - Initial implementation of the Helm chart. -[unreleased]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.17.0...HEAD +[unreleased]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.19.0...HEAD +[0.19.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.18.1...0.19.0 +[0.18.1]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.18.0...0.18.1 +[0.18.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.17.0...0.18.0 [0.17.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.16.0...0.17.0 [0.16.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.15.0...0.16.0 [0.15.0]: https://github.com/deepgram/self-hosted-resources/compare/deepgram-self-hosted-0.14.0...0.15.0 diff --git a/charts/deepgram-self-hosted/Chart.yaml b/charts/deepgram-self-hosted/Chart.yaml index 1b0dbcb..6ed59e2 100644 --- a/charts/deepgram-self-hosted/Chart.yaml +++ b/charts/deepgram-self-hosted/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v2 name: deepgram-self-hosted type: application -version: 0.17.0 -appVersion: "release-250814" +version: 0.19.0 +appVersion: "release-250912" description: A Helm chart for running Deepgram services in a self-hosted environment home: "https://developers.deepgram.com/docs/self-hosted-introduction" sources: ["https://github.com/deepgram/self-hosted-resources"] diff --git a/charts/deepgram-self-hosted/README.md b/charts/deepgram-self-hosted/README.md index 6129aaf..ac2a093 100644 --- a/charts/deepgram-self-hosted/README.md +++ b/charts/deepgram-self-hosted/README.md @@ -1,6 +1,6 @@ # deepgram-self-hosted -![Version: 0.17.0](https://img.shields.io/badge/Version-0.17.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: release-250814](https://img.shields.io/badge/AppVersion-release--250814-informational?style=flat-square) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/deepgram-self-hosted)](https://artifacthub.io/packages/search?repo=deepgram-self-hosted) +![Version: 0.19.0](https://img.shields.io/badge/Version-0.19.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: release-250912](https://img.shields.io/badge/AppVersion-release--250912-informational?style=flat-square) [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/deepgram-self-hosted)](https://artifacthub.io/packages/search?repo=deepgram-self-hosted) A Helm chart for running Deepgram services in a self-hosted environment @@ -177,10 +177,30 @@ If you encounter issues while deploying or using Deepgram, consider the followin | Key | Type | Default | Description | |-----|------|---------|-------------| +| agent.allowNonpublicEndpoints | bool | `false` | Whether to allow non-public URLs (such as `localhost`) in custom endpoints. Disabled by default | +| agent.enabled | bool | `false` | Whether to enable voice agent. Disabled by default | +| agent.eotTimeoutMs | int | `3500` | Timeout in milliseconds for end-of-turn detection | +| agent.llmProviders | object | `` | Configuration for LLM providers and their available models | +| agent.llmProviders.anthropic | object | `` | Anthropic provider configuration | +| agent.llmProviders.anthropic.models | object | `` | Available Anthropic models and their configurations | +| agent.llmProviders.deepgram | object | `` | Deepgram provider configuration | +| agent.llmProviders.deepgram.models | object | `` | Available Deepgram models and their configurations | +| agent.llmProviders.groq | object | `` | Groq provider configuration | +| agent.llmProviders.groq.models | object | `` | Available Groq models and their configurations | +| agent.llmProviders.open_ai | object | `` | OpenAI provider configuration | +| agent.llmProviders.open_ai.models | object | `` | Available OpenAI models and their configurations | +| agent.llmProviders.open_ai.models.gpt-4o-mini.name | string | `"GPT-4o mini"` | Display name for the GPT-4o mini model | +| agent.llmProviders.open_ai.models.gpt-4o-mini.public | bool | `true` | Whether this model is publicly available | +| agent.llmProviders.open_ai.models.gpt-4o-mini.tier | string | `"standard"` | Service tier for this model (standard or advanced) | +| agent.llmProviders.open_ai.name | string | `"OpenAI"` | Display name for the OpenAI provider | +| agent.llmProviders.x_ai | object | `` | xAI provider configuration | +| agent.llmProviders.x_ai.models | object | `` | Available xAI models and their configurations | +| agent.maxConversationChars | int | `15000` | Maximum number of characters allowed in a conversation history | | api.additionalAnnotations | object | `nil` | Additional annotations to add to the API deployment | | api.additionalLabels | object | `{}` | Additional labels to add to API resources | | api.affinity | object | `{}` | [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) to apply for API pods. | | api.containerSecurityContext | object | `{}` | [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for API containers. | +| api.customToml | string | `nil` | Custom TOML sections can be added to extend api.toml | | api.driverPool | object | `` | driverPool configures the backend pool of speech engines (generically referred to as "drivers" here). The API will load-balance among drivers in the standard pool; if one standard driver fails, the next one will be tried. | | api.driverPool.standard | object | `` | standard is the main driver pool to use. | | api.driverPool.standard.maxResponseSize | string | `"1073741824"` | Maximum response to deserialize from Driver (in bytes). Default is 1GB, expressed in bytes. | @@ -191,12 +211,13 @@ If you encounter issues while deploying or using Deepgram, consider the followin | api.features.diskBufferPath | string | `nil` | If API is receiving requests faster than Engine can process them, a request queue will form. By default, this queue is stored in memory. Under high load, the queue may grow too large and cause Out-Of-Memory errors. To avoid this, set a diskBufferPath to buffer the overflow on the request queue to disk. WARN: This is only to temporarily buffer requests during high load. If there is not enough Engine capacity to process the queued requests over time, the queue (and response time) will grow indefinitely. | | api.features.entityDetection | bool | `false` | Enables entity detection on pre-recorded audio *if* a valid entity detection model is available. | | api.features.entityRedaction | bool | `false` | Enables entity-based redaction on pre-recorded audio *if* a valid entity detection model is available. | -| api.features.formatEntityTags | bool | `false` | Enables format entity tags on pre-recorded audio *if* a valid NER model is available. | +| api.features.formatEntityTags | bool | `true` | Enables format entity tags on pre-recorded audio *if* a valid NER model is available. | | api.image.path | string | `"quay.io/deepgram/self-hosted-api"` | path configures the image path to use for creating API containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | api.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram API image | -| api.image.tag | string | `"release-250814"` | tag defines which Deepgram release to use for API containers | +| api.image.tag | string | `"release-250912"` | tag defines which Deepgram release to use for API containers | | api.livenessProbe | object | `` | Liveness probe customization for API pods. | | api.namePrefix | string | `"deepgram-api"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram API containers. | +| api.nodeSelector | object | `{}` | [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) to apply to API pods. | | api.readinessProbe | object | `` | Readiness probe customization for API pods. | | api.resolver | object | `` | Specify custom DNS resolution options. | | api.resolver.maxTTL | int | `nil` | maxTTL sets the DNS TTL value if specifying a custom DNS nameserver. | @@ -235,11 +256,12 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.chunking.speechToText.streaming.step | float | `1` | step defines how often to return interim results, in seconds. This value may be lowered to increase the frequency of interim results. However, this also causes a significant decrease in the number of concurrent streams supported by a single GPU. Please contact your Deepgram Account representative for more details. | | engine.concurrencyLimit.activeRequests | int | `nil` | activeRequests limits the number of active requests handled by a single Engine container. If additional requests beyond the limit are sent, the API container forming the request will try a different Engine pod. If no Engine pods are able to accept the request, the API will return a 429 HTTP response to the client. The `nil` default means no limit will be set. | | engine.containerSecurityContext | object | `{}` | [Container-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for Engine containers. | -| engine.features.streamingNer | bool | `false` | Enables format entity tags on streaming audio *if* a valid NER model is available. | +| engine.customToml | string | `nil` | Custom TOML sections can be added to extend engine.toml | +| engine.features.streamingNer | bool | `true` | Enables format entity tags on streaming audio *if* a valid NER model is available. | | engine.halfPrecision.state | string | `"auto"` | Engine will automatically enable half precision operations if your GPU supports them. You can explicitly enable or disable this behavior with the state parameter which supports `"enable"`, `"disabled"`, and `"auto"`. | | engine.image.path | string | `"quay.io/deepgram/self-hosted-engine"` | path configures the image path to use for creating Engine containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | engine.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram Engine image | -| engine.image.tag | string | `"release-250814"` | tag defines which Deepgram release to use for Engine containers | +| engine.image.tag | string | `"release-250912"` | tag defines which Deepgram release to use for Engine containers | | engine.livenessProbe | object | `` | Liveness probe customization for Engine pods. | | engine.metricsServer | object | `` | metricsServer exposes an endpoint on each Engine container for reporting inference-specific system metrics. See https://developers.deepgram.com/docs/metrics-guide#deepgram-engine for more details. | | engine.metricsServer.host | string | `"0.0.0.0"` | host is the IP address to listen on for metrics requests. You will want to listen on all interfaces to interact with other pods in the cluster. | @@ -261,6 +283,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | engine.modelManager.volumes.gcp.gpd.storageClassName | string | `"standard-rwo"` | The storageClassName of the existing persistent disk. | | engine.modelManager.volumes.gcp.gpd.volumeHandle | string | `""` | The identifier of your pre-existing persistent disk. The format is projects/{project_id}/zones/{zone_name}/disks/{disk_name} for Zonal persistent disks, or projects/{project_id}/regions/{region_name}/disks/{disk_name} for Regional persistent disks. | | engine.namePrefix | string | `"deepgram-engine"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram Engine containers. | +| engine.nodeSelector | object | `{}` | [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) to apply to Engine pods. | | engine.readinessProbe | object | `` | Readiness probe customization for Engine pods. | | engine.resources | object | `` | Configure resource limits per Engine container. See [Deepgram's documentation](https://developers.deepgram.com/docs/self-hosted-deployment-environments#engine) for more details. | | engine.resources.limits.gpu | int | `1` | gpu maps to the nvidia.com/gpu resource parameter | @@ -298,10 +321,11 @@ If you encounter issues while deploying or using Deepgram, consider the followin | licenseProxy.enabled | bool | `false` | The License Proxy is optional, but highly recommended to be deployed in production to enable highly available environments. | | licenseProxy.image.path | string | `"quay.io/deepgram/self-hosted-license-proxy"` | path configures the image path to use for creating License Proxy containers. You may change this from the public Quay image path if you have imported Deepgram images into a private container registry. | | licenseProxy.image.pullPolicy | string | `"IfNotPresent"` | pullPolicy configures how the Kubelet attempts to pull the Deepgram License Proxy image | -| licenseProxy.image.tag | string | `"release-250814"` | tag defines which Deepgram release to use for License Proxy containers | +| licenseProxy.image.tag | string | `"release-250912"` | tag defines which Deepgram release to use for License Proxy containers | | licenseProxy.keepUpstreamServerAsBackup | bool | `true` | Even with a License Proxy deployed, API and Engine pods can be configured to keep the upstream `license.deepgram.com` license server as a fallback licensing option if the License Proxy is unavailable. Disable this option if you are restricting API/Engine Pod network access for security reasons, and only the License Proxy should send egress traffic to the upstream license server. | | licenseProxy.livenessProbe | object | `` | Liveness probe customization for Proxy pods. | | licenseProxy.namePrefix | string | `"deepgram-license-proxy"` | namePrefix is the prefix to apply to the name of all K8s objects associated with the Deepgram License Proxy containers. | +| licenseProxy.nodeSelector | object | `{}` | [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) to apply to License Proxy pods. | | licenseProxy.readinessProbe | object | `` | Readiness probe customization for License Proxy pods. | | licenseProxy.resources | object | `` | Configure resource limits per License Proxy container. See [Deepgram's documentation](https://developers.deepgram.com/docs/license-proxy#system-requirements) for more details. | | licenseProxy.securityContext | object | `{}` | [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for License Proxy pods. | @@ -330,6 +354,7 @@ If you encounter issues while deploying or using Deepgram, consider the followin | scaling.auto.engine.metrics.textToSpeech.batch.requestsPerPod | int | `nil` | Scale the Engine pods based on a static desired number of text-to-speech batch requests per pod | | scaling.auto.engine.minReplicas | int | `1` | Minimum number of Engine replicas. | | scaling.replicas | object | `` | Number of replicas to set during initial installation. | +| scaling.replicas.engine | int | `1` | Engine replicas can be specified either as a single number for one engine type, or as individual counts for each engine type when Voice Agent is enabled. | ## Maintainers diff --git a/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml b/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml index 475ebbd..1ca7a3f 100644 --- a/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml +++ b/charts/deepgram-self-hosted/samples/01-basic-setup-aws.values.yaml @@ -48,6 +48,10 @@ scaling: # Discuss a reasoanble value with your Deepgram Account Representative # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling requestCapacityRatio: + +agent: + enabled: false + api: affinity: nodeAffinity: @@ -66,6 +70,10 @@ api: memory: "8Gi" cpu: "4000m" + # -- Custom TOML sections can be added here to extend api.toml + # customToml: | + # [custom_section] + engine: affinity: nodeAffinity: @@ -91,12 +99,23 @@ engine: concurrencyLimit: activeRequests: + # -- Custom TOML sections can be added here to extend engine.toml + # customToml: | + # # Preload models on engine startup for faster initial requests + # # See https://deepgram.gitbook.io/help-center/how-can-i-pre-load-models-to-reduce-cold-start-latency + # [preload_models] + # models = [ + # # Example model preload configuration: + # # { model = "general-nova-3", version = "2025-09-05.12808", language = "multi", format = false } + # ] + modelManager: volumes: aws: efs: enabled: true fileSystemId: fs-xxxxxxxxxxxxxxxx # Replace with your EFS ID + namePrefix: dg-models models: add: - https://link-to-model-1.dg # Replace these links with those provided to you diff --git a/charts/deepgram-self-hosted/samples/02-basic-setup-gcp.yaml b/charts/deepgram-self-hosted/samples/02-basic-setup-gcp.yaml index bfc0cd4..e9ac02e 100644 --- a/charts/deepgram-self-hosted/samples/02-basic-setup-gcp.yaml +++ b/charts/deepgram-self-hosted/samples/02-basic-setup-gcp.yaml @@ -49,6 +49,9 @@ scaling: # Must also set engine.concurrencyLimit.activeRequests if using request ratio for autoscaling requestCapacityRatio: +agent: + enabled: false + api: affinity: nodeAffinity: diff --git a/charts/deepgram-self-hosted/samples/04-aura-2-setup.values.yaml b/charts/deepgram-self-hosted/samples/04-aura-2-setup.values.yaml index a13c8bf..82a9b50 100644 --- a/charts/deepgram-self-hosted/samples/04-aura-2-setup.values.yaml +++ b/charts/deepgram-self-hosted/samples/04-aura-2-setup.values.yaml @@ -22,7 +22,7 @@ scaling: # API configuration for English Aura-2 api: image: - tag: release-250814 + tag: release-250912 # Enable Aura-2 specific features features: @@ -38,7 +38,7 @@ api: # Engine configuration for Aura-2 engine: image: - tag: release-250814 + tag: release-250912 # Aura-2 requires more resources than standard models resources: @@ -89,7 +89,7 @@ licenseProxy: keepUpstreamServerAsBackup: true image: - tag: release-250814 + tag: release-250912 # Monitoring configuration for Aura-2 # Enable Prometheus stack for metrics collection diff --git a/charts/deepgram-self-hosted/samples/README.md b/charts/deepgram-self-hosted/samples/README.md index defce23..cb8ab17 100644 --- a/charts/deepgram-self-hosted/samples/README.md +++ b/charts/deepgram-self-hosted/samples/README.md @@ -8,6 +8,7 @@ This directory contains examples of how to use the Deepgram Helm chart in variou - **02-basic-setup-gcp.yaml** - Basic GCP GKE deployment configuration - **03-basic-setup-onprem.yaml** - On-premises deployment configuration - **04-aura-2-setup.yaml** - Aura-2 model deployment with English and Spanish language support +- **05-voice-agent-aws.values.yaml** - AWS EKS Voice Agent deployment configuration ## AWS EKS Samples See the [Deepgram AWS EKS guide](https://developers.deepgram.com/docs/aws-k8s) for detailed instructions on how to deploy Deepgram services in a managed Kubernetes cluster in AWS. diff --git a/charts/deepgram-self-hosted/templates/NOTES.txt b/charts/deepgram-self-hosted/templates/NOTES.txt index 44632c5..e8abd2f 100644 --- a/charts/deepgram-self-hosted/templates/NOTES.txt +++ b/charts/deepgram-self-hosted/templates/NOTES.txt @@ -1,4 +1,4 @@ -This is an beta Helm chart for Deepgram self-hosted deployments. +This is a Helm chart for Deepgram self-hosted deployments. Please refer to Deepgram's self-hosted documentation for further details: * https://developers.deepgram.com/docs/self-hosted-introduction diff --git a/charts/deepgram-self-hosted/templates/api/api.config.yaml b/charts/deepgram-self-hosted/templates/api/api.config.yaml index 5db6518..4f9f5e1 100644 --- a/charts/deepgram-self-hosted/templates/api/api.config.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.config.yaml @@ -54,11 +54,61 @@ data: disk_buffer_path = "{{ .Values.api.features.diskBufferPath }}" {{- end }} + {{- if .Values.agent.enabled }} + [[driver_pool.standard]] + url = "https://{{ .Values.engine.namePrefix }}-agent-speech-to-text-internal:{{ .Values.engine.server.port}}/v2" + streaming_stt = true + tts = false + eot = false + timeout_backoff = {{ .Values.api.driverPool.standard.timeoutBackoff }} + retry_sleep = "{{ .Values.api.driverPool.standard.retrySleep }}" + retry_backoff = {{ .Values.api.driverPool.standard.retryBackoff }} + max_response_size = {{ int .Values.api.driverPool.standard.maxResponseSize }} + + [[driver_pool.standard]] + url = "https://{{ .Values.engine.namePrefix }}-agent-text-to-speech-internal:{{ .Values.engine.server.port}}/v2" + streaming_stt = false + tts = true + eot = false + timeout_backoff = {{ .Values.api.driverPool.standard.timeoutBackoff }} + retry_sleep = "{{ .Values.api.driverPool.standard.retrySleep }}" + retry_backoff = {{ .Values.api.driverPool.standard.retryBackoff }} + max_response_size = {{ int .Values.api.driverPool.standard.maxResponseSize }} + + [[driver_pool.standard]] + url = "https://{{ .Values.engine.namePrefix }}-agent-end-of-turn-internal:{{ .Values.engine.server.port}}/v2" + streaming_stt = false + tts = false + eot = true + timeout_backoff = {{ .Values.api.driverPool.standard.timeoutBackoff }} + retry_sleep = "{{ .Values.api.driverPool.standard.retrySleep }}" + retry_backoff = {{ .Values.api.driverPool.standard.retryBackoff }} + max_response_size = {{ int .Values.api.driverPool.standard.maxResponseSize }} + + {{- else }} [[driver_pool.standard]] url = "https://{{ .Values.engine.namePrefix }}-internal:{{ .Values.engine.server.port}}/v2" timeout_backoff = {{ .Values.api.driverPool.standard.timeoutBackoff }} retry_sleep = "{{ .Values.api.driverPool.standard.retrySleep }}" retry_backoff = {{ .Values.api.driverPool.standard.retryBackoff }} max_response_size = {{ int .Values.api.driverPool.standard.maxResponseSize }} + {{- end }} + [voice_agent] + {{- if .Values.agent.enabled }} + eot_timeout_ms = {{ .Values.agent.eotTimeoutMs }} + max_conversation_chars_sent_to_llm = {{ .Values.agent.maxConversationChars }} + allow_nonpublic_endpoints = {{ .Values.agent.allowNonpublicEndpoints }} + {{- range $provider, $config := .Values.agent.llmProviders }} + [voice_agent.llm_providers.{{ $provider }}] + name = {{ $config.name | quote }} + [voice_agent.llm_providers.{{ $provider }}.models] + {{- range $modelId, $model := $config.models }} + {{ $modelId }} = { name = {{ $model.name | quote }}, tier = {{ $model.tier | quote }}, public = {{ $model.public }} } + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.api.customToml }} + {{ .Values.api.customToml | nindent 4 }} + {{- end }} diff --git a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml index 6af0acf..d14b5d7 100644 --- a/charts/deepgram-self-hosted/templates/api/api.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/api/api.deployment.yaml @@ -17,7 +17,9 @@ spec: matchLabels: app: deepgram-api {{ include "deepgram-self-hosted.selectorLabels" . }} + {{- if not .Values.scaling.auto.enabled }} replicas: {{ .Values.scaling.replicas.api }} + {{- end }} strategy: type: RollingUpdate rollingUpdate: @@ -28,7 +30,7 @@ spec: labels: *labels annotations: checksum/config: {{ include (print $.Template.BasePath "/api/api.config.yaml") . | sha256sum }} - {{- with .Values.api.additionalAnnotations }} + {{- with $.Values.api.additionalAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} spec: @@ -41,7 +43,8 @@ spec: {{- toYaml .Values.api.affinity | nindent 8 }} tolerations: {{- toYaml .Values.api.tolerations | nindent 8 }} - {{- with .Values.api.securityContext }} + nodeSelector: + {{- toYaml .Values.api.nodeSelector | nindent 8 }} securityContext: {{- toYaml .Values.api.securityContext | nindent 8 }} {{- if or .Values.api.serviceAccount.create .Values.api.serviceAccount.name }} @@ -61,6 +64,60 @@ spec: env: - name: DEEPGRAM_DEPLOYMENT_ORCHESTRATOR value: helm-{{ include "deepgram-self-hosted.chart" . }} + - name: OPENAI_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.openAiSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.openAiSecretRef }} + key: OPENAI_API_KEY + {{- else }} + value: "" + {{- end }} + - name: XAI_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.xaiSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.xaiSecretRef }} + key: XAI_API_KEY + {{- else }} + value: "" + {{- end }} + - name: ANTHROPIC_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.anthropicSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.anthropicSecretRef }} + key: ANTHROPIC_API_KEY + {{- else }} + value: "" + {{- end }} + - name: GROQ_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.groqSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.groqSecretRef }} + key: GROQ_API_KEY + {{- else }} + value: "" + {{- end }} + - name: ELEVENLABS_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.elevenLabsSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.elevenLabsSecretRef }} + key: ELEVENLABS_API_KEY + {{- else }} + value: "" + {{- end }} + - name: CARTESIA_API_KEY + {{- if and .Values.global.thirdPartyCredentials .Values.global.thirdPartyCredentials.cartesiaSecretRef }} + valueFrom: + secretKeyRef: + name: {{ .Values.global.thirdPartyCredentials.cartesiaSecretRef }} + key: CARTESIA_API_KEY + {{- else }} + value: "" + {{- end }} command: [ "stem" ] args: ["-v", "serve", "/etc/config/api.toml"] resources: diff --git a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml index 178ba9b..7b94481 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.config.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.config.yaml @@ -67,3 +67,6 @@ data: [half_precision] state = "{{ .Values.engine.halfPrecision.state }}" + {{- if .Values.engine.customToml }} + {{ .Values.engine.customToml | nindent 4 }} + {{- end }} diff --git a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml index f2a6d42..5c16dfa 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.deployment.yaml @@ -1,14 +1,25 @@ +{{- $engineTypes := list }} +{{- if .Values.agent.enabled }} +{{- $engineTypes = list "agent-speech-to-text" "agent-text-to-speech" "agent-end-of-turn" }} +{{- else }} +{{- $engineTypes = list "" }} +{{- end }} +{{- range $type := $engineTypes }} +--- apiVersion: apps/v1 kind: Deployment metadata: - name: {{ .Values.engine.namePrefix }} + name: {{ $.Values.engine.namePrefix }}{{- if $type }}-{{ $type }}{{- end }} labels: &labels -{{ include "deepgram-self-hosted.labels" . | indent 4}} +{{ include "deepgram-self-hosted.labels" $ | indent 4}} app: deepgram-engine - {{- range $key, $val := .Values.engine.additionalLabels }} + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{- range $key, $val := $.Values.engine.additionalLabels }} {{ $key }}: {{ $val | quote }} {{- end}} - {{- with .Values.engine.additionalAnnotations }} + {{- with $.Values.engine.additionalAnnotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} @@ -16,90 +27,100 @@ spec: selector: matchLabels: app: deepgram-engine - {{ include "deepgram-self-hosted.selectorLabels" . }} - replicas: {{ .Values.scaling.replicas.engine }} + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{ include "deepgram-self-hosted.selectorLabels" $ }} + {{- if not $.Values.scaling.auto.enabled }} + {{- if $.Values.agent.enabled }} + replicas: {{ index $.Values.scaling.replicas.engine $type | default 1 }} + {{- else }} + replicas: {{ $.Values.scaling.replicas.engine }} + {{- end }} + {{- end }} strategy: type: RollingUpdate rollingUpdate: - maxUnavailable: {{ .Values.engine.updateStrategy.rollingUpdate.maxUnavailable }} - maxSurge: {{ .Values.engine.updateStrategy.rollingUpdate.maxSurge }} + maxUnavailable: {{ $.Values.engine.updateStrategy.rollingUpdate.maxUnavailable }} + maxSurge: {{ $.Values.engine.updateStrategy.rollingUpdate.maxSurge }} template: metadata: labels: *labels annotations: - checksum/config: {{ include (print $.Template.BasePath "/engine/engine.config.yaml") . | sha256sum }} - {{- with .Values.engine.additionalAnnotations }} + checksum/config: {{ include (print $.Template.BasePath "/engine/engine.config.yaml") $ | sha256sum }} + {{- with $.Values.engine.additionalAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} spec: - terminationGracePeriodSeconds: {{ .Values.global.outstandingRequestGracePeriod }} - {{- if .Values.global.pullSecretRef }} + terminationGracePeriodSeconds: {{ $.Values.global.outstandingRequestGracePeriod }} + {{- if $.Values.global.pullSecretRef }} imagePullSecrets: - - name: {{ .Values.global.pullSecretRef }} + - name: {{ $.Values.global.pullSecretRef }} {{- end }} affinity: - {{- toYaml .Values.engine.affinity | nindent 8 }} + {{- toYaml $.Values.engine.affinity | nindent 8 }} tolerations: - {{- toYaml .Values.engine.tolerations | nindent 8 }} - {{- with .Values.engine.securityContext }} + {{- toYaml $.Values.engine.tolerations | nindent 8 }} + {{- with $.Values.engine.securityContext }} securityContext: - {{- toYaml .Values.engine.securityContext | nindent 8 }} - {{- if or .Values.engine.serviceAccount.create .Values.engine.serviceAccount.name }} - serviceAccountName: {{ default (printf "%s-sa" .Values.engine.namePrefix) .Values.engine.serviceAccount.name }} + {{- toYaml . | nindent 8 }} + {{- end}} + {{- if or $.Values.engine.serviceAccount.create $.Values.engine.serviceAccount.name }} + serviceAccountName: {{ default (printf "%s-sa" $.Values.engine.namePrefix) $.Values.engine.serviceAccount.name }} {{- end }} containers: - - name: {{ .Values.engine.namePrefix }} - {{- with .Values.engine.containerSecurityContext }} + - name: {{ $.Values.engine.namePrefix }} + {{- with $.Values.engine.containerSecurityContext }} securityContext: {{- toYaml . | nindent 10 }} {{- end }} - image: {{ .Values.engine.image.path }}:{{ .Values.engine.image.tag }} - imagePullPolicy: {{ .Values.engine.image.pullPolicy }} + image: {{ $.Values.engine.image.path }}:{{ $.Values.engine.image.tag }} + imagePullPolicy: {{ $.Values.engine.image.pullPolicy }} envFrom: - secretRef: - name: {{ required "Missing Deepgram self-hosted API key - see `global.deepgramSecretRef`" .Values.global.deepgramSecretRef }} + name: {{ required "Missing Deepgram self-hosted API key - see `global.deepgramSecretRef`" $.Values.global.deepgramSecretRef }} env: - name: DEEPGRAM_DEPLOYMENT_ORCHESTRATOR - value: helm-{{ include "deepgram-self-hosted.chart" . }} - {{- if le (int .Values.engine.resources.requests.gpu) 0 }} + value: helm-{{ include "deepgram-self-hosted.chart" $ }} + {{- if le (int $.Values.engine.resources.requests.gpu) 0 }} - name: NVIDIA_VISIBLE_DEVICES value: "void" {{- end }} - {{- if .Values.aura2.enabled }} - {{- if .Values.aura2.english.enabled }} + {{- if $.Values.aura2.enabled }} + {{- if $.Values.aura2.english.enabled }} - name: IMPELLER_AURA2_MAX_BATCH_SIZE - value: "{{ .Values.aura2.english.maxBatchSize }}" + value: "{{ $.Values.aura2.english.maxBatchSize }}" - name: IMPELLER_AURA2_T2C_UUID - value: "{{ .Values.aura2.english.t2cUuid }}" + value: "{{ $.Values.aura2.english.t2cUuid }}" - name: IMPELLER_AURA2_C2A_UUID - value: "{{ .Values.aura2.english.c2aUuid }}" + value: "{{ $.Values.aura2.english.c2aUuid }}" - name: CUDA_VISIBLE_DEVICES - value: "{{ .Values.aura2.english.cudaVisibleDevices }}" - {{- else if .Values.aura2.spanish.enabled }} + value: "{{ $.Values.aura2.english.cudaVisibleDevices }}" + {{- else if $.Values.aura2.spanish.enabled }} - name: IMPELLER_AURA2_MAX_BATCH_SIZE - value: "{{ .Values.aura2.spanish.maxBatchSize }}" + value: "{{ $.Values.aura2.spanish.maxBatchSize }}" - name: IMPELLER_AURA2_T2C_UUID - value: "{{ .Values.aura2.spanish.t2cUuid }}" + value: "{{ $.Values.aura2.spanish.t2cUuid }}" - name: IMPELLER_AURA2_C2A_UUID - value: "{{ .Values.aura2.spanish.c2aUuid }}" + value: "{{ $.Values.aura2.spanish.c2aUuid }}" - name: CUDA_VISIBLE_DEVICES - value: "{{ .Values.aura2.spanish.cudaVisibleDevices }}" + value: "{{ $.Values.aura2.spanish.cudaVisibleDevices }}" {{- end }} {{- end }} command: [ "impeller" ] args: ["-v", "serve", "/etc/config/engine.toml"] resources: requests: - memory: "{{ .Values.engine.resources.requests.memory }}" - cpu: "{{ .Values.engine.resources.requests.cpu }}" - {{- if gt (int .Values.engine.resources.requests.gpu) 0 }} - nvidia.com/gpu: {{ .Values.engine.resources.requests.gpu }} + memory: "{{ $.Values.engine.resources.requests.memory }}" + cpu: "{{ $.Values.engine.resources.requests.cpu }}" + {{- if gt (int $.Values.engine.resources.requests.gpu) 0 }} + nvidia.com/gpu: {{ $.Values.engine.resources.requests.gpu }} {{- end }} limits: - memory: "{{ .Values.engine.resources.limits.memory }}" - cpu: "{{ .Values.engine.resources.limits.cpu }}" - {{- if gt (int .Values.engine.resources.limits.gpu) 0 }} - nvidia.com/gpu: {{ .Values.engine.resources.limits.gpu }} + memory: "{{ $.Values.engine.resources.limits.memory }}" + cpu: "{{ $.Values.engine.resources.limits.cpu }}" + {{- if gt (int $.Values.engine.resources.limits.gpu) 0 }} + nvidia.com/gpu: {{ $.Values.engine.resources.limits.gpu }} {{- end }} volumeMounts: - name: engine-config-volume @@ -108,45 +129,45 @@ spec: mountPath: /models ports: - name: primary - containerPort: {{ .Values.engine.server.port }} + containerPort: {{ $.Values.engine.server.port }} - name: metrics - containerPort: {{ .Values.engine.metricsServer.port }} + containerPort: {{ $.Values.engine.metricsServer.port }} startupProbe: tcpSocket: - port: {{ .Values.engine.server.port }} - periodSeconds: {{ .Values.engine.startupProbe.periodSeconds }} - failureThreshold: {{ .Values.engine.startupProbe.failureThreshold }} + port: {{ $.Values.engine.server.port }} + periodSeconds: {{ $.Values.engine.startupProbe.periodSeconds }} + failureThreshold: {{ $.Values.engine.startupProbe.failureThreshold }} livenessProbe: tcpSocket: - port: {{ .Values.engine.server.port }} - initialDelaySeconds: {{ .Values.engine.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.engine.livenessProbe.periodSeconds }} - failureThreshold: {{ .Values.engine.livenessProbe.failureThreshold }} + port: {{ $.Values.engine.server.port }} + initialDelaySeconds: {{ $.Values.engine.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ $.Values.engine.livenessProbe.periodSeconds }} + failureThreshold: {{ $.Values.engine.livenessProbe.failureThreshold }} readinessProbe: tcpSocket: - port: {{ .Values.engine.server.port }} - initialDelaySeconds: {{ .Values.engine.readinessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.engine.readinessProbe.periodSeconds }} - failureThreshold: {{ .Values.engine.readinessProbe.failureThreshold }} + port: {{ $.Values.engine.server.port }} + initialDelaySeconds: {{ $.Values.engine.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ $.Values.engine.readinessProbe.periodSeconds }} + failureThreshold: {{ $.Values.engine.readinessProbe.failureThreshold }} volumes: - name: engine-config-volume configMap: - name: {{ .Values.engine.namePrefix }}-config + name: {{ $.Values.engine.namePrefix }}-config - name: models-volume persistentVolumeClaim: - {{- $customClaimEnabled := .Values.engine.modelManager.volumes.customVolumeClaim.enabled }} - {{- $customClaimName := .Values.engine.modelManager.volumes.customVolumeClaim.name }} - {{- $awsEfsEnabled := .Values.engine.modelManager.volumes.aws.efs.enabled }} - {{- $gcpGpdEnabled := .Values.engine.modelManager.volumes.gcp.gpd.enabled }} + {{- $customClaimEnabled := $.Values.engine.modelManager.volumes.customVolumeClaim.enabled }} + {{- $customClaimName := $.Values.engine.modelManager.volumes.customVolumeClaim.name }} + {{- $awsEfsEnabled := $.Values.engine.modelManager.volumes.aws.efs.enabled }} + {{- $gcpGpdEnabled := $.Values.engine.modelManager.volumes.gcp.gpd.enabled }} {{- $enabledCount := (int $customClaimEnabled) | add (int $awsEfsEnabled) | add (int $gcpGpdEnabled) }} - + {{- if eq $enabledCount 0 }} {{- fail "Error: At least one of customVolumeClaim.enabled, aws.efs.enabled, or gcp.gpd.enabled must be set to true." }} {{- else if gt $enabledCount 1 }} {{- fail "Error: Only one of customVolumeClaim.enabled, aws.efs.enabled, or gcp.gpd.enabled can be set to true." }} {{- end }} - + {{- if $customClaimEnabled }} {{- if not $customClaimName }} {{- fail "Error: customVolumeClaim.name must be set when customVolumeClaim.enabled is true." }} @@ -154,7 +175,8 @@ spec: claimName: {{ $customClaimName }} {{- end }} {{- else if $awsEfsEnabled }} - claimName: {{ .Values.engine.modelManager.volumes.aws.efs.namePrefix }}-aws-efs-pvc + claimName: {{ $.Values.engine.modelManager.volumes.aws.efs.namePrefix }}-aws-efs-pvc {{- else if $gcpGpdEnabled }} - claimName: {{ .Values.engine.modelManager.volumes.gcp.gpd.namePrefix }}-gcp-gpd-pvc + claimName: {{ $.Values.engine.modelManager.volumes.gcp.gpd.namePrefix }}-gcp-gpd-pvc {{- end }} +{{- end }} diff --git a/charts/deepgram-self-hosted/templates/engine/engine.hpa.yaml b/charts/deepgram-self-hosted/templates/engine/engine.hpa.yaml index 4a2c2db..1970cf1 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.hpa.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.hpa.yaml @@ -1,4 +1,8 @@ -{{- if .Values.scaling.auto.enabled -}} +{{- if and .Values.scaling.auto.enabled .Values.agent.enabled }} +{{- fail "Error: Autoscaling is not yet supported for voice agent" }} +{{- end }} +{{- if .Values.scaling.auto.enabled }} +--- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: diff --git a/charts/deepgram-self-hosted/templates/engine/engine.service.yaml b/charts/deepgram-self-hosted/templates/engine/engine.service.yaml index 6442e57..9ea3987 100644 --- a/charts/deepgram-self-hosted/templates/engine/engine.service.yaml +++ b/charts/deepgram-self-hosted/templates/engine/engine.service.yaml @@ -1,39 +1,60 @@ +{{- $engineTypes := list }} +{{- if .Values.agent.enabled }} +{{- $engineTypes = list "agent-speech-to-text" "agent-text-to-speech" "agent-end-of-turn" }} +{{- else }} +{{- $engineTypes = list "" }} +{{- end }} +{{- range $type := $engineTypes }} +--- apiVersion: v1 kind: Service metadata: - name: {{ .Values.engine.namePrefix }}-metrics + name: {{ $.Values.engine.namePrefix }}{{- if $type }}-{{ $type }}{{- end }}-metrics labels: -{{ include "deepgram-self-hosted.labels" . | indent 4}} - {{- range $key, $val := .Values.engine.additionalLabels }} +{{ include "deepgram-self-hosted.labels" $ | indent 4}} + app: deepgram-engine + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{- range $key, $val := $.Values.engine.additionalLabels }} {{ $key }}: {{ $val | quote }} {{- end}} spec: selector: app: deepgram-engine - {{ include "deepgram-self-hosted.selectorLabels" . }} + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{ include "deepgram-self-hosted.selectorLabels" $ }} type: NodePort ports: - name: "metrics" - port: {{ .Values.engine.metricsServer.port }} - targetPort: {{ .Values.engine.metricsServer.port }} + port: {{ $.Values.engine.metricsServer.port }} + targetPort: {{ $.Values.engine.metricsServer.port }} --- - apiVersion: v1 kind: Service metadata: - name: {{ .Values.engine.namePrefix }}-internal + name: {{ $.Values.engine.namePrefix }}{{- if $type }}-{{ $type }}{{- end }}-internal labels: -{{ include "deepgram-self-hosted.labels" . | indent 4}} - {{- range $key, $val := .Values.engine.additionalLabels }} +{{ include "deepgram-self-hosted.labels" $ | indent 4}} + app: deepgram-engine + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{- range $key, $val := $.Values.engine.additionalLabels }} {{ $key }}: {{ $val | quote }} {{- end}} spec: selector: app: deepgram-engine - {{ include "deepgram-self-hosted.selectorLabels" . }} + {{- if $type }} + engine-type: {{ $type }} + {{- end }} + {{ include "deepgram-self-hosted.selectorLabels" $ }} ports: - name: "primary" - port: {{ .Values.engine.server.port }} - targetPort: {{ .Values.engine.server.port }} - + port: {{ $.Values.engine.server.port }} + targetPort: {{ $.Values.engine.server.port }} +{{- end }} diff --git a/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml b/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml index ceb1730..fcb8cea 100644 --- a/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml +++ b/charts/deepgram-self-hosted/templates/license-proxy/license-proxy.deployment.yaml @@ -29,7 +29,7 @@ spec: labels: *labels annotations: checksum/config: {{ include (print $.Template.BasePath "/license-proxy/license-proxy.config.yaml") . | sha256sum }} - {{- with .Values.licenseProxy.additionalAnnotations }} + {{- with $.Values.licenseProxy.additionalAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} spec: @@ -41,10 +41,10 @@ spec: {{- toYaml .Values.licenseProxy.affinity | nindent 8 }} tolerations: {{- toYaml .Values.licenseProxy.tolerations | nindent 8 }} - {{- with .Values.licenseProxy.securityContext }} + nodeSelector: + {{- toYaml .Values.licenseProxy.nodeSelector | nindent 8 }} securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} + {{- toYaml .Values.licenseProxy.securityContext | nindent 8 }} {{- if or .Values.licenseProxy.serviceAccount.create .Values.licenseProxy.serviceAccount.name }} serviceAccountName: {{ default (printf "%s-sa" .Values.licenseProxy.namePrefix) .Values.licenseProxy.serviceAccount.name }} {{- end }} diff --git a/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml b/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml index 427fcb7..1e32e1e 100644 --- a/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml +++ b/charts/deepgram-self-hosted/templates/volumes/aws/efs-model-download.job.yaml @@ -20,6 +20,8 @@ spec: securityContext: {{- toYaml . | nindent 8 }} {{- end }} + nodeSelector: + {{- toYaml .Values.engine.nodeSelector | nindent 8 }} containers: - name: model-management {{- with .Values.engine.containerSecurityContext }} diff --git a/charts/deepgram-self-hosted/values.yaml b/charts/deepgram-self-hosted/values.yaml index b0f9534..a144cdf 100644 --- a/charts/deepgram-self-hosted/values.yaml +++ b/charts/deepgram-self-hosted/values.yaml @@ -31,6 +31,8 @@ scaling: # @default -- `` replicas: api: 1 + # -- Engine replicas can be specified either as a single number for one engine type, + # or as individual counts for each engine type when Voice Agent is enabled. engine: 1 # -- Enable pod autoscaling based on system load/traffic. @@ -99,6 +101,110 @@ scaling: value: 25 periodSeconds: 60 +agent: + # -- (bool) Whether to enable voice agent. Disabled by default + enabled: false + # -- (int) Timeout in milliseconds for end-of-turn detection + eotTimeoutMs: 3500 + # -- (int) Maximum number of characters allowed in a conversation history + maxConversationChars: 15000 + # -- (bool) Whether to allow non-public URLs (such as `localhost`) in custom endpoints. Disabled by default + allowNonpublicEndpoints: false + # -- Configuration for LLM providers and their available models + # @default -- `` + llmProviders: + # -- OpenAI provider configuration + # @default -- `` + open_ai: + # -- Display name for the OpenAI provider + name: "OpenAI" + # -- Available OpenAI models and their configurations + # @default -- `` + models: + gpt-4o-mini: + # -- Display name for the GPT-4o mini model + name: "GPT-4o mini" + # -- Service tier for this model (standard or advanced) + tier: "standard" + # -- Whether this model is publicly available + public: true + gpt-3-5-turbo: + name: "GPT-3.5 Turbo" + tier: "standard" + public: false + gpt-4o: + name: "GPT-4o" + tier: "advanced" + public: false + # -- Anthropic provider configuration + # @default -- `` + anthropic: + name: "Anthropic" + # -- Available Anthropic models and their configurations + # @default -- `` + models: + claude-3-haiku-20240307: + name: "Claude 3 Haiku" + tier: "standard" + public: true + claude-3-opus-20240229: + name: "Claude 3 Opus" + tier: "advanced" + public: false + claude-3-5-haiku-20241022: + name: "Claude 3.5 Haiku" + tier: "standard" + public: false + claude-3-5-sonnet-20240620: + name: "Claude 3.5 Sonnet" + tier: "advanced" + public: false + claude-3-5-sonnet-latest: + name: "Claude 3.5 Sonnet" + tier: "advanced" + public: false + # -- Groq provider configuration + # @default -- `` + groq: + name: "Groq" + # -- Available Groq models and their configurations + # @default -- `` + models: + mixtral-8x7b-32768: + name: "Mixtral 8x7B" + tier: "standard" + public: false + llama3-8b-8192: + name: "Llama 3 8B" + tier: "standard" + public: false + llama3-70b-8192: + name: "Llama 3 70B" + tier: "advanced" + public: false + # -- Deepgram provider configuration + # @default -- `` + deepgram: + name: "Deepgram" + # -- Available Deepgram models and their configurations + # @default -- `` + models: + llama-3-1-8b-instruct: + name: "Llama 3.1 8B Instruct" + tier: "standard" + public: false + # -- xAI provider configuration + # @default -- `` + x_ai: + name: "xAI" + # -- Available xAI models and their configurations + # @default -- `` + models: + grok-2-latest: + name: "Grok 2 Latest" + tier: "standard" + public: false + api: # -- namePrefix is the prefix to apply to the name of all K8s objects # associated with the Deepgram API containers. @@ -112,7 +218,7 @@ api: # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram API image pullPolicy: IfNotPresent # -- tag defines which Deepgram release to use for API containers - tag: release-250814 + tag: release-250912 # -- Additional labels to add to API resources additionalLabels: {} @@ -165,6 +271,10 @@ api: # to apply to API pods. tolerations: [] + # -- [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) + # to apply to API pods. + nodeSelector: {} + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for API pods. securityContext: {} @@ -235,7 +345,7 @@ api: # -- Enables format entity tags on pre-recorded audio # *if* a valid NER model is available. - formatEntityTags: false + formatEntityTags: true # -- If API is receiving requests faster than Engine can process them, a request # queue will form. By default, this queue is stored in memory. Under high load, @@ -270,6 +380,9 @@ api: # Default is 1GB, expressed in bytes. maxResponseSize: "1073741824" + # -- Custom TOML sections can be added to extend api.toml + customToml: + engine: # -- namePrefix is the prefix to apply to the name of all K8s objects # associated with the Deepgram Engine containers. @@ -283,7 +396,7 @@ engine: # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram Engine image pullPolicy: IfNotPresent # -- tag defines which Deepgram release to use for Engine containers - tag: release-250814 + tag: release-250912 # -- Additional labels to add to Engine resources additionalLabels: {} @@ -357,6 +470,10 @@ engine: # to apply to Engine pods. tolerations: [] + # -- [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) + # to apply to Engine pods. + nodeSelector: {} + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for Engine pods. securityContext: {} @@ -382,6 +499,9 @@ engine: # to the client. The `nil` default means no limit will be set. activeRequests: + # -- Custom TOML sections can be added to extend engine.toml + customToml: + # -- Configure Engine containers to listen for requests from API containers. # @default -- `` server: @@ -479,7 +599,7 @@ engine: features: # -- Enables format entity tags on streaming audio # *if* a valid NER model is available. - streamingNer: false + streamingNer: true # -- chunking defines the size of audio chunks to process in seconds. # Adjusting these values will affect both inference performance and accuracy @@ -542,7 +662,7 @@ licenseProxy: # Deepgram images into a private container registry. path: quay.io/deepgram/self-hosted-license-proxy # -- tag defines which Deepgram release to use for License Proxy containers - tag: release-250814 + tag: release-250912 # -- pullPolicy configures how the Kubelet attempts to pull the Deepgram # License Proxy image pullPolicy: IfNotPresent @@ -597,6 +717,10 @@ licenseProxy: # to apply to License Proxy pods. tolerations: [] + # -- [Node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) + # to apply to License Proxy pods. + nodeSelector: {} + # -- [Pod-level security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for License Proxy pods. securityContext: {} diff --git a/common/license_proxy_deploy/engine.aura-2-en.toml b/common/license_proxy_deploy/engine.aura-2-en.toml index 214c733..856e415 100644 --- a/common/license_proxy_deploy/engine.aura-2-en.toml +++ b/common/license_proxy_deploy/engine.aura-2-en.toml @@ -53,7 +53,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -77,3 +77,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/common/license_proxy_deploy/engine.aura-2-es.toml b/common/license_proxy_deploy/engine.aura-2-es.toml index 4c553c5..90a778f 100644 --- a/common/license_proxy_deploy/engine.aura-2-es.toml +++ b/common/license_proxy_deploy/engine.aura-2-es.toml @@ -53,7 +53,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -77,3 +77,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/common/license_proxy_deploy/engine.toml b/common/license_proxy_deploy/engine.toml index 214c733..856e415 100644 --- a/common/license_proxy_deploy/engine.toml +++ b/common/license_proxy_deploy/engine.toml @@ -53,7 +53,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -77,3 +77,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/common/standard_deploy/api.toml b/common/standard_deploy/api.toml index 98d7407..1294771 100644 --- a/common/standard_deploy/api.toml +++ b/common/standard_deploy/api.toml @@ -71,7 +71,7 @@ entity_detection = false # or true entity_redaction = false # or true ### Enables pre-recorded entity formatting *if* a valid NER model is available -format_entity_tags = false # or true +format_entity_tags = true # or false ### If API is receiving requests faster than Engine can process them, a request ### queue will form. By default, this queue is stored in memory. Under high load, diff --git a/common/standard_deploy/engine.aura-2-en.toml b/common/standard_deploy/engine.aura-2-en.toml index 214c733..856e415 100644 --- a/common/standard_deploy/engine.aura-2-en.toml +++ b/common/standard_deploy/engine.aura-2-en.toml @@ -53,7 +53,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -77,3 +77,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/common/standard_deploy/engine.aura-2-es.toml b/common/standard_deploy/engine.aura-2-es.toml index 4c553c5..90a778f 100644 --- a/common/standard_deploy/engine.aura-2-es.toml +++ b/common/standard_deploy/engine.aura-2-es.toml @@ -53,7 +53,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -77,3 +77,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/common/standard_deploy/engine.toml b/common/standard_deploy/engine.toml index 9309856..579d5bc 100644 --- a/common/standard_deploy/engine.toml +++ b/common/standard_deploy/engine.toml @@ -51,7 +51,7 @@ multichannel = true # or false ### Enables language detection *if* a valid language detection model is available language_detection = true # or false ### Enables streaming entity formatting *if* a valid NER model is available -streaming_ner = false # or true +streaming_ner = true # or false ### Size of audio chunks to process in seconds. [chunking.batch] @@ -75,3 +75,12 @@ streaming_ner = false # or true ### which supports enabled, disabled, and auto (the default). [half_precision] # state = "disabled" # or "enabled" or "auto" + +[health] +### Controls whether Engine fails on startup if no GPU is detected +### Default: false +### +### While Engine can run without a GPU, production deployments require one for +### acceptable performance. Set to true to fail fast if no GPU is available, +### rather than running with severely degraded performance. +# gpu_required = true # or false diff --git a/docker/docker-compose.aura-2.yml b/docker/docker-compose.aura-2.yml index ecb5607..870ccad 100644 --- a/docker/docker-compose.aura-2.yml +++ b/docker/docker-compose.aura-2.yml @@ -10,7 +10,7 @@ services: # The speech API service. # English Language Aura-2 api-en: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -37,7 +37,7 @@ services: # Spanish Language Aura-2 api-es: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -65,7 +65,7 @@ services: # The speech engine service. # English Language Aura-2 Driver engine-en: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -98,7 +98,7 @@ services: # Spanish Language Aura-2 Driver engine-es: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -131,7 +131,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/self-hosted-license-proxy:release-250814 + image: quay.io/deepgram/self-hosted-license-proxy:release-250912 restart: always # Here we expose the License Proxy status port to the host machine. The container port diff --git a/docker/docker-compose.license-proxy.yml b/docker/docker-compose.license-proxy.yml index 5431d75..f35c04b 100644 --- a/docker/docker-compose.license-proxy.yml +++ b/docker/docker-compose.license-proxy.yml @@ -9,7 +9,7 @@ x-env: &env services: # The speech API service. api: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -35,7 +35,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -63,7 +63,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/self-hosted-license-proxy:release-250814 + image: quay.io/deepgram/self-hosted-license-proxy:release-250912 restart: always # Here we expose the License Proxy status port to the host machine. The container port diff --git a/docker/docker-compose.standard.yml b/docker/docker-compose.standard.yml index 92d43ee..bb1d23a 100644 --- a/docker/docker-compose.standard.yml +++ b/docker/docker-compose.standard.yml @@ -9,7 +9,7 @@ x-env: &env services: # The speech API service. api: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -31,7 +31,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. diff --git a/podman/podman-compose.aura-2.yml b/podman/podman-compose.aura-2.yml index 9586ec7..006cf8f 100644 --- a/podman/podman-compose.aura-2.yml +++ b/podman/podman-compose.aura-2.yml @@ -4,7 +4,7 @@ services: # The speech API service. # English Language Aura-2 api-en: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -33,7 +33,7 @@ services: # Spanish Language Aura-2 api-es: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -63,7 +63,7 @@ services: # The speech engine service. # English Language Aura-2 Driver engine-en: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -99,7 +99,7 @@ services: # Spanish Language Aura-2 Driver engine-es: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -135,7 +135,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/self-hosted-license-proxy:release-250814 + image: quay.io/deepgram/self-hosted-license-proxy:release-250912 restart: always # Here we expose the License Proxy status port to the host machine. The container port diff --git a/podman/podman-compose.license-proxy.yml b/podman/podman-compose.license-proxy.yml index 8159731..654425e 100644 --- a/podman/podman-compose.license-proxy.yml +++ b/podman/podman-compose.license-proxy.yml @@ -3,7 +3,7 @@ services: # The speech API service. api: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -32,7 +32,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available. @@ -64,7 +64,7 @@ services: # The service to validate your Deepgram license license-proxy: - image: quay.io/deepgram/self-hosted-license-proxy:release-250814 + image: quay.io/deepgram/self-hosted-license-proxy:release-250912 restart: always # Here we expose the License Proxy status port to the host machine. The container port diff --git a/podman/podman-compose.standard.yml b/podman/podman-compose.standard.yml index fff23a8..91b74be 100644 --- a/podman/podman-compose.standard.yml +++ b/podman/podman-compose.standard.yml @@ -3,7 +3,7 @@ services: # The speech API service. api: - image: quay.io/deepgram/self-hosted-api:release-250814 + image: quay.io/deepgram/self-hosted-api:release-250912 restart: always # Here we expose the API port to the host machine. The container port @@ -28,7 +28,7 @@ services: # The speech engine service. engine: - image: quay.io/deepgram/self-hosted-engine:release-250814 + image: quay.io/deepgram/self-hosted-engine:release-250912 restart: always # Utilize a GPU, if available.