Skip to content

Commit 6db0eaf

Browse files
domsolutionslc525
andauthored
feat(pipeline-gw): health probes (#6728)
* wip * startup and liveness and readiness probes * wip: refactor * tests * fix typo * add startup probe for gRPC svc * lint * add kafka health checks * support TLS * lint * Update scheduler/cmd/pipelinegateway/main.go Co-authored-by: Lucian Carata <lc525@users.noreply.github.com> --------- Co-authored-by: Lucian Carata <lc525@users.noreply.github.com>
1 parent f267792 commit 6db0eaf

File tree

16 files changed

+1041
-59
lines changed

16 files changed

+1041
-59
lines changed

k8s/helm-charts/seldon-core-v2-setup/templates/_components-deployments.tpl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ spec:
667667
- --kafka-config-path=/mnt/kafka/kafka.json
668668
- --tracing-config-path=/mnt/tracing/tracing.json
669669
- --log-level=$(LOG_LEVEL)
670+
- --health-probe-port=$(HEALTH_PROBE_PORT)
670671
command:
671672
- /bin/pipelinegateway
672673
env:
@@ -753,6 +754,8 @@ spec:
753754
valueFrom:
754755
fieldRef:
755756
fieldPath: metadata.name
757+
- name: HEALTH_PROBE_PORT
758+
value: "9999"
756759
- name: POD_IP
757760
valueFrom:
758761
fieldRef:
@@ -764,6 +767,12 @@ spec:
764767
image: '{{ .Values.pipelinegateway.image.registry }}/{{ .Values.pipelinegateway.image.repository
765768
}}:{{ .Values.pipelinegateway.image.tag }}'
766769
imagePullPolicy: '{{ .Values.pipelinegateway.image.pullPolicy }}'
770+
livenessProbe:
771+
failureThreshold: 3
772+
httpGet:
773+
path: /live
774+
port: health
775+
periodSeconds: 5
767776
name: pipelinegateway
768777
ports:
769778
- containerPort: 9010
@@ -775,12 +784,28 @@ spec:
775784
- containerPort: 9006
776785
name: metrics
777786
protocol: TCP
787+
- containerPort: 9999
788+
name: health
789+
protocol: TCP
790+
readinessProbe:
791+
failureThreshold: 3
792+
httpGet:
793+
path: /ready
794+
port: health
795+
periodSeconds: 5
778796
resources:
779797
limits:
780798
memory: '{{ .Values.pipelinegateway.resources.memory }}'
781799
requests:
782800
cpu: '{{ .Values.pipelinegateway.resources.cpu }}'
783801
memory: '{{ .Values.pipelinegateway.resources.memory }}'
802+
startupProbe:
803+
failureThreshold: 3
804+
httpGet:
805+
path: /startup
806+
port: health
807+
initialDelaySeconds: 3
808+
periodSeconds: 5
784809
volumeMounts:
785810
- mountPath: /mnt/kafka
786811
name: kafka-config-volume

k8s/helm-charts/seldon-core-v2-setup/templates/_components-statefulsets.tpl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ spec:
667667
- --kafka-config-path=/mnt/kafka/kafka.json
668668
- --tracing-config-path=/mnt/tracing/tracing.json
669669
- --log-level=$(LOG_LEVEL)
670+
- --health-probe-port=$(HEALTH_PROBE_PORT)
670671
command:
671672
- /bin/pipelinegateway
672673
env:
@@ -753,6 +754,8 @@ spec:
753754
valueFrom:
754755
fieldRef:
755756
fieldPath: metadata.name
757+
- name: HEALTH_PROBE_PORT
758+
value: "9999"
756759
- name: POD_IP
757760
valueFrom:
758761
fieldRef:
@@ -764,6 +767,12 @@ spec:
764767
image: '{{ .Values.pipelinegateway.image.registry }}/{{ .Values.pipelinegateway.image.repository
765768
}}:{{ .Values.pipelinegateway.image.tag }}'
766769
imagePullPolicy: '{{ .Values.pipelinegateway.image.pullPolicy }}'
770+
livenessProbe:
771+
failureThreshold: 3
772+
httpGet:
773+
path: /live
774+
port: health
775+
periodSeconds: 5
767776
name: pipelinegateway
768777
ports:
769778
- containerPort: 9010
@@ -775,12 +784,28 @@ spec:
775784
- containerPort: 9006
776785
name: metrics
777786
protocol: TCP
787+
- containerPort: 9999
788+
name: health
789+
protocol: TCP
790+
readinessProbe:
791+
failureThreshold: 3
792+
httpGet:
793+
path: /ready
794+
port: health
795+
periodSeconds: 5
778796
resources:
779797
limits:
780798
memory: '{{ .Values.pipelinegateway.resources.memory }}'
781799
requests:
782800
cpu: '{{ .Values.pipelinegateway.resources.cpu }}'
783801
memory: '{{ .Values.pipelinegateway.resources.memory }}'
802+
startupProbe:
803+
failureThreshold: 3
804+
httpGet:
805+
path: /startup
806+
port: health
807+
initialDelaySeconds: 3
808+
periodSeconds: 5
784809
volumeMounts:
785810
- mountPath: /mnt/kafka
786811
name: kafka-config-volume

k8s/yaml/components.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ spec:
508508
- --kafka-config-path=/mnt/kafka/kafka.json
509509
- --tracing-config-path=/mnt/tracing/tracing.json
510510
- --log-level=$(LOG_LEVEL)
511+
- --health-probe-port=$(HEALTH_PROBE_PORT)
511512
command:
512513
- /bin/pipelinegateway
513514
env:
@@ -589,6 +590,8 @@ spec:
589590
valueFrom:
590591
fieldRef:
591592
fieldPath: metadata.name
593+
- name: HEALTH_PROBE_PORT
594+
value: "9999"
592595
- name: POD_IP
593596
valueFrom:
594597
fieldRef:
@@ -599,6 +602,12 @@ spec:
599602
fieldPath: metadata.namespace
600603
image: 'docker.io/seldonio/seldon-pipelinegateway:latest'
601604
imagePullPolicy: 'IfNotPresent'
605+
livenessProbe:
606+
failureThreshold: 3
607+
httpGet:
608+
path: /live
609+
port: health
610+
periodSeconds: 5
602611
name: pipelinegateway
603612
ports:
604613
- containerPort: 9010
@@ -610,12 +619,28 @@ spec:
610619
- containerPort: 9006
611620
name: metrics
612621
protocol: TCP
622+
- containerPort: 9999
623+
name: health
624+
protocol: TCP
625+
readinessProbe:
626+
failureThreshold: 3
627+
httpGet:
628+
path: /ready
629+
port: health
630+
periodSeconds: 5
613631
resources:
614632
limits:
615633
memory: '1G'
616634
requests:
617635
cpu: '100m'
618636
memory: '1G'
637+
startupProbe:
638+
failureThreshold: 3
639+
httpGet:
640+
path: /startup
641+
port: health
642+
initialDelaySeconds: 3
643+
periodSeconds: 5
619644
volumeMounts:
620645
- mountPath: /mnt/kafka
621646
name: kafka-config-volume

operator/config/seldonconfigs/default.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ spec:
172172
- --kafka-config-path=/mnt/kafka/kafka.json
173173
- --tracing-config-path=/mnt/tracing/tracing.json
174174
- --log-level=$(LOG_LEVEL)
175+
- --health-probe-port=$(HEALTH_PROBE_PORT)
175176
command:
176177
- /bin/pipelinegateway
177178
env:
@@ -185,6 +186,8 @@ spec:
185186
valueFrom:
186187
fieldRef:
187188
fieldPath: metadata.name
189+
- name: HEALTH_PROBE_PORT
190+
value: "9999"
188191
- name: POD_IP
189192
valueFrom:
190193
fieldRef:
@@ -206,12 +209,34 @@ spec:
206209
- containerPort: 9006
207210
name: metrics
208211
protocol: TCP
212+
- containerPort: 9999
213+
name: health
214+
protocol: TCP
209215
resources:
210216
limits:
211217
memory: 1G
212218
requests:
213219
cpu: 100m
214220
memory: 1G
221+
startupProbe:
222+
httpGet:
223+
path: /startup
224+
port: health
225+
initialDelaySeconds: 3
226+
periodSeconds: 5
227+
failureThreshold: 3
228+
readinessProbe:
229+
httpGet:
230+
path: /ready
231+
port: health
232+
periodSeconds: 5
233+
failureThreshold: 3
234+
livenessProbe:
235+
httpGet:
236+
path: /live
237+
port: health
238+
periodSeconds: 5
239+
failureThreshold: 3
215240
volumeMounts:
216241
- mountPath: /mnt/kafka
217242
name: kafka-config-volume

0 commit comments

Comments
 (0)