Skip to content

Commit 9c4cd51

Browse files
authored
feat(scheduler): optionally enable pprof (#6899)
* pprof on scheduler * test new server * simplify * log warn
1 parent 24950eb commit 9c4cd51

File tree

5 files changed

+83
-1
lines changed

5 files changed

+83
-1
lines changed

k8s/helm-charts/seldon-core-v2-setup/templates/_components-deployments.tpl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,10 @@ spec:
522522
- --enable-server-autoscaling=$(ENABLE_SERVER_AUTOSCALING)
523523
- --log-level=$(LOG_LEVEL)
524524
- --health-probe-port=$(HEALTH_PROBE_PORT)
525+
- --enable-pprof=$(ENABLE_PPROF)
526+
- --pprof-port=$(PPROF_PORT)
527+
- --pprof-block-rate=$(PPROF_BLOCK_RATE)
528+
- --pprof-mutex-rate=$(PPROF_MUTEX_RATE)
525529
command:
526530
- /bin/scheduler
527531
env:
@@ -599,6 +603,14 @@ spec:
599603
fieldPath: metadata.namespace
600604
- name: HEALTH_PROBE_PORT
601605
value: "9999"
606+
- name: ENABLE_PPROF
607+
value: "false"
608+
- name: PPROF_PORT
609+
value: "6060"
610+
- name: PPROF_BLOCK_RATE
611+
value: "0"
612+
- name: PPROF_MUTEX_RATE
613+
value: "0"
602614
image: '{{ .Values.scheduler.image.registry }}/{{ .Values.scheduler.image.repository
603615
}}:{{ .Values.scheduler.image.tag }}'
604616
imagePullPolicy: '{{ .Values.scheduler.image.pullPolicy }}'

k8s/helm-charts/seldon-core-v2-setup/templates/_components-statefulsets.tpl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,10 @@ spec:
522522
- --enable-server-autoscaling=$(ENABLE_SERVER_AUTOSCALING)
523523
- --log-level=$(LOG_LEVEL)
524524
- --health-probe-port=$(HEALTH_PROBE_PORT)
525+
- --enable-pprof=$(ENABLE_PPROF)
526+
- --pprof-port=$(PPROF_PORT)
527+
- --pprof-block-rate=$(PPROF_BLOCK_RATE)
528+
- --pprof-mutex-rate=$(PPROF_MUTEX_RATE)
525529
command:
526530
- /bin/scheduler
527531
env:
@@ -599,6 +603,14 @@ spec:
599603
fieldPath: metadata.namespace
600604
- name: HEALTH_PROBE_PORT
601605
value: "9999"
606+
- name: ENABLE_PPROF
607+
value: "false"
608+
- name: PPROF_PORT
609+
value: "6060"
610+
- name: PPROF_BLOCK_RATE
611+
value: "0"
612+
- name: PPROF_MUTEX_RATE
613+
value: "0"
602614
image: '{{ .Values.scheduler.image.registry }}/{{ .Values.scheduler.image.repository
603615
}}:{{ .Values.scheduler.image.tag }}'
604616
imagePullPolicy: '{{ .Values.scheduler.image.pullPolicy }}'

k8s/yaml/components.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,10 @@ spec:
369369
- --enable-server-autoscaling=$(ENABLE_SERVER_AUTOSCALING)
370370
- --log-level=$(LOG_LEVEL)
371371
- --health-probe-port=$(HEALTH_PROBE_PORT)
372+
- --enable-pprof=$(ENABLE_PPROF)
373+
- --pprof-port=$(PPROF_PORT)
374+
- --pprof-block-rate=$(PPROF_BLOCK_RATE)
375+
- --pprof-mutex-rate=$(PPROF_MUTEX_RATE)
372376
command:
373377
- /bin/scheduler
374378
env:
@@ -442,6 +446,14 @@ spec:
442446
fieldPath: metadata.namespace
443447
- name: HEALTH_PROBE_PORT
444448
value: "9999"
449+
- name: ENABLE_PPROF
450+
value: "false"
451+
- name: PPROF_PORT
452+
value: "6060"
453+
- name: PPROF_BLOCK_RATE
454+
value: "0"
455+
- name: PPROF_MUTEX_RATE
456+
value: "0"
445457
image: 'docker.io/seldonio/seldon-scheduler:latest'
446458
imagePullPolicy: 'IfNotPresent'
447459
livenessProbe:

operator/config/seldonconfigs/default.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,10 @@ spec:
343343
- --enable-server-autoscaling=$(ENABLE_SERVER_AUTOSCALING)
344344
- --log-level=$(LOG_LEVEL)
345345
- --health-probe-port=$(HEALTH_PROBE_PORT)
346+
- --enable-pprof=$(ENABLE_PPROF)
347+
- --pprof-port=$(PPROF_PORT)
348+
- --pprof-block-rate=$(PPROF_BLOCK_RATE)
349+
- --pprof-mutex-rate=$(PPROF_MUTEX_RATE)
346350
command:
347351
- /bin/scheduler
348352
env:
@@ -374,6 +378,14 @@ spec:
374378
fieldPath: metadata.namespace
375379
- name: HEALTH_PROBE_PORT
376380
value: "9999"
381+
- name: ENABLE_PPROF
382+
value: "false"
383+
- name: PPROF_PORT
384+
value: "6060"
385+
- name: PPROF_BLOCK_RATE
386+
value: "0"
387+
- name: PPROF_MUTEX_RATE
388+
value: "0"
377389
image: seldonio/seldon-scheduler:latest
378390
imagePullPolicy: Always
379391
name: scheduler

scheduler/cmd/scheduler/main.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ package main
1111

1212
import (
1313
"context"
14+
"errors"
1415
"flag"
1516
"fmt"
1617
"math/rand"
18+
"net/http"
19+
_ "net/http/pprof"
1720
"os"
1821
"os/signal"
22+
"runtime"
1923
"syscall"
2024
"time"
2125

@@ -74,6 +78,10 @@ var (
7478
accessLogPath string
7579
enableAccessLog bool
7680
includeSuccessfulRequests bool
81+
enablePprof bool
82+
pprofPort int
83+
pprofMutexRate int
84+
pprofBlockRate int
7785
)
7886

7987
const (
@@ -160,6 +168,10 @@ func init() {
160168
flag.StringVar(&accessLogPath, "envoy-accesslog-path", "/tmp/envoy-accesslog.txt", "Envoy access log path")
161169
flag.BoolVar(&enableAccessLog, "enable-envoy-accesslog", true, "Enable Envoy access log")
162170
flag.BoolVar(&includeSuccessfulRequests, "include-successful-requests-envoy-accesslog", false, "Include successful requests in Envoy access log")
171+
flag.BoolVar(&enablePprof, "enable-pprof", false, "Enables pprof on localhost - do not use in production, will affect performance")
172+
flag.IntVar(&pprofPort, "pprof-port", 6060, "pprof HTTP server port")
173+
flag.IntVar(&pprofBlockRate, "pprof-block-rate", 0, "pprof block rate")
174+
flag.IntVar(&pprofMutexRate, "pprof-mutex-rate", 0, "pprof mutex rate")
163175
}
164176

165177
func getNamespace() string {
@@ -234,7 +246,12 @@ func main() {
234246

235247
httpServer, err := initHealthProbe(tlsOptions, logger, probesConfig, int(healthProbePort))
236248
if err != nil {
237-
log.WithError(err).Fatal("Failed to start health server")
249+
logger.WithError(err).Fatal("Failed to start health server")
250+
}
251+
252+
if enablePprof {
253+
logger.Info("Starting pprof server")
254+
startPprofServer(pprofPort, pprofBlockRate, pprofMutexRate, logger)
238255
}
239256

240257
logger.WithField("port", healthProbePort).Info("Started HTTP health server")
@@ -410,6 +427,23 @@ func main() {
410427
log.Info("All services have shut down cleanly")
411428
}
412429

430+
func startPprofServer(port int, blockRate, mutexRate int, log *log.Logger) {
431+
if blockRate > 0 {
432+
log.Warn("Block rate > 0 - performance will be affected")
433+
}
434+
if mutexRate > 0 {
435+
log.Warn("Mutex rate > 0 - performance will be affected")
436+
}
437+
runtime.SetBlockProfileRate(blockRate)
438+
runtime.SetMutexProfileFraction(mutexRate)
439+
440+
go func() {
441+
if err := http.ListenAndServe(fmt.Sprintf("localhost:%d", port), nil); err != nil && !errors.Is(err, http.ErrServerClosed) {
442+
log.WithError(err).Error("Failed running pprof server")
443+
}
444+
}()
445+
}
446+
413447
type probe struct {
414448
port int
415449
plaintText bool

0 commit comments

Comments
 (0)