From 40ba51b9bb900985d2e239d036099b9a9b0b94e1 Mon Sep 17 00:00:00 2001 From: Mahendra Paipuri Date: Thu, 21 Aug 2025 17:04:52 +0200 Subject: [PATCH] style: Bump golanglint-ci to 2.4 * Disable deprecated linters and update linter configs * Fix all the lint errors from the new version Signed-off-by: Mahendra Paipuri --- .github/workflows/step_tests-lint.yml | 2 +- .golangci.yml | 5 + cmd/ceems_api_server/main.go | 3 +- cmd/ceems_api_server/main_test.go | 9 +- cmd/ceems_exporter/main.go | 3 +- cmd/ceems_exporter/main_test.go | 27 ++-- .../base/scheme.go | 15 ++- .../http/handlers.go | 6 +- .../http/server.go | 6 +- .../http/server_test.go | 1 + cmd/ceems_k8s_admission_controller/main.go | 6 +- cmd/ceems_lb/main.go | 3 +- cmd/ceems_lb/main_test.go | 9 +- cmd/ceems_tool/config.go | 26 ++-- cmd/ceems_tool/main.go | 18 ++- cmd/ceems_tool/updater.go | 4 +- cmd/redfish_proxy/main.go | 22 ++-- cmd/redfish_proxy/main_test.go | 6 +- cmd/redfish_proxy/reverseproxy.go | 7 +- cmd/redfish_proxy/server.go | 6 +- cmd/redfish_proxy/server_test.go | 4 + .../cmd/mock_ceems_exporter/main.go | 3 +- .../cmd/mock_ceems_server/main.go | 3 +- .../pkg/resource/mock_manager.go | 3 +- .../cmd/mock_ceems_server/main.go | 3 +- internal/common/helpers.go | 3 +- internal/common/helpers_test.go | 5 +- internal/osexec/osexec.go | 6 +- internal/security/exec.go | 32 +++-- internal/security/manager.go | 51 +++++--- internal/security/manager_test.go | 15 ++- pkg/api/cli/cli.go | 55 +++++--- pkg/api/cli/cli_test.go | 7 +- pkg/api/db/db.go | 122 +++++++++++------- pkg/api/db/db_test.go | 73 +++++++---- pkg/api/db/helpers.go | 21 +-- pkg/api/db/helpers_test.go | 4 +- pkg/api/db/migrator/migrate.go | 6 +- pkg/api/helper/helper.go | 3 +- pkg/api/http/cors_test.go | 2 + pkg/api/http/error.go | 4 +- pkg/api/http/querier.go | 19 ++- pkg/api/http/querier_test.go | 7 + pkg/api/http/server.go | 96 +++++++++----- pkg/api/http/validation_test.go | 9 +- pkg/api/models/types.go | 52 ++++---- pkg/api/resource/k8s/manager.go | 19 ++- pkg/api/resource/k8s/manager_test.go | 9 +- pkg/api/resource/manager.go | 11 +- pkg/api/resource/openstack/compute.go | 11 +- pkg/api/resource/openstack/identity.go | 8 +- pkg/api/resource/openstack/manager.go | 22 +++- pkg/api/resource/openstack/manager_test.go | 19 ++- pkg/api/resource/openstack/request.go | 4 +- pkg/api/resource/openstack/types.go | 6 +- pkg/api/resource/slurm/cli.go | 27 +++- pkg/api/resource/slurm/manager.go | 9 +- pkg/api/updater/tsdb/tsdb.go | 23 +++- pkg/api/updater/tsdb/tsdb_test.go | 6 +- pkg/collector/cgroup.go | 68 +++++----- pkg/collector/cli.go | 18 ++- pkg/collector/cli_test.go | 3 +- pkg/collector/collector.go | 6 +- pkg/collector/cpu.go | 10 ++ pkg/collector/cray_pm_counters.go | 12 +- pkg/collector/ebpf.go | 67 +++++++--- pkg/collector/emissions.go | 3 +- pkg/collector/gpu.go | 88 ++++++++----- pkg/collector/helper.go | 16 +-- pkg/collector/helper_test.go | 3 +- pkg/collector/hwmon.go | 18 ++- pkg/collector/infiniband.go | 1 + pkg/collector/ipmi.go | 41 ++++-- pkg/collector/k8s.go | 30 +++-- pkg/collector/k8s_test.go | 6 +- pkg/collector/kernel.go | 5 +- pkg/collector/libvirt.go | 38 ++++-- pkg/collector/libvirt_test.go | 2 - pkg/collector/meminfo.go | 1 + pkg/collector/meminfo_test.go | 1 + pkg/collector/perf.go | 84 ++++++++---- pkg/collector/profiling.go | 27 ++-- pkg/collector/profiling_config.go | 15 ++- pkg/collector/profiling_test.go | 1 + pkg/collector/rapl.go | 26 ++-- pkg/collector/rdma.go | 21 ++- pkg/collector/redfish.go | 23 +++- pkg/collector/redfish_test.go | 18 ++- pkg/collector/server.go | 12 +- pkg/collector/server_test.go | 1 + pkg/collector/slurm.go | 59 ++++++--- pkg/collector/targets.go | 6 +- pkg/emissions/emaps.go | 6 + pkg/emissions/emaps_test.go | 16 ++- pkg/emissions/helpers.go | 3 +- pkg/emissions/owid.go | 3 +- pkg/emissions/provider.go | 7 +- pkg/emissions/rte.go | 5 + pkg/emissions/rte_test.go | 6 +- pkg/emissions/watttime.go | 10 +- pkg/emissions/watttime_test.go | 24 +++- pkg/grafana/grafana.go | 11 +- pkg/grafana/grafana_test.go | 6 +- pkg/ipmi/client.go | 16 ++- pkg/ipmi/sensors.go | 4 +- pkg/k8s/client.go | 21 ++- pkg/k8s/mock.go | 20 ++- pkg/lb/backend/pyro.go | 1 + pkg/lb/backend/response.go | 28 ++-- pkg/lb/backend/response_test.go | 13 +- pkg/lb/backend/tsdb.go | 7 +- pkg/lb/backend/tsdb_test.go | 11 +- pkg/lb/cli/cli.go | 31 +++-- pkg/lb/cli/cli_test.go | 9 +- pkg/lb/frontend/frontend.go | 18 ++- pkg/lb/frontend/frontend_test.go | 25 ++-- pkg/lb/frontend/helpers.go | 7 +- pkg/lb/frontend/middleware.go | 32 +++-- pkg/lb/frontend/middleware_test.go | 20 +-- pkg/lb/frontend/parse.go | 21 ++- pkg/lb/serverpool/leastconn_test.go | 1 + pkg/lb/serverpool/roundrobin.go | 1 + pkg/sqlite3/sqlite3.go | 44 ++++--- pkg/sqlite3/sqlite3_test.go | 22 ++-- pkg/tsdb/client.go | 23 ++-- pkg/tsdb/client_test.go | 30 +++-- scripts/e2e-test.sh | 2 +- scripts/mock_servers/main.go | 108 +++++++++++----- scripts/pyro_requestor/main.go | 4 +- 129 files changed, 1568 insertions(+), 744 deletions(-) diff --git a/.github/workflows/step_tests-lint.yml b/.github/workflows/step_tests-lint.yml index 90eadd5c..a2b450d1 100644 --- a/.github/workflows/step_tests-lint.yml +++ b/.github/workflows/step_tests-lint.yml @@ -28,5 +28,5 @@ jobs: - name: Lint uses: golangci/golangci-lint-action@v7 with: - version: v2.1.1 + version: v2.4.0 args: --timeout=5m diff --git a/.golangci.yml b/.golangci.yml index 624e8fe6..38fc1d64 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -26,7 +26,12 @@ linters: - testpackage - varnamelen - wrapcheck + - wsl settings: + wsl_v5: + allow-first-in-block: true + allow-whole-block: false + branch-max-lines: 2 errcheck: exclude-functions: # Used in HTTP handlers, any error is handled by the server itself. diff --git a/cmd/ceems_api_server/main.go b/cmd/ceems_api_server/main.go index 9905e0bb..4c3f490a 100644 --- a/cmd/ceems_api_server/main.go +++ b/cmd/ceems_api_server/main.go @@ -24,7 +24,8 @@ func main() { } // Main entrypoint of the app - if err := CEEMSServer.Main(); err != nil { + err = CEEMSServer.Main() + if err != nil { log.Println(err) os.Exit(1) } diff --git a/cmd/ceems_api_server/main_test.go b/cmd/ceems_api_server/main_test.go index c366fd37..8d4cc714 100644 --- a/cmd/ceems_api_server/main_test.go +++ b/cmd/ceems_api_server/main_test.go @@ -18,7 +18,8 @@ const ( ) func TestBatchjobStatsExecutable(t *testing.T) { - if _, err := os.Stat(binary); err != nil { + _, err := os.Stat(binary) + if err != nil { t.Skipf("ceems_api_server binary not available, try to run `make build` first: %s", err) } @@ -31,7 +32,8 @@ func TestBatchjobStatsExecutable(t *testing.T) { err = os.Link(sacctPath, tmpSacctPath) require.NoError(t, err) - usagestats := exec.Command( + usagestats := exec.CommandContext( + t.Context(), binary, "--web.listen-address", address, "--no-security.drop-privileges", @@ -40,7 +42,8 @@ func TestBatchjobStatsExecutable(t *testing.T) { } func runCommandAndTests(cmd *exec.Cmd) error { - if err := cmd.Start(); err != nil { + err := cmd.Start() + if err != nil { return fmt.Errorf("failed to start command: %w", err) } diff --git a/cmd/ceems_exporter/main.go b/cmd/ceems_exporter/main.go index cade7dda..97c8771a 100644 --- a/cmd/ceems_exporter/main.go +++ b/cmd/ceems_exporter/main.go @@ -16,7 +16,8 @@ func main() { } // Main entrypoint of the app - if err := ceemsExporterApp.Main(); err != nil { + err = ceemsExporterApp.Main() + if err != nil { log.Println(err) os.Exit(1) } diff --git a/cmd/ceems_exporter/main_test.go b/cmd/ceems_exporter/main_test.go index f402a6b2..75dc23c5 100644 --- a/cmd/ceems_exporter/main_test.go +++ b/cmd/ceems_exporter/main_test.go @@ -21,7 +21,8 @@ const ( ) func TestFileDescriptorLeak(t *testing.T) { - if _, err := os.Stat(binary); err != nil { + _, err := os.Stat(binary) + if err != nil { t.Skipf("ceems_exporter binary not available, try to run `make build` first: %s", err) } @@ -33,7 +34,8 @@ func TestFileDescriptorLeak(t *testing.T) { ) } - if _, err := fs.Stat(); err != nil { + _, err = fs.Stat() + if err != nil { t.Errorf("unable to read process stats: %s", err) } @@ -43,7 +45,8 @@ func TestFileDescriptorLeak(t *testing.T) { procfsPath, err := filepath.Abs("../../pkg/collector/testdata/proc") require.NoError(t, err) - exporter := exec.Command( + exporter := exec.CommandContext( + t.Context(), binary, "--web.listen-address", address, "--path.cgroupfs", sysfsPath, @@ -52,7 +55,8 @@ func TestFileDescriptorLeak(t *testing.T) { // "--no-security.drop-privileges", ) test := func(pid int) error { - if err := queryExporter(address); err != nil { + err := queryExporter(address) + if err != nil { return err } @@ -67,7 +71,8 @@ func TestFileDescriptorLeak(t *testing.T) { } for range 5 { - if err := queryExporter(address); err != nil { + err := queryExporter(address) + if err != nil { return err } } @@ -102,7 +107,8 @@ func queryExporter(address string) error { return err } - if err := resp.Body.Close(); err != nil { + err = resp.Body.Close() + if err != nil { return err } @@ -114,14 +120,16 @@ func queryExporter(address string) error { } func runCommandAndTests(cmd *exec.Cmd, address string, fn func(pid int) error) error { - if err := cmd.Start(); err != nil { + err := cmd.Start() + if err != nil { return fmt.Errorf("failed to start command: %w", err) } time.Sleep(50 * time.Millisecond) for i := range 10 { - if err := queryExporter(address); err == nil { + err := queryExporter(address) + if err == nil { break } @@ -133,11 +141,12 @@ func runCommandAndTests(cmd *exec.Cmd, address string, fn func(pid int) error) e } errc := make(chan error) + go func(pid int) { errc <- fn(pid) }(cmd.Process.Pid) - err := <-errc + err = <-errc if cmd.Process != nil { cmd.Process.Kill() diff --git a/cmd/ceems_k8s_admission_controller/base/scheme.go b/cmd/ceems_k8s_admission_controller/base/scheme.go index 43830ca8..dba6e327 100644 --- a/cmd/ceems_k8s_admission_controller/base/scheme.go +++ b/cmd/ceems_k8s_admission_controller/base/scheme.go @@ -20,23 +20,28 @@ func NewRuntimeScheme() (*runtime.Scheme, error) { runtimeScheme := runtime.NewScheme() // Add resources to runtime scheme - if err := corev1.AddToScheme(runtimeScheme); err != nil { + err := corev1.AddToScheme(runtimeScheme) + if err != nil { return nil, fmt.Errorf("failed to add core resources to runtime scheme: %w", err) } - if err := appsv1.AddToScheme(runtimeScheme); err != nil { + err = appsv1.AddToScheme(runtimeScheme) + if err != nil { return nil, fmt.Errorf("failed to add apps resources to runtime scheme: %w", err) } - if err := batchv1.AddToScheme(runtimeScheme); err != nil { + err = batchv1.AddToScheme(runtimeScheme) + if err != nil { return nil, fmt.Errorf("failed to add batch resources to runtime scheme: %w", err) } - if err := admissionv1beta1.AddToScheme(runtimeScheme); err != nil { + err = admissionv1beta1.AddToScheme(runtimeScheme) + if err != nil { return nil, fmt.Errorf("failed to add v1beta1 admission resources to runtime scheme: %w", err) } - if err := admissionv1.AddToScheme(runtimeScheme); err != nil { + err = admissionv1.AddToScheme(runtimeScheme) + if err != nil { return nil, fmt.Errorf("failed to add v1 admission resources to runtime scheme: %w", err) } diff --git a/cmd/ceems_k8s_admission_controller/http/handlers.go b/cmd/ceems_k8s_admission_controller/http/handlers.go index aa1ae9c2..1bfd8969 100644 --- a/cmd/ceems_k8s_admission_controller/http/handlers.go +++ b/cmd/ceems_k8s_admission_controller/http/handlers.go @@ -73,7 +73,8 @@ func (h *admissionHandler) Serve(hook base.Hook) http.HandlerFunc { var obj runtime.Object - if obj, gvk, err = h.decoder.Decode(body, nil, nil); err != nil { + obj, gvk, err = h.decoder.Decode(body, nil, nil) + if err != nil { h.logger.Error("Failed to decode body into admission review", "err", err) http.Error(w, fmt.Sprintf("could not deserialize request: %v", err), http.StatusBadRequest) @@ -229,7 +230,8 @@ func (h *admissionHandler) Serve(hook base.Hook) http.HandlerFunc { // Write response w.WriteHeader(http.StatusOK) - if err = json.NewEncoder(w).Encode(&responseObj); err != nil { + err = json.NewEncoder(w).Encode(&responseObj) + if err != nil { h.logger.Error("Failed to encode response", "path", r.URL.Path, "version", version, "uid", uid, "err", err) http.Error(w, fmt.Sprintf("could not marshal JSON patch: %v", err), http.StatusInternalServerError) } diff --git a/cmd/ceems_k8s_admission_controller/http/server.go b/cmd/ceems_k8s_admission_controller/http/server.go index 670a9f5b..13225573 100644 --- a/cmd/ceems_k8s_admission_controller/http/server.go +++ b/cmd/ceems_k8s_admission_controller/http/server.go @@ -76,7 +76,8 @@ func NewAdmissionControllerServer(c *base.Config) (*AdmissionControllerServer, e func (s *AdmissionControllerServer) Start() error { s.logger.Info("Starting " + base.AppName) - if err := web.ListenAndServe(s.server, s.webConfig, s.logger); err != nil && !errors.Is(err, http.ErrServerClosed) { + err := web.ListenAndServe(s.server, s.webConfig, s.logger) + if err != nil && !errors.Is(err, http.ErrServerClosed) { s.logger.Error("Failed to Listen and Serve HTTP server", "err", err) return err @@ -91,7 +92,8 @@ func (s *AdmissionControllerServer) Shutdown(ctx context.Context) error { // First shutdown HTTP server to avoid accepting any incoming // connections - if err := s.server.Shutdown(ctx); err != nil { + err := s.server.Shutdown(ctx) + if err != nil { s.logger.Error("Failed to stop exporter's HTTP server") return err diff --git a/cmd/ceems_k8s_admission_controller/http/server_test.go b/cmd/ceems_k8s_admission_controller/http/server_test.go index 4be87afd..6121930e 100644 --- a/cmd/ceems_k8s_admission_controller/http/server_test.go +++ b/cmd/ceems_k8s_admission_controller/http/server_test.go @@ -71,6 +71,7 @@ func TestNewAdmissionControllerServer(t *testing.T) { require.NoError(t, err, test.name) defer resp.Body.Close() + assert.Equal(t, test.code, resp.StatusCode, test.name) } diff --git a/cmd/ceems_k8s_admission_controller/main.go b/cmd/ceems_k8s_admission_controller/main.go index 7cb295e3..f8c710b7 100644 --- a/cmd/ceems_k8s_admission_controller/main.go +++ b/cmd/ceems_k8s_admission_controller/main.go @@ -100,7 +100,8 @@ func main() { // Initializing the server in a goroutine so that // it won't block the graceful shutdown handling below. go func() { - if err := server.Start(); err != nil { + err := server.Start() + if err != nil { logger.Error("Failed to start server", "err", err) } }() @@ -117,7 +118,8 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - if err := server.Shutdown(ctx); err != nil { + err = server.Shutdown(ctx) + if err != nil { logger.Error("Failed to gracefully shutdown server", "err", err) } diff --git a/cmd/ceems_lb/main.go b/cmd/ceems_lb/main.go index b48be74d..5b96b015 100644 --- a/cmd/ceems_lb/main.go +++ b/cmd/ceems_lb/main.go @@ -19,7 +19,8 @@ func main() { } // Main entrypoint of the app - if err := CEEMSLoadBalancer.Main(); err != nil { + err = CEEMSLoadBalancer.Main() + if err != nil { log.Println(err) os.Exit(1) } diff --git a/cmd/ceems_lb/main_test.go b/cmd/ceems_lb/main_test.go index 09a2cedf..df049267 100644 --- a/cmd/ceems_lb/main_test.go +++ b/cmd/ceems_lb/main_test.go @@ -18,7 +18,8 @@ const ( ) func TestCEEMSLBExecutable(t *testing.T) { - if _, err := os.Stat(binary); err != nil { + _, err := os.Stat(binary) + if err != nil { t.Skipf("ceems_lb binary not available, try to run `make build` first: %s", err) } @@ -31,7 +32,8 @@ func TestCEEMSLBExecutable(t *testing.T) { err = os.Link(configPath, tmpConfigPath) require.NoError(t, err) - lb := exec.Command( + lb := exec.CommandContext( + t.Context(), binary, "--path.data", tmpDir, "--config.path", tmpConfigPath, "--web.listen-address", address, @@ -41,7 +43,8 @@ func TestCEEMSLBExecutable(t *testing.T) { } func runCommandAndTests(cmd *exec.Cmd) error { - if err := cmd.Start(); err != nil { + err := cmd.Start() + if err != nil { return fmt.Errorf("failed to start command: %w", err) } diff --git a/cmd/ceems_tool/config.go b/cmd/ceems_tool/config.go index 924d40ae..549a452f 100644 --- a/cmd/ceems_tool/config.go +++ b/cmd/ceems_tool/config.go @@ -81,10 +81,12 @@ func GenerateWebConfig(basicAuth bool, tls bool, hosts []string, validity time.D // Encode to YAML with indent set to 2 var b bytes.Buffer + yamlEncoder := yaml.NewEncoder(&b) yamlEncoder.SetIndent(2) - if err := yamlEncoder.Encode(&config); err != nil { + err = yamlEncoder.Encode(&config) + if err != nil { fmt.Fprintln(os.Stderr, "error encoding web config", err) return err @@ -92,7 +94,9 @@ func GenerateWebConfig(basicAuth bool, tls bool, hosts []string, validity time.D // Write to disk configFile := filepath.Join(outDir, "web-config.yml") - if err := os.WriteFile(configFile, b.Bytes(), 0o600); err != nil { + + err = os.WriteFile(configFile, b.Bytes(), 0o600) + if err != nil { fmt.Fprintln(os.Stderr, "failed to write web config file:", err) return err @@ -108,7 +112,8 @@ func GenerateWebConfig(basicAuth bool, tls bool, hosts []string, validity time.D // tlsConfig returns a TLS config based on self signed TLS certificates. func tlsConfig(hosts []string, validity time.Duration, outDir string) (TLSConfig, error) { // Make directory to store certificate files - if err := os.MkdirAll(outDir, 0o700); err != nil { + err := os.MkdirAll(outDir, 0o700) + if err != nil { fmt.Fprintln(os.Stderr, "error creating output directory:", err) return TLSConfig{}, err @@ -116,7 +121,8 @@ func tlsConfig(hosts []string, validity time.Duration, outDir string) (TLSConfig // Generate self signed certificates // Nicked from https://go.dev/src/crypto/tls/generate_cert.go - if err := selfSignedTLS(hosts, validity, outDir); err != nil { + err = selfSignedTLS(hosts, validity, outDir) + if err != nil { fmt.Fprintln(os.Stderr, "error generating self signed TLS certificate", err) return TLSConfig{}, err @@ -209,11 +215,13 @@ func selfSignedTLS(hosts []string, validity time.Duration, outDir string) error return err } - if err := pem.Encode(certOut, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes}); err != nil { + err = pem.Encode(certOut, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes}) + if err != nil { return err } - if err := certOut.Close(); err != nil { + err = certOut.Close() + if err != nil { return err } @@ -227,11 +235,13 @@ func selfSignedTLS(hosts []string, validity time.Duration, outDir string) error return err } - if err := pem.Encode(keyOut, &pem.Block{Type: "PRIVATE KEY", Bytes: privBytes}); err != nil { + err = pem.Encode(keyOut, &pem.Block{Type: "PRIVATE KEY", Bytes: privBytes}) + if err != nil { return err } - if err := keyOut.Close(); err != nil { + err = keyOut.Close() + if err != nil { return err } diff --git a/cmd/ceems_tool/main.go b/cmd/ceems_tool/main.go index e31514a2..dbc1853e 100644 --- a/cmd/ceems_tool/main.go +++ b/cmd/ceems_tool/main.go @@ -204,6 +204,7 @@ func main() { case tsdbUpdaterConfigCmd.FullCommand(): ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() + os.Exit(checkErr(GenerateTSDBUpdaterConfig(ctx, promServerURL, start, end, httpRoundTripper))) case tsdbRecRulesCmd.FullCommand(): @@ -214,11 +215,13 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() + os.Exit(checkErr(CreatePromRecordingRules(ctx, promServerURL, start, end, pueValue, emissionFactorValue, countryCode, evalInterval, outDir, disableProviders, httpRoundTripper))) case tsdbRelabelConfigCmd.FullCommand(): ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() + os.Exit(checkErr(CreatePromRelabelConfig(ctx, promServerURL, start, end, httpRoundTripper))) } } @@ -270,7 +273,8 @@ func CheckWebConfig(files ...string) int { failed := false for _, f := range files { - if err := web.Validate(f); err != nil { + err := web.Validate(f) + if err != nil { fmt.Fprintln(os.Stderr, f, "FAILED:", err) failed = true @@ -330,7 +334,9 @@ func config(ctx context.Context, api v1.API) (*Config, error) { // Unmarshall config var config Config - if err := yaml.Unmarshal([]byte(c.YAML), &config); err != nil { + + err = yaml.Unmarshal([]byte(c.YAML), &config) + if err != nil { return nil, err } @@ -394,14 +400,16 @@ func parseTimes(start, end string) (time.Time, time.Time, error) { } func parseTime(s string) (time.Time, error) { - if t, err := strconv.ParseFloat(s, 64); err == nil { + t, err := strconv.ParseFloat(s, 64) + if err == nil { s, ns := math.Modf(t) return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC(), nil } - if t, err := time.Parse(time.RFC3339Nano, s); err == nil { - return t, nil + tt, err := time.Parse(time.RFC3339Nano, s) + if err == nil { + return tt, nil } return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s) diff --git a/cmd/ceems_tool/updater.go b/cmd/ceems_tool/updater.go index 00bfe00d..2c7238fd 100644 --- a/cmd/ceems_tool/updater.go +++ b/cmd/ceems_tool/updater.go @@ -181,7 +181,9 @@ func GenerateTSDBUpdaterConfig(ctx context.Context, serverURL *url.URL, start st // Render the CPU rules template buf := &bytes.Buffer{} - if err := tmpl.ExecuteTemplate(buf, "queries.yml", tmplData); err != nil { + + err = tmpl.ExecuteTemplate(buf, "queries.yml", tmplData) + if err != nil { return err } diff --git a/cmd/redfish_proxy/main.go b/cmd/redfish_proxy/main.go index 5ef2e13e..6f53c289 100644 --- a/cmd/redfish_proxy/main.go +++ b/cmd/redfish_proxy/main.go @@ -57,7 +57,8 @@ func (t *Target) UnmarshalYAML(unmarshal func(any) error) error { URL string `yaml:"url"` } - if err := unmarshal(&tmp); err != nil { + err := unmarshal(&tmp) + if err != nil { return err } @@ -108,7 +109,8 @@ func (r *ProxyConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain ProxyConfig - if err := unmarshal((*plain)(r)); err != nil { + err := unmarshal((*plain)(r)) + if err != nil { return err } @@ -119,8 +121,6 @@ func (r *ProxyConfig) UnmarshalYAML(unmarshal func(any) error) error { } } - var err error - // Compile regex r.allowedAPIResourcesRegexp, err = regexp.Compile(strings.Join(r.AllowedAPIResources, "|")) if err != nil { @@ -142,7 +142,8 @@ type Redfish struct { func (r *Redfish) UnmarshalYAML(unmarshal func(any) error) error { type plain Redfish - if err := unmarshal((*plain)(r)); err != nil { + err := unmarshal((*plain)(r)) + if err != nil { return err } @@ -221,7 +222,8 @@ func main() { app.UsageWriter(os.Stdout) app.HelpFlag.Short('h') - if _, err := app.Parse(os.Args[1:]); err != nil { + _, err := app.Parse(os.Args[1:]) + if err != nil { panic(err) } @@ -267,8 +269,6 @@ func main() { // If webConfigFile is set, get absolute path var webConfigFilePath string - - var err error if webConfigFile != "" { webConfigFilePath, err = filepath.Abs(webConfigFile) if err != nil { @@ -303,7 +303,8 @@ func main() { // Initializing the server in a goroutine so that // it won't block the graceful shutdown handling below. go func() { - if err := server.Start(); err != nil { + err := server.Start() + if err != nil { logger.Error("Failed to start server", "err", err) } }() @@ -320,7 +321,8 @@ func main() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - if err := server.Shutdown(ctx); err != nil { + err = server.Shutdown(ctx) + if err != nil { logger.Error("Failed to gracefully shutdown server", "err", err) } diff --git a/cmd/redfish_proxy/main_test.go b/cmd/redfish_proxy/main_test.go index 7638d074..c77a9ea8 100644 --- a/cmd/redfish_proxy/main_test.go +++ b/cmd/redfish_proxy/main_test.go @@ -35,7 +35,8 @@ func queryServer(address, port string) error { return err } - if err := resp.Body.Close(); err != nil { + err = resp.Body.Close() + if err != nil { return err } @@ -162,7 +163,8 @@ redfish_config: // Query exporter for i := range 10 { - if err := queryServer("localhost:5000", port); err == nil { + err := queryServer("localhost:5000", port) + if err == nil { break } diff --git a/cmd/redfish_proxy/reverseproxy.go b/cmd/redfish_proxy/reverseproxy.go index 6481adbb..cd322bc1 100644 --- a/cmd/redfish_proxy/reverseproxy.go +++ b/cmd/redfish_proxy/reverseproxy.go @@ -85,12 +85,14 @@ func rewriteRequestURL(logger *slog.Logger, req *http.Request, targets map[strin remoteIPs = req.Header[http.CanonicalHeaderKey(realIPHeaderName)] // Add remoteAddr only when not on testing - if ip, _, err := net.SplitHostPort(req.RemoteAddr); err == nil && os.Getenv("__IS_TESTING") == "" { + ip, _, err := net.SplitHostPort(req.RemoteAddr) + if err == nil && os.Getenv("__IS_TESTING") == "" { remoteIPs = append(remoteIPs, ip) } // Check if target is already in map targetsMapMu.RLock() + for _, ip := range remoteIPs { if target, ok = targets[ip]; ok { // Unlock map and go to rewrite_req @@ -99,6 +101,7 @@ func rewriteRequestURL(logger *slog.Logger, req *http.Request, targets map[strin goto rewrite_req } } + targetsMapMu.RUnlock() // If target is not found in map, check header @@ -114,9 +117,11 @@ func rewriteRequestURL(logger *slog.Logger, req *http.Request, targets map[strin // Add this to targets map targetsMapMu.Lock() + for _, ip := range remoteIPs { targets[ip] = target } + targetsMapMu.Unlock() goto rewrite_req diff --git a/cmd/redfish_proxy/server.go b/cmd/redfish_proxy/server.go index 47d78302..f3ea7674 100644 --- a/cmd/redfish_proxy/server.go +++ b/cmd/redfish_proxy/server.go @@ -63,7 +63,8 @@ func NewRedfishProxyServer(c *Config) (*RedfishProxyServer, error) { func (s *RedfishProxyServer) Start() error { s.logger.Info("Starting " + appName) - if err := web.ListenAndServe(s.server, s.webConfig, s.logger); err != nil && !errors.Is(err, http.ErrServerClosed) { + err := web.ListenAndServe(s.server, s.webConfig, s.logger) + if err != nil && !errors.Is(err, http.ErrServerClosed) { s.logger.Error("Failed to Listen and Serve HTTP server", "err", err) return err @@ -80,7 +81,8 @@ func (s *RedfishProxyServer) Shutdown(ctx context.Context) error { // connections // Do not return error here as we SHOULD ENSURE to close collectors // that might release any system resources - if err := s.server.Shutdown(ctx); err != nil { + err := s.server.Shutdown(ctx) + if err != nil { s.logger.Error("Failed to stop exporter's HTTP server") return err diff --git a/cmd/redfish_proxy/server_test.go b/cmd/redfish_proxy/server_test.go index 810eadb8..b9e9ee8b 100644 --- a/cmd/redfish_proxy/server_test.go +++ b/cmd/redfish_proxy/server_test.go @@ -97,6 +97,7 @@ func TestNewRedfishProxyServerWithTargets(t *testing.T) { resp, err := client.Do(req) require.NoError(t, err) + defer resp.Body.Close() bodyBytes, err := io.ReadAll(resp.Body) @@ -174,6 +175,7 @@ func TestNewRedfishProxyServerWithWebConfig(t *testing.T) { resp, err := client.Do(req) require.NoError(t, err) + defer resp.Body.Close() bodyBytes, err := io.ReadAll(resp.Body) @@ -191,6 +193,7 @@ func TestNewRedfishProxyServerWithWebConfig(t *testing.T) { resp, err := client.Do(req) require.NoError(t, err) + defer resp.Body.Close() // Check the body if it has same IP set @@ -221,6 +224,7 @@ func TestNewRedfishProxyServerWithWebConfig(t *testing.T) { resp, err := client.Do(req) errs <- err + defer resp.Body.Close() bodyBytes, err := io.ReadAll(resp.Body) diff --git a/examples/mock_collector/cmd/mock_ceems_exporter/main.go b/examples/mock_collector/cmd/mock_ceems_exporter/main.go index 41d65dc6..00e47165 100644 --- a/examples/mock_collector/cmd/mock_ceems_exporter/main.go +++ b/examples/mock_collector/cmd/mock_ceems_exporter/main.go @@ -19,7 +19,8 @@ func main() { } // Main entrypoint of the app - if err := ceemsExporterApp.Main(); err != nil { + err = ceemsExporterApp.Main() + if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } diff --git a/examples/mock_resource_manager/cmd/mock_ceems_server/main.go b/examples/mock_resource_manager/cmd/mock_ceems_server/main.go index 0fa30cd4..fcf69014 100644 --- a/examples/mock_resource_manager/cmd/mock_ceems_server/main.go +++ b/examples/mock_resource_manager/cmd/mock_ceems_server/main.go @@ -24,7 +24,8 @@ func main() { } // Main entrypoint of the app - if err := usageStatsServerApp.Main(); err != nil { + err = usageStatsServerApp.Main() + if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } diff --git a/examples/mock_resource_manager/pkg/resource/mock_manager.go b/examples/mock_resource_manager/pkg/resource/mock_manager.go index b393175e..072170a3 100644 --- a/examples/mock_resource_manager/pkg/resource/mock_manager.go +++ b/examples/mock_resource_manager/pkg/resource/mock_manager.go @@ -31,7 +31,8 @@ func init() { // Do all basic checks here. func preflightChecks(logger *slog.Logger) error { - if _, err := os.Stat(*macctPath); err != nil { + _, err := os.Stat(*macctPath) + if err != nil { logger.Error("Failed to open executable", "path", *macctPath, "err", err) return err diff --git a/examples/mock_updater/cmd/mock_ceems_server/main.go b/examples/mock_updater/cmd/mock_ceems_server/main.go index b9e52bed..10d17377 100644 --- a/examples/mock_updater/cmd/mock_ceems_server/main.go +++ b/examples/mock_updater/cmd/mock_ceems_server/main.go @@ -25,7 +25,8 @@ func main() { } // Main entrypoint of the app - if err := usageStatsServerApp.Main(); err != nil { + err = usageStatsServerApp.Main() + if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } diff --git a/internal/common/helpers.go b/internal/common/helpers.go index 25848489..8d579361 100644 --- a/internal/common/helpers.go +++ b/internal/common/helpers.go @@ -316,7 +316,8 @@ func NewGrafanaClient(config *GrafanaWebConfig, logger *slog.Logger) (*grafana.G } if grafanaClient.Available() { - if err := grafanaClient.Ping(); err != nil { + err := grafanaClient.Ping() + if err != nil { //lint:ignore ST1005 Grafana is a noun and need to capitalize! return nil, fmt.Errorf( //nolint:staticcheck "Grafana at %s is unreachable: %w", diff --git a/internal/common/helpers_test.go b/internal/common/helpers_test.go index 0d68760b..f5946e85 100644 --- a/internal/common/helpers_test.go +++ b/internal/common/helpers_test.go @@ -444,7 +444,9 @@ func TestGrafanaClient(t *testing.T) { Login: r.Header.Get("Authorization"), }, } - if err := json.NewEncoder(w).Encode(&teamMembers); err != nil { + + err := json.NewEncoder(w).Encode(&teamMembers) + if err != nil { w.Write([]byte("KO")) } })) @@ -465,6 +467,7 @@ func TestGrafanaClient(t *testing.T) { var client *grafana.Grafana var err error + client, err = NewGrafanaClient(config, slog.New(slog.DiscardHandler)) require.NoError(t, err, "failed to create Grafana client") diff --git a/internal/osexec/osexec.go b/internal/osexec/osexec.go index 5b5ada56..f77a7272 100644 --- a/internal/osexec/osexec.go +++ b/internal/osexec/osexec.go @@ -23,7 +23,7 @@ var ( // Execute command and return stdout/stderr. func Execute(cmd string, args []string, env []string) ([]byte, error) { - execCmd := exec.Command(cmd, args...) + execCmd := exec.Command(cmd, args...) //nolint:noctx // If env is not nil pointer, add env vars into subprocess cmd if env != nil { @@ -49,7 +49,7 @@ func Execute(cmd string, args []string, env []string) ([]byte, error) { // ExecuteAs executes a command as a given UID and GID and return stdout/stderr. func ExecuteAs(cmd string, args []string, uid int, gid int, env []string) ([]byte, error) { - execCmd := exec.Command(cmd, args...) + execCmd := exec.Command(cmd, args...) //nolint:noctx // Check bounds on uid and gid before converting into int32 uidInt32, err := convertToUint(uid) @@ -165,6 +165,7 @@ func ExecuteWithTimeout(cmd string, args []string, timeout int, env []string) ([ if timeout > 0 { var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) defer cancel() } @@ -209,6 +210,7 @@ func ExecuteAsWithTimeout( if timeout > 0 { var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) defer cancel() } diff --git a/internal/security/exec.go b/internal/security/exec.go index db77a20a..8101453e 100644 --- a/internal/security/exec.go +++ b/internal/security/exec.go @@ -67,7 +67,8 @@ func (s *SecurityContext) Exec(data any) error { return s.f(data) } - if _, err := s.launcher.Launch(data); err != nil { + _, err := s.launcher.Launch(data) + if err != nil { return err } @@ -81,15 +82,18 @@ func (s *SecurityContext) raiseCaps() error { return nil } - if err := s.capSet.SetFlag(cap.Permitted, true, s.caps...); err != nil { + err := s.capSet.SetFlag(cap.Permitted, true, s.caps...) + if err != nil { return fmt.Errorf("raising: error setting permitted capabilities: %w", err) } - if err := s.capSet.SetFlag(cap.Effective, true, s.caps...); err != nil { + err = s.capSet.SetFlag(cap.Effective, true, s.caps...) + if err != nil { return fmt.Errorf("raising: error setting effective capabilities: %w", err) } - if err := s.capSet.SetProc(); err != nil { + err = s.capSet.SetProc() + if err != nil { return fmt.Errorf("raising: error setting capabilities: %w", err) } @@ -103,11 +107,13 @@ func (s *SecurityContext) dropCaps() error { return nil } - if err := s.capSet.SetFlag(cap.Effective, false, s.caps...); err != nil { + err := s.capSet.SetFlag(cap.Effective, false, s.caps...) + if err != nil { return fmt.Errorf("dropping: error setting effective capabilities: %w", err) } - if err := s.capSet.SetProc(); err != nil { + err = s.capSet.SetProc() + if err != nil { return fmt.Errorf("dropping: error setting capabilities: %w", err) } @@ -123,17 +129,20 @@ func (s *SecurityContext) targetFunc(data any) error { // the main function. // Log an error so that operators will be aware that the reason // for the error is lack of privileges. - if err := s.raiseCaps(); err != nil { + err := s.raiseCaps() + if err != nil { s.logger.Error("Failed to raise capabilities", "name", s.Name, "caps", cap.GetProc().String(), "err", err) } s.logger.Debug("Executing in security context", "name", s.Name, "caps", cap.GetProc().String()) // Execute function - if err := s.f(data); err != nil { + err = s.f(data) + if err != nil { // Attempt to drop capabilities and ignore any errors - if err := s.dropCaps(); err != nil { - s.logger.Warn("Failed to drop capabilities", "name", s.Name, "caps", cap.GetProc().String(), "err", err) + capsErr := s.dropCaps() + if capsErr != nil { + s.logger.Warn("Failed to drop capabilities", "name", s.Name, "caps", cap.GetProc().String(), "err", capsErr) } return err @@ -142,7 +151,8 @@ func (s *SecurityContext) targetFunc(data any) error { // Drop capabilities. This is not really needed as thread will be // destroyed. But just in case... // Ignore any errors - if err := s.dropCaps(); err != nil { + err = s.dropCaps() + if err != nil { s.logger.Warn("Failed to drop capabilities", "name", s.Name, "caps", cap.GetProc().String(), "err", err) } diff --git a/internal/security/manager.go b/internal/security/manager.go index a2e4c821..42ab3567 100644 --- a/internal/security/manager.go +++ b/internal/security/manager.go @@ -69,7 +69,8 @@ func NewManager(c *Config, logger *slog.Logger) (*Manager, error) { if err != nil { errs = errors.Join(errs, err) - if manager.runAsUser, err = user.LookupId(c.RunAsUser); err != nil { + manager.runAsUser, err = user.LookupId(c.RunAsUser) + if err != nil { errs = errors.Join(errs, err) return nil, fmt.Errorf("could not lookup %s: %w", c.RunAsUser, errs) @@ -201,7 +202,8 @@ func (m *Manager) DropPrivileges(enableEffective bool) error { // Get if the current process has any capabilities at all // by comparing against a new capability set // If no capabilities found, nothing to do, return - if isPriv, err := existing.Cf(cap.NewSet()); err == nil && isPriv == 0 { + isPriv, err := existing.Cf(cap.NewSet()) + if err == nil && isPriv == 0 { return nil } @@ -212,19 +214,22 @@ func (m *Manager) DropPrivileges(enableEffective bool) error { // Here we set a bunch of linux specific security stuff. // Add ACL entries to all relevant paths - if err := m.addACLEntries(); err != nil { + err := m.addACLEntries() + if err != nil { return err } // Now change the user from root to runAsUser - if err := m.changeUser(); err != nil { + err = m.changeUser() + if err != nil { return err } // Ensure ReadPaths and ReadWritePaths are accessible for runAsUser. // This can happen when any of the parent directories do not have rx // on others which might prevent runAsUser to access these paths. - if err := m.pathsReachable(); err != nil { + err = m.pathsReachable() + if err != nil { return err } @@ -254,7 +259,8 @@ func (m *Manager) DeleteACLEntries() error { } if securityCtx, ok := m.securityContexts[deleteACLCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return fmt.Errorf("failed to remove ACLs in a security context: %w", err) } } else { @@ -271,17 +277,20 @@ func (m *Manager) addACLEntries() error { a := &acls.ACL{} // Load the existing ACL entries of the PosixACLAccess type - if err := a.Load(acl.path, acls.PosixACLAccess); err != nil { + err := a.Load(acl.path, acls.PosixACLAccess) + if err != nil { return fmt.Errorf("failed to load acl entries: %w", err) } // Add entry to new ACL - if err := a.AddEntry(acl.entry); err != nil { + err = a.AddEntry(acl.entry) + if err != nil { return fmt.Errorf("failed to add acl entry %s err: %w", acl.entry, err) } // Apply entry to new ACL - if err := a.Apply(acl.path, acls.PosixACLAccess); err != nil { + err = a.Apply(acl.path, acls.PosixACLAccess) + if err != nil { return fmt.Errorf("failed to apply acl entries %s to path %s err: %w", a, acl.path, err) } @@ -331,7 +340,8 @@ func (m *Manager) changeUser() error { func (m *Manager) pathsReachable() error { // Stat path to check if they are reachable for _, a := range m.acls { - if _, err := os.Stat(a.path); err != nil { + _, err := os.Stat(a.path) + if err != nil { return fmt.Errorf("could not reach path %s after changing user to %s", a.path, m.runAsUser.Username) } } @@ -349,7 +359,8 @@ func GetDefaultRunAsUser() (string, error) { if syscall.Geteuid() == 0 { return "nobody", nil } else { - if currentUser, err := user.Current(); err != nil { + currentUser, err := user.Current() + if err != nil { return "", fmt.Errorf("failed to get current user: %w", err) } else { return currentUser.Username, nil @@ -367,23 +378,27 @@ func setCapabilities(caps []cap.Value, enableEffective bool) error { // Permitted makes the permission possible to get, effective makes it 'active' for _, c := range caps { - if err := newcaps.SetFlag(cap.Permitted, true, c); err != nil { + err := newcaps.SetFlag(cap.Permitted, true, c) + if err != nil { return fmt.Errorf("error setting permitted setcap: %w", err) } // Only enable effective set before performing a privileged operation - if err := newcaps.SetFlag(cap.Effective, enableEffective, c); err != nil { + err = newcaps.SetFlag(cap.Effective, enableEffective, c) + if err != nil { return fmt.Errorf("error setting effective setcap: %w", err) } // We do not want these capabilities to be inherited by subprocesses - if err := newcaps.SetFlag(cap.Inheritable, false, c); err != nil { + err = newcaps.SetFlag(cap.Inheritable, false, c) + if err != nil { return fmt.Errorf("error setting inheritable setcap: %w", err) } } // Apply the new capabilities to the current process (incl. all threads) - if err := newcaps.SetProc(); err != nil { + err := newcaps.SetProc() + if err != nil { return fmt.Errorf("error setting new process capabilities %s via setcap: %w", newcaps.String(), err) } @@ -466,7 +481,8 @@ func deleteACLEntries(data any) error { a := &acls.ACL{} // Load ACL entries from a given path object - if err := a.Load(acl.path, acls.PosixACLAccess); err != nil { + err := a.Load(acl.path, acls.PosixACLAccess) + if err != nil { return err } @@ -474,7 +490,8 @@ func deleteACLEntries(data any) error { a.DeleteEntry(acl.entry) // Apply entry to new ACL - if err := a.Apply(acl.path, acls.PosixACLAccess); err != nil { + err = a.Apply(acl.path, acls.PosixACLAccess) + if err != nil { return err } } diff --git a/internal/security/manager_test.go b/internal/security/manager_test.go index dd923795..a14fb14b 100644 --- a/internal/security/manager_test.go +++ b/internal/security/manager_test.go @@ -27,23 +27,30 @@ func skipUnprivileged(t *testing.T) { func testConfig(tmpDir string) (*Config, error) { // Make test directories testDir := filepath.Join(tmpDir, "l1", "l2", "l3") - if err := os.MkdirAll(testDir, 0o700); err != nil { + + err := os.MkdirAll(testDir, 0o700) + if err != nil { return nil, err } // Add rx on tmpDir/l1/l2 - if err := os.Chmod(filepath.Join(tmpDir, "l1", "l2"), 0o705); err != nil { + err = os.Chmod(filepath.Join(tmpDir, "l1", "l2"), 0o705) + if err != nil { return nil, err } // Create a file in testDir testReadFile := filepath.Join(testDir, "testRead") - if err := os.WriteFile(testReadFile, []byte("hello"), 0o700); err != nil { //nolint:gosec + + err = os.WriteFile(testReadFile, []byte("hello"), 0o700) //nolint:gosec + if err != nil { return nil, err } testWriteFile := filepath.Join(testDir, "testWrite") - if err := os.WriteFile(testWriteFile, []byte("hello"), 0o700); err != nil { //nolint:gosec + + err = os.WriteFile(testWriteFile, []byte("hello"), 0o700) //nolint:gosec + if err != nil { return nil, err } diff --git a/pkg/api/cli/cli.go b/pkg/api/cli/cli.go index 7186d732..c096db7c 100644 --- a/pkg/api/cli/cli.go +++ b/pkg/api/cli/cli.go @@ -48,12 +48,14 @@ func (c *CEEMSAPIAppConfig) SetDirectory(dir string) { // Validate validates the config. func (c *CEEMSAPIAppConfig) Validate() error { // Validate Data config - if err := c.Server.Data.Validate(); err != nil { + err := c.Server.Data.Validate() + if err != nil { return err } // Validate Admin config - if err := c.Server.Admin.Validate(); err != nil { + err = c.Server.Admin.Validate() + if err != nil { return err } @@ -242,12 +244,14 @@ func (b *CEEMSServer) Main() error { config.Server.Data.SkipDeleteOldUnits = skipDeleteOldUnits // Return error if backup interval of less than 1 day is used - if err := config.Validate(); err != nil && !disableChecks { + err = config.Validate() + if err != nil && !disableChecks { return err } // Setup data directories - if config, err = createDirs(config); err != nil { + config, err = createDirs(config) + if err != nil { return err } @@ -263,7 +267,8 @@ func (b *CEEMSServer) Main() error { runtime.GOMAXPROCS(maxProcs) logger.Debug("Go MAXPROCS", "procs", runtime.GOMAXPROCS(0)) - if user, err := user.Current(); err == nil && user.Uid == "0" { + user, err := user.Current() + if err == nil && user.Uid == "0" { logger.Info("CEEMS API server is running as root user. Privileges will be dropped and process will be run as unprivileged user") } @@ -306,7 +311,9 @@ func (b *CEEMSServer) Main() error { // If there is already a DB file, we should add it to ReadWritePaths dbFile := filepath.Join(config.Server.Data.Path, base.CEEMSDBName) - if _, err := os.Stat(dbFile); err == nil { + + _, err = os.Stat(dbFile) + if err == nil { securityCfg.ReadWritePaths = append(securityCfg.ReadWritePaths, dbFile) } @@ -320,7 +327,8 @@ func (b *CEEMSServer) Main() error { // Drop all unnecessary privileges if dropPrivs { - if err := securityManager.DropPrivileges(disableCapAwareness); err != nil { + err := securityManager.DropPrivileges(disableCapAwareness) + if err != nil { logger.Error("Failed to drop privileges", "err", err) return err @@ -410,7 +418,8 @@ func (b *CEEMSServer) Main() error { // starts instead of waiting for ticker to tick. logger.Info("Updating CEEMS DB", "interval", config.Server.Data.UpdateInterval) - if err := collector.Collect(ctx); err != nil { + err := collector.Collect(ctx) + if err != nil { logger.Error("Failed to fetch data", "err", err) } @@ -443,7 +452,8 @@ func (b *CEEMSServer) Main() error { // first tick to run it. logger.Info("Backing up CEEMS DB", "interval", config.Server.Data.BackupInterval) - if err := collector.Backup(ctx); err != nil { + err := collector.Backup(ctx) + if err != nil { logger.Error("Failed to backup DB", "err", err) } case <-ctx.Done(): @@ -458,7 +468,8 @@ func (b *CEEMSServer) Main() error { // Initializing the server in a goroutine so that // it won't block the graceful shutdown handling below. go func() { - if err := apiServer.Start(ctx); err != nil { + err := apiServer.Start(ctx) + if err != nil { logger.Error("Failed to start server", "err", err) } }() @@ -477,7 +488,8 @@ func (b *CEEMSServer) Main() error { wg.Wait() // Close DB only after all DB go routines are done. - if err := collector.Stop(); err != nil { + err = collector.Stop() + if err != nil { logger.Error("Failed to close DB connection", "err", err) } @@ -490,12 +502,14 @@ func (b *CEEMSServer) Main() error { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - if err := apiServer.Shutdown(ctx); err != nil { + err = apiServer.Shutdown(ctx) + if err != nil { logger.Error("Failed to gracefully shutdown server", "err", err) } // Restore file permissions by removing any ACLs added - if err := securityManager.DeleteACLEntries(); err != nil { + err = securityManager.DeleteACLEntries() + if err != nil { logger.Error("Failed to remove ACL entries", "err", err) } @@ -515,7 +529,8 @@ func createDirs(config *CEEMSAPIAppConfig) (*CEEMSAPIAppConfig, error) { } if config.Server.Data.BackupPath != "" { - if config.Server.Data.BackupPath, err = filepath.Abs(config.Server.Data.BackupPath); err != nil { + config.Server.Data.BackupPath, err = filepath.Abs(config.Server.Data.BackupPath) + if err != nil { return nil, fmt.Errorf( "failed to get absolute path for data.backup_path=%s: %w", config.Server.Data.BackupPath, @@ -525,15 +540,19 @@ func createDirs(config *CEEMSAPIAppConfig) (*CEEMSAPIAppConfig, error) { } // Check if config.Data.Path/config.Data.BackupPath exists and create one if it does not. - if _, err := os.Stat(config.Server.Data.Path); os.IsNotExist(err) { - if err := os.MkdirAll(config.Server.Data.Path, 0o750); err != nil { + _, err = os.Stat(config.Server.Data.Path) + if os.IsNotExist(err) { + err := os.MkdirAll(config.Server.Data.Path, 0o750) + if err != nil { return nil, fmt.Errorf("failed to create data directory: %w", err) } } if config.Server.Data.BackupPath != "" { - if _, err := os.Stat(config.Server.Data.BackupPath); os.IsNotExist(err) { - if err := os.MkdirAll(config.Server.Data.BackupPath, 0o750); err != nil { + _, err := os.Stat(config.Server.Data.BackupPath) + if os.IsNotExist(err) { + err := os.MkdirAll(config.Server.Data.BackupPath, 0o750) + if err != nil { return nil, fmt.Errorf("failed to create backup data directory: %w", err) } } diff --git a/pkg/api/cli/cli_test.go b/pkg/api/cli/cli_test.go index 6b674bc0..f65ad30c 100644 --- a/pkg/api/cli/cli_test.go +++ b/pkg/api/cli/cli_test.go @@ -39,7 +39,8 @@ func queryServer(address string) error { return err } - if err := resp.Body.Close(); err != nil { + err = resp.Body.Close() + if err != nil { return err } @@ -73,6 +74,7 @@ func TestCEEMSConfigNestedDataDirs(t *testing.T) { // Setup data directories var err error + config, err = createDirs(config) require.NoError(t, err, "failed to create data directories") @@ -141,7 +143,8 @@ ceems_api_server: // Query exporter for i := range 10 { - if err := queryServer("localhost:9020"); err == nil { + err := queryServer("localhost:9020") + if err == nil { break } diff --git a/pkg/api/db/db.go b/pkg/api/db/db.go index d7ecdd13..ebf76dc5 100644 --- a/pkg/api/db/db.go +++ b/pkg/api/db/db.go @@ -80,9 +80,8 @@ type DateTime struct { func (t *DateTime) UnmarshalYAML(unmarshal func(any) error) error { var tmp string - var err error - - if err = unmarshal(&tmp); err != nil { + err := unmarshal(&tmp) + if err != nil { return err } @@ -95,17 +94,20 @@ func (t *DateTime) UnmarshalYAML(unmarshal func(any) error) error { tt, _ = time.Parse("2006-01-02", time.Now().Format("2006-01-02")) } else { // First attempt to parse as YYYY-MM-DD - if tt, err = time.Parse("2006-01-02", tmp); err == nil { + tt, err = time.Parse("2006-01-02", tmp) + if err == nil { goto outside } // Second attempt to parse as YYYY-MM-DDTHH:MM - if tt, err = time.Parse("2006-01-02T15:04", tmp); err == nil { + tt, err = time.Parse("2006-01-02T15:04", tmp) + if err == nil { goto outside } // Final attempt to parse as YYYY-MM-DDTHH:MM:SS - if tt, err = time.Parse("2006-01-02T15:04:05", tmp); err == nil { + tt, err = time.Parse("2006-01-02T15:04:05", tmp) + if err == nil { goto outside } @@ -137,7 +139,8 @@ func (c *AdminConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain AdminConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } @@ -190,7 +193,8 @@ func (c *DataConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain DataConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } @@ -306,8 +310,12 @@ func New(c *Config) (*stats, error) { // Get file paths dbPath := filepath.Join(c.Data.Path, base.CEEMSDBName) + // Make a timeout context + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + // Setup DB - db, dbConn, err := setupDB(dbPath) + db, dbConn, err := setupDB(ctx, dbPath) if err != nil { c.Logger.Error("DB setup failed", "err", err) @@ -321,7 +329,8 @@ func New(c *Config) (*stats, error) { } // Perform DB migrations - if err = migrator.ApplyMigrations(db); err != nil { + err = migrator.ApplyMigrations(db) + if err != nil { return nil, err } @@ -331,7 +340,8 @@ func New(c *Config) (*stats, error) { var emptyDB bool - if err = db.QueryRow("SELECT MAX(last_updated_at) FROM " + base.UsageDBTableName).Scan(&lastUpdatedAt); err == nil { + err = db.QueryRow("SELECT MAX(last_updated_at) FROM " + base.UsageDBTableName).Scan(&lastUpdatedAt) //nolint:noctx + if err == nil { // Parse date time string c.Data.LastUpdate.Time, err = time.Parse(base.DatetimeLayout, lastUpdatedAt) if err != nil { @@ -436,7 +446,8 @@ func (s *stats) Collect(ctx context.Context) error { if nextUpdateTime.Compare(currentTime) == -1 { s.logger.Debug("Incremental DB update step", "from", s.storage.lastUpdateTime, "to", nextUpdateTime) - if err := s.collect(ctx, s.storage.lastUpdateTime, nextUpdateTime); err != nil { + err := s.collect(ctx, s.storage.lastUpdateTime, nextUpdateTime) + if err != nil { s.logger.Error("Failed incremental update", "from", s.storage.lastUpdateTime, "to", nextUpdateTime, "err", err) return err @@ -510,7 +521,8 @@ func (s *stats) collect(ctx context.Context, startTime, endTime time.Time) error units = s.updater.Update(ctx, startTime, endTime, units) // Update admin users list from Grafana - if err := s.updateAdminUsers(ctx); err != nil { + err = s.updateAdminUsers(ctx) + if err != nil { s.logger.Error("Failed to update admin users from Grafana", "err", err) } @@ -525,7 +537,8 @@ func (s *stats) collect(ctx context.Context, startTime, endTime time.Time) error if !s.storage.skipDeleteOldUnits { s.logger.Debug("Cleaning up old entries in DB") - if err = s.purgeExpiredUnits(ctx, tx); err != nil { + err = s.purgeExpiredUnits(ctx, tx) + if err != nil { s.logger.Error("Failed to clean up old entries", "err", err) } else { s.logger.Debug("Cleaned up old entries in DB") @@ -535,7 +548,8 @@ func (s *stats) collect(ctx context.Context, startTime, endTime time.Time) error // Insert data into DB s.logger.Debug("Executing SQL statements") - if err := s.execStatements(ctx, tx, startTime, endTime, units, users, projects); err != nil { + err = s.execStatements(ctx, tx, startTime, endTime, units, users, projects) + if err != nil { s.logger.Debug("Failed to execute SQL statements", "err", err) return fmt.Errorf("failed to execute SQL statements: %w", err) @@ -544,7 +558,8 @@ func (s *stats) collect(ctx context.Context, startTime, endTime time.Time) error } // Commit changes - if err = tx.Commit(); err != nil { + err = tx.Commit() + if err != nil { return fmt.Errorf("failed to commit SQL transcation: %w", err) } @@ -571,13 +586,17 @@ func (s *stats) purgeExpiredUnits(ctx context.Context, tx *sql.Tx) error { base.UnitsDBTableName, int(s.storage.retentionPeriod.Hours()/24), ) // #nosec - if _, err := tx.ExecContext(ctx, deleteUnitsQuery); err != nil { + + _, err := tx.ExecContext(ctx, deleteUnitsQuery) + if err != nil { return err } // Get changes var unitsDeleted int - if err := tx.QueryRowContext(ctx, "SELECT changes()").Scan(&unitsDeleted); err == nil { + + err = tx.QueryRowContext(ctx, "SELECT changes()").Scan(&unitsDeleted) + if err == nil { s.logger.Debug("DB update", "units_deleted", unitsDeleted) } @@ -587,13 +606,17 @@ func (s *stats) purgeExpiredUnits(ctx context.Context, tx *sql.Tx) error { base.UsageDBTableName, int(s.storage.retentionPeriod.Hours()/24), ) // #nosec - if _, err := tx.ExecContext(ctx, deleteUsageQuery); err != nil { + + _, err = tx.ExecContext(ctx, deleteUsageQuery) + if err != nil { return err } // Get changes var usageDeleted int - if err := tx.QueryRowContext(ctx, "SELECT changes()").Scan(&usageDeleted); err == nil { + + err = tx.QueryRowContext(ctx, "SELECT changes()").Scan(&usageDeleted) + if err == nil { s.logger.Debug("DB update", "usage_deleted", usageDeleted) } @@ -641,7 +664,7 @@ func (s *stats) execStatements( // s.logger.Debug("Inserting unit", "id", unit.Jobid) // Use named parameters to not to repeat the values - if _, err = stmts[base.UnitsDBTableName].ExecContext( + _, err = stmts[base.UnitsDBTableName].ExecContext( ctx, sql.Named(base.UnitsDBTableStructFieldColNameMap["ResourceManager"], unit.ResourceManager), sql.Named(base.UnitsDBTableStructFieldColNameMap["ClusterID"], cluster.Cluster.ID), @@ -676,7 +699,8 @@ func (s *stats) execStatements( sql.Named(base.UnitsDBTableStructFieldColNameMap["Ignore"], unit.Ignore), sql.Named(base.UnitsDBTableStructFieldColNameMap["NumUpdates"], 1), sql.Named(base.UnitsDBTableStructFieldColNameMap["LastUpdatedAt"], currentTime.Format(base.DatetimeLayout)), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to insert unit in DB", "cluster_id", cluster.Cluster.ID, "uuid", unit.UUID, "err", err) } @@ -689,7 +713,7 @@ func (s *stats) execStatements( // Update Usage table // Use named parameters to not to repeat the values - if _, err = stmts[base.UsageDBTableName].ExecContext( + _, err = stmts[base.UsageDBTableName].ExecContext( ctx, sql.Named(base.UsageDBTableStructFieldColNameMap["ResourceManager"], unit.ResourceManager), sql.Named(base.UsageDBTableStructFieldColNameMap["ClusterID"], cluster.Cluster.ID), @@ -712,13 +736,14 @@ func (s *stats) execStatements( sql.Named(base.UsageDBTableStructFieldColNameMap["TotalIngressStats"], unit.TotalIngressStats), sql.Named(base.UsageDBTableStructFieldColNameMap["TotalEgressStats"], unit.TotalEgressStats), sql.Named(base.UsageDBTableStructFieldColNameMap["NumUpdates"], 1), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to update usage table in DB", "cluster_id", cluster.Cluster.ID, "uuid", unit.UUID, "err", err) } // Update DailyUsage table // Use named parameters to not to repeat the values - if _, err = stmts[base.DailyUsageDBTableName].ExecContext( + _, err = stmts[base.DailyUsageDBTableName].ExecContext( ctx, sql.Named(base.UsageDBTableStructFieldColNameMap["ResourceManager"], unit.ResourceManager), sql.Named(base.UsageDBTableStructFieldColNameMap["ClusterID"], cluster.Cluster.ID), @@ -741,7 +766,8 @@ func (s *stats) execStatements( sql.Named(base.UsageDBTableStructFieldColNameMap["TotalIngressStats"], unit.TotalIngressStats), sql.Named(base.UsageDBTableStructFieldColNameMap["TotalEgressStats"], unit.TotalEgressStats), sql.Named(base.UsageDBTableStructFieldColNameMap["NumUpdates"], 1), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to update daily_usage table in DB", "cluster_id", cluster.Cluster.ID, "uuid", unit.UUID, "err", err) } } @@ -750,7 +776,7 @@ func (s *stats) execStatements( // Update users for _, cluster := range clusterUsers { for _, user := range cluster.Users { - if _, err = stmts[base.UsersDBTableName].ExecContext( + _, err = stmts[base.UsersDBTableName].ExecContext( ctx, sql.Named(base.UsersDBTableStructFieldColNameMap["ClusterID"], cluster.Cluster.ID), sql.Named(base.UsersDBTableStructFieldColNameMap["ResourceManager"], cluster.Cluster.Manager), @@ -759,7 +785,8 @@ func (s *stats) execStatements( sql.Named(base.UsersDBTableStructFieldColNameMap["Projects"], user.Projects), sql.Named(base.UsersDBTableStructFieldColNameMap["Tags"], user.Tags), sql.Named(base.UsersDBTableStructFieldColNameMap["LastUpdatedAt"], user.LastUpdatedAt), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to insert user in DB", "cluster_id", cluster.Cluster.ID, "user", user.Name, "err", err) } } @@ -768,7 +795,7 @@ func (s *stats) execStatements( // Update projects for _, cluster := range clusterProjects { for _, project := range cluster.Projects { - if _, err = stmts[base.ProjectsDBTableName].ExecContext( + _, err = stmts[base.ProjectsDBTableName].ExecContext( ctx, sql.Named(base.ProjectsDBTableStructFieldColNameMap["ClusterID"], cluster.Cluster.ID), sql.Named(base.ProjectsDBTableStructFieldColNameMap["ResourceManager"], cluster.Cluster.Manager), @@ -777,7 +804,8 @@ func (s *stats) execStatements( sql.Named(base.ProjectsDBTableStructFieldColNameMap["Users"], project.Users), sql.Named(base.ProjectsDBTableStructFieldColNameMap["Tags"], project.Tags), sql.Named(base.ProjectsDBTableStructFieldColNameMap["LastUpdatedAt"], project.LastUpdatedAt), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to insert project in DB", "cluster_id", cluster.Cluster.ID, "project", project.Name, "err", err) } } @@ -786,7 +814,7 @@ func (s *stats) execStatements( // Update admin users table for _, source := range AdminUsersSources { for _, user := range s.admin.users[source] { - if _, err = stmts[base.AdminUsersDBTableName].ExecContext( + _, err = stmts[base.AdminUsersDBTableName].ExecContext( ctx, sql.Named(base.AdminUsersDBTableStructFieldColNameMap["ClusterID"], "all"), sql.Named(base.AdminUsersDBTableStructFieldColNameMap["ResourceManager"], ""), @@ -795,7 +823,8 @@ func (s *stats) execStatements( sql.Named(base.AdminUsersDBTableStructFieldColNameMap["Projects"], models.List{}), sql.Named(base.AdminUsersDBTableStructFieldColNameMap["Tags"], models.List{source}), sql.Named(base.AdminUsersDBTableStructFieldColNameMap["LastUpdatedAt"], currentTime.Format(base.DatetimeLayout)), - ); err != nil { + ) + if err != nil { s.logger.Error("Failed to update admin_users table in DB", "source", source, "err", err) } } @@ -813,18 +842,16 @@ func (s *stats) execStatements( // Based on https://gist.github.com/bbengfort/452a9d5e74a63d88e5a34a580d6cb6d3 // Ref: https://github.com/rotationalio/ensign/pull/529/files func (s *stats) backup(ctx context.Context, backupDBPath string) error { - var backupDBFile *os.File - - var err error // Create a backup DB file - if backupDBFile, err = os.Create(backupDBPath); err != nil { + backupDBFile, err := os.Create(backupDBPath) + if err != nil { return err } backupDBFile.Close() // Open a second sqlite3 database at the backup location - destDB, destConn, err := openDBConnection(backupDBPath) + destDB, destConn, err := openDBConnection(ctx, backupDBPath) if err != nil { return err } @@ -837,7 +864,8 @@ func (s *stats) backup(ctx context.Context, backupDBPath string) error { // NOTE: backup.Finish() MUST be called to prevent panics. var backup *sqlite3.SQLiteBackup - if backup, err = destConn.Backup(sqlite3Main, s.dbConn, sqlite3Main); err != nil { + backup, err = destConn.Backup(sqlite3Main, s.dbConn, sqlite3Main) + if err != nil { return err } @@ -855,8 +883,10 @@ func (s *stats) backup(ctx context.Context, backupDBPath string) error { default: // Backing up a smaller number of pages per step is the most effective way of // doing online backups and also allow write transactions to make progress. - if isDone, err = backup.Step(pagesPerStep); err != nil { - if finishErr := backup.Finish(); finishErr != nil { + isDone, err = backup.Step(pagesPerStep) + if err != nil { + finishErr := backup.Finish() + if finishErr != nil { return fmt.Errorf("errors: %w, %w", err, finishErr) } @@ -877,7 +907,8 @@ func (s *stats) backup(ctx context.Context, backupDBPath string) error { func (s *stats) vacuum(ctx context.Context) error { s.logger.Debug("Starting to vacuum DB") - if _, err := s.db.ExecContext(ctx, "VACUUM"); err != nil { + _, err := s.db.ExecContext(ctx, "VACUUM") + if err != nil { return err } @@ -890,7 +921,8 @@ func (s *stats) createBackup(ctx context.Context) error { defer common.TimeTrack(time.Now(), "DB backup", s.logger) // First vacuum DB to reduce size - if err := s.vacuum(ctx); err != nil { + err := s.vacuum(ctx) + if err != nil { s.logger.Warn("Failed to vacuum DB", "err", err) } else { s.logger.Debug("DB vacuumed") @@ -905,12 +937,14 @@ func (s *stats) createBackup(ctx context.Context) error { ) backupDBFilePath := filepath.Join(filepath.Dir(s.storage.dbPath), backupDBFileName) - if err := s.backup(ctx, backupDBFilePath); err != nil { + + err = s.backup(ctx, backupDBFilePath) + if err != nil { return err } // If back is successful, move it to dbBackupPath - err := os.Rename(backupDBFilePath, filepath.Join(s.storage.dbBackupPath, backupDBFileName)) + err = os.Rename(backupDBFilePath, filepath.Join(s.storage.dbBackupPath, backupDBFileName)) if err != nil { return fmt.Errorf("failed to move backup DB file: %w", err) } diff --git a/pkg/api/db/db_test.go b/pkg/api/db/db_test.go index 05fb8152..5354e6ff 100644 --- a/pkg/api/db/db_test.go +++ b/pkg/api/db/db_test.go @@ -548,11 +548,13 @@ func prepareMockConfig(tmpDir string) (*Config, error) { dataBackupDir := filepath.Join(tmpDir, "data-backup") // Create data directory - if err := os.Mkdir(dataDir, 0o750); err != nil { + err := os.Mkdir(dataDir, 0o750) + if err != nil { return nil, fmt.Errorf("Failed to create data directory: %w", err) } - if err := os.Mkdir(dataBackupDir, 0o750); err != nil { + err = os.Mkdir(dataBackupDir, 0o750) + if err != nil { return nil, fmt.Errorf("Failed to create data directory: %w", err) } @@ -583,7 +585,7 @@ func prepareMockConfig(tmpDir string) (*Config, error) { } func populateDBWithMockData(ctx context.Context, s *stats) error { - tx, err := s.db.Begin() + tx, err := s.db.BeginTx(ctx, nil) if err != nil { return err } @@ -615,7 +617,7 @@ func TestNewUnitStatsDB(t *testing.T) { require.NoError(t, err, "DB file not created") // Insert a dummy entry into DB - _, err = s.db.Exec(`INSERT INTO usage(last_updated_at) VALUES ("2023-12-20T00:00:00")`) + _, err = s.db.ExecContext(t.Context(), `INSERT INTO usage(last_updated_at) VALUES ("2023-12-20T00:00:00")`) require.NoError(t, err, "failed to insert dummy entry into DB") s.Stop() @@ -647,6 +649,7 @@ func TestUnitStatsDBEntries(t *testing.T) { // Fetch units var expectedUnits []models.ClusterUnits + expectedUnits = append(expectedUnits, mockUnitsOne...) expectedUnits = append(expectedUnits, mockUnitsTwo...) fetchedUnits, err := s.manager.FetchUnits(ctx, time.Now(), time.Now()) @@ -658,12 +661,14 @@ func TestUnitStatsDBEntries(t *testing.T) { require.NoError(t, err, "failed to collect units data") // Make units query - rows, err := s.db.Query( + rows, err := s.db.QueryContext( + t.Context(), "SELECT uuid,username,project,total_time_seconds,avg_cpu_usage,avg_cpu_mem_usage,total_cpu_energy_usage_kwh,total_cpu_emissions_gms,avg_gpu_usage,avg_gpu_mem_usage,total_gpu_energy_usage_kwh,total_gpu_emissions_gms FROM units ORDER BY uuid", ) require.NoError(t, err, "failed to make DB query") defer rows.Close() + require.NoError(t, rows.Err()) var units []models.Unit @@ -671,12 +676,13 @@ func TestUnitStatsDBEntries(t *testing.T) { for rows.Next() { var unit models.Unit - if err = rows.Scan( + err = rows.Scan( &unit.UUID, &unit.User, &unit.Project, &unit.TotalTime, &unit.AveCPUUsage, &unit.AveCPUMemUsage, &unit.TotalCPUEnergyUsage, &unit.TotalCPUEmissions, &unit.AveGPUUsage, &unit.AveGPUMemUsage, - &unit.TotalGPUEnergyUsage, &unit.TotalGPUEmissions); err != nil { + &unit.TotalGPUEnergyUsage, &unit.TotalGPUEmissions) + if err != nil { t.Errorf("failed to scan row: %s", err) } @@ -684,6 +690,7 @@ func TestUnitStatsDBEntries(t *testing.T) { } var mockUpdatedUnits []models.ClusterUnits + mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsSlurm01...) mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsSlurm1...) mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsOS0...) @@ -698,12 +705,14 @@ func TestUnitStatsDBEntries(t *testing.T) { assert.ElementsMatch(t, units, expectedUpdatedUnits, "expected and got updated cluster units differ") // Make usage query - rows, err = s.db.Query( + rows, err = s.db.QueryContext( + t.Context(), "SELECT avg_cpu_usage,num_updates FROM usage WHERE username = 'foo1' AND cluster_id = 'slurm-0'", ) require.NoError(t, err, "failed to make DB query") defer rows.Close() + require.NoError(t, rows.Err()) // // For debugging @@ -718,7 +727,8 @@ func TestUnitStatsDBEntries(t *testing.T) { var numUpdates int64 for rows.Next() { - if err = rows.Scan(&cpuUsage, &numUpdates); err != nil { + err = rows.Scan(&cpuUsage, &numUpdates) + if err != nil { t.Errorf("failed to scan row: %s", err) } } @@ -727,17 +737,20 @@ func TestUnitStatsDBEntries(t *testing.T) { assert.InEpsilon(t, 15, float64(cpuUsage["usage"]), 0, "expected cpuUsage = 15") // Make projects query - rows, err = s.db.Query( + rows, err = s.db.QueryContext( + t.Context(), "SELECT users FROM projects WHERE name = 'fooprj' AND cluster_id = 'slurm-0'", ) require.NoError(t, err, "Failed to make DB query") defer rows.Close() + require.NoError(t, rows.Err()) var users models.List for rows.Next() { - if err = rows.Scan(&users); err != nil { + err = rows.Scan(&users) + if err != nil { t.Errorf("failed to scan row: %s", err) } } @@ -780,6 +793,7 @@ func TestUnitStatsDBEntriesHistorical(t *testing.T) { // Fetch units var expectedUnits []models.ClusterUnits + expectedUnits = append(expectedUnits, mockUnitsOne...) expectedUnits = append(expectedUnits, mockUnitsTwo...) fetchedUnits, err := s.manager.FetchUnits(ctx, time.Now(), time.Now()) @@ -791,12 +805,14 @@ func TestUnitStatsDBEntriesHistorical(t *testing.T) { require.NoError(t, err, "Failed to collect units data") // Make units query - rows, err := s.db.Query( + rows, err := s.db.QueryContext( + t.Context(), "SELECT uuid,username,project,total_time_seconds,avg_cpu_usage,avg_cpu_mem_usage,total_cpu_energy_usage_kwh,total_cpu_emissions_gms,avg_gpu_usage,avg_gpu_mem_usage,total_gpu_energy_usage_kwh,total_gpu_emissions_gms FROM units ORDER BY uuid", ) require.NoError(t, err, "Failed to make DB query") defer rows.Close() + require.NoError(t, rows.Err()) var units []models.Unit @@ -804,12 +820,13 @@ func TestUnitStatsDBEntriesHistorical(t *testing.T) { for rows.Next() { var unit models.Unit - if err = rows.Scan( + err = rows.Scan( &unit.UUID, &unit.User, &unit.Project, &unit.TotalTime, &unit.AveCPUUsage, &unit.AveCPUMemUsage, &unit.TotalCPUEnergyUsage, &unit.TotalCPUEmissions, &unit.AveGPUUsage, &unit.AveGPUMemUsage, - &unit.TotalGPUEnergyUsage, &unit.TotalGPUEmissions); err != nil { + &unit.TotalGPUEnergyUsage, &unit.TotalGPUEmissions) + if err != nil { t.Errorf("failed to scan row: %s", err) } @@ -817,6 +834,7 @@ func TestUnitStatsDBEntriesHistorical(t *testing.T) { } var mockUpdatedUnits []models.ClusterUnits + mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsSlurm01...) mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsSlurm1...) mockUpdatedUnits = append(mockUpdatedUnits, mockUpdatedUnitsOS0...) @@ -842,16 +860,17 @@ func TestUnitStatsDBLock(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "Failed to create new stats") // Beging exclusive transcation to lock DB - _, err = s.db.Exec("BEGIN EXCLUSIVE") + _, err = s.db.ExecContext(t.Context(), "BEGIN EXCLUSIVE") require.NoError(t, err) // Try to insert data. It should fail err = s.Collect(t.Context()) require.Error(t, err, "expected error due to DB lock") - s.db.Exec("COMMIT") + s.db.ExecContext(t.Context(), "COMMIT") } func TestUnitStatsDBVacuum(t *testing.T) { @@ -862,6 +881,7 @@ func TestUnitStatsDBVacuum(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "Failed to create new stats") // Populate DB with data @@ -888,6 +908,7 @@ func TestUnitStatsDBBackup(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "Failed to create new stats") // Populate DB with data @@ -913,15 +934,16 @@ func TestUnitStatsDBBackup(t *testing.T) { // Check contents of backed up DB var numRows int - db, _, err := openDBConnection(expectedBackupFile) + db, _, err := openDBConnection(t.Context(), expectedBackupFile) if err != nil { t.Errorf("Failed to create DB connection to backup DB: %s", err) } - rows, err := db.Query("SELECT * FROM " + base.UnitsDBTableName) //nolint:gosec + rows, err := db.QueryContext(t.Context(), "SELECT * FROM "+base.UnitsDBTableName) //nolint:gosec require.NoError(t, err) defer rows.Close() + require.NoError(t, rows.Err()) for rows.Next() { @@ -940,7 +962,8 @@ func TestAdminUsersDBUpdate(t *testing.T) { t.Setenv("GRAFANA_API_TOKEN", "foo") server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -953,6 +976,7 @@ func TestAdminUsersDBUpdate(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "failed to create new stats") // Make backup dir non existent @@ -985,6 +1009,7 @@ func TestStatsDBBackup(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "failed to create new stats") // Make backup dir non existent @@ -1008,6 +1033,7 @@ func TestUnitStatsDeleteOldUnits(t *testing.T) { // Make new stats DB s, err := New(c) defer s.Stop() + require.NoError(t, err, "failed to create new stats") // Add new row that should be deleted @@ -1025,7 +1051,7 @@ func TestUnitStatsDeleteOldUnits(t *testing.T) { }, } ctx := t.Context() - tx, err := s.db.Begin() + tx, err := s.db.BeginTx(ctx, nil) require.NoError(t, err) // stmtMap, err := s.prepareStatements(ctx, tx) // require.NoError(t, err) @@ -1038,14 +1064,17 @@ func TestUnitStatsDeleteOldUnits(t *testing.T) { tx.Commit() // Query for deleted unit - result, err := s.db.Prepare( + result, err := s.db.PrepareContext( + t.Context(), fmt.Sprintf("SELECT COUNT(uuid) FROM %s WHERE uuid = ?;", base.UnitsDBTableName), ) require.NoError(t, err) + defer result.Close() var numRows int - err = result.QueryRow(unitID).Scan(&numRows) + + err = result.QueryRowContext(t.Context(), unitID).Scan(&numRows) require.NoError(t, err, "failed to query DB") assert.Equal(t, 0, numRows, "expected 0 rows after deletion") } diff --git a/pkg/api/db/helpers.go b/pkg/api/db/helpers.go index dce52994..50f6ff0f 100644 --- a/pkg/api/db/helpers.go +++ b/pkg/api/db/helpers.go @@ -4,6 +4,7 @@ package db import ( + "context" "database/sql" "fmt" "os" @@ -40,7 +41,7 @@ func makeDSN(filePath string, opts map[string]string) string { } // Open DB connection and return connection poiner. -func openDBConnection(dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { +func openDBConnection(ctx context.Context, dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { var db *sql.DB var dbConn *ceems_sqlite3.Conn @@ -49,11 +50,13 @@ func openDBConnection(dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { var ok bool - if db, err = sql.Open(ceems_sqlite3.DriverName, makeDSN(dbFilePath, defaultOpts)); err != nil { + db, err = sql.Open(ceems_sqlite3.DriverName, makeDSN(dbFilePath, defaultOpts)) + if err != nil { return nil, nil, err } - if err = db.Ping(); err != nil { + err = db.PingContext(ctx) + if err != nil { return nil, nil, err } @@ -65,10 +68,11 @@ func openDBConnection(dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { } // Setup DB and create table. -func setupDB(dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { - if _, err := os.Stat(dbFilePath); err == nil { +func setupDB(ctx context.Context, dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { + _, err := os.Stat(dbFilePath) + if err == nil { // Open the created SQLite File - db, dbConn, err := openDBConnection(dbFilePath) + db, dbConn, err := openDBConnection(ctx, dbFilePath) if err != nil { return nil, nil, fmt.Errorf("failed to open DB file: %w", err) } @@ -85,12 +89,13 @@ func setupDB(dbFilePath string) (*sql.DB, *ceems_sqlite3.Conn, error) { file.Close() // Set strict permissions - if err := os.Chmod(dbFilePath, 0o640); err != nil { + err = os.Chmod(dbFilePath, 0o640) + if err != nil { return nil, nil, fmt.Errorf("failed to harden permissions on DB file: %w", err) } // Open the created SQLite File - db, dbConn, err := openDBConnection(dbFilePath) + db, dbConn, err := openDBConnection(ctx, dbFilePath) if err != nil { return nil, nil, fmt.Errorf("failed to open DB connection: %w", err) } diff --git a/pkg/api/db/helpers_test.go b/pkg/api/db/helpers_test.go index d5f37535..4d2021e2 100644 --- a/pkg/api/db/helpers_test.go +++ b/pkg/api/db/helpers_test.go @@ -17,12 +17,12 @@ func TestJobStatsDBPreparation(t *testing.T) { statDBPath := filepath.Join(tmpDir, "stats.db") // Test setupDB function - _, _, err := setupDB(statDBPath) + _, _, err := setupDB(t.Context(), statDBPath) require.NoError(t, err) require.FileExists(t, statDBPath, "DB file not found") // Call setupDB again. This should return with db conn - _, _, err = setupDB(statDBPath) + _, _, err = setupDB(t.Context(), statDBPath) require.NoError(t, err, "failed to setup DB on already setup DB") // Check DB file exists diff --git a/pkg/api/db/migrator/migrate.go b/pkg/api/db/migrator/migrate.go index 681f3589..a841b794 100644 --- a/pkg/api/db/migrator/migrate.go +++ b/pkg/api/db/migrator/migrate.go @@ -47,11 +47,13 @@ func (m *Migrator) ApplyMigrations(db *sql.DB) error { m.logger.Info("Applying DB migrations") - if err = migrator.Up(); err != nil && !errors.Is(err, migrate.ErrNoChange) { + err = migrator.Up() + if err != nil && !errors.Is(err, migrate.ErrNoChange) { return fmt.Errorf("unable to apply migrations %w", err) } - if version, dirty, err := migrator.Version(); err != nil { + version, dirty, err := migrator.Version() + if err != nil { m.logger.Error("Failed to get DB migration version", "err", err) } else { m.logger.Debug("Current DB migration version", "version", version, "dirty", dirty) diff --git a/pkg/api/helper/helper.go b/pkg/api/helper/helper.go index 43639bc0..6d11cba5 100644 --- a/pkg/api/helper/helper.go +++ b/pkg/api/helper/helper.go @@ -7,7 +7,8 @@ import ( // TimeToTimestamp converts a date in a given layout to unix timestamp of the date. func TimeToTimestamp(layout string, date string) int64 { - if t, err := time.Parse(layout, date); err == nil { + t, err := time.Parse(layout, date) + if err == nil { return t.UnixMilli() } diff --git a/pkg/api/http/cors_test.go b/pkg/api/http/cors_test.go index a258dce1..2892c720 100644 --- a/pkg/api/http/cors_test.go +++ b/pkg/api/http/cors_test.go @@ -43,6 +43,7 @@ func TestCORSHandler(t *testing.T) { resp, err := client.Do(req) require.NoError(t, err, "client get failed with unexpected error") + defer resp.Body.Close() AccessControlAllowOrigin := resp.Header.Get("Access-Control-Allow-Origin") @@ -57,6 +58,7 @@ func TestCORSHandler(t *testing.T) { resp, err = client.Do(req) require.NoError(t, err, "client get failed with unexpected error") + defer resp.Body.Close() AccessControlAllowOrigin = resp.Header.Get("Access-Control-Allow-Origin") diff --git a/pkg/api/http/error.go b/pkg/api/http/error.go index b2f63977..a3eddbd0 100644 --- a/pkg/api/http/error.go +++ b/pkg/api/http/error.go @@ -96,7 +96,9 @@ func errorResponse[T any](w http.ResponseWriter, apiErr *apiError, logger *slog. Error: apiErr.err.Error(), Data: data, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } diff --git a/pkg/api/http/querier.go b/pkg/api/http/querier.go index a755c8c4..52eda941 100644 --- a/pkg/api/http/querier.go +++ b/pkg/api/http/querier.go @@ -93,13 +93,15 @@ func scanRows[T any](rows *sql.Rows, numRows int) ([]T, error) { indexes := structset.CachedFieldIndexes(reflect.TypeOf(&value).Elem()) // Get columns - if columns, err = rows.Columns(); err != nil { + columns, err = rows.Columns() + if err != nil { return nil, fmt.Errorf("cannot fetch columns: %w", err) } // Scan each row for rows.Next() { - if err := structset.ScanRow(rows, columns, indexes, &value); err != nil { + err := structset.ScanRow(rows, columns, indexes, &value) + if err != nil { scanErrs++ } @@ -120,7 +122,8 @@ func scanRows[T any](rows *sql.Rows, numRows int) ([]T, error) { // Ref: http://go-database-sql.org/errors.html // Get all the errors during iteration - if errRows := rows.Err(); errRows != nil { + errRows := rows.Err() + if errRows != nil { err = errors.Join(err, errRows) } @@ -136,7 +139,7 @@ func countRows(ctx context.Context, dbConn *sql.DB, query Query) (int, error) { // Prepare SQL statements countQuery := queryRegexp.ReplaceAllString(queryString, "SELECT COUNT(*) FROM $2") - countStmt, err := dbConn.Prepare(countQuery) + countStmt, err := dbConn.PrepareContext(ctx, countQuery) if err != nil { return 0, err } @@ -166,7 +169,8 @@ func countRows(ctx context.Context, dbConn *sql.DB, query Query) (int, error) { for countRows.Next() { irow++ - if err := countRows.Scan(&numRows); err != nil { + err := countRows.Scan(&numRows) + if err != nil { continue } } @@ -183,7 +187,8 @@ func Querier[T any](ctx context.Context, dbConn *sql.DB, query Query, logger *sl // If requested model is units, get number of rows switch any(*new(T)).(type) { case models.Unit: - if numRows, err = countRows(ctx, dbConn, query); err != nil { + numRows, err = countRows(ctx, dbConn, query) + if err != nil { logger.Error("Failed to get rows count", "err", err) return nil, err @@ -195,7 +200,7 @@ func Querier[T any](ctx context.Context, dbConn *sql.DB, query Query, logger *sl // Get query string and params queryString, queryParams := query.get() - queryStmt, err := dbConn.Prepare(queryString) + queryStmt, err := dbConn.PrepareContext(ctx, queryString) if err != nil { logger.Error("Failed prepare query statement", "query", queryString, "queryParams", strings.Join(queryParams, ","), "err", err, diff --git a/pkg/api/http/querier_test.go b/pkg/api/http/querier_test.go index 4baaf4ab..dfdfe5d4 100644 --- a/pkg/api/http/querier_test.go +++ b/pkg/api/http/querier_test.go @@ -37,6 +37,7 @@ func TestUnitsQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -174,6 +175,7 @@ func TestUnitsQuerier(t *testing.T) { func TestUsageQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -225,6 +227,7 @@ func TestUsageQuerier(t *testing.T) { func TestProjectQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -254,6 +257,7 @@ func TestProjectQuerier(t *testing.T) { func TestUserQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -283,6 +287,7 @@ func TestUserQuerier(t *testing.T) { func TestClusterQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -307,6 +312,7 @@ func TestClusterQuerier(t *testing.T) { func TestStatsQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query @@ -332,6 +338,7 @@ func TestStatsQuerier(t *testing.T) { func TestKeysQuerier(t *testing.T) { db, err := setupTestDB() require.NoError(t, err, "failed to setup test DB") + defer db.Close() // Query diff --git a/pkg/api/http/server.go b/pkg/api/http/server.go index b4706710..07352872 100644 --- a/pkg/api/http/server.go +++ b/pkg/api/http/server.go @@ -82,7 +82,8 @@ func (c *WebConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain WebConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } @@ -127,7 +128,7 @@ type CEEMSServer struct { maxQueryPeriod time.Duration queriers queriers usageCache *ttlcache.Cache[uint64, []models.Usage] // Cache that stores usage query results - healthCheck func(*sql.DB, *slog.Logger) bool + healthCheck func(context.Context, *sql.DB, *slog.Logger) bool } // Response defines the response model of CEEMSAPIServer. @@ -174,8 +175,9 @@ func init() { } // Ping DB for connection test. -func getDBStatus(dbConn *sql.DB, logger *slog.Logger) bool { - if err := dbConn.Ping(); err != nil { +func getDBStatus(ctx context.Context, dbConn *sql.DB, logger *slog.Logger) bool { + err := dbConn.PingContext(ctx) + if err != nil { logger.Error("DB Ping failed", "err", err) return false @@ -285,7 +287,9 @@ func New(c *Config) (*CEEMSServer, error) { filepath.Join(c.DB.Data.Path, base.CEEMSDBName), "_mutex=no&mode=ro&_busy_timeout=5000", ) - if server.db, err = sql.Open(sqlite3.DriverName, dsn); err != nil { + + server.db, err = sql.Open(sqlite3.DriverName, dsn) + if err != nil { return nil, fmt.Errorf("failed to open DB: %w", err) } @@ -389,13 +393,15 @@ func (s *CEEMSServer) Start(_ context.Context) error { } // If externalURL is not set, ensure the server address is of good format. - if host, port, err := net.SplitHostPort(docs.SwaggerInfo.Host); err == nil && host == "" { + host, port, err := net.SplitHostPort(docs.SwaggerInfo.Host) + if err == nil && host == "" { docs.SwaggerInfo.Host = "localhost:" + port } s.logger.Info("Starting " + base.CEEMSServerAppName) - if err := web.ListenAndServe(s.server, s.webConfig, s.logger); err != nil && !errors.Is(err, http.ErrServerClosed) { + err = web.ListenAndServe(s.server, s.webConfig, s.logger) + if err != nil && !errors.Is(err, http.ErrServerClosed) { s.logger.Error("Failed to Listen and Serve HTTP server", "err", err) return err @@ -407,14 +413,16 @@ func (s *CEEMSServer) Start(_ context.Context) error { // Shutdown server. func (s *CEEMSServer) Shutdown(ctx context.Context) error { // Close DB connection - if err := s.db.Close(); err != nil { + err := s.db.Close() + if err != nil { s.logger.Error("Failed to close DB connection", "err", err) return err } // Shutdown the server - if err := s.server.Shutdown(ctx); err != nil { + err = s.server.Shutdown(ctx) + if err != nil { s.logger.Error("Failed to shutdown HTTP server", "err", err) return err @@ -453,14 +461,15 @@ func (s *CEEMSServer) setWriteDeadline(deadline time.Duration, w http.ResponseWr rc := http.NewResponseController(w) // Set write deadline to this request - if err := rc.SetWriteDeadline(time.Now().Add(deadline)); err != nil { + err := rc.SetWriteDeadline(time.Now().Add(deadline)) + if err != nil { s.logger.Error("Failed to set write deadline", "err", err) } } // Check status of server. func (s *CEEMSServer) health(w http.ResponseWriter, r *http.Request) { - if !s.healthCheck(s.db, s.logger) { + if !s.healthCheck(r.Context(), s.db, s.logger) { w.Header().Set("X-Content-Type-Options", "nosniff") w.WriteHeader(http.StatusServiceUnavailable) w.Write([]byte("KO")) @@ -513,7 +522,8 @@ func (s *CEEMSServer) timeLocation(l string) *time.Location { if l == "" { return s.dbConfig.Data.Timezone.Location } else { - if loc, err := time.LoadLocation(l); err != nil { + loc, err := time.LoadLocation(l) + if err != nil { return s.dbConfig.Data.Timezone.Location } else { return loc @@ -530,7 +540,8 @@ func (s *CEEMSServer) getQueryWindow(r *http.Request, column string, running boo // Get to and from query parameters and do checks on them if f := q.Get("from"); f != "" { // Return error response if from is not a timestamp - if ts, err := strconv.ParseInt(f, 10, 64); err != nil { + ts, err := strconv.ParseInt(f, 10, 64) + if err != nil { s.logger.Error("Failed to parse from timestamp", "from", f, "err", err) return Query{}, fmt.Errorf("query parameter 'from': %w", ErrMalformedTimeStamp) @@ -541,7 +552,8 @@ func (s *CEEMSServer) getQueryWindow(r *http.Request, column string, running boo if t := q.Get("to"); t != "" { // Return error response if to is not a timestamp - if ts, err := strconv.ParseInt(t, 10, 64); err != nil { + ts, err := strconv.ParseInt(t, 10, 64) + if err != nil { s.logger.Error("Failed to parse to timestamp", "to", t, "err", err) return Query{}, fmt.Errorf("query parameter 'to': %w", ErrMalformedTimeStamp) @@ -622,7 +634,8 @@ func (s *CEEMSServer) roundQueryWindow(r *http.Request) error { ) } else { // Return error response if from is not a timestamp - if ts, err := strconv.ParseInt(f, 10, 64); err != nil { + ts, err := strconv.ParseInt(f, 10, 64) + if err != nil { s.logger.Error("Failed to parse from timestamp", "from", f, "err", err) return fmt.Errorf("query parameter 'from': %w", ErrMalformedTimeStamp) @@ -644,7 +657,8 @@ func (s *CEEMSServer) roundQueryWindow(r *http.Request) error { ) } else { // Return error response if from is not a timestamp - if ts, err := strconv.ParseInt(t, 10, 64); err != nil { + ts, err := strconv.ParseInt(t, 10, 64) + if err != nil { s.logger.Error("Failed to parse from timestamp", "to", t, "err", err) return fmt.Errorf("query parameter 'to': %w", ErrMalformedTimeStamp) @@ -790,7 +804,8 @@ queryUnits: response.Warnings = append(response.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&response); err != nil { + err = json.NewEncoder(w).Encode(&response) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -974,7 +989,9 @@ func (s *CEEMSServer) verifyUnitsOwnership(w http.ResponseWriter, r *http.Reques response := Response[string]{ Status: "success", } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1044,7 +1061,8 @@ func (s *CEEMSServer) clustersAdmin(w http.ResponseWriter, r *http.Request) { clusterIDsResponse.Warnings = append(clusterIDsResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&clusterIDsResponse); err != nil { + err = json.NewEncoder(w).Encode(&clusterIDsResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1079,7 +1097,8 @@ func (s *CEEMSServer) adminUsersQuerier(w http.ResponseWriter, r *http.Request) usersResponse.Warnings = append(usersResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&usersResponse); err != nil { + err = json.NewEncoder(w).Encode(&usersResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1131,7 +1150,8 @@ func (s *CEEMSServer) usersQuerier(users []string, w http.ResponseWriter, r *htt usersResponse.Warnings = append(usersResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&usersResponse); err != nil { + err = json.NewEncoder(w).Encode(&usersResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1277,7 +1297,8 @@ func (s *CEEMSServer) projectsQuerier(users []string, w http.ResponseWriter, r * projectsResponse.Warnings = append(projectsResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&projectsResponse); err != nil { + err = json.NewEncoder(w).Encode(&projectsResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1396,7 +1417,8 @@ func (s *CEEMSServer) aggQueryBuilder( tmpl := template.Must(template.New(metric).Parse(aggUsageQueries[metric])) query := &bytes.Buffer{} - if err := tmpl.Execute(query, data); err != nil { + err = tmpl.Execute(query, data) + if err != nil { s.logger.Error("Failed to execute query template", "metric", metric, "err", err) return "" @@ -1427,7 +1449,8 @@ func (s *CEEMSServer) currentUsage(users []string, fields []string, w http.Respo var err, qErrs error // Round `to` and `from` query parameters to cacheTTL - if err := s.roundQueryWindow(r); err != nil { + err = s.roundQueryWindow(r) + if err != nil { errorResponse[any](w, &apiError{errorBadData, err}, s.logger, nil) return @@ -1478,7 +1501,9 @@ func (s *CEEMSServer) currentUsage(users []string, fields []string, w http.Respo queryParts[i] = query } else { mu.Lock() + qErrs = errors.Join(fmt.Errorf("failed to build query for %s", field), qErrs) + mu.Unlock() } }(iField, field) @@ -1602,7 +1627,8 @@ writer: usageResponse.Warnings = append(usageResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&usageResponse); err != nil { + err = json.NewEncoder(w).Encode(&usageResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1648,7 +1674,8 @@ func (s *CEEMSServer) globalUsage(users []string, queriedFields []string, w http usageResponse.Warnings = append(usageResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&usageResponse); err != nil { + err = json.NewEncoder(w).Encode(&usageResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1917,7 +1944,8 @@ func (s *CEEMSServer) currentStats(users []string, w http.ResponseWriter, r *htt projectsResponse.Warnings = append(projectsResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&projectsResponse); err != nil { + err = json.NewEncoder(w).Encode(&projectsResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -1971,7 +1999,8 @@ func (s *CEEMSServer) globalStats(users []string, w http.ResponseWriter, r *http projectsResponse.Warnings = append(projectsResponse.Warnings, err.Error()) } - if err = json.NewEncoder(w).Encode(&projectsResponse); err != nil { + err = json.NewEncoder(w).Encode(&projectsResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -2099,7 +2128,9 @@ func (s *CEEMSServer) demo(w http.ResponseWriter, r *http.Request) { Status: "success", Data: units, } - if err := json.NewEncoder(w).Encode(&unitsResponse); err != nil { + + err := json.NewEncoder(w).Encode(&unitsResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -2115,7 +2146,9 @@ func (s *CEEMSServer) demo(w http.ResponseWriter, r *http.Request) { Status: "success", Data: usage, } - if err := json.NewEncoder(w).Encode(&usageResponse); err != nil { + + err := json.NewEncoder(w).Encode(&usageResponse) + if err != nil { s.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -2124,7 +2157,8 @@ func (s *CEEMSServer) demo(w http.ResponseWriter, r *http.Request) { // convertTimeLocation converts time from source location to target location. func convertTimeLocation(sourceLoc *time.Location, targetLoc *time.Location, val string) string { - if t, err := time.ParseInLocation(base.DatetimezoneLayout, val, sourceLoc); err == nil { + t, err := time.ParseInLocation(base.DatetimezoneLayout, val, sourceLoc) + if err == nil { return t.In(targetLoc).Format(base.DatetimezoneLayout) } diff --git a/pkg/api/http/validation_test.go b/pkg/api/http/validation_test.go index c5658f59..2fbdba96 100644 --- a/pkg/api/http/validation_test.go +++ b/pkg/api/http/validation_test.go @@ -1,6 +1,7 @@ package http import ( + "context" "database/sql" "fmt" "path/filepath" @@ -11,7 +12,7 @@ import ( ) // Same as the one in lb/frontend/middleware_test.go. -func setupMockDB(d string) (*sql.DB, error) { +func setupMockDB(ctx context.Context, d string) (*sql.DB, error) { dbPath := filepath.Join(d, "test.db") db, err := sql.Open("sqlite3", dbPath) @@ -92,7 +93,7 @@ INSERT INTO admin_users VALUES(5, 'all', 'adm5', '["grafana"]'); INSERT INTO admin_users VALUES(6, 'all', 'adm6', '["grafana"]'); COMMIT;` - _, err = db.Exec(stmts) + _, err = db.ExecContext(ctx, stmts) if err != nil { return nil, fmt.Errorf("failed to insert mock data into DB: %w", err) } @@ -101,7 +102,7 @@ COMMIT;` } func TestVerifyOwnership(t *testing.T) { - db, err := setupMockDB(t.TempDir()) + db, err := setupMockDB(t.Context(), t.TempDir()) require.NoError(t, err, "failed to setup test DB") tests := []struct { @@ -197,7 +198,7 @@ func TestVerifyOwnership(t *testing.T) { } func TestAdminUsers(t *testing.T) { - db, err := setupMockDB(t.TempDir()) + db, err := setupMockDB(t.Context(), t.TempDir()) require.NoError(t, err, "failed to setup test DB") // Expected users diff --git a/pkg/api/models/types.go b/pkg/api/models/types.go index f3eedf66..7f499072 100644 --- a/pkg/api/models/types.go +++ b/pkg/api/models/types.go @@ -34,10 +34,8 @@ type Generic map[string]any // Value implements Valuer interface. func (g Generic) Value() (driver.Value, error) { - var generic []byte - - var err error - if generic, err = json.Marshal(g); err != nil { + generic, err := json.Marshal(g) + if err != nil { return nil, err } @@ -68,7 +66,8 @@ func (g *Generic) Scan(v any) error { d.UseNumber() - if err := d.Decode(&tmp); err != nil { + err := d.Decode(&tmp) + if err != nil { return err } @@ -76,7 +75,8 @@ func (g *Generic) Scan(v any) error { for k := range tmp { switch tmpt := tmp[k].(type) { case json.Number: - if i, err := tmpt.Int64(); err == nil { + i, err := tmpt.Int64() + if err == nil { tmp[k] = i } } @@ -154,10 +154,8 @@ func (m MetricMap) Values(format string) []any { // Value implements Valuer interface. func (m MetricMap) Value() (driver.Value, error) { - var generic []byte - - var err error - if generic, err = json.Marshal(m); err != nil { + generic, err := json.Marshal(m) + if err != nil { return nil, err } @@ -185,7 +183,9 @@ func (m *MetricMap) Scan(v any) error { // Ref: Improvable, see https://groups.google.com/g/golang-nuts/c/TDuGDJAIuVM?pli=1 // Decode into a tmp var var tmp map[string]JSONFloat - if err := d.Decode(&tmp); err != nil { + + err := d.Decode(&tmp) + if err != nil { return err } @@ -199,10 +199,8 @@ type JSONFloat float64 // Value implements Valuer interface. func (j JSONFloat) Value() (driver.Value, error) { - var generic []byte - - var err error - if generic, err = json.Marshal(j); err != nil { + generic, err := json.Marshal(j) + if err != nil { return nil, err } @@ -240,7 +238,9 @@ func (j *JSONFloat) Scan(v any) error { // Ref: Improvable, see https://groups.google.com/g/golang-nuts/c/TDuGDJAIuVM?pli=1 // Decode into a tmp var var tmp JSONFloat - if err := d.Decode(&tmp); err != nil { + + err := d.Decode(&tmp) + if err != nil { return err } @@ -279,7 +279,9 @@ func (j *JSONFloat) UnmarshalJSON(v []byte) error { } // just a regular float value var fv float64 - if err := json.Unmarshal(v, &fv); err != nil { + + err := json.Unmarshal(v, &fv) + if err != nil { return err } @@ -294,10 +296,8 @@ type List []any // Value implements Valuer interface. func (l List) Value() (driver.Value, error) { - var list []byte - - var err error - if list, err = json.Marshal(l); err != nil { + list, err := json.Marshal(l) + if err != nil { return nil, err } @@ -328,7 +328,8 @@ func (l *List) Scan(v any) error { d.UseNumber() - if err := d.Decode(&tmp); err != nil { + err := d.Decode(&tmp) + if err != nil { return err } @@ -336,7 +337,8 @@ func (l *List) Scan(v any) error { for k := range tmp { switch tmpt := tmp[k].(type) { case json.Number: - if i, err := tmpt.Int64(); err == nil { + i, err := tmpt.Int64() + if err == nil { tmp[k] = i } } @@ -365,7 +367,9 @@ func (c *WebConfig) UnmarshalYAML(unmarshal func(any) error) error { *c = WebConfig{ HTTPClientConfig: config.DefaultHTTPClientConfig, } - if err := unmarshal((*plain)(c)); err != nil { + + err := unmarshal((*plain)(c)) + if err != nil { return err } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. diff --git a/pkg/api/resource/k8s/manager.go b/pkg/api/resource/k8s/manager.go index a90bd129..b489b8f8 100644 --- a/pkg/api/resource/k8s/manager.go +++ b/pkg/api/resource/k8s/manager.go @@ -94,7 +94,9 @@ func init() { func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) { // Fetch any provided from extra_config var c k8sConfig - if err := cluster.Extra.Decode(&c); err != nil { + + err := cluster.Extra.Decode(&c) + if err != nil { logger.Error("Failed to decode extra_config for k8s cluster", "id", cluster.ID, "err", err) return nil, err @@ -120,14 +122,16 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) mainConfig.SetDirectory(filepath.Dir(base.ConfigFilePath)) // Create a new pod informer - if err := client.NewPodInformer(time.Duration(mainConfig.Server.Data.UpdateInterval)); err != nil { + err = client.NewPodInformer(time.Duration(mainConfig.Server.Data.UpdateInterval)) + if err != nil { logger.Error("Failed to create k8s pod informer", "id", cluster.ID, "err", err) return nil, err } // Start pod informer - if err := client.StartInformer(); err != nil { + err = client.StartInformer() + if err != nil { logger.Error("Failed to start k8s pod informer", "id", cluster.ID, "err", err) return nil, err @@ -426,12 +430,14 @@ func (k *k8sManager) fetchUserNSs(ctx context.Context, current time.Time) ([]mod currentTime := current.Format(base.DatetimezoneLayout) // Check if the configmap is available to fetch users - if content, err := os.ReadFile(k.config.NSUsersListFile); err == nil { + content, err := os.ReadFile(k.config.NSUsersListFile) + if err == nil { var usersDB struct { NSUsers map[string][]string `yaml:"users"` } - if err := yaml.Unmarshal(content, &usersDB); err == nil { + err := yaml.Unmarshal(content, &usersDB) + if err == nil { nsUsers = usersDB.NSUsers for ns, users := range usersDB.NSUsers { @@ -443,7 +449,8 @@ func (k *k8sManager) fetchUserNSs(ctx context.Context, current time.Time) ([]mod } // Merge users and namespaces from RBAC - if rbacUsers, err := k.client.ListUsers(ctx, ""); err == nil { + rbacUsers, err := k.client.ListUsers(ctx, "") + if err == nil { for ns, users := range rbacUsers { for _, user := range users { usersNSs[user] = append(usersNSs[user], ns) diff --git a/pkg/api/resource/k8s/manager_test.go b/pkg/api/resource/k8s/manager_test.go index c918dcd5..08e91853 100644 --- a/pkg/api/resource/k8s/manager_test.go +++ b/pkg/api/resource/k8s/manager_test.go @@ -153,7 +153,8 @@ func mockK8sAPIServer() *httptest.Server { // Start test server server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "pods") { - if data, err := os.ReadFile("../../../collector/testdata/k8s/pods-metadata.json"); err == nil { + data, err := os.ReadFile("../../../collector/testdata/k8s/pods-metadata.json") + if err == nil { w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/vnd.kubernetes.protobuf") w.Write(data) @@ -161,7 +162,8 @@ func mockK8sAPIServer() *httptest.Server { return } } else if strings.HasSuffix(r.URL.Path, "rolebindings") { - if data, err := os.ReadFile("../../../collector/testdata/k8s/rolebindings.json"); err == nil { + data, err := os.ReadFile("../../../collector/testdata/k8s/rolebindings.json") + if err == nil { w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/vnd.kubernetes.protobuf") w.Write(data) @@ -249,7 +251,8 @@ project_annotations: var extraConfig yaml.Node - if err := yaml.Unmarshal([]byte(cfg), &extraConfig); err == nil { + err = yaml.Unmarshal([]byte(cfg), &extraConfig) + if err == nil { return extraConfig, mainConfigFile, nil } else { return yaml.Node{}, mainConfigFile, err diff --git a/pkg/api/resource/manager.go b/pkg/api/resource/manager.go index 5747f7e9..02bcece5 100644 --- a/pkg/api/resource/manager.go +++ b/pkg/api/resource/manager.go @@ -181,7 +181,8 @@ func New(logger *slog.Logger) (*Manager, error) { // If we dont need to keep any privileges, drop any existing capabilities if dropPrivs { - if err := security.DropCapabilities(); err != nil { + err := security.DropCapabilities() + if err != nil { logger.Warn("Failed to drop capabilities", "err", err) } } @@ -207,7 +208,9 @@ func (b Manager) FetchUnits(ctx context.Context, start time.Time, end time.Time) units, err := f.FetchUnits(ctx, start, end) if err != nil { unitFetcherLock.Lock() + errs = errors.Join(errs, err) + unitFetcherLock.Unlock() wg.Done() @@ -215,7 +218,9 @@ func (b Manager) FetchUnits(ctx context.Context, start time.Time, end time.Time) } unitFetcherLock.Lock() + clusterUnits = append(clusterUnits, units...) + unitFetcherLock.Unlock() wg.Done() }(fetcher) @@ -249,7 +254,9 @@ func (b Manager) FetchUsersProjects( users, projects, err := f.FetchUsersProjects(ctx, currentTime) if err != nil { userFetcherLock.Lock() + errs = errors.Join(errs, err) + userFetcherLock.Unlock() wg.Done() @@ -257,8 +264,10 @@ func (b Manager) FetchUsersProjects( } userFetcherLock.Lock() + clusterUsers = append(clusterUsers, users...) clusterProjects = append(clusterProjects, projects...) + userFetcherLock.Unlock() wg.Done() }(fetcher) diff --git a/pkg/api/resource/openstack/compute.go b/pkg/api/resource/openstack/compute.go index ecdc02bc..7e4ca0ad 100644 --- a/pkg/api/resource/openstack/compute.go +++ b/pkg/api/resource/openstack/compute.go @@ -35,7 +35,8 @@ var ( func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, end time.Time) ([]models.Unit, error) { // Check if service is online - if err := o.ping("compute"); err != nil { + err := o.ping("compute") + if err != nil { return nil, err } @@ -60,14 +61,18 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, servers, err := o.fetchInstances(ctx, start, end, false) if err != nil { errsLock.Lock() + allErrs = errors.Join(allErrs, fmt.Errorf("failed to fetch active instances: %w", err)) + errsLock.Unlock() return } serversLock.Lock() + allServers = append(allServers, servers...) + serversLock.Unlock() }() @@ -79,14 +84,18 @@ func (o *openstackManager) activeInstances(ctx context.Context, start time.Time, servers, err := o.fetchInstances(ctx, start, end, true) if err != nil { errsLock.Lock() + allErrs = errors.Join(allErrs, fmt.Errorf("failed to fetch active instances: %w", err)) + errsLock.Unlock() return } serversLock.Lock() + allServers = append(allServers, servers...) + serversLock.Unlock() }() diff --git a/pkg/api/resource/openstack/identity.go b/pkg/api/resource/openstack/identity.go index 790155f3..867c2791 100644 --- a/pkg/api/resource/openstack/identity.go +++ b/pkg/api/resource/openstack/identity.go @@ -48,7 +48,8 @@ func (o *openstackManager) rotateToken(ctx context.Context) error { // updateUsersProjects updates users and projects of a given Openstack cluster. func (o *openstackManager) updateUsersProjects(ctx context.Context, current time.Time) error { // Fetch current users and projects - if userProjectsCache, err := o.usersProjectsAssoc(ctx, current); err != nil { + userProjectsCache, err := o.usersProjectsAssoc(ctx, current) + if err != nil { return err } else { o.userProjectsCache = userProjectsCache @@ -117,7 +118,8 @@ func (o *openstackManager) fetchUserProjects(ctx context.Context, userID string) // fetchUsers fetches a list of users or specific user from Openstack cluster. func (o *openstackManager) usersProjectsAssoc(ctx context.Context, current time.Time) (userProjectsCache, error) { // Check if service is online - if err := o.ping("identity"); err != nil { + err := o.ping("identity") + if err != nil { return userProjectsCache{}, err } @@ -160,8 +162,10 @@ func (o *openstackManager) usersProjectsAssoc(ctx context.Context, current time. projects, err := o.fetchUserProjects(ctx, id) projectLock.Lock() + userProjects[id] = projects allErrs = errors.Join(allErrs, err) + projectLock.Unlock() }(userID) } diff --git a/pkg/api/resource/openstack/manager.go b/pkg/api/resource/openstack/manager.go index d971904e..2c7667af 100644 --- a/pkg/api/resource/openstack/manager.go +++ b/pkg/api/resource/openstack/manager.go @@ -112,7 +112,8 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) } // Make a HTTP client for Openstack from client config - if openstackManager.client, err = config_util.NewClientFromConfig(cluster.Web.HTTPClientConfig, "openstack"); err != nil { + openstackManager.client, err = config_util.NewClientFromConfig(cluster.Web.HTTPClientConfig, "openstack") + if err != nil { logger.Error("Failed to create HTTP client for Openstack cluster", "id", cluster.ID, "err", err) return nil, err @@ -120,7 +121,9 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) // Fetch compute and identity API URLs and auth config from extra_config osConfig := &openstackConfig{} - if err := cluster.Extra.Decode(osConfig); err != nil { + + err = cluster.Extra.Decode(osConfig) + if err != nil { logger.Error("Failed to decode extra_config for Openstack cluster", "id", cluster.ID, "err", err) return nil, err @@ -145,21 +148,24 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) // Convert auth to bytes to embed into requests later osConfig.addAuthKey() - if openstackManager.auth, err = json.Marshal(common.ConvertMapI2MapS(osConfig.AuthConfig)); err != nil { + openstackManager.auth, err = json.Marshal(common.ConvertMapI2MapS(osConfig.AuthConfig)) + if err != nil { logger.Error("Failed to marshal auth object for Openstack cluster", "id", cluster.ID, "err", err) return nil, errors.Unwrap(err) } // Request first API token from keystone - if err := openstackManager.rotateToken(context.Background()); err != nil { + err = openstackManager.rotateToken(context.Background()) + if err != nil { logger.Error("Failed to request API token for Openstack cluster", "id", cluster.ID, "err", err) return nil, errors.Unwrap(err) } // Get initial users and projects - if err = openstackManager.updateUsersProjects(context.Background(), time.Now()); err != nil { + err = openstackManager.updateUsersProjects(context.Background(), time.Now()) + if err != nil { logger.Error("Failed to update users and projects for Openstack cluster", "id", cluster.ID, "err", err) return nil, err @@ -197,7 +203,8 @@ func (o *openstackManager) FetchUsersProjects( if time.Since(o.userProjectsLastUpdateTime) > o.userProjectsCacheTTL { o.logger.Debug("Updating users and projects for Openstack cluster", "id", o.cluster.ID) - if err := o.updateUsersProjects(ctx, current); err != nil { + err := o.updateUsersProjects(ctx, current) + if err != nil { o.logger.Error("Failed to update users and projects data for Openstack cluster", "id", o.cluster.ID, "err", err) return nil, nil, err @@ -235,7 +242,8 @@ func (o *openstackManager) userProjects(id string) *url.URL { func (o *openstackManager) addTokenHeader(ctx context.Context, req *http.Request) (*http.Request, error) { // Check if token is still valid. If not rotate token if time.Now().After(o.apiTokenExpiry) { - if err := o.rotateToken(ctx); err != nil { + err := o.rotateToken(ctx) + if err != nil { return nil, err } } diff --git a/pkg/api/resource/openstack/manager_test.go b/pkg/api/resource/openstack/manager_test.go index 66675270..22e2e782 100644 --- a/pkg/api/resource/openstack/manager_test.go +++ b/pkg/api/resource/openstack/manager_test.go @@ -190,13 +190,15 @@ func mockOSComputeAPIServer() *httptest.Server { fileName = "servers" } - if data, err := os.ReadFile(fmt.Sprintf("../../testdata/openstack/compute/%s.json", fileName)); err == nil { + data, err := os.ReadFile(fmt.Sprintf("../../testdata/openstack/compute/%s.json", fileName)) + if err == nil { w.Write(data) return } } else if strings.Contains(r.URL.Path, "flavors") { - if data, err := os.ReadFile("../../testdata/openstack/compute/flavors.json"); err == nil { + data, err := os.ReadFile("../../testdata/openstack/compute/flavors.json") + if err == nil { w.Write(data) return @@ -219,7 +221,8 @@ func mockOSIdentityAPIServer() *httptest.Server { return } - if data, err := os.ReadFile("../../testdata/openstack/identity/users.json"); err == nil { + data, err := os.ReadFile("../../testdata/openstack/identity/users.json") + if err == nil { w.Write(data) return @@ -234,7 +237,9 @@ func mockOSIdentityAPIServer() *httptest.Server { pathParts := strings.Split(r.URL.Path, "/") userID := pathParts[len(pathParts)-2] - if data, err := os.ReadFile(fmt.Sprintf("../../testdata/openstack/identity/%s.json", userID)); err == nil { + + data, err := os.ReadFile(fmt.Sprintf("../../testdata/openstack/identity/%s.json", userID)) + if err == nil { w.Write(data) return @@ -244,7 +249,8 @@ func mockOSIdentityAPIServer() *httptest.Server { var t map[string]any - if err := decoder.Decode(&t); err != nil { + err := decoder.Decode(&t) + if err != nil { w.Write([]byte("KO")) return @@ -281,7 +287,8 @@ auth: var extraConfig yaml.Node - if err := yaml.Unmarshal([]byte(cfg), &extraConfig); err == nil { + err := yaml.Unmarshal([]byte(cfg), &extraConfig) + if err == nil { return extraConfig, nil } else { return yaml.Node{}, err diff --git a/pkg/api/resource/openstack/request.go b/pkg/api/resource/openstack/request.go index b3cd950e..20e933fa 100644 --- a/pkg/api/resource/openstack/request.go +++ b/pkg/api/resource/openstack/request.go @@ -33,7 +33,9 @@ func apiRequest[T any](req *http.Request, client *http.Client) (T, error) { // Unpack into data var data T - if err = json.Unmarshal(body, &data); err != nil { + + err = json.Unmarshal(body, &data) + if err != nil { return *new(T), err } diff --git a/pkg/api/resource/openstack/types.go b/pkg/api/resource/openstack/types.go index 7e3698c5..f6ca4321 100644 --- a/pkg/api/resource/openstack/types.go +++ b/pkg/api/resource/openstack/types.go @@ -12,7 +12,9 @@ type JSONRFC3339MilliNoZ time.Time func (jt *JSONRFC3339MilliNoZ) UnmarshalJSON(data []byte) error { var s string - if err := json.Unmarshal(data, &s); err != nil { + + err := json.Unmarshal(data, &s) + if err != nil { return err } @@ -129,6 +131,7 @@ func (r *Server) UnmarshalJSON(b []byte) error { var s struct { tmp + LaunchedAt JSONRFC3339MilliNoZ `json:"OS-SRV-USG:launched_at"` TerminatedAt JSONRFC3339MilliNoZ `json:"OS-SRV-USG:terminated_at"` } @@ -266,6 +269,7 @@ func (r *Flavor) UnmarshalJSON(b []byte) error { var s struct { tmp + Swap any `json:"swap"` } diff --git a/pkg/api/resource/slurm/cli.go b/pkg/api/resource/slurm/cli.go index 4113f7d9..d06dafd0 100644 --- a/pkg/api/resource/slurm/cli.go +++ b/pkg/api/resource/slurm/cli.go @@ -62,7 +62,8 @@ func preflightsCLI(slurm *slurmScheduler) error { slurm.cluster.CLI.Path = filepath.Dir(path) } else { // Check if slurm binary directory exists at the given path - if _, err := os.Stat(slurm.cluster.CLI.Path); err != nil { + _, err := os.Stat(slurm.cluster.CLI.Path) + if err != nil { slurm.logger.Error("Failed to open SLURM bin dir", "path", slurm.cluster.CLI.Path, "err", err) return err @@ -82,7 +83,8 @@ func preflightsCLI(slurm *slurmScheduler) error { } // If current user is root or if current process has necessary caps setup security context - if currentUser, err := user.Current(); err == nil && currentUser.Uid == "0" || haveCaps { + currentUser, err := user.Current() + if err == nil && currentUser.Uid == "0" || haveCaps { slurm.cmdExecMode = capabilityMode slurm.logger.Info("Current user/process have enough privileges to execute SLURM commands", "user", currentUser.Username) @@ -124,7 +126,8 @@ func preflightsCLI(slurm *slurmScheduler) error { sacctPath := filepath.Join(slurm.cluster.CLI.Path, "sacct") // Last attempt to run sacct with sudo - if _, err := internal_osexec.ExecuteWithTimeout("sudo", []string{sacctPath, "--help"}, 5, nil); err == nil { + _, err = internal_osexec.ExecuteWithTimeout("sudo", []string{sacctPath, "--help"}, 5, nil) + if err == nil { slurm.cmdExecMode = sudoMode slurm.logger.Info("sudo will be used to execute SLURM commands") @@ -186,6 +189,7 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] // Attempt to convert strings to int and ignore any errors in conversion var gidInt, uidInt int64 + gidInt, _ = strconv.ParseInt(components[sacctFieldMap["gid"]], 10, 64) uidInt, _ = strconv.ParseInt(components[sacctFieldMap["uid"]], 10, 64) // elapsedSeconds, _ = strconv.ParseInt(components[sacctFieldMap["elapsedraw"]], 10, 64) @@ -194,7 +198,8 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] eventTS := make(map[string]int64, 3) for _, c := range []string{"submit", "start", "end"} { - if t, err := time.Parse(base.DatetimezoneLayout, components[sacctFieldMap[c]]); err == nil { + t, err := time.Parse(base.DatetimezoneLayout, components[sacctFieldMap[c]]) + if err == nil { components[sacctFieldMap[c]] = t.In(loc).Format(base.DatetimezoneLayout) } @@ -213,7 +218,8 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] matches := gresRegex.FindStringSubmatch(elem) if len(matches) == 2 { - if val, err := strconv.ParseInt(matches[1], 10, 64); err == nil { + val, err := strconv.ParseInt(matches[1], 10, 64) + if err == nil { ngpus = val } } @@ -245,7 +251,8 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] var mem int64 if len(matches) >= 2 { - if memFloat, err := strconv.ParseFloat(matches[1], 64); err == nil { + memFloat, err := strconv.ParseFloat(matches[1], 64) + if err == nil { if len(matches) == 3 { if unitConv, ok := toBytes[matches[2]]; ok { mem = int64(memFloat) * unitConv @@ -297,6 +304,7 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] // Get cpuSeconds and gpuSeconds of the current interval var cpuSeconds, gpuSeconds int64 + cpuSeconds = ncpus * elapsedSeconds gpuSeconds = ngpus * elapsedSeconds @@ -370,8 +378,10 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([] } jobLock.Lock() + jobs[i] = jobStat numJobs += 1 + jobLock.Unlock() wg.Done() }(iline, line) @@ -425,10 +435,12 @@ func parseSacctMgrCmdOutput(sacctMgrOutput string, currentTime string) ([]models // Add user project association to map assocLock.Lock() + userProjectMap[components[1]] = append(userProjectMap[components[1]], components[0]) projectUserMap[components[0]] = append(projectUserMap[components[0]], components[1]) users = append(users, components[1]) projects = append(projects, components[0]) + assocLock.Unlock() wg.Done() }(line) @@ -606,7 +618,8 @@ func executeInSecurityContext( dataPtr *security.ExecSecurityCtxData, ) ([]byte, error) { // Read stdOut of command into data - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return nil, err } diff --git a/pkg/api/resource/slurm/manager.go b/pkg/api/resource/slurm/manager.go index 57d46150..f0d7c89f 100644 --- a/pkg/api/resource/slurm/manager.go +++ b/pkg/api/resource/slurm/manager.go @@ -76,7 +76,8 @@ func New(cluster models.Cluster, logger *slog.Logger) (resource.Fetcher, error) securityContexts: make(map[string]*security.SecurityContext), } - if err := preflightChecks(&slurmScheduler); err != nil { + err := preflightChecks(&slurmScheduler) + if err != nil { return nil, err } @@ -96,7 +97,8 @@ func (s *slurmScheduler) FetchUnits( var err error if s.fetchMode == cliMode { - if jobs, err = s.fetchFromSacct(ctx, start, end); err != nil { + jobs, err = s.fetchFromSacct(ctx, start, end) + if err != nil { s.logger.Error("Failed to execute SLURM sacct command", "cluster_id", s.cluster.ID, "err", err) return nil, err @@ -120,7 +122,8 @@ func (s *slurmScheduler) FetchUsersProjects( var err error if s.fetchMode == cliMode { - if users, projects, err = s.fetchFromSacctMgr(ctx, current); err != nil { + users, projects, err = s.fetchFromSacctMgr(ctx, current) + if err != nil { s.logger.Error("Failed to execute SLURM sacctmgr command", "cluster_id", s.cluster.ID, "err", err) return nil, nil, err diff --git a/pkg/api/updater/tsdb/tsdb.go b/pkg/api/updater/tsdb/tsdb.go index e3bf922d..55869cc3 100644 --- a/pkg/api/updater/tsdb/tsdb.go +++ b/pkg/api/updater/tsdb/tsdb.go @@ -139,8 +139,9 @@ func (c *tsdbConfig) validate() error { // Embed TSDB struct into our TSDBUpdater struct. type tsdbUpdater struct { - config *tsdbConfig *tsdb.Client + + config *tsdbConfig } // Mutex lock. @@ -159,7 +160,9 @@ func init() { func New(instance updater.Instance, logger *slog.Logger) (updater.Updater, error) { // Make TSDB config from instances extra config var c tsdbConfig - if err := instance.Extra.Decode(&c); err != nil { + + err := instance.Extra.Decode(&c) + if err != nil { logger.Error("Failed to setup TSDB updater", "id", instance.ID, "err", err) return nil, err @@ -169,7 +172,8 @@ func New(instance updater.Instance, logger *slog.Logger) (updater.Updater, error config := c.defaults() // Validate config - if err := config.validate(); err != nil { + err = config.validate() + if err != nil { logger.Error("Failed to validate TSDB updater config", "instance_id", instance.ID, "err", err) return nil, err @@ -190,8 +194,8 @@ func New(instance updater.Instance, logger *slog.Logger) (updater.Updater, error logger.Info("TSDB updater setup successful", "id", instance.ID) return &tsdbUpdater{ - config, tsdb, + config, }, nil } @@ -214,7 +218,8 @@ func (t *tsdbUpdater) queryBuilder(name string, queryTemplate string, data map[s tmpl := template.Must(template.New(name).Parse(queryTemplate)) builder := &strings.Builder{} - if err := tmpl.Execute(builder, data); err != nil { + err := tmpl.Execute(builder, data) + if err != nil { return "", err } @@ -282,7 +287,8 @@ func (t *tsdbUpdater) fetchAggMetrics( return } - if aggMetric, err = t.Query(ctx, tsdbQuery, queryTime); err != nil { + aggMetric, err = t.Query(ctx, tsdbQuery, queryTime) + if err != nil { t.Logger.Error( "Failed to fetch metrics from TSDB", "metric", n, "duration", duration, "scrape_int", settings.ScrapeInterval, @@ -290,11 +296,13 @@ func (t *tsdbUpdater) fetchAggMetrics( ) } else { metricLock.Lock() + if aggMetrics[n] == nil { aggMetrics[n] = make(map[string]tsdb.Metric) } aggMetrics[n][sn] = aggMetric + metricLock.Unlock() } }(metricName, subMetricName, query) @@ -561,7 +569,8 @@ func (t *tsdbUpdater) update( } // Finally delete time series - if err := t.deleteTimeSeries(ctx, startTime, endTime, uuidsToDelete); err != nil { + err := t.deleteTimeSeries(ctx, startTime, endTime, uuidsToDelete) + if err != nil { t.Logger.Error("Failed to delete time series in TSDB", "err", err) } diff --git a/pkg/api/updater/tsdb/tsdb_test.go b/pkg/api/updater/tsdb/tsdb_test.go index e43b3fdb..fba8f7fc 100644 --- a/pkg/api/updater/tsdb/tsdb_test.go +++ b/pkg/api/updater/tsdb/tsdb_test.go @@ -46,7 +46,8 @@ func mockTSDBServer() *httptest.Server { }, } server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -94,7 +95,8 @@ queries: var extraConfig yaml.Node - if err := yaml.Unmarshal([]byte(config), &extraConfig); err != nil { + err := yaml.Unmarshal([]byte(config), &extraConfig) + if err != nil { return updater.Instance{}, fmt.Errorf("failed to unmarshall config: %w\n", err) } diff --git a/pkg/collector/cgroup.go b/pkg/collector/cgroup.go index 2ee5560d..a8c06e62 100644 --- a/pkg/collector/cgroup.go +++ b/pkg/collector/cgroup.go @@ -348,7 +348,8 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) } for _, slice := range []string{"machine", "machine.slice"} { - if _, err := os.Stat(filepath.Join(slicesPrefix, slice)); err == nil { + _, err := os.Stat(filepath.Join(slicesPrefix, slice)) + if err == nil { manager.slices = append(manager.slices, slice) if slice == "machine" { @@ -441,7 +442,8 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) // Discover all cgroup slices depending on the driver used for _, slice := range []string{"kubepods", "kubepods.slice"} { - if _, err := os.Stat(filepath.Join(*cgroupfsPath, activeSubsystem, slice)); err == nil { + _, err := os.Stat(filepath.Join(*cgroupfsPath, activeSubsystem, slice)) + if err == nil { manager.slices = append(manager.slices, slice) } } @@ -457,7 +459,8 @@ func NewCgroupManager(name manager, logger *slog.Logger) (*cgroupManager, error) // In this scenario, verify if there are cgroups formed under `/system.slice`. This // happens when cgroup driver on containerd does not match with the one of kubelet. - if matches, err := filepath.Glob(filepath.Join(*cgroupfsPath, activeSubsystem, "/system.slice/kubepods*")); err == nil && len(matches) > 0 { + matches, err := filepath.Glob(filepath.Join(*cgroupfsPath, activeSubsystem, "/system.slice/kubepods*")) + if err == nil && len(matches) > 0 { logger.Warn( "Containerd creating container cgroups in /system.slice instead of /kubepods.slice. " + "This happens when cgroup driver of containerd does not match with that of kubelet and this can have " + @@ -593,17 +596,15 @@ func (c *cgroupManager) discover() ([]cgroup, error) { // Walk through all cgroups and get cgroup paths // https://goplay.tools/snippet/coVDkIozuhg for _, mountPoint := range c.mountPoints { - if err := filepath.WalkDir(mountPoint, func(p string, info fs.DirEntry, err error) error { + err := filepath.WalkDir(mountPoint, func(p string, info fs.DirEntry, err error) error { if err != nil { return err } - // Ignore paths that are not directories if !info.IsDir() { return nil } - // Get relative path of cgroup rel, err := filepath.Rel(c.root, p) if err != nil { c.logger.Error("Failed to resolve relative path for cgroup", "path", p, "err", err) @@ -611,10 +612,8 @@ func (c *cgroupManager) discover() ([]cgroup, error) { return nil } - // Add leading slash to relative path rel = "/" + rel - // Unescape UTF-8 characters in cgroup path sanitizedPath, err := unescapeString(p) if err != nil { c.logger.Error("Failed to sanitize cgroup path", "path", p, "err", err) @@ -622,21 +621,19 @@ func (c *cgroupManager) discover() ([]cgroup, error) { return nil } - // Find all matches of regex matches := c.idRegex.FindStringSubmatch(sanitizedPath) if len(matches) < 2 { return nil } - // Get capture group maps and map values to names captureGrps := make(map[string]string) + for i, name := range c.idRegex.SubexpNames() { if i != 0 && name != "" { captureGrps[name] = matches[i] } } - // Get cgroup ID which is instance ID id := strings.TrimSpace(captureGrps["id"]) if id == "" { c.logger.Error("Empty cgroup ID", "path", p) @@ -644,37 +641,30 @@ func (c *cgroupManager) discover() ([]cgroup, error) { return nil } - // For k8s when systemd is used, there will be "_" in the - // id. We need to replace them with "-" - // Ref: https://github.com/kubernetes/kubernetes/blob/f007012f5fe49e40ae0596cf463a8e7b247b3357/pkg/kubelet/stats/cri_stats_provider.go#L952-L967 id = strings.ReplaceAll(id, "_", "-") - // Optionally we get "virtual" hostname as well if it is in - // cgroup path (for SLURM only) vhost := strings.TrimSpace(captureGrps["host"]) - // Find procs in this cgroup - if data, err := os.ReadFile(filepath.Join(p, "cgroup.procs")); err == nil { + data, err := os.ReadFile(filepath.Join(p, "cgroup.procs")) + if err == nil { scanner := bufio.NewScanner(bytes.NewReader(data)) for scanner.Scan() { - if pid, err := strconv.ParseInt(scanner.Text(), 10, 0); err == nil { - if proc, err := c.fs.Proc(int(pid)); err == nil { + pid, err := strconv.ParseInt(scanner.Text(), 10, 0) + if err == nil { + proc, err := c.fs.Proc(int(pid)) + if err == nil { cgroupProcs[id] = append(cgroupProcs[id], proc) } } } } - // Ignore child cgroups. We are only interested in root cgroup if c.isChild(p) { cgroupChildren[id] = append(cgroupChildren[id], cgroupPath{abs: sanitizedPath, rel: rel}) return nil } - // By default set id and uuid to same cgroup ID and if the resource - // manager has two representations, override it in corresponding - // collector. For instance, it applies only to libvirt cgrp := cgroup{ id: id, uuid: id, @@ -686,7 +676,8 @@ func (c *cgroupManager) discover() ([]cgroup, error) { cgroupChildren[id] = append(cgroupChildren[id], cgroupPath{abs: sanitizedPath, rel: rel}) return nil - }); err != nil { + }) + if err != nil { c.logger.Error("Error walking cgroup subsystem", "path", mountPoint, "err", err) return nil, err @@ -779,7 +770,8 @@ func NewCgroupCollector(logger *slog.Logger, cgManager *cgroupManager, opts cgro file, err := os.Open(procFilePath("meminfo")) if err == nil { - if memInfo, err := parseMemInfo(file); err == nil { + memInfo, err := parseMemInfo(file) + if err == nil { hostMemInfo = memInfo } } else { @@ -792,8 +784,10 @@ func NewCgroupCollector(logger *slog.Logger, cgManager *cgroupManager, opts cgro // We construct a map from major:minor to device name using this info blockDevices := make(map[string]string) - if blockdevice, err := blockdevice.NewFS(*procfsPath, *sysPath); err == nil { - if stats, err := blockdevice.ProcDiskstats(); err == nil { + blockdevice, err := blockdevice.NewFS(*procfsPath, *sysPath) + if err == nil { + stats, err := blockdevice.ProcDiskstats() + if err == nil { for _, s := range stats { blockDevices[fmt.Sprintf("%d:%d", s.MajorNumber, s.MinorNumber)] = s.DeviceName } @@ -962,20 +956,28 @@ func (c *cgroupCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup) // CPU stats ch <- prometheus.MustNewConstMetric(c.cgCPUUser, prometheus.CounterValue, m.cpuUser, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgCPUSystem, prometheus.CounterValue, m.cpuSystem, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgCPUs, prometheus.GaugeValue, float64(m.cpus)/milliCPUtoCPU, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) // Memory stats ch <- prometheus.MustNewConstMetric(c.cgMemoryRSS, prometheus.GaugeValue, m.memoryRSS, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemoryCache, prometheus.GaugeValue, m.memoryCache, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemoryUsed, prometheus.GaugeValue, m.memoryUsed, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemoryTotal, prometheus.GaugeValue, m.memoryTotal, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemoryFailCount, prometheus.GaugeValue, m.memoryFailCount, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) // Memory swap stats if c.opts.collectSwapMemStats { ch <- prometheus.MustNewConstMetric(c.cgMemswUsed, prometheus.GaugeValue, m.memswUsed, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemswTotal, prometheus.GaugeValue, m.memswTotal, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemswFailCount, prometheus.GaugeValue, m.memswFailCount, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) } @@ -1003,6 +1005,7 @@ func (c *cgroupCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup) // PSI stats if c.opts.collectPSIStats { ch <- prometheus.MustNewConstMetric(c.cgCPUPressure, prometheus.GaugeValue, m.cpuPressure, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) + ch <- prometheus.MustNewConstMetric(c.cgMemoryPressure, prometheus.GaugeValue, m.memoryPressure, c.cgroupManager.name, c.hostname, m.cgroup.hostname, m.cgroup.uuid) } @@ -1239,7 +1242,8 @@ func (c *cgroupCollector) statsV1(cgrp cgroup) cgMetric { } } - if ncpus, err := c.getCPUs(path); err == nil { + ncpus, err := c.getCPUs(path) + if err == nil { metric.cpus = ncpus } @@ -1372,7 +1376,8 @@ func (c *cgroupCollector) statsV2(cgrp cgroup) cgMetric { } } - if ncpus, err := c.getCPUs(path); err == nil { + ncpus, err := c.getCPUs(path) + if err == nil { metric.cpus = ncpus } @@ -1504,7 +1509,8 @@ func parseCgroupSubSysIds() ([]cgroupController, error) { ) } - if id, err := strconv.ParseUint(fields[1], 10, 32); err == nil { + id, err := strconv.ParseUint(fields[1], 10, 32) + if err == nil { cgroupControllers = append(cgroupControllers, cgroupController{ id: id, idx: idx, diff --git a/pkg/collector/cli.go b/pkg/collector/cli.go index 0ddd76f1..b1b9b5a3 100644 --- a/pkg/collector/cli.go +++ b/pkg/collector/cli.go @@ -297,7 +297,8 @@ func (b *CEEMSExporter) Main() error { return err } - if user, err := user.Current(); err == nil && user.Uid == "0" { + user, err := user.Current() + if err == nil && user.Uid == "0" { logger.Info("CEEMS Exporter is running as root user. Privileges will be dropped and process will be run as unprivileged user") } @@ -325,7 +326,8 @@ func (b *CEEMSExporter) Main() error { // Drop all unnecessary privileges if dropPrivs { - if err := securityManager.DropPrivileges(disableCapAwareness); err != nil { + err := securityManager.DropPrivileges(disableCapAwareness) + if err != nil { logger.Error("Failed to drop privileges", "err", err) return err @@ -368,7 +370,8 @@ func (b *CEEMSExporter) Main() error { // Start profiling session if enabled if profiler.Enabled() { go func() { - if err := profiler.Start(ctx); err != nil { + err := profiler.Start(ctx) + if err != nil { logger.Error("Failed to start ebpf profiler", "err", err) } }() @@ -385,7 +388,8 @@ func (b *CEEMSExporter) Main() error { // Initializing the server in a goroutine so that // it won't block the graceful shutdown handling below. go func() { - if err := server.Start(); err != nil { + err := server.Start() + if err != nil { logger.Error("Failed to start server", "err", err) } }() @@ -407,14 +411,16 @@ func (b *CEEMSExporter) Main() error { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - if err := server.Shutdown(ctx); err != nil { + err = server.Shutdown(ctx) + if err != nil { logger.Error("Failed to gracefully shutdown server", "err", err) } // Restore file permissions by removing any ACLs added // When dropPrivs is false, this is noop, so it is fine to leave it // here - if err := securityManager.DeleteACLEntries(); err != nil { + err = securityManager.DeleteACLEntries() + if err != nil { logger.Error("Failed to remove ACL entries", "err", err) } diff --git a/pkg/collector/cli_test.go b/pkg/collector/cli_test.go index 9c20ccb2..ef33a549 100644 --- a/pkg/collector/cli_test.go +++ b/pkg/collector/cli_test.go @@ -60,7 +60,8 @@ func TestCEEMSExporterMain(t *testing.T) { // Query exporter for i := range 10 { - if err := queryExporter("localhost:9010"); err == nil { + err := queryExporter("localhost:9010") + if err == nil { break } diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index 53d7ae1b..dbe321be 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -164,6 +164,7 @@ func NewCEEMSCollector(logger *slog.Logger) (*CEEMSCollector, error) { // Describe implements the prometheus.Collector interface. func (n CEEMSCollector) Describe(ch chan<- *prometheus.Desc) { ch <- scrapeDurationDesc + ch <- scrapeSuccessDesc } @@ -187,7 +188,8 @@ func (n CEEMSCollector) Close(ctx context.Context) error { var errs error for _, c := range n.Collectors { - if err := c.Stop(ctx); err != nil { + err := c.Stop(ctx) + if err != nil { errs = errors.Join(errs, err) } } @@ -216,7 +218,9 @@ func execute(name string, c Collector, ch chan<- prometheus.Metric, logger *slog success = 1 } + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name, "") + ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name) } diff --git a/pkg/collector/cpu.go b/pkg/collector/cpu.go index 52323ff4..71086ff1 100644 --- a/pkg/collector/cpu.go +++ b/pkg/collector/cpu.go @@ -123,15 +123,25 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { // Acquire a lock to read the stats. c.cpuStatsMutex.Lock() defer c.cpuStatsMutex.Unlock() + ch <- prometheus.MustNewConstMetric(c.ncpus, prometheus.GaugeValue, float64(ncpus), c.hostname) + ch <- prometheus.MustNewConstMetric(c.ncpusPerCore, prometheus.GaugeValue, float64(c.cpusPerCore), c.hostname) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.User, c.hostname, "user") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.Nice, c.hostname, "nice") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.System, c.hostname, "system") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.Idle, c.hostname, "idle") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.Iowait, c.hostname, "iowait") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.IRQ, c.hostname, "irq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.SoftIRQ, c.hostname, "softirq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, c.cpuStats.Steal, c.hostname, "steal") return nil diff --git a/pkg/collector/cray_pm_counters.go b/pkg/collector/cray_pm_counters.go index e7bb4922..eb2b53d1 100644 --- a/pkg/collector/cray_pm_counters.go +++ b/pkg/collector/cray_pm_counters.go @@ -115,19 +115,23 @@ func (c *crayPMCCollector) Update(ch chan<- prometheus.Metric) error { // Update metrics for _, domain := range domains { - if val, err := domain.GetEnergyJoules(); err == nil && val > 0 { + val, err := domain.GetEnergyJoules() + if err == nil && val > 0 { ch <- prometheus.MustNewConstMetric(c.joulesMetricDesc, prometheus.GaugeValue, float64(val), c.hostname, domain.Name) } - if val, err := domain.GetPowerWatts(); err == nil && val > 0 { + val, err = domain.GetPowerWatts() + if err == nil && val > 0 { ch <- prometheus.MustNewConstMetric(c.wattsMetricDesc, prometheus.GaugeValue, float64(val), c.hostname, domain.Name) } - if val, err := domain.GetPowerLimitWatts(); err == nil && val > 0 { + val, err = domain.GetPowerLimitWatts() + if err == nil && val > 0 { ch <- prometheus.MustNewConstMetric(c.wattsLimitMetricDesc, prometheus.GaugeValue, float64(val), c.hostname, domain.Name) } - if val, err := domain.GetTempCelsius(); err == nil && val > 0 { + val, err = domain.GetTempCelsius() + if err == nil && val > 0 { ch <- prometheus.MustNewConstMetric(c.tempMetricDesc, prometheus.GaugeValue, float64(val), c.hostname, domain.Name) } } diff --git a/pkg/collector/ebpf.go b/pkg/collector/ebpf.go index 250a925f..226f6228 100644 --- a/pkg/collector/ebpf.go +++ b/pkg/collector/ebpf.go @@ -209,7 +209,8 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle } // Remove resource limits for kernels <5.11. - if err := rlimit.RemoveMemlock(); err != nil { + err = rlimit.RemoveMemlock() + if err != nil { return nil, fmt.Errorf("error removing memlock: %w", err) } @@ -285,7 +286,8 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle } } - if err := configMap.Update(uint32(0), config, ebpf.UpdateAny); err != nil { + err = configMap.Update(uint32(0), config, ebpf.UpdateAny) + if err != nil { return nil, fmt.Errorf("failed to update bpf config: %w", err) } @@ -309,7 +311,8 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle continue } - if links[kernFuncName], err = link.Kprobe(kernFuncName, prog, nil); err != nil { + links[kernFuncName], err = link.Kprobe(kernFuncName, prog, nil) + if err != nil { logger.Error("Failed to open kprobe", "func", kernFuncName, "err", err) } @@ -327,7 +330,8 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle continue } - if links[kernFuncName], err = link.Kretprobe(kernFuncName, prog, nil); err != nil { + links[kernFuncName], err = link.Kretprobe(kernFuncName, prog, nil) + if err != nil { logger.Error("Failed to open kretprobe", "func", kernFuncName, "err", err) } @@ -338,10 +342,12 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle // fentry/* programs if strings.HasPrefix(name, "fentry") { kernFuncName := strings.TrimPrefix(name, "fentry_") - if links[kernFuncName], err = link.AttachTracing(link.TracingOptions{ + + links[kernFuncName], err = link.AttachTracing(link.TracingOptions{ Program: prog, AttachType: ebpf.AttachTraceFEntry, - }); err != nil { + }) + if err != nil { logger.Error("Failed to open fentry", "func", kernFuncName, "err", err) } @@ -351,10 +357,12 @@ func NewEbpfCollector(logger *slog.Logger, cgManager *cgroupManager) (*ebpfColle // fexit/* programs if strings.HasPrefix(name, "fexit") { kernFuncName := strings.TrimPrefix(name, "fexit_") - if links[kernFuncName], err = link.AttachTracing(link.TracingOptions{ + + links[kernFuncName], err = link.AttachTracing(link.TracingOptions{ Program: prog, AttachType: ebpf.AttachTraceFExit, - }); err != nil { + }) + if err != nil { logger.Error("Failed to open fexit", "func", kernFuncName, "err", err) } @@ -545,7 +553,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateVFSWrite(ch, aggMetrics); err != nil { + err := c.updateVFSWrite(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update VFS write stats", "err", err) } }() @@ -553,7 +562,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateVFSRead(ch, aggMetrics); err != nil { + err := c.updateVFSRead(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update VFS read stats", "err", err) } }() @@ -561,7 +571,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateVFSOpen(ch, aggMetrics); err != nil { + err := c.updateVFSOpen(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update VFS open stats", "err", err) } }() @@ -569,7 +580,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateVFSCreate(ch, aggMetrics); err != nil { + err := c.updateVFSCreate(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update VFS create stats", "err", err) } }() @@ -577,7 +589,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateVFSUnlink(ch, aggMetrics); err != nil { + err := c.updateVFSUnlink(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update VFS unlink stats", "err", err) } }() @@ -589,7 +602,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateNetEgress(ch, aggMetrics); err != nil { + err := c.updateNetEgress(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update network egress stats", "err", err) } }() @@ -597,7 +611,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateNetIngress(ch, aggMetrics); err != nil { + err := c.updateNetIngress(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update network ingress stats", "err", err) } }() @@ -605,7 +620,8 @@ func (c *ebpfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma go func() { defer wg.Done() - if err := c.updateNetRetrans(ch, aggMetrics); err != nil { + err := c.updateNetRetrans(ch, aggMetrics) + if err != nil { c.logger.Error("Failed to update network retransmission stats", "err", err) } }() @@ -623,7 +639,8 @@ func (c *ebpfCollector) Stop(_ context.Context) error { // Close all probes for name, link := range c.links { - if err := link.Close(); err != nil { + err := link.Close() + if err != nil { c.logger.Error("Failed to close link", "func", name, "err", err) } } @@ -658,7 +675,9 @@ func (c *ebpfCollector) updateVFSWrite(ch chan<- prometheus.Metric, aggMetrics * // Update metrics to the channel for key, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.vfsWriteRequests, prometheus.CounterValue, float64(value.Calls), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) + ch <- prometheus.MustNewConstMetric(c.vfsWriteBytes, prometheus.CounterValue, float64(value.Bytes), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) + ch <- prometheus.MustNewConstMetric(c.vfsWriteErrors, prometheus.CounterValue, float64(value.Errors), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) } @@ -682,7 +701,9 @@ func (c *ebpfCollector) updateVFSRead(ch chan<- prometheus.Metric, aggMetrics *a // Update metrics to the channel for key, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.vfsReadRequests, prometheus.CounterValue, float64(value.Calls), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) + ch <- prometheus.MustNewConstMetric(c.vfsReadBytes, prometheus.CounterValue, float64(value.Bytes), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) + ch <- prometheus.MustNewConstMetric(c.vfsReadErrors, prometheus.CounterValue, float64(value.Errors), c.cgroupManager.name, c.hostname, key.UUID, key.Mount) } @@ -706,6 +727,7 @@ func (c *ebpfCollector) updateVFSOpen(ch chan<- prometheus.Metric, aggMetrics *a // Update metrics to the channel for uuid, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.vfsOpenRequests, prometheus.CounterValue, float64(value.Calls), c.cgroupManager.name, c.hostname, uuid) + ch <- prometheus.MustNewConstMetric(c.vfsOpenErrors, prometheus.CounterValue, float64(value.Errors), c.cgroupManager.name, c.hostname, uuid) } @@ -729,6 +751,7 @@ func (c *ebpfCollector) updateVFSCreate(ch chan<- prometheus.Metric, aggMetrics // Update metrics to the channel for uuid, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.vfsCreateRequests, prometheus.CounterValue, float64(value.Calls), c.cgroupManager.name, c.hostname, uuid) + ch <- prometheus.MustNewConstMetric(c.vfsCreateErrors, prometheus.CounterValue, float64(value.Errors), c.cgroupManager.name, c.hostname, uuid) } @@ -752,6 +775,7 @@ func (c *ebpfCollector) updateVFSUnlink(ch chan<- prometheus.Metric, aggMetrics // Update metrics to the channel for uuid, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.vfsUnlinkRequests, prometheus.CounterValue, float64(value.Calls), c.cgroupManager.name, c.hostname, uuid) + ch <- prometheus.MustNewConstMetric(c.vfsUnlinkErrors, prometheus.CounterValue, float64(value.Errors), c.cgroupManager.name, c.hostname, uuid) } @@ -775,6 +799,7 @@ func (c *ebpfCollector) updateNetIngress(ch chan<- prometheus.Metric, aggMetrics // Update metrics to the channel for key, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.netIngressPackets, prometheus.CounterValue, float64(value.Packets), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) + ch <- prometheus.MustNewConstMetric(c.netIngressBytes, prometheus.CounterValue, float64(value.Bytes), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) } @@ -798,6 +823,7 @@ func (c *ebpfCollector) updateNetEgress(ch chan<- prometheus.Metric, aggMetrics // Update metrics to the channel for key, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.netEgressPackets, prometheus.CounterValue, float64(value.Packets), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) + ch <- prometheus.MustNewConstMetric(c.netEgressBytes, prometheus.CounterValue, float64(value.Bytes), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) } @@ -821,6 +847,7 @@ func (c *ebpfCollector) updateNetRetrans(ch chan<- prometheus.Metric, aggMetrics // Update metrics to the channel for key, value := range aggMetric { ch <- prometheus.MustNewConstMetric(c.netRetransPackets, prometheus.CounterValue, float64(value.Packets), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) + ch <- prometheus.MustNewConstMetric(c.netRetransBytes, prometheus.CounterValue, float64(value.Bytes), c.cgroupManager.name, c.hostname, key.UUID, key.Proto, key.Family) } @@ -839,7 +866,8 @@ func (c *ebpfCollector) readMaps() (*aggMetrics, error) { // Start new profilers within security context if securityCtx, ok := c.securityContexts[ebpfReadBPFMapsCtx]; ok { - if err := securityCtx.Exec(dataPtr); err == nil { + err := securityCtx.Exec(dataPtr) + if err == nil { return dataPtr.aggMetrics, nil } else { return nil, err @@ -868,7 +896,8 @@ func (c *ebpfCollector) discoverCgroups(cgroups []cgroup) { // Get inode of the cgroup path if not already present in the cache if _, ok := c.cgroupPathIDCache[path]; !ok { - if inode, err := inode(path); err == nil { + inode, err := inode(path) + if err == nil { c.cgroupPathIDCache[path] = inode c.cgroupIDUUIDCache[inode] = uuid } diff --git a/pkg/collector/emissions.go b/pkg/collector/emissions.go index 80f3b167..afde89d1 100644 --- a/pkg/collector/emissions.go +++ b/pkg/collector/emissions.go @@ -87,7 +87,8 @@ func (c *emissionsCollector) Stop(_ context.Context) error { c.logger.Debug("Stopping", "collector", emissionsCollectorSubsystem) // Stop all providers to release any system resources that are being used - if err := c.emissionFactorProviders.Stop(); err != nil { + err := c.emissionFactorProviders.Stop() + if err != nil { c.logger.Error("Failed to stop emission factor providers", "err", err) return err diff --git a/pkg/collector/gpu.go b/pkg/collector/gpu.go index 48dabc86..a608af77 100644 --- a/pkg/collector/gpu.go +++ b/pkg/collector/gpu.go @@ -202,30 +202,33 @@ func (p *DeviceAttrsShared) UnmarshalXML(d *xml.Decoder, start xml.StartElement) DecCount string `xml:"decoder_count"` } - if err := d.DecodeElement(&tmp, &start); err != nil { + err := d.DecodeElement(&tmp, &start) + if err != nil { return err } - var err error - p.XMLName = tmp.XMLName // In case of errors set count to 1. This is especially important for // SMCount as we compute SMFrac with it and if we set it zero, fractions // will be NaN. - if p.SMCount, err = strconv.ParseUint(tmp.SMCount, 10, 64); err != nil { + p.SMCount, err = strconv.ParseUint(tmp.SMCount, 10, 64) + if err != nil { p.SMCount = 1 } - if p.CECount, err = strconv.ParseUint(tmp.CECount, 10, 64); err != nil { + p.CECount, err = strconv.ParseUint(tmp.CECount, 10, 64) + if err != nil { p.CECount = 1 } - if p.EncCount, err = strconv.ParseUint(tmp.EncCount, 10, 64); err != nil { + p.EncCount, err = strconv.ParseUint(tmp.EncCount, 10, 64) + if err != nil { p.EncCount = 1 } - if p.DecCount, err = strconv.ParseUint(tmp.DecCount, 10, 64); err != nil { + p.DecCount, err = strconv.ParseUint(tmp.DecCount, 10, 64) + if err != nil { p.DecCount = 1 } @@ -266,22 +269,24 @@ func (p *MIGDevice) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { Bar1Memory Memory `xml:"bar1_memory_usage"` } - if err := d.DecodeElement(&tmp, &start); err != nil { + err := d.DecodeElement(&tmp, &start) + if err != nil { return err } - var err error - // In case of errors return more intuitive error message. - if p.Index, err = strconv.ParseUint(tmp.Index, 10, 64); err != nil { + p.Index, err = strconv.ParseUint(tmp.Index, 10, 64) + if err != nil { return fmt.Errorf("invalid mig index %s: %w", tmp.Index, err) } - if p.GPUInstID, err = strconv.ParseUint(tmp.GPUInstID, 10, 64); err != nil { + p.GPUInstID, err = strconv.ParseUint(tmp.GPUInstID, 10, 64) + if err != nil { return fmt.Errorf("invalid mig gpu instance id %s: %w", tmp.GPUInstID, err) } - if p.ComputeInstID, err = strconv.ParseUint(tmp.ComputeInstID, 10, 64); err != nil { + p.ComputeInstID, err = strconv.ParseUint(tmp.ComputeInstID, 10, 64) + if err != nil { return fmt.Errorf("invalid mig compute instance id %s: %w", tmp.ComputeInstID, err) } @@ -584,7 +589,8 @@ func NewGPUSMI(k8sClient *ceems_k8s.Client, logger *slog.Logger) (*GPUSMI, error switch v.id { case nvidia: // Look up nvidia-smi command - if smiCmd, err := lookupSmiCmd(*nvidiaSmiPath, nvidiaSMIQueryCmd[0]); err == nil { + smiCmd, err := lookupSmiCmd(*nvidiaSmiPath, nvidiaSMIQueryCmd[0]) + if err == nil { vendors[iv].smiCmd = smiCmd vendors[iv].smiQueryCmd = nvidiaSMIQueryCmd } @@ -594,11 +600,13 @@ func NewGPUSMI(k8sClient *ceems_k8s.Client, logger *slog.Logger) (*GPUSMI, error // Always prefer amd-smi to rocm-smi. This is preferred way // to query for AMD GPUs // Ref: https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html#transitioning-from-rocm-smi - if smiCmd, err := lookupSmiCmd(*amdSmiPath, amdSMIQueryCmd[0]); err == nil { + smiCmd, err := lookupSmiCmd(*amdSmiPath, amdSMIQueryCmd[0]) + if err == nil { vendors[iv].smiCmd = smiCmd vendors[iv].smiQueryCmd = amdSMIQueryCmd } else { - if smiCmd, err := lookupSmiCmd(*rocmSmiPath, rocmSMIQueryCmd[0]); err == nil { + smiCmd, err := lookupSmiCmd(*rocmSmiPath, rocmSMIQueryCmd[0]) + if err == nil { vendors[iv].smiCmd = smiCmd vendors[iv].smiQueryCmd = rocmSMIQueryCmd } @@ -631,7 +639,8 @@ func NewGPUSMI(k8sClient *ceems_k8s.Client, logger *slog.Logger) (*GPUSMI, error smiQueryCmd = amdSMIQueryCmd } - if pods, err := k8sClient.ListPods(ctx, "", opts); err == nil && len(pods) > 0 { + pods, err := k8sClient.ListPods(ctx, "", opts) + if err == nil && len(pods) > 0 { vendors[iv].k8sNS = pods[0].Namespace vendors[iv].k8sPod = pods[0].Name @@ -931,7 +940,8 @@ func (g *GPUSMI) amdGPUDevices(vendor vendor) ([]Device, error) { // be a good starting point. // A playground that is useful: https://goplay.tools/snippet/gvpJ3B5Resq // This is taken from unit tests of upstream!! - if deviceProperties, err := parseAMDDevPropertiesFromPCIDevices(); err == nil { + deviceProperties, err := parseAMDDevPropertiesFromPCIDevices() + if err == nil { for idev, dev := range devices { // Ensure that we device properties corresponding to physical bus ID if devProperties, ok := deviceProperties[strings.ToLower(dev.BusID.pathName)]; ok { @@ -970,7 +980,8 @@ func (g *GPUSMI) execute(vendor vendor, cmd []string) ([]byte, error) { // If smi command is found, always prefer to execute it natively if vendor.smiCmd != "" { - if cmdOut, err := osexec.ExecuteContext(ctx, vendor.smiCmd, cmd[1:], nil); err == nil { + cmdOut, err := osexec.ExecuteContext(ctx, vendor.smiCmd, cmd[1:], nil) + if err == nil { g.logger.Debug("GPU query command executed natively", "vendor", vendor.name, "cmd", strings.Join(cmd, " ")) return cmdOut, nil @@ -979,7 +990,8 @@ func (g *GPUSMI) execute(vendor vendor, cmd []string) ([]byte, error) { // If k8sclient is available, attempt to execute command inside container if g.k8sClient != nil { - if stdout, _, err := g.k8sClient.Exec(ctx, vendor.k8sNS, vendor.k8sPod, vendor.k8sContainer, cmd); err == nil { + stdout, _, err := g.k8sClient.Exec(ctx, vendor.k8sNS, vendor.k8sPod, vendor.k8sContainer, cmd) + if err == nil { g.logger.Debug("GPU query command executed inside pod", "vendor", vendor.name, "cmd", strings.Join(cmd, " "), "pod", fmt.Sprintf("%s/%s/%s", vendor.k8sNS, vendor.k8sPod, vendor.k8sContainer)) return stdout, nil @@ -1006,17 +1018,20 @@ func detectVendors() ([]vendor, error) { // PCI class 0x12 is for processing accelerators. MI300X GPUs seems to have this class ID. // Check if class starts with "0x03" and if it does, it means it is a GPU device // Ref: https://pcisig.com/sites/default/files/files/PCI_Code-ID_r_1_11__v24_Jan_2019.pdf - if classBytes, err := os.ReadFile(filepath.Join(devPath, "class")); err == nil { + classBytes, err := os.ReadFile(filepath.Join(devPath, "class")) + if err == nil { if class := strings.TrimSpace(strings.Trim(string(classBytes), "\n")); !strings.HasPrefix(class, "0x03") && !strings.HasPrefix(class, "0x12") { continue } - if idBytes, err := os.ReadFile(filepath.Join(devPath, "vendor")); err == nil { + idBytes, err := os.ReadFile(filepath.Join(devPath, "vendor")) + if err == nil { // Strip new lines and spaces idString := strings.TrimSpace(strings.Trim(string(idBytes), "\n")) // Let Go pick the base as vendor IDs will be prefixed by "0x" - if id, err := strconv.ParseUint(idString, 0, 16); err == nil { + id, err := strconv.ParseUint(idString, 0, 16) + if err == nil { vendorIDs = append(vendorIDs, id) } } @@ -1047,13 +1062,15 @@ func detectVendors() ([]vendor, error) { // to `nvidia-smi`/`rocm-smi` command on host. func lookupSmiCmd(customCmd string, fallbackCmd string) (string, error) { if customCmd != "" { - if _, err := os.Stat(customCmd); err != nil { + _, err := os.Stat(customCmd) + if err != nil { return "", err } return customCmd, nil } else { - if _, err := exec.LookPath(fallbackCmd); err != nil { + _, err := exec.LookPath(fallbackCmd) + if err != nil { return "", err } else { return fallbackCmd, nil @@ -1070,7 +1087,9 @@ func parseNvidiaSmiOutput(cmdOutput []byte) ([]Device, error) { // Read XML byte array into gpu var nvidiaSMILog NVIDIASMILog - if err := xml.Unmarshal(cmdOutput, &nvidiaSMILog); err != nil { + + err := xml.Unmarshal(cmdOutput, &nvidiaSMILog) + if err != nil { return nil, fmt.Errorf("failed to parse nvidia-smi xml log %w", err) } @@ -1248,7 +1267,9 @@ func parseRocmSmioutput(cmdOutput []byte) ([]Device, error) { // Unmarshall output into AMDSMILog struct amdDevs := make(map[string]ROCMSMI) - if err := json.Unmarshal(cmdOutput, &amdDevs); err != nil { + + err := json.Unmarshal(cmdOutput, &amdDevs) + if err != nil { return nil, fmt.Errorf("failed to parse ROCM SMI output: %w", err) } @@ -1260,8 +1281,10 @@ func parseRocmSmioutput(cmdOutput []byte) ([]Device, error) { // Sort cards slices based on index slices.SortFunc(cardIDs, func(a, b string) int { - if aIndx, err := strconv.ParseInt(strings.TrimPrefix(a, "card"), 10, 64); err == nil { - if bIndx, err := strconv.ParseInt(strings.TrimPrefix(b, "card"), 10, 64); err == nil { + aIndx, err := strconv.ParseInt(strings.TrimPrefix(a, "card"), 10, 64) + if err == nil { + bIndx, err := strconv.ParseInt(strings.TrimPrefix(b, "card"), 10, 64) + if err == nil { return cmp.Compare(aIndx, bIndx) } } @@ -1393,7 +1416,9 @@ func parseAmdSmioutput(cmdOutput []byte) ([]Device, error) { // Unmarshall output into AMDSMILog struct var amdDevs []AMDGPU - if err := json.Unmarshal(cmdOutput, &amdDevs); err != nil { + + err := json.Unmarshal(cmdOutput, &amdDevs) + if err != nil { return nil, fmt.Errorf("failed to parse AMD SMI output: %w", err) } @@ -1563,7 +1588,8 @@ func parseAMDDevPropertiesFromPCIDevices() (map[string][]AMDNodeProperties, erro } // Convert render ID to uint64 - if rID, err := strconv.ParseUint(id, 10, 64); err == nil { + rID, err := strconv.ParseUint(id, 10, 64) + if err == nil { if val, ok := renderDevIDs[rID]; ok { bdf := filepath.Base(path) devicePluginDevIDs[bdf] = val diff --git a/pkg/collector/helper.go b/pkg/collector/helper.go index f6113d8d..a9bac8c4 100644 --- a/pkg/collector/helper.go +++ b/pkg/collector/helper.go @@ -378,25 +378,19 @@ func lookupCgroupRoots(rootDir string, name string) ([]string, error) { var foundCgroupRoots []string // Walk through all cgroups and get cgroup paths - if err := filepath.WalkDir(rootDir, func(p string, info fs.DirEntry, err error) error { + err := filepath.WalkDir(rootDir, func(p string, info fs.DirEntry, err error) error { if err != nil { return err } - // Ignore paths that are not directories if !info.IsDir() { return nil } - // Check if name is in path - // Once we add the directory to foundCgroupRoots, we need to - // skip all the sub directories of this directory. - // We are lookin only for leaf folders if strings.Contains(p, name) { - // Get relative path of cgroup rel, err := filepath.Rel(rootDir, p) if err != nil { - return nil //nolint:nilerr + return err } foundCgroupRoots = append(foundCgroupRoots, rel) @@ -405,7 +399,8 @@ func lookupCgroupRoots(rootDir string, name string) ([]string, error) { } return nil - }); err != nil { + }) + if err != nil { return nil, err } @@ -442,7 +437,8 @@ func perfEventsAvailable() error { // // Even with paranoid set to -1, we still need CAP_PERFMON to be // able to open perf events for ANY process on the host. - if paranoid, err := fs.SysctlInts("kernel.perf_event_paranoid"); err == nil { + paranoid, err := fs.SysctlInts("kernel.perf_event_paranoid") + if err == nil { if len(paranoid) == 1 && paranoid[0] > 2 { return fmt.Errorf( "perf_event_open syscall is not possible with perf_event_paranoid=%d. Set it to value 2", diff --git a/pkg/collector/helper_test.go b/pkg/collector/helper_test.go index 6cad3c90..994d4ac3 100644 --- a/pkg/collector/helper_test.go +++ b/pkg/collector/helper_test.go @@ -74,6 +74,7 @@ func TestGokitLogger(t *testing.T) { require.NoError(t, err) var got logLine + err = json.Unmarshal(buf.Bytes(), &got) require.NoError(t, err) @@ -87,7 +88,7 @@ func TestGokitLogger(t *testing.T) { err = json.Unmarshal(buf.Bytes(), &got) require.NoError(t, err) - assert.Equal(t, logLine{strings.ToUpper(lvl), "message", "helper_test.go:85", 123, "first"}, got) + assert.Equal(t, logLine{strings.ToUpper(lvl), "message", "helper_test.go:86", 123, "first"}, got) } } diff --git a/pkg/collector/hwmon.go b/pkg/collector/hwmon.go index 381a9659..dce7cbdc 100644 --- a/pkg/collector/hwmon.go +++ b/pkg/collector/hwmon.go @@ -90,7 +90,8 @@ func NewHwmonCollector(logger *slog.Logger) (Collector, error) { } // Discover monitors - if err := hwmonCollector.discoverMonitors(); err != nil { + err := hwmonCollector.discoverMonitors() + if err != nil { logger.Error("Failed to discover power and/or energy hwmon", "err", err) return nil, err @@ -172,7 +173,8 @@ func (c *hwmonCollector) discoverMonitors() error { continue } - if mon, err := getHwmon(hwmonXPathName); err == nil && mon != nil { + mon, err := getHwmon(hwmonXPathName) + if err == nil && mon != nil { c.monitors = append(c.monitors, mon) } } @@ -197,7 +199,8 @@ func getHwmon(dir string) (*hwmon, error) { return nil, err } - if _, err := os.Stat(filepath.Join(dir, "device")); err == nil { + _, err = os.Stat(filepath.Join(dir, "device")) + if err == nil { s, err := collectSensors(filepath.Join(dir, "device")) if err != nil { return nil, err @@ -213,7 +216,8 @@ func getHwmon(dir string) (*hwmon, error) { hwmon := &hwmon{dir: dir, name: hwmonName, sensors: sensors} - if hwmonChipName, err := hwmonHumanReadableChipName(dir); err == nil { + hwmonChipName, err := hwmonHumanReadableChipName(dir) + if err == nil { hwmon.chipName = hwmonChipName } @@ -320,7 +324,8 @@ func parseSensorFilename(filename string) (bool, string, int, string) { } if match == "id" && len(matches[i]) > 0 { - if num, err := strconv.Atoi(matches[i]); err == nil { + num, err := strconv.Atoi(matches[i]) + if err == nil { sensorNum = num } else { return false, sensorType, sensorNum, sensorProperty @@ -385,7 +390,8 @@ func readSensorValue(file string) float64 { return 0 } - if parsedValue, err := strconv.ParseFloat(strings.Trim(string(raw), "\n"), 64); err == nil { + parsedValue, err := strconv.ParseFloat(strings.Trim(string(raw), "\n"), 64) + if err == nil { return parsedValue } diff --git a/pkg/collector/infiniband.go b/pkg/collector/infiniband.go index c4f34813..d74bfc74 100644 --- a/pkg/collector/infiniband.go +++ b/pkg/collector/infiniband.go @@ -113,6 +113,7 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error { []string{"device", "board_id", "firmware_version", "hca_type"}, nil, ) + infoValue := 1.0 ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.BoardID, device.FirmwareVersion, device.HCAType) diff --git a/pkg/collector/ipmi.go b/pkg/collector/ipmi.go index 0c16072a..5e8707dd 100644 --- a/pkg/collector/ipmi.go +++ b/pkg/collector/ipmi.go @@ -268,7 +268,8 @@ func NewIPMICollector(logger *slog.Logger) (Collector, error) { } if *ipmiDcmiCmd == "" { - if cmdSlice, err = findIPMICmd(); err != nil { + cmdSlice, err = findIPMICmd() + if err != nil { logger.Info("None of ipmitool,ipmiutil,ipmi-dcmi commands found. Using native implementation using OpenIPMI interface") execMode = nativeMode @@ -301,7 +302,8 @@ func NewIPMICollector(logger *slog.Logger) (Collector, error) { // Eventually we drop all the privileges and use cap_setuid and cap_setgid to // execute ipmi command in subprocess as root. // So we set execMode as capabilityMode here too. - if _, err := osexec.Execute(cmdSlice[0], cmdSlice[1:], nil); err == nil { + _, err = osexec.Execute(cmdSlice[0], cmdSlice[1:], nil) + if err == nil { execMode = capabilityMode goto outside @@ -310,7 +312,8 @@ func NewIPMICollector(logger *slog.Logger) (Collector, error) { // If ipmiDcmiCmd failed to run and if sudo is not already present in command, // add sudo to command and execute. If current user has sudo rights it will be a success if cmdSlice[0] != sudoMode { - if _, err := osexec.ExecuteWithTimeout(sudoMode, cmdSlice, 1, nil); err == nil { + _, err := osexec.ExecuteWithTimeout(sudoMode, cmdSlice, 1, nil) + if err == nil { execMode = sudoMode goto outside @@ -319,7 +322,8 @@ func NewIPMICollector(logger *slog.Logger) (Collector, error) { // As last attempt, run the command as root user by forking subprocess // as root. If there is setuid cap on the process, it will be a success - if _, err := osexec.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, nil); err == nil { + _, err = osexec.ExecuteAs(cmdSlice[0], cmdSlice[1:], 0, 0, nil) + if err == nil { execMode = capabilityMode goto outside @@ -496,7 +500,8 @@ func (c *impiCollector) Stop(_ context.Context) error { // Close fd when native mode is being used if c.execMode == nativeMode { - if err := c.client.Close(); err != nil { + err := c.client.Close() + if err != nil { c.logger.Debug("Failed to close OpenIPMI device fd", "err", err) return err @@ -587,7 +592,9 @@ func (c *impiCollector) getIPMIReadings() (*ipmiReadings, error) { func (c *impiCollector) parseCapmcOutput(stdOut []byte) (map[string]float64, error) { // Unmarshal JSON output var data map[string]any - if err := json.Unmarshal(stdOut, &data); err != nil { + + err := json.Unmarshal(stdOut, &data) + if err != nil { return nil, fmt.Errorf("%s Power readings command failed", crayPowerCap) } @@ -631,8 +638,10 @@ func (c *impiCollector) parseIPMIOutput(stdOut []byte) (map[string]float64, erro if value == "active" || value == "Active" || value == "activated" { // Get power readings for rType, regex := range ipmiDCMIPowerReadingRegexMap { - if reading, err := getValue(stdOut, regex); err == nil { - if readingValue, err := strconv.ParseFloat(reading, 64); err == nil { + reading, err := getValue(stdOut, regex) + if err == nil { + readingValue, err := strconv.ParseFloat(reading, 64) + if err == nil { powerReadings["dcmi_"+rType] = readingValue } } @@ -682,11 +691,13 @@ func (c *impiCollector) executeCmdInSecurityContext() ([]byte, error) { // Read stdOut of command into data if securityCtx, ok := c.securityContexts[ipmiExecCmdCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return nil, err } } else { - if err := security.ExecAsUser(dataPtr); err != nil { + err := security.ExecAsUser(dataPtr) + if err != nil { return nil, err } } @@ -706,7 +717,8 @@ func (c *impiCollector) doRequestInSecurityContext() (*ipmiReadings, error) { if securityCtx, ok := c.securityContexts[openIPMICtx]; ok { // Always return readings as we might have partial result // in readings - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return dataPtr.readings, err } } else { @@ -758,12 +770,15 @@ func doIPMIRequests(data any) error { func findIPMICmd() ([]string, error) { for _, cmd := range ipmiDcmiCmds { cmdSlice := strings.Split(cmd, " ") - if _, err := exec.LookPath(cmdSlice[0]); err == nil { + + _, err := exec.LookPath(cmdSlice[0]) + if err == nil { return cmdSlice, nil } // Check if binary exists in /sbin or /usr/sbin - if _, err := lookPath(cmdSlice[0]); err == nil { + _, err = lookPath(cmdSlice[0]) + if err == nil { return cmdSlice, nil } } diff --git a/pkg/collector/k8s.go b/pkg/collector/k8s.go index 5201a6fe..58ac96a0 100644 --- a/pkg/collector/k8s.go +++ b/pkg/collector/k8s.go @@ -161,7 +161,8 @@ func NewK8sCollector(logger *slog.Logger) (Collector, error) { } // Attempt to get GPU devices - if err := gpuSMI.Discover(); err != nil { + err = gpuSMI.Discover() + if err != nil { // If we failed to fetch GPUs that are from supported // vendor, return with error logger.Error("Error fetching GPU devices", "err", err) @@ -283,7 +284,8 @@ func (c *k8sCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update cgroup metrics - if err := c.cgroupCollector.Update(ch, cgroups); err != nil { + err := c.cgroupCollector.Update(ch, cgroups) + if err != nil { c.logger.Error("Failed to update cgroup stats", "err", err) } @@ -300,7 +302,8 @@ func (c *k8sCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update perf metrics - if err := c.perfCollector.Update(ch, cgroups, k8sCollectorSubsystem); err != nil { + err := c.perfCollector.Update(ch, cgroups, k8sCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update perf stats", "err", err) } }() @@ -313,7 +316,8 @@ func (c *k8sCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update ebpf metrics - if err := c.ebpfCollector.Update(ch, cgroups, k8sCollectorSubsystem); err != nil { + err := c.ebpfCollector.Update(ch, cgroups, k8sCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update IO and/or network stats", "err", err) } }() @@ -326,7 +330,8 @@ func (c *k8sCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update RDMA metrics - if err := c.rdmaCollector.Update(ch, cgroups, k8sCollectorSubsystem); err != nil { + err := c.rdmaCollector.Update(ch, cgroups, k8sCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update RDMA stats", "err", err) } }() @@ -343,33 +348,38 @@ func (c *k8sCollector) Stop(ctx context.Context) error { c.logger.Debug("Stopping", "collector", k8sCollectorSubsystem) // Stop k8s client - if err := c.k8sClient.Close(); err != nil { + err := c.k8sClient.Close() + if err != nil { c.logger.Error("Failed to stop k8s client", "err", err) } // Stop all sub collectors // Stop cgroupCollector - if err := c.cgroupCollector.Stop(ctx); err != nil { + err = c.cgroupCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop cgroup collector", "err", err) } // Stop perfCollector if perfCollectorEnabled() { - if err := c.perfCollector.Stop(ctx); err != nil { + err := c.perfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop perf collector", "err", err) } } // Stop ebpfCollector if ebpfCollectorEnabled() { - if err := c.ebpfCollector.Stop(ctx); err != nil { + err := c.ebpfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop ebpf collector", "err", err) } } // Stop rdmaCollector if rdmaCollectorEnabled() { - if err := c.rdmaCollector.Stop(ctx); err != nil { + err := c.rdmaCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop RDMA collector", "err", err) } } diff --git a/pkg/collector/k8s_test.go b/pkg/collector/k8s_test.go index aeb2cd8d..1ae49514 100644 --- a/pkg/collector/k8s_test.go +++ b/pkg/collector/k8s_test.go @@ -27,7 +27,8 @@ func TestNewK8sCollector(t *testing.T) { // Test k8s API server server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if data, err := os.ReadFile("testdata/k8s/pods-metadata.json"); err == nil { + data, err := os.ReadFile("testdata/k8s/pods-metadata.json") + if err == nil { w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/vnd.kubernetes.protobuf") w.Write(data) @@ -70,6 +71,7 @@ users: require.NoError(t, err) var podResourcesResp podresourcesapi.ListPodResourcesResponse + err = json.Unmarshal(podResourceContent, &podResourcesResp) require.NoError(t, err) @@ -135,6 +137,7 @@ func TestK8sPodDevices(t *testing.T) { require.NoError(t, err) var podResourcesResp podresourcesapi.ListPodResourcesResponse + err = json.Unmarshal(content, &podResourcesResp) require.NoError(t, err) @@ -143,6 +146,7 @@ func TestK8sPodDevices(t *testing.T) { require.NoError(t, err) var podsMetadata v1.PodList + err = json.Unmarshal(content, &podsMetadata) require.NoError(t, err) diff --git a/pkg/collector/kernel.go b/pkg/collector/kernel.go index 708dfd77..64ea7ef0 100644 --- a/pkg/collector/kernel.go +++ b/pkg/collector/kernel.go @@ -198,7 +198,8 @@ func KernelStringToNumeric(ver string) int64 { func KernelVersion() (int64, error) { var versionStrings []string - if versionSig, err := os.ReadFile(procFilePath("version_signature")); err == nil { + versionSig, err := os.ReadFile(procFilePath("version_signature")) + if err == nil { versionStrings = strings.Fields(string(versionSig)) } @@ -208,7 +209,7 @@ func KernelVersion() (int64, error) { var uname unix.Utsname - err := unix.Uname(&uname) + err = unix.Uname(&uname) if err != nil { return 0, err } diff --git a/pkg/collector/libvirt.go b/pkg/collector/libvirt.go index c9d67112..f7dc618d 100644 --- a/pkg/collector/libvirt.go +++ b/pkg/collector/libvirt.go @@ -210,7 +210,8 @@ func NewLibvirtCollector(logger *slog.Logger) (Collector, error) { } // Attempt to get GPU devices - if err := gpuSMI.Discover(); err != nil { + err = gpuSMI.Discover() + if err != nil { // If we failed to fetch GPUs that are from supported // vendor, return with error logger.Error("Error fetching GPU devices", "err", err) @@ -320,7 +321,8 @@ func (c *libvirtCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update cgroup metrics - if err := c.cgroupCollector.Update(ch, cgroups); err != nil { + err := c.cgroupCollector.Update(ch, cgroups) + if err != nil { c.logger.Error("Failed to update cgroup stats", "err", err) } @@ -337,7 +339,8 @@ func (c *libvirtCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update ebpf metrics - if err := c.ebpfCollector.Update(ch, cgroups, libvirtCollectorSubsystem); err != nil { + err := c.ebpfCollector.Update(ch, cgroups, libvirtCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update IO and/or network stats", "err", err) } }() @@ -350,7 +353,8 @@ func (c *libvirtCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update RDMA metrics - if err := c.rdmaCollector.Update(ch, cgroups, libvirtCollectorSubsystem); err != nil { + err := c.rdmaCollector.Update(ch, cgroups, libvirtCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update RDMA stats", "err", err) } }() @@ -368,27 +372,31 @@ func (c *libvirtCollector) Stop(ctx context.Context) error { // Stop all sub collectors // Stop cgroupCollector - if err := c.cgroupCollector.Stop(ctx); err != nil { + err := c.cgroupCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop cgroup collector", "err", err) } // Stop perfCollector if perfCollectorEnabled() { - if err := c.perfCollector.Stop(ctx); err != nil { + err := c.perfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop perf collector", "err", err) } } // Stop ebpfCollector if ebpfCollectorEnabled() { - if err := c.ebpfCollector.Stop(ctx); err != nil { + err := c.ebpfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop ebpf collector", "err", err) } } // Stop rdmaCollector if rdmaCollectorEnabled() { - if err := c.rdmaCollector.Stop(ctx); err != nil { + err := c.rdmaCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop RDMA collector", "err", err) } } @@ -495,7 +503,8 @@ func (c *libvirtCollector) instanceProperties(instanceID string) *instanceProper } if securityCtx, ok := c.securityContexts[libvirtReadXMLCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { c.logger.Error( "Failed to run inside security contxt", "instance_id", instanceID, "err", err, ) @@ -549,7 +558,8 @@ func (c *libvirtCollector) updateDeviceInstances(cgroups []cgroup) { // If vGPU is activated on atleast one GPU, update mdevs if c.vGPUActivated { - if err := c.gpuSMI.UpdateGPUMdevs(); err != nil { + err := c.gpuSMI.UpdateGPUMdevs() + if err != nil { c.logger.Error("Failed to update GPU mdevs", "err", err) } } @@ -631,7 +641,8 @@ func readLibvirtXMLFile(data any) error { xmlFilePath := filepath.Join(d.xmlPath, d.instanceID+".xml") // If file does not exist return error - if _, err := os.Stat(xmlFilePath); err != nil { + _, err := os.Stat(xmlFilePath) + if err != nil { return err } @@ -643,7 +654,9 @@ func readLibvirtXMLFile(data any) error { // Read XML byte array into domain var domain Domain - if err := xml.Unmarshal(xmlByteArray, &domain); err != nil { + + err = xml.Unmarshal(xmlByteArray, &domain) + if err != nil { return err } @@ -694,6 +707,7 @@ func readLibvirtXMLFile(data any) error { } } } + outer_loop: } diff --git a/pkg/collector/libvirt_test.go b/pkg/collector/libvirt_test.go index 7950fb12..91134d67 100644 --- a/pkg/collector/libvirt_test.go +++ b/pkg/collector/libvirt_test.go @@ -206,8 +206,6 @@ func TestInstancePropsCaching(t *testing.T) { }, } - noOpLogger := noOpLogger - // Instantiate a new instance of gpuSMI struct gpu, err := NewGPUSMI(nil, noOpLogger) require.NoError(t, err) diff --git a/pkg/collector/meminfo.go b/pkg/collector/meminfo.go index 1d03b51a..726e74fc 100644 --- a/pkg/collector/meminfo.go +++ b/pkg/collector/meminfo.go @@ -82,6 +82,7 @@ func (c *meminfoCollector) Update(ch chan<- prometheus.Metric) error { } else { metricType = prometheus.GaugeValue } + ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( prometheus.BuildFQName(Namespace, memInfoSubsystem, k), diff --git a/pkg/collector/meminfo_test.go b/pkg/collector/meminfo_test.go index 6da74603..acb45d41 100644 --- a/pkg/collector/meminfo_test.go +++ b/pkg/collector/meminfo_test.go @@ -42,6 +42,7 @@ func TestMeminfoCollector(t *testing.T) { func TestMemInfo(t *testing.T) { file, err := os.Open("testdata/proc/meminfo") require.NoError(t, err) + defer file.Close() memInfo, err := parseMemInfo(file) diff --git a/pkg/collector/perf.go b/pkg/collector/perf.go index deb2f66a..4daa8bdc 100644 --- a/pkg/collector/perf.go +++ b/pkg/collector/perf.go @@ -169,7 +169,8 @@ func NewPerfCollector(logger *slog.Logger, cgManager *cgroupManager) (*perfColle } // Check if perf_event_paranoid allows to read perf events - if err := perfEventsAvailable(); err != nil { + err := perfEventsAvailable() + if err != nil { logger.Error("Perf events are not available", "err", err) return nil, err @@ -564,7 +565,8 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma // Remove all profilers that have already finished // Ignore all errors - if err := c.closeProfilers(activePIDs); err != nil { + err = c.closeProfilers(activePIDs) + if err != nil { c.logger.Error("failed to close profilers counters", "err", err) } @@ -576,15 +578,18 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma for _, cgroup := range cgroups { uuid := cgroup.uuid - if err := c.updateHardwareCounters(uuid, cgroup.procs, ch); err != nil { + err := c.updateHardwareCounters(uuid, cgroup.procs, ch) + if err != nil { c.logger.Error("failed to update hardware counters", "uuid", uuid, "err", err) } - if err := c.updateSoftwareCounters(uuid, cgroup.procs, ch); err != nil { + err = c.updateSoftwareCounters(uuid, cgroup.procs, ch) + if err != nil { c.logger.Error("failed to update software counters", "uuid", uuid, "err", err) } - if err := c.updateCacheCounters(uuid, cgroup.procs, ch); err != nil { + err = c.updateCacheCounters(uuid, cgroup.procs, ch) + if err != nil { c.logger.Error("failed to update cache counters", "uuid", uuid, "err", err) } } @@ -597,7 +602,8 @@ func (c *perfCollector) Stop(_ context.Context) error { c.logger.Debug("Stopping", "sub_collector", perfCollectorSubsystem) // Close all profilers - if err := c.closeProfilers([]int{}); err != nil { + err := c.closeProfilers([]int{}) + if err != nil { c.logger.Error("failed to close profilers counters", "err", err) } @@ -737,7 +743,9 @@ func (c *perfCollector) updateHardwareCounters( if hwProfiler, ok := c.perfHwProfilers[pid]; ok { hwProfile := &perf.HardwareProfile{} - if err := (*hwProfiler).Profile(hwProfile); err != nil { + + err := (*hwProfiler).Profile(hwProfile) + if err != nil { errs = errors.Join(errs, fmt.Errorf("%w: %d", err, pid)) continue @@ -827,7 +835,9 @@ func (c *perfCollector) updateSoftwareCounters( if swProfiler, ok := c.perfSwProfilers[pid]; ok { swProfile := &perf.SoftwareProfile{} - if err := (*swProfiler).Profile(swProfile); err != nil { + + err := (*swProfiler).Profile(swProfile) + if err != nil { errs = errors.Join(errs, fmt.Errorf("%w: %d", err, pid)) continue @@ -944,7 +954,9 @@ func (c *perfCollector) updateCacheCounters(cgroupID string, procs []procfs.Proc if cacheProfiler, ok := c.perfCacheProfilers[pid]; ok { cacheProfile := &perf.CacheProfile{} - if err := (*cacheProfiler).Profile(cacheProfile); err != nil { + + err := (*cacheProfiler).Profile(cacheProfile) + if err != nil { errs = errors.Join(errs, fmt.Errorf("%w: %d", err, pid)) continue @@ -982,7 +994,8 @@ func (c *perfCollector) filterProcs(cgroups []cgroup) ([]cgroup, error) { // Use security context as reading procs env vars is a privileged action if securityCtx, ok := c.securityContexts[perfProcFilterCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return nil, err } } else { @@ -1016,7 +1029,8 @@ func (c *perfCollector) newProfilers(cgroups []cgroup) []int { // Start new profilers within security context if securityCtx, ok := c.securityContexts[perfOpenProfilersCtx]; ok { - if err := securityCtx.Exec(dataPtr); err == nil { + err := securityCtx.Exec(dataPtr) + if err == nil { return dataPtr.activePIDs } } @@ -1042,7 +1056,8 @@ func (c *perfCollector) closeProfilers(activePIDs []int) error { // Start new profilers within security context if securityCtx, ok := c.securityContexts[perfCloseProfilersCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return err } } @@ -1076,7 +1091,8 @@ func openProfilers(data any) error { if d.perfHwProfilersEnabled { if _, ok := d.perfHwProfilers[pid]; !ok { - if hwProfiler, err := newHwProfiler(pid, d.perfHwProfilerTypes); err != nil { + hwProfiler, err := newHwProfiler(pid, d.perfHwProfilerTypes) + if err != nil { d.logger.Error("failed to start hardware profiler", "pid", pid, "cmd", strings.Join(cmdLine, " "), "err", err) } else { d.perfHwProfilers[pid] = hwProfiler @@ -1086,7 +1102,8 @@ func openProfilers(data any) error { if d.perfSwProfilersEnabled { if _, ok := d.perfSwProfilers[pid]; !ok { - if swProfiler, err := newSwProfiler(pid, d.perfSwProfilerTypes); err != nil { + swProfiler, err := newSwProfiler(pid, d.perfSwProfilerTypes) + if err != nil { d.logger.Error("failed to start software profiler", "pid", pid, "cmd", strings.Join(cmdLine, " "), "err", err) } else { d.perfSwProfilers[pid] = swProfiler @@ -1096,7 +1113,8 @@ func openProfilers(data any) error { if d.perfCacheProfilersEnabled { if _, ok := d.perfCacheProfilers[pid]; !ok { - if cacheProfiler, err := newCacheProfiler(pid, d.perfCacheProfilerTypes); err != nil { + cacheProfiler, err := newCacheProfiler(pid, d.perfCacheProfilerTypes) + if err != nil { d.logger.Error("failed to start cache profiler", "pid", pid, "cmd", strings.Join(cmdLine, " "), "err", err) } else { d.perfCacheProfilers[pid] = cacheProfiler @@ -1123,7 +1141,8 @@ func newHwProfiler(pid int, profilerTypes perf.HardwareProfilerType) (*perf.Hard return nil, err } - if err := hwProf.Start(); err != nil { + err = hwProf.Start() + if err != nil { return nil, err } @@ -1141,7 +1160,8 @@ func newSwProfiler(pid int, profilerTypes perf.SoftwareProfilerType) (*perf.Soft return nil, err } - if err := swProf.Start(); err != nil { + err = swProf.Start() + if err != nil { return nil, err } @@ -1159,7 +1179,8 @@ func newCacheProfiler(pid int, profilerTypes perf.CacheProfilerType) (*perf.Cach return nil, err } - if err := cacheProf.Start(); err != nil { + err = cacheProf.Start() + if err != nil { return nil, err } @@ -1180,7 +1201,8 @@ func closeProfilers(data any) error { if d.perfHwProfilersEnabled { for pid, hwProfiler := range d.perfHwProfilers { if !slices.Contains(d.activePIDs, pid) { - if err := closeHwProfiler(hwProfiler); err != nil { + err := closeHwProfiler(hwProfiler) + if err != nil { d.logger.Error("failed to shutdown hardware profiler", "err", err) } @@ -1193,7 +1215,8 @@ func closeProfilers(data any) error { if d.perfSwProfilersEnabled { for pid, swProfiler := range d.perfSwProfilers { if !slices.Contains(d.activePIDs, pid) { - if err := closeSwProfiler(swProfiler); err != nil { + err := closeSwProfiler(swProfiler) + if err != nil { d.logger.Error("failed to shutdown software profiler", "err", err) } @@ -1206,7 +1229,8 @@ func closeProfilers(data any) error { if d.perfCacheProfilersEnabled { for pid, cacheProfiler := range d.perfCacheProfilers { if !slices.Contains(d.activePIDs, pid) { - if err := closeCacheProfiler(cacheProfiler); err != nil { + err := closeCacheProfiler(cacheProfiler) + if err != nil { d.logger.Error("failed to shutdown cache profiler", "err", err) } @@ -1221,11 +1245,13 @@ func closeProfilers(data any) error { // closeHwProfiler stops and closes a hardware profiler. func closeHwProfiler(profiler *perf.HardwareProfiler) error { - if err := (*profiler).Stop(); err != nil { + err := (*profiler).Stop() + if err != nil { return err } - if err := (*profiler).Close(); err != nil { + err = (*profiler).Close() + if err != nil { return err } @@ -1234,11 +1260,13 @@ func closeHwProfiler(profiler *perf.HardwareProfiler) error { // closeSwProfiler stops and closes a software profiler. func closeSwProfiler(profiler *perf.SoftwareProfiler) error { - if err := (*profiler).Stop(); err != nil { + err := (*profiler).Stop() + if err != nil { return err } - if err := (*profiler).Close(); err != nil { + err = (*profiler).Close() + if err != nil { return err } @@ -1247,11 +1275,13 @@ func closeSwProfiler(profiler *perf.SoftwareProfiler) error { // closeCacheProfiler stops and closes a cache profiler. func closeCacheProfiler(profiler *perf.CacheProfiler) error { - if err := (*profiler).Stop(); err != nil { + err := (*profiler).Stop() + if err != nil { return err } - if err := (*profiler).Close(); err != nil { + err = (*profiler).Close() + if err != nil { return err } diff --git a/pkg/collector/profiling.go b/pkg/collector/profiling.go index 18f177b9..c01cdaa9 100644 --- a/pkg/collector/profiling.go +++ b/pkg/collector/profiling.go @@ -48,7 +48,8 @@ func NewProfiler(c *profilerConfig) (Profiler, error) { } // Check if perf_event_paranoid allows to read perf events - if err := perfEventsAvailable(); err != nil { + err = perfEventsAvailable() + if err != nil { c.logger.Error("Perf events are not available", "err", err) return nil, err @@ -164,7 +165,8 @@ func NewProfiler(c *profilerConfig) (Profiler, error) { "cap_sys_resource", } - if _, err = setupAppCaps(capabilities); err != nil { + _, err = setupAppCaps(capabilities) + if err != nil { c.logger.Warn("Failed to parse capability name(s)", "err", err) } @@ -189,7 +191,8 @@ func (p *eBPFProfiler) Start(ctx context.Context) error { p.logger.Debug("Starting profiling session") // Start a new profiling session - if err := p.session.Start(); err != nil { + err := p.session.Start() + if err != nil { p.logger.Error("Failed to start a profiling session", "err", err) return err @@ -197,8 +200,10 @@ func (p *eBPFProfiler) Start(ctx context.Context) error { // Ingest profiles in a separate go routine profiles := make(chan *pushv1.PushRequest, 512) + go func() { - if err := p.ingest(ctx, profiles); err != nil { + err := p.ingest(ctx, profiles) + if err != nil { p.logger.Error("Failed to setup profiles ingest", "err", err) } }() @@ -213,7 +218,8 @@ func (p *eBPFProfiler) Start(ctx context.Context) error { case <-discoverTicker.C: p.session.UpdateTargets(p.convertTargetOptions()) case <-collectTicker.C: - if err := p.collectProfiles(ctx, profiles); err != nil { + err := p.collectProfiles(ctx, profiles) + if err != nil { p.logger.Error("Failed to collect profiles", "err", err) } case <-ctx.Done(): @@ -244,7 +250,9 @@ func (p *eBPFProfiler) collectProfiles(ctx context.Context, profiles chan *pushv SampleRate: int64(p.sessionOptions.SampleRate), PerPIDProfile: true, }) - if err := pprof.Collect(builders, p.session); err != nil { + + err := pprof.Collect(builders, p.session) + if err != nil { return err } @@ -269,7 +277,9 @@ func (p *eBPFProfiler) collectProfiles(ctx context.Context, profiles chan *pushv // Read profile sample into buffer buf := bytes.NewBuffer(nil) - if _, err := builder.Write(buf); err != nil { + + _, err := builder.Write(buf) + if err != nil { p.logger.Error("Failed to write profile data into buffer. Dropping sample", "target", builder.Labels.String(), "err", err) continue @@ -310,7 +320,8 @@ func (p *eBPFProfiler) ingest(ctx context.Context, profiles chan *pushv1.PushReq for { it := <-profiles - if _, err := client.Push(ctx, connect.NewRequest(it)); err != nil { + _, err := client.Push(ctx, connect.NewRequest(it)) + if err != nil { p.logger.Error("Failed to push profile sample", "err", err) } } diff --git a/pkg/collector/profiling_config.go b/pkg/collector/profiling_config.go index 92501ef0..cdd57cba 100644 --- a/pkg/collector/profiling_config.go +++ b/pkg/collector/profiling_config.go @@ -64,7 +64,8 @@ func (c *SessionConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain SessionConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } @@ -72,7 +73,8 @@ func (c *SessionConfig) UnmarshalYAML(unmarshal func(any) error) error { c.Demangle = strings.TrimSpace(strings.ToLower(c.Demangle)) // Validate config - if err := c.Validate(); err != nil { + err = c.Validate() + if err != nil { return err } @@ -105,12 +107,14 @@ func (c *PyroscopeConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain PyroscopeConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } // Validate config - if err := c.Validate(); err != nil { + err = c.Validate() + if err != nil { return err } @@ -120,7 +124,8 @@ func (c *PyroscopeConfig) UnmarshalYAML(unmarshal func(any) error) error { // Validate validates the config. func (c *PyroscopeConfig) Validate() error { // Check if URL is valid - if _, err := url.Parse(c.URL); err != nil { + _, err := url.Parse(c.URL) + if err != nil { return fmt.Errorf("invalid pyroscope URL: %w", err) } diff --git a/pkg/collector/profiling_test.go b/pkg/collector/profiling_test.go index 453807d3..992a3386 100644 --- a/pkg/collector/profiling_test.go +++ b/pkg/collector/profiling_test.go @@ -188,6 +188,7 @@ func (m *mockSession) Stop() { func (m *mockSession) Update(options ebpfspy.SessionOptions) error { m.mtx.Lock() defer m.mtx.Unlock() + m.options = options return nil diff --git a/pkg/collector/rapl.go b/pkg/collector/rapl.go index 906584b9..2cc2803f 100644 --- a/pkg/collector/rapl.go +++ b/pkg/collector/rapl.go @@ -73,7 +73,8 @@ func NewRaplCollector(logger *slog.Logger) (Collector, error) { // Get kernel version securityContexts := make(map[string]*security.SecurityContext) - if currentKernelVer, err := KernelVersion(); err == nil { + currentKernelVer, err := KernelVersion() + if err == nil { // Startin from kernel 5.10, RAPL counters are read only by root. // So we need CAP_DAC_READ_SEARCH capability to read them. if currentKernelVer >= KernelStringToNumeric("5.10") { @@ -154,7 +155,8 @@ func (c *raplCollector) Update(ch chan<- prometheus.Metric) error { go func() { defer wg.Done() - if err := c.updateLimits(zones, ch); err != nil { + err := c.updateLimits(zones, ch) + if err != nil { c.logger.Error("Failed to update RAPL power limits", "err", err) } }() @@ -164,7 +166,8 @@ func (c *raplCollector) Update(ch chan<- prometheus.Metric) error { go func() { defer wg.Done() - if err := c.updateEnergy(zones, ch); err != nil { + err := c.updateEnergy(zones, ch) + if err != nil { c.logger.Error("Failed to update RAPL energy counters", "err", err) } }() @@ -213,12 +216,14 @@ func (c *raplCollector) updateEnergy(zones []sysfs.RaplZone, ch chan<- prometheu if len(c.securityContexts) > 0 { // Start new profilers within security context if securityCtx, ok := c.securityContexts[raplReadEnergyCounter]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return err } } } else { - if err := readCounters(dataPtr); err != nil { + err := readCounters(dataPtr) + if err != nil { return err } } @@ -327,12 +332,15 @@ func readPowerLimits(zones []sysfs.RaplZone) (map[sysfs.RaplZone]uint64, error) for c := range 2 { timeWindowFile := filepath.Join(rz.Path, fmt.Sprintf("constraint_%d_time_window_us", c)) - if _, err := os.Stat(timeWindowFile); err != nil { + + _, err := os.Stat(timeWindowFile) + if err != nil { continue } // Read time window in micro seconds - if constTimeWindow, err := readUintFromFile(timeWindowFile); err == nil { + constTimeWindow, err := readUintFromFile(timeWindowFile) + if err == nil { if constTimeWindow > timeWindow { timeWindow = constTimeWindow longtermConstraint = c @@ -342,7 +350,9 @@ func readPowerLimits(zones []sysfs.RaplZone) (map[sysfs.RaplZone]uint64, error) // Now read power_limit_uw for the selected constraint. Value is in micro watts. powerLimitFile := filepath.Join(rz.Path, fmt.Sprintf("constraint_%d_power_limit_uw", longtermConstraint)) - if powerLimit, err := readUintFromFile(powerLimitFile); err == nil { + + powerLimit, err := readUintFromFile(powerLimitFile) + if err == nil { powerLimits[rz] = powerLimit } } diff --git a/pkg/collector/rdma.go b/pkg/collector/rdma.go index b23d5ffe..c5024ff7 100644 --- a/pkg/collector/rdma.go +++ b/pkg/collector/rdma.go @@ -95,7 +95,8 @@ func NewRDMACollector(logger *slog.Logger, cgManager *cgroupManager) (*rdmaColle if *rdmaCmd != "" { rdmaCmdPath = *rdmaCmd } else { - if rdmaCmdPath, err = exec.LookPath("rdma"); err != nil { + rdmaCmdPath, err = exec.LookPath("rdma") + if err != nil { logger.Error("rdma command not found. Not all RDMA metrics will be reported.", "err", err) } } @@ -245,7 +246,8 @@ func (c *rdmaCollector) Update(ch chan<- prometheus.Metric, cgroups []cgroup, ma } // Check QP modes and attempt to enable PID if not already done - if err := c.perPIDCounters(true); err != nil { + err := c.perPIDCounters(true) + if err != nil { c.logger.Error("Failed to enable Per-PID QP stats", "err", err) } @@ -296,7 +298,8 @@ func (c *rdmaCollector) perPIDCounters(enable bool) error { } // If command didnt return error, we successfully enabled/disabled mode - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { allErrs = errors.Join(allErrs, err) } else { c.qpModes[link] = enable @@ -334,6 +337,7 @@ func (c *rdmaCollector) update(ch chan<- prometheus.Metric, cgroups []cgroup) { for uuid, mr := range mrs { ch <- prometheus.MustNewConstMetric(c.metricDescs["mrs_active"], prometheus.GaugeValue, float64(mr.num), c.cgroupManager.name, c.hostname, mr.dev, "", uuid) + ch <- prometheus.MustNewConstMetric(c.metricDescs["mrs_len_active"], prometheus.GaugeValue, float64(mr.len), c.cgroupManager.name, c.hostname, mr.dev, "", uuid) } }(procCgroup) @@ -353,6 +357,7 @@ func (c *rdmaCollector) update(ch chan<- prometheus.Metric, cgroups []cgroup) { for uuid, cq := range cqs { ch <- prometheus.MustNewConstMetric(c.metricDescs["cqs_active"], prometheus.GaugeValue, float64(cq.num), c.cgroupManager.name, c.hostname, cq.dev, "", uuid) + ch <- prometheus.MustNewConstMetric(c.metricDescs["cqe_len_active"], prometheus.GaugeValue, float64(cq.len), c.cgroupManager.name, c.hostname, cq.dev, "", uuid) } }(procCgroup) @@ -408,6 +413,7 @@ func (c *rdmaCollector) update(ch chan<- prometheus.Metric, cgroups []cgroup) { } else { vType = prometheus.CounterValue } + ch <- prometheus.MustNewConstMetric(c.metricDescs[n], vType, float64(v), c.cgroupManager.name, c.hostname, device, port) } } @@ -459,7 +465,8 @@ func (c *rdmaCollector) devMR(procCgroup map[string]string) (map[string]*mr, err if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { if uuid, ok := procCgroup[pidMatch[1]]; ok { if mrLenMatch := mrlenRegex.FindStringSubmatch(line); len(mrLenMatch) > 1 { - if l, err := strconv.ParseUint(mrLenMatch[1], 10, 64); err == nil { + l, err := strconv.ParseUint(mrLenMatch[1], 10, 64) + if err == nil { if _, ok := mrs[uuid]; ok { mrs[uuid].num++ mrs[uuid].len += l @@ -500,7 +507,8 @@ func (c *rdmaCollector) devCQ(procCgroup map[string]string) (map[string]*cq, err if pidMatch := pidRegex.FindStringSubmatch(line); len(pidMatch) > 1 { if uuid, ok := procCgroup[pidMatch[1]]; ok { if cqeMatch := cqeRegex.FindStringSubmatch(line); len(cqeMatch) > 1 { - if l, err := strconv.ParseUint(cqeMatch[1], 10, 64); err == nil { + l, err := strconv.ParseUint(cqeMatch[1], 10, 64) + if err == nil { if _, ok := cqs[uuid]; ok { cqs[uuid].num++ cqs[uuid].len += l @@ -572,7 +580,8 @@ func (c *rdmaCollector) linkQP(procCgroup map[string]string) (map[string]*qp, er if uuid, ok := procCgroup[pidMatch[1]]; ok { counterRegex := regexp.MustCompile(fmt.Sprintf(`.+?%s\s*([\d]+)`, hwCounter)) if counterMatch := counterRegex.FindStringSubmatch(line); len(counterMatch) > 1 { - if v, err := strconv.ParseUint(counterMatch[1], 10, 64); err == nil { + v, err := strconv.ParseUint(counterMatch[1], 10, 64) + if err == nil { if _, ok := qps[uuid]; !ok { link := strings.Split(linkMatch[1], "/") qps[uuid] = &qp{1, link[0], link[1], make(map[string]uint64)} diff --git a/pkg/collector/redfish.go b/pkg/collector/redfish.go index 46f31b1f..5a91f099 100644 --- a/pkg/collector/redfish.go +++ b/pkg/collector/redfish.go @@ -73,12 +73,11 @@ func (c *redfishClientConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain redfishClientConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } - var err error - // If BMC Hostname is not provided, attempt to discover it using OpenIPMI interface if c.Hostname == "" { // Make a new IPMI client @@ -94,8 +93,15 @@ func (c *redfishClientConfig) UnmarshalYAML(unmarshal func(any) error) error { return fmt.Errorf("failed to get BMC LAN IP: %w", err) } + // Make a timeout context + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + // Attempt to get BMC hostname from IP - if hostname, err := net.LookupAddr(*bmcIP); err == nil { + var ns *net.Resolver + + hostname, err := ns.LookupAddr(ctx, *bmcIP) + if err == nil { c.Hostname = hostname[0] } else { c.Hostname = *bmcIP @@ -142,7 +148,8 @@ type redfishConfig struct { func (c *redfishConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain redfishConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } @@ -274,7 +281,8 @@ func NewRedfishCollector(logger *slog.Logger) (Collector, error) { } // Connect to Redfish server - if err := collector.connect(); err != nil { + err = collector.connect() + if err != nil { logger.Error("Failed to connect to Redfish server", "err", err) return nil, err @@ -366,7 +374,8 @@ func (c *redfishCollector) powerReadings() map[string]map[string]float64 { // When this happens this scrape is lost and it will return cached values // but the next scrape should be good as we created new client - if err := c.connect(); err != nil { + err := c.connect() + if err != nil { c.logger.Error("Failed to create new redfish client", "err", err) } } diff --git a/pkg/collector/redfish_test.go b/pkg/collector/redfish_test.go index 99f44b14..5ca13c16 100644 --- a/pkg/collector/redfish_test.go +++ b/pkg/collector/redfish_test.go @@ -49,7 +49,8 @@ func testRedfishServer() *httptest.Server { // Test redfish server server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/redfish/v1/" { - if data, err := os.ReadFile("testdata/redfish/service_root.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/service_root.json") + if err == nil { w.Write(data) return @@ -73,7 +74,8 @@ func testRedfishServer() *httptest.Server { return } - if data, err := os.ReadFile("testdata/redfish/chassis_collection.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/chassis_collection.json") + if err == nil { w.Write(data) return @@ -85,7 +87,8 @@ func testRedfishServer() *httptest.Server { return } - if data, err := os.ReadFile("testdata/redfish/chassis_1.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/chassis_1.json") + if err == nil { w.Write(data) return @@ -97,7 +100,8 @@ func testRedfishServer() *httptest.Server { return } - if data, err := os.ReadFile("testdata/redfish/chassis_1_power.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/chassis_1_power.json") + if err == nil { w.Write(data) return @@ -109,7 +113,8 @@ func testRedfishServer() *httptest.Server { return } - if data, err := os.ReadFile("testdata/redfish/chassis_2.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/chassis_2.json") + if err == nil { w.Write(data) return @@ -121,7 +126,8 @@ func testRedfishServer() *httptest.Server { return } - if data, err := os.ReadFile("testdata/redfish/chassis_2_power.json"); err == nil { + data, err := os.ReadFile("testdata/redfish/chassis_2_power.json") + if err == nil { w.Write(data) return diff --git a/pkg/collector/server.go b/pkg/collector/server.go index ebced6a3..c672fc28 100644 --- a/pkg/collector/server.go +++ b/pkg/collector/server.go @@ -117,7 +117,8 @@ func NewCEEMSExporterServer(c *Config) (*CEEMSExporterServer, error) { // Register metrics collector with Prometheus server.metricsHandler.metricsRegistry.MustRegister(version.NewCollector(CEEMSExporterAppName)) - if err := server.metricsHandler.metricsRegistry.Register(server.collector); err != nil { + err := server.metricsHandler.metricsRegistry.Register(server.collector) + if err != nil { return nil, fmt.Errorf("couldn't register compute resource collector: %w", err) } @@ -159,7 +160,8 @@ func NewCEEMSExporterServer(c *Config) (*CEEMSExporterServer, error) { func (s *CEEMSExporterServer) Start() error { s.logger.Info("Starting " + CEEMSExporterAppName) - if err := web.ListenAndServe(s.server, s.webConfig, s.logger); err != nil && !errors.Is(err, http.ErrServerClosed) { + err := web.ListenAndServe(s.server, s.webConfig, s.logger) + if err != nil && !errors.Is(err, http.ErrServerClosed) { s.logger.Error("Failed to Listen and Serve HTTP server", "err", err) return err @@ -178,14 +180,16 @@ func (s *CEEMSExporterServer) Shutdown(ctx context.Context) error { // connections // Do not return error here as we SHOULD ENSURE to close collectors // that might release any system resources - if err := s.server.Shutdown(ctx); err != nil { + err := s.server.Shutdown(ctx) + if err != nil { s.logger.Error("Failed to stop exporter's HTTP server") errs = errors.Join(errs, err) } // Now close all collectors that release any system resources - if err := s.collector.Close(ctx); err != nil { + err = s.collector.Close(ctx) + if err != nil { s.logger.Error("Failed to stop collector(s)") return errors.Join(errs, err) diff --git a/pkg/collector/server_test.go b/pkg/collector/server_test.go index bca97e27..d4cf9782 100644 --- a/pkg/collector/server_test.go +++ b/pkg/collector/server_test.go @@ -134,6 +134,7 @@ func TestCEEMSExporterServer(t *testing.T) { // Make request resp, err := http.Get(fmt.Sprintf("http://localhost:%d%s", p, req.path)) //nolint:noctx require.NoError(t, err) + defer resp.Body.Close() assert.Equal(t, req.respCode, resp.StatusCode, "name: %s path: %s", test.name, req.path) diff --git a/pkg/collector/slurm.go b/pkg/collector/slurm.go index 55fe82f3..aef30a6f 100644 --- a/pkg/collector/slurm.go +++ b/pkg/collector/slurm.go @@ -195,7 +195,8 @@ func NewSlurmCollector(logger *slog.Logger) (Collector, error) { } // Attempt to get GPU devices - if err := gpuSMI.Discover(); err != nil { + err = gpuSMI.Discover() + if err != nil { // If we failed to fetch GPUs that are from supported // vendor, return with error logger.Error("Error fetching GPU devices", "err", err) @@ -223,9 +224,11 @@ func NewSlurmCollector(logger *slog.Logger) (Collector, error) { var mpsEnabled bool - if _, err := os.Stat(*slurmGresConfigFile); err == nil { + _, err = os.Stat(*slurmGresConfigFile) + if err == nil { // Read gres.conf file and split file by lines - if out, err := os.ReadFile(*slurmGresConfigFile); err == nil { + out, err := os.ReadFile(*slurmGresConfigFile) + if err == nil { // If Name=shard is in the line, sharding is enabled gpuSMI.Devices, shardEnabled = updateGPUAvailableShares(string(out), "shard", hostname, gpuSMI.Devices) if shardEnabled { @@ -331,7 +334,8 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update cgroup metrics - if err := c.cgroupCollector.Update(ch, cgroups); err != nil { + err := c.cgroupCollector.Update(ch, cgroups) + if err != nil { c.logger.Error("Failed to update cgroup stats", "err", err) } @@ -348,7 +352,8 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update perf metrics - if err := c.perfCollector.Update(ch, cgroups, slurmCollectorSubsystem); err != nil { + err := c.perfCollector.Update(ch, cgroups, slurmCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update perf stats", "err", err) } }() @@ -361,7 +366,8 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update ebpf metrics - if err := c.ebpfCollector.Update(ch, cgroups, slurmCollectorSubsystem); err != nil { + err := c.ebpfCollector.Update(ch, cgroups, slurmCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update IO and/or network stats", "err", err) } }() @@ -374,7 +380,8 @@ func (c *slurmCollector) Update(ch chan<- prometheus.Metric) error { defer wg.Done() // Update RDMA metrics - if err := c.rdmaCollector.Update(ch, cgroups, slurmCollectorSubsystem); err != nil { + err := c.rdmaCollector.Update(ch, cgroups, slurmCollectorSubsystem) + if err != nil { c.logger.Error("Failed to update RDMA stats", "err", err) } }() @@ -392,27 +399,31 @@ func (c *slurmCollector) Stop(ctx context.Context) error { // Stop all sub collectors // Stop cgroupCollector - if err := c.cgroupCollector.Stop(ctx); err != nil { + err := c.cgroupCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop cgroup collector", "err", err) } // Stop perfCollector if perfCollectorEnabled() { - if err := c.perfCollector.Stop(ctx); err != nil { + err := c.perfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop perf collector", "err", err) } } // Stop ebpfCollector if ebpfCollectorEnabled() { - if err := c.ebpfCollector.Stop(ctx); err != nil { + err := c.ebpfCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop ebpf collector", "err", err) } } // Stop rdmaCollector if rdmaCollectorEnabled() { - if err := c.rdmaCollector.Stop(ctx); err != nil { + err := c.rdmaCollector.Stop(ctx) + if err != nil { c.logger.Error("Failed to stop RDMA collector", "err", err) } } @@ -706,7 +717,8 @@ func (c *slurmCollector) jobGRESResources(uuid string, procs []procfs.Proc) *gre } if securityCtx, ok := c.securityContexts[slurmReadProcCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { c.logger.Error( "Failed to run inside security contxt", "jobid", uuid, "err", err, ) @@ -822,7 +834,8 @@ func readProcEnvirons(data any) error { } // Convert numShares to uint64 - if val, err := strconv.ParseUint(numShares, 10, 64); err == nil { + val, err := strconv.ParseUint(numShares, 10, 64) + if err == nil { d.gres.numShares = val } @@ -888,13 +901,17 @@ func updateGPUAvailableShares(content string, gresType string, hostname string, case strings.Contains(dp, "dev/nvidia") && !strings.Contains(dp, "dev/nvidia-caps"): // For physical GPUs, it will be /dev/nvidia0, /dev/nvidia[0-3], etc minorString := strings.Split(dp, "dev/nvidia")[1] - if val, err := parseRange(strings.TrimSuffix(strings.TrimPrefix(minorString, "["), "]")); err == nil { + + val, err := parseRange(strings.TrimSuffix(strings.TrimPrefix(minorString, "["), "]")) + if err == nil { minors = val } case strings.Contains(dp, "dev/nvidia-caps/nvidia-cap"): // For MIG backed shards, it will be File=/dev/nvidia-caps/nvidia-cap21 migMinorString := strings.Split(dp, "dev/nvidia-caps/nvidia-cap")[1] - if val, err := parseRange(strings.TrimSuffix(strings.TrimPrefix(migMinorString, "["), "]")); err == nil { + + val, err := parseRange(strings.TrimSuffix(strings.TrimPrefix(migMinorString, "["), "]")) + if err == nil { for _, v := range val { minorString, gpuInstID, computeInstID := migInstanceIDFromDevMinor(v) minors = append(minors, minorString) @@ -910,7 +927,8 @@ func updateGPUAvailableShares(content string, gresType string, hostname string, // Get num shards for this device if strings.Contains(d, "count") { if p := strings.Split(d, "="); len(p) >= 2 { - if v, err := strconv.ParseUint(p[1], 10, 64); err == nil { + v, err := strconv.ParseUint(p[1], 10, 64) + if err == nil { count = v } } @@ -972,7 +990,8 @@ func migInstanceIDFromDevMinor(migMinor string) (string, uint64, uint64) { var computeInstID uint64 - if b, err := os.ReadFile(procFilePath("driver/nvidia-caps/mig-minors")); err == nil { + b, err := os.ReadFile(procFilePath("driver/nvidia-caps/mig-minors")) + if err == nil { for line := range strings.SplitSeq(string(b), "\n") { if path := strings.Split(line, " "); len(path) >= 2 && path[1] == migMinor { for p := range strings.SplitSeq(path[0], "/") { @@ -981,13 +1000,15 @@ func migInstanceIDFromDevMinor(migMinor string) (string, uint64, uint64) { } if strings.Contains(p, "gi") { - if v, err := strconv.ParseUint(strings.Split(p, "gi")[1], 10, 64); err == nil { + v, err := strconv.ParseUint(strings.Split(p, "gi")[1], 10, 64) + if err == nil { gpuInstID = v } } if strings.Contains(p, "ci") { - if v, err := strconv.ParseUint(strings.Split(p, "ci")[1], 10, 64); err == nil { + v, err := strconv.ParseUint(strings.Split(p, "ci")[1], 10, 64) + if err == nil { computeInstID = v } } diff --git a/pkg/collector/targets.go b/pkg/collector/targets.go index 1d78c5c1..1ebbc5d4 100644 --- a/pkg/collector/targets.go +++ b/pkg/collector/targets.go @@ -182,7 +182,8 @@ func (d *targetDiscoverer) discover() ([]Target, error) { // else execute function natively if len(d.targetEnvVars) > 0 { if securityCtx, ok := d.securityContexts[profilingTargetFilterCtx]; ok { - if err := securityCtx.Exec(dataPtr); err != nil { + err := securityCtx.Exec(dataPtr) + if err != nil { return nil, err } } else { @@ -306,7 +307,8 @@ func TargetsHandlerFor(discoverer Discoverer, opts promhttp.HandlerOpts) http.Ha // httpEncode encodes response to http.ResponseWriter. func httpEncode(rsp http.ResponseWriter, response []Target) { - if err := json.NewEncoder(rsp).Encode(&response); err != nil { + err := json.NewEncoder(rsp).Encode(&response) + if err != nil { rsp.Write([]byte("KO")) } } diff --git a/pkg/emissions/emaps.go b/pkg/emissions/emaps.go index 5fffe8fe..1f196bec 100644 --- a/pkg/emissions/emaps.go +++ b/pkg/emissions/emaps.go @@ -152,7 +152,9 @@ func (s *emapsProvider) update() { s.logger.Error("Failed to retrieve emission factor from Electricity maps provider", "err", err) } else { emapsFactorMu.Lock() + s.lastEmissionFactor = currentEmissionFactor + emapsFactorMu.Unlock() } @@ -170,7 +172,9 @@ func (s *emapsProvider) update() { func (s *emapsProvider) emissionFactors() EmissionFactors { emapsFactorMu.RLock() + emissionFactors := s.lastEmissionFactor + emapsFactorMu.RUnlock() return emissionFactors @@ -210,7 +214,9 @@ func makeEMapsAPIRequest( // Set emission factor only when returned value is non zero if response.CarbonIntensity > 0 { emapsZoneFactorMu.Lock() + emissionFactors[z] = EmissionFactor{n, float64(response.CarbonIntensity)} + emapsZoneFactorMu.Unlock() } diff --git a/pkg/emissions/emaps_test.go b/pkg/emissions/emaps_test.go index 066851cd..70587430 100644 --- a/pkg/emissions/emaps_test.go +++ b/pkg/emissions/emaps_test.go @@ -109,7 +109,8 @@ func TestNewEMapsProvider(t *testing.T) { expected := eMapsZonesResponse{"FR": map[string]string{"zoneName": "France"}} server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -130,7 +131,8 @@ func TestNewEMapsProviderFail(t *testing.T) { expected := dummyResponse server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -150,7 +152,8 @@ func TestEMapsAPIRequest(t *testing.T) { expected := eMapsCarbonIntensityResponse{CarbonIntensity: expectedFactor} server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -167,7 +170,8 @@ func TestEMapsAPIRequestFail(t *testing.T) { expected := dummyResponse server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -189,7 +193,9 @@ func TestEMapsAPIRequestZones(t *testing.T) { zone := r.URL.Query()["zone"][0] expected := eMapsCarbonIntensityResponse{CarbonIntensity: int(expectedFactors[zone].Factor)} - if err := json.NewEncoder(w).Encode(&expected); err != nil { + + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) diff --git a/pkg/emissions/helpers.go b/pkg/emissions/helpers.go index e6a2741f..e78fe06b 100644 --- a/pkg/emissions/helpers.go +++ b/pkg/emissions/helpers.go @@ -12,7 +12,8 @@ func init() { } // Unmarshal JSON file into struct - if err := json.Unmarshal(countryCodesContents, &CountryCodes); err != nil { + err = json.Unmarshal(countryCodesContents, &CountryCodes) + if err != nil { return } } diff --git a/pkg/emissions/owid.go b/pkg/emissions/owid.go index 09c89473..d78a9d3a 100644 --- a/pkg/emissions/owid.go +++ b/pkg/emissions/owid.go @@ -68,7 +68,8 @@ func readOWIDData(contents []byte) (EmissionFactors, error) { } // Populate emissionFactors map - if val, err := strconv.ParseFloat(record[3], 64); err == nil { + val, err := strconv.ParseFloat(record[3], 64) + if err == nil { emissionFactors[countryCode] = EmissionFactor{record[0], val} } } diff --git a/pkg/emissions/provider.go b/pkg/emissions/provider.go index 7a67f8c5..f3d0e72a 100644 --- a/pkg/emissions/provider.go +++ b/pkg/emissions/provider.go @@ -85,7 +85,9 @@ func (e FactorProviders) Collect() map[string]PayLoad { } emissionsMu.Lock() + emissionFactors[name] = PayLoad{Factor: factor, Name: e.ProviderNames[name]} + emissionsMu.Unlock() wg.Done() }(name, s) @@ -107,11 +109,14 @@ func (e FactorProviders) Stop() error { go func(name string, s Provider) { defer wg.Done() - if err := s.Stop(); err != nil { + err := s.Stop() + if err != nil { e.logger.Error("Failed to stop emission factor updater", "provider", name, "err", err) errorsMu.Lock() + errs = errors.Join(errs, err) + errorsMu.Unlock() return diff --git a/pkg/emissions/rte.go b/pkg/emissions/rte.go index 5bd65c72..dc48f0b3 100644 --- a/pkg/emissions/rte.go +++ b/pkg/emissions/rte.go @@ -96,7 +96,9 @@ func (s *rteProvider) update() { s.logger.Error("Failed to retrieve emission factor from RTE provider", "err", err) } else { rteFactorMu.Lock() + s.lastEmissionFactor = currentEmissionFactor + rteFactorMu.Unlock() } @@ -114,7 +116,9 @@ func (s *rteProvider) update() { func (s *rteProvider) emissionFactors() EmissionFactors { rteFactorMu.RLock() + emissionFactors := s.lastEmissionFactor + rteFactorMu.RUnlock() return emissionFactors @@ -175,6 +179,7 @@ func makeRTEAPIRequest(url string) (EmissionFactors, error) { } var fields []nationalRealTimeFieldsV2 + fields = append(fields, data.Results...) // Check size of fields as it can be zero sometimes if len(fields) >= 1 { diff --git a/pkg/emissions/rte_test.go b/pkg/emissions/rte_test.go index 8bcc9fec..0105f05e 100644 --- a/pkg/emissions/rte_test.go +++ b/pkg/emissions/rte_test.go @@ -112,7 +112,8 @@ func TestRTEAPIRequest(t *testing.T) { expected := nationalRealTimeResponseV2{1, []nationalRealTimeFieldsV2{{TauxCo2: expectedFactor}}} server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -129,7 +130,8 @@ func TestRTEAPIRequestFail(t *testing.T) { expected := dummyResponse server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) diff --git a/pkg/emissions/watttime.go b/pkg/emissions/watttime.go index df5ecb5f..2078da95 100644 --- a/pkg/emissions/watttime.go +++ b/pkg/emissions/watttime.go @@ -95,7 +95,8 @@ func NewWattTimeProvider(logger *slog.Logger) (Provider, error) { // Try few times before giving up for range 5 { - if err = updateToken(url, w.auth); err == nil { + err = updateToken(url, w.auth) + if err == nil { break } @@ -154,7 +155,9 @@ func (s *wtProvider) update() { s.logger.Error("Failed to retrieve emission factor from Watt Time provider", "err", err) } else { wtFactorMu.Lock() + s.lastEmissionFactor = currentEmissionFactor + wtFactorMu.Unlock() } @@ -172,7 +175,9 @@ func (s *wtProvider) update() { func (s *wtProvider) emissionFactors() EmissionFactors { wtFactorMu.RLock() + emissionFactors := s.lastEmissionFactor + wtFactorMu.RUnlock() return emissionFactors @@ -181,7 +186,8 @@ func (s *wtProvider) emissionFactors() EmissionFactors { // fetchWTEmissionFactor makes request to Watt time API to fetch factor for the given region. func fetchWTEmissionFactor(baseURL string, auth *auth, region string) (EmissionFactors, error) { // Update token if necessary - if err := updateToken(baseURL, auth); err != nil { + err := updateToken(baseURL, auth) + if err != nil { return nil, fmt.Errorf("failed to update api token of watt time provider: %w", err) } diff --git a/pkg/emissions/watttime_test.go b/pkg/emissions/watttime_test.go index c45b364b..535175b1 100644 --- a/pkg/emissions/watttime_test.go +++ b/pkg/emissions/watttime_test.go @@ -103,7 +103,9 @@ func TestNewWTProvider(t *testing.T) { expected := wtTokenResponse{ Token: "token", } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } } else { @@ -118,7 +120,8 @@ func TestNewWTProvider(t *testing.T) { }, } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } } @@ -142,7 +145,8 @@ func TestNewWTProviderFail(t *testing.T) { expected := dummyResponse server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -174,7 +178,8 @@ func TestWTAPIRequest(t *testing.T) { }, } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -196,7 +201,8 @@ func TestWTAPIRequestFail(t *testing.T) { expected := dummyResponse server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -218,7 +224,9 @@ func TestWTAPILogin(t *testing.T) { expected := wtTokenResponse{ Token: "token", } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -244,7 +252,9 @@ func TestWTTokenUpdate(t *testing.T) { expected := wtTokenResponse{ Token: fmt.Sprintf("token-%d", reqIdx), } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } diff --git a/pkg/grafana/grafana.go b/pkg/grafana/grafana.go index a0b2aaa8..a1254bf2 100644 --- a/pkg/grafana/grafana.go +++ b/pkg/grafana/grafana.go @@ -55,17 +55,14 @@ func New(webURL string, config config_util.HTTPClientConfig, logger *slog.Logger } // Parse Grafana web Url - var grafanaURL *url.URL - - var grafanaClient *http.Client - - var err error - if grafanaURL, err = url.Parse(webURL); err != nil { + grafanaURL, err := url.Parse(webURL) + if err != nil { return nil, errors.Unwrap(err) } // If skip verify is set to true for TSDB add it to client - if grafanaClient, err = config_util.NewClientFromConfig(config, "grafana"); err != nil { + grafanaClient, err := config_util.NewClientFromConfig(config, "grafana") + if err != nil { return nil, err } diff --git a/pkg/grafana/grafana_test.go b/pkg/grafana/grafana_test.go index 99bafffc..9ea0b666 100644 --- a/pkg/grafana/grafana_test.go +++ b/pkg/grafana/grafana_test.go @@ -48,7 +48,8 @@ func TestGrafanaTeamMembersQuerySuccess(t *testing.T) { t.Setenv("GRAFANA_API_TOKEN", "foo") server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -72,7 +73,8 @@ func TestGrafanaTeamMembersQueryFailNoTeamID(t *testing.T) { t.Setenv("GRAFANA_API_TOKEN", "foo") server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) diff --git a/pkg/ipmi/client.go b/pkg/ipmi/client.go index 7adbb788..4e6d1dcb 100644 --- a/pkg/ipmi/client.go +++ b/pkg/ipmi/client.go @@ -65,7 +65,8 @@ func NewClient(c *Config) (Client, error) { var devFile *os.File for _, d := range ipmiDevs { - if f, err := os.Open(fmt.Sprintf(d, c.DevNum)); err == nil { + f, err := os.Open(fmt.Sprintf(d, c.DevNum)) + if err == nil { c.Logger.Debug("IPMI device found", "device", fmt.Sprintf(d, c.DevNum)) devFile = f @@ -81,7 +82,9 @@ func NewClient(c *Config) (Client, error) { // Setup event receiver recvEvents := 1 - if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, devFile.Fd(), IPMICTL_SET_GETS_EVENTS_CMD, uintptr(unsafe.Pointer(&recvEvents))); errno != 0 { + + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, devFile.Fd(), IPMICTL_SET_GETS_EVENTS_CMD, uintptr(unsafe.Pointer(&recvEvents))) + if errno != 0 { return nil, fmt.Errorf("failed to enable IPMI event receiver: %w", errno) } @@ -111,7 +114,8 @@ func (i *ipmiClient) Do(req *Request) (*Response, error) { fd := i.devFile.Fd() // Send request - if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, IPMICTL_SEND_COMMAND, uintptr(unsafe.Pointer(req))); errno != 0 { + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, IPMICTL_SEND_COMMAND, uintptr(unsafe.Pointer(req))) + if errno != 0 { i.logger.Error("Failed to send IPMI request", "err", errno) return nil, fmt.Errorf("failed to send IPMI request: %w", errno) @@ -158,7 +162,8 @@ func (i *ipmiClient) Do(req *Request) (*Response, error) { } // Read data into recv struct - if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, IPMICTL_RECEIVE_MSG_TRUNC, uintptr(unsafe.Pointer(&recv))); errno != 0 { + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, fd, IPMICTL_RECEIVE_MSG_TRUNC, uintptr(unsafe.Pointer(&recv))) + if errno != 0 { i.logger.Error("Failed to read response from IPMI device interface", "err", errno) return nil, fmt.Errorf("failed to read response from IPMI device interface: %w", errno) @@ -176,7 +181,8 @@ func (i *ipmiClient) Do(req *Request) (*Response, error) { // i.logger.Debug("IPMI response data", "data", resp.Data[0:resp.DataLen]) // Check completion code - if err := binary.Read(bytes.NewReader(resp.Data[0:1]), binary.BigEndian, &resp.Ccode); err == nil && resp.Ccode != 0 { + err = binary.Read(bytes.NewReader(resp.Data[0:1]), binary.BigEndian, &resp.Ccode) + if err == nil && resp.Ccode != 0 { return nil, errors.New("received non zero completion code in IPMI response") } diff --git a/pkg/ipmi/sensors.go b/pkg/ipmi/sensors.go index 4b419635..de62edbf 100644 --- a/pkg/ipmi/sensors.go +++ b/pkg/ipmi/sensors.go @@ -49,7 +49,9 @@ func (i *ipmiClient) SensorRecords() ([]*FullSensorRecord, error) { } sensorRecord := &FullSensorRecord{} - if err := sensorRecord.DecodeFromBytes(resp.Data[:]); err != nil { + + err = sensorRecord.DecodeFromBytes(resp.Data[:]) + if err != nil { errs = errors.Join(errs, fmt.Errorf("failed to decode sensor record %d: %w", recordID, err)) continue diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index a16c7c0f..9815284a 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -120,7 +120,8 @@ func New(kubeconfigPath string, kubeletSocket string, logger *slog.Logger) (*Cli } // If kubelet socket is mounted, create a pod resource client - if _, err := os.Stat(kubeletSocket); err == nil { + _, err = os.Stat(kubeletSocket) + if err == nil { conn, err := ConnectToServer(kubeletSocket) if err != nil { return nil, err @@ -161,7 +162,8 @@ func (c *Client) NewPodInformer(resyncPeriod time.Duration) error { // Create a new instance of pod informer c.PodInformer = c.informerFactory.Core().V1().Pods() - if _, err := c.PodInformer.Informer().AddEventHandler( + + _, err := c.PodInformer.Informer().AddEventHandler( // Your custom resource event handlers. cache.ResourceEventHandlerFuncs{ // Called on creation @@ -171,7 +173,8 @@ func (c *Client) NewPodInformer(resyncPeriod time.Duration) error { // Called on resource deletion. DeleteFunc: c.podDelete, }, - ); err != nil { + ) + if err != nil { return err } @@ -201,12 +204,14 @@ func (c *Client) Pods() []*v1.Pod { var pods []*v1.Pod podMu.Lock() + for _, pod := range c.pods { pods = append(pods, pod) } // Reset pods map c.pods = make(map[string]*v1.Pod) + podMu.Unlock() return pods @@ -303,7 +308,9 @@ func (c *Client) Exec(ctx context.Context, ns string, pod string, container stri } scheme := runtime.NewScheme() - if err := v1.AddToScheme(scheme); err != nil { + + err := v1.AddToScheme(scheme) + if err != nil { return nil, nil, fmt.Errorf("failed to add to scheme: %w", err) } @@ -345,7 +352,9 @@ func (c *Client) ConfigMap(ctx context.Context, ns string, name string) (map[str func (c *Client) podAdd(obj any) { if pod, ok := obj.(*v1.Pod); ok { podMu.Lock() + c.pods[string(pod.UID)] = pod + podMu.Unlock() } } @@ -354,7 +363,9 @@ func (c *Client) podAdd(obj any) { func (c *Client) podUpdate(_, newObj any) { if pod, ok := newObj.(*v1.Pod); ok { podMu.Lock() + c.pods[string(pod.UID)] = pod + podMu.Unlock() } } @@ -363,7 +374,9 @@ func (c *Client) podUpdate(_, newObj any) { func (c *Client) podDelete(obj any) { if pod, ok := obj.(*v1.Pod); ok { podMu.Lock() + c.pods[string(pod.UID)] = pod + podMu.Unlock() } } diff --git a/pkg/k8s/mock.go b/pkg/k8s/mock.go index e1ed7caf..caad702a 100644 --- a/pkg/k8s/mock.go +++ b/pkg/k8s/mock.go @@ -32,6 +32,7 @@ type streamContext struct { type streamAndReply struct { httpstream.Stream + replySent <-chan struct{} } @@ -85,6 +86,7 @@ func CreateHTTPStreams(w http.ResponseWriter, req *http.Request, opts *remotecom if opts.Stderr != nil { expectedStreams++ } + WaitForStreams: for { select { @@ -93,15 +95,19 @@ WaitForStreams: switch streamType { case v1.StreamTypeError: replyChan <- struct{}{} + ctx.writeStatus = v4WriteStatusFunc(stream) case v1.StreamTypeStdout: replyChan <- struct{}{} + ctx.stdoutStream = stream case v1.StreamTypeStdin: replyChan <- struct{}{} + ctx.stdinStream = stream case v1.StreamTypeStderr: replyChan <- struct{}{} + ctx.stderrStream = stream default: // add other stream ... @@ -146,7 +152,8 @@ func CreateListener(addr string) (net.Listener, error) { return nil, fmt.Errorf("failed to unlink socket file %q: %w", addr, err) } - if err := os.MkdirAll(filepath.Dir(addr), 0o750); err != nil { + err = os.MkdirAll(filepath.Dir(addr), 0o750) + if err != nil { return nil, fmt.Errorf("error creating socket directory %q: %w", filepath.Dir(addr), err) } @@ -156,16 +163,18 @@ func CreateListener(addr string) (net.Listener, error) { return nil, fmt.Errorf("failed to create temporary file: %w", err) } - if err := os.Remove(file.Name()); err != nil { + err = os.Remove(file.Name()) + if err != nil { return nil, fmt.Errorf("failed to remove temporary file: %w", err) } - l, err := net.Listen("unix", file.Name()) + l, err := net.Listen("unix", file.Name()) //nolint:noctx if err != nil { return nil, err } - if err = os.Rename(file.Name(), addr); err != nil { + err = os.Rename(file.Name(), addr) + if err != nil { return nil, fmt.Errorf("failed to move temporary file to addr %q: %w", addr, err) } @@ -175,7 +184,8 @@ func CreateListener(addr string) (net.Listener, error) { // FakeKubeletServer returns a mock API resource server. func FakeKubeletServer(socketDir string, listResp *podresourcesapi.ListPodResourcesResponse, getAllocatableResourcesResp *podresourcesapi.AllocatableResourcesResponse) (*FakeResourceServer, error) { // Ensure socket directory exists - if err := os.MkdirAll(socketDir, os.ModeDir); err != nil { + err := os.MkdirAll(socketDir, os.ModeDir) + if err != nil { return nil, err } diff --git a/pkg/lb/backend/pyro.go b/pkg/lb/backend/pyro.go index 9d9f4190..302cc5a9 100644 --- a/pkg/lb/backend/pyro.go +++ b/pkg/lb/backend/pyro.go @@ -81,6 +81,7 @@ func (b *pyroServer) SetAlive(alive bool) { // IsAlive returns if backend Pyroscope server is alive. func (b *pyroServer) IsAlive() bool { b.mux.RLock() + alive := b.alive defer b.mux.RUnlock() diff --git a/pkg/lb/backend/response.go b/pkg/lb/backend/response.go index 0a33f18b..5d127ca0 100644 --- a/pkg/lb/backend/response.go +++ b/pkg/lb/backend/response.go @@ -38,7 +38,9 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error case strings.HasSuffix(r.Request.URL.Path, "query") || strings.HasSuffix(r.Request.URL.Path, "query_range"): // Read response bytes into TSDB response var tsdbResp tsdb.Response[tsdb.Data] - if err = json.Unmarshal(b, &tsdbResp); err != nil { + + err = json.Unmarshal(b, &tsdbResp) + if err != nil { return err } @@ -57,13 +59,16 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error } // Marshal into newBody - if newBody, err = json.Marshal(tsdbResp); err != nil { + newBody, err = json.Marshal(tsdbResp) + if err != nil { return err } case strings.HasSuffix(r.Request.URL.Path, "series"): // Read response bytes into TSDB response var tsdbResp tsdb.Response[[]map[string]string] - if err = json.Unmarshal(b, &tsdbResp); err != nil { + + err = json.Unmarshal(b, &tsdbResp) + if err != nil { return err } @@ -82,13 +87,16 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error } // Marshal into newBody - if newBody, err = json.Marshal(tsdbResp); err != nil { + newBody, err = json.Marshal(tsdbResp) + if err != nil { return err } case strings.HasSuffix(r.Request.URL.Path, "labels"): // Read response bytes into TSDB response var tsdbResp tsdb.Response[[]string] - if err = json.Unmarshal(b, &tsdbResp); err != nil { + + err = json.Unmarshal(b, &tsdbResp) + if err != nil { return err } @@ -112,7 +120,8 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error tsdbResp.Data = newData // Marshal into newBody - if newBody, err = json.Marshal(tsdbResp); err != nil { + newBody, err = json.Marshal(tsdbResp) + if err != nil { return err } case strings.HasSuffix(r.Request.URL.Path, "values"): @@ -134,7 +143,9 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error // Read response bytes into TSDB response var tsdbResp tsdb.Response[[]string] - if err = json.Unmarshal(b, &tsdbResp); err != nil { + + err = json.Unmarshal(b, &tsdbResp) + if err != nil { return err } @@ -142,7 +153,8 @@ func PromResponseModifier(labelsToFilter []string) func(r *http.Response) error tsdbResp.Data = nil // Marshal into newBody - if newBody, err = json.Marshal(tsdbResp); err != nil { + newBody, err = json.Marshal(tsdbResp) + if err != nil { return err } } diff --git a/pkg/lb/backend/response_test.go b/pkg/lb/backend/response_test.go index 0bd65a60..01c6161c 100644 --- a/pkg/lb/backend/response_test.go +++ b/pkg/lb/backend/response_test.go @@ -35,7 +35,8 @@ func TestPromReverseProxyModifyResponse(t *testing.T) { w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(&resp); err != nil { + err := json.NewEncoder(w).Encode(&resp) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "series") { @@ -58,7 +59,8 @@ func TestPromReverseProxyModifyResponse(t *testing.T) { w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(&resp); err != nil { + err := json.NewEncoder(w).Encode(&resp) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "labels") { @@ -70,7 +72,8 @@ func TestPromReverseProxyModifyResponse(t *testing.T) { w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(&resp); err != nil { + err := json.NewEncoder(w).Encode(&resp) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "values") { @@ -82,7 +85,8 @@ func TestPromReverseProxyModifyResponse(t *testing.T) { w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(&resp); err != nil { + err := json.NewEncoder(w).Encode(&resp) + if err != nil { w.Write([]byte("KO")) } } @@ -116,6 +120,7 @@ func TestPromReverseProxyModifyResponse(t *testing.T) { // Read response body b, err := io.ReadAll(resp.Body) require.NoError(t, err) + defer resp.Body.Close() for _, label := range labelsToFilter { diff --git a/pkg/lb/backend/tsdb.go b/pkg/lb/backend/tsdb.go index ecae864e..9d8d9cb0 100644 --- a/pkg/lb/backend/tsdb.go +++ b/pkg/lb/backend/tsdb.go @@ -128,6 +128,7 @@ func (b *tsdbServer) SetAlive(alive bool) { // IsAlive returns if backend TSDB server is alive. func (b *tsdbServer) IsAlive() bool { b.mux.RLock() + alive := b.alive defer b.mux.RUnlock() @@ -208,13 +209,15 @@ func (b *tsdbServer) fetchRetentionPeriod() (time.Duration, error) { // Make a range query query := fmt.Sprintf(`up{instance="%s:%s"}`, b.url.Hostname(), b.url.Port()) - if results, err := b.client.RangeQuery( + + results, err := b.client.RangeQuery( ctx, query, time.Now().Add(-queryPeriod).UTC(), time.Now().UTC(), queryPeriod/5000, - ); err == nil { + ) + if err == nil { for _, result := range results { if n, ok := result.Metric["__name__"]; ok && n == "up" { // We are updating retention period only at a frequency set by diff --git a/pkg/lb/backend/tsdb_test.go b/pkg/lb/backend/tsdb_test.go index d92d21a5..0a25321f 100644 --- a/pkg/lb/backend/tsdb_test.go +++ b/pkg/lb/backend/tsdb_test.go @@ -54,7 +54,9 @@ func testTSDBServer(storageRetention string, emptyResponse bool, basicAuth bool) if emptyResponse { expected := "dummy" - if err := json.NewEncoder(w).Encode(&expected); err != nil { + + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } @@ -62,11 +64,13 @@ func testTSDBServer(storageRetention string, emptyResponse bool, basicAuth bool) } if strings.HasSuffix(r.URL.Path, "runtimeinfo") { - if err := json.NewEncoder(w).Encode(&expectedRuntime); err != nil { + err := json.NewEncoder(w).Encode(&expectedRuntime) + if err != nil { w.Write([]byte("KO")) } } else { - if err := json.NewEncoder(w).Encode(&expectedRange); err != nil { + err := json.NewEncoder(w).Encode(&expectedRange) + if err != nil { w.Write([]byte("KO")) } } @@ -178,6 +182,7 @@ func TestTSDBQueryWithLabelFilter(t *testing.T) { require.NoError(t, err) var tsdbResp tsdb.Response[tsdb.Data] + err = json.Unmarshal(body, &tsdbResp) require.NoError(t, err) diff --git a/pkg/lb/cli/cli.go b/pkg/lb/cli/cli.go index ce51ba4b..e72cfe39 100644 --- a/pkg/lb/cli/cli.go +++ b/pkg/lb/cli/cli.go @@ -107,19 +107,22 @@ func (c *CEEMSLBAppConfig) UnmarshalYAML(unmarshal func(any) error) error { type plain CEEMSLBAppConfig - if err := unmarshal((*plain)(c)); err != nil { + err := unmarshal((*plain)(c)) + if err != nil { return err } // Validate backend servers config - if err := c.Validate(); err != nil { + err = c.Validate() + if err != nil { return err } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. // Thus we just do its validation here. - if err := c.Server.Web.HTTPClientConfig.Validate(); err != nil { + err = c.Server.Web.HTTPClientConfig.Validate() + if err != nil { return err } @@ -263,12 +266,15 @@ func (lb *CEEMSLoadBalancer) Main() error { } // Check if DB path and file exists in config and add them to ReadPaths - if _, err := os.Stat(config.Server.Data.Path); err == nil { + _, err = os.Stat(config.Server.Data.Path) + if err == nil { securityCfg.ReadPaths = append(securityCfg.ReadPaths, config.Server.Data.Path) // Now check if DB file exists dbFile := filepath.Join(config.Server.Data.Path, ceems_api_base.CEEMSDBName) - if _, err := os.Stat(dbFile); err == nil { + + _, err := os.Stat(dbFile) + if err == nil { securityCfg.ReadPaths = append(securityCfg.ReadPaths, dbFile) } } @@ -283,7 +289,8 @@ func (lb *CEEMSLoadBalancer) Main() error { // Drop all unnecessary privileges if dropPrivs { - if err := securityManager.DropPrivileges(disableCapAwareness); err != nil { + err := securityManager.DropPrivileges(disableCapAwareness) + if err != nil { logger.Error("Failed to drop privileges", "err", err) return err @@ -372,13 +379,15 @@ func (lb *CEEMSLoadBalancer) Main() error { go func() { defer wg.Done() + monitor(ctx, managers[lbType], logger.With("backend_type", lbType)) }() // Initializing the server in a goroutine so that // it won't block the graceful shutdown handling below go func() { - if err := lbs[lbType].Start(ctx); err != nil { + err := lbs[lbType].Start(ctx) + if err != nil { logger.Error("Failed to start load balancer", "backend_type", lbType, "err", err) } }() @@ -400,13 +409,15 @@ func (lb *CEEMSLoadBalancer) Main() error { defer cancel() for _, lbType := range lbTypes { - if err := lbs[lbType].Shutdown(shutDownCtx); err != nil { + err := lbs[lbType].Shutdown(shutDownCtx) + if err != nil { logger.Error("Failed to gracefully shutdown LB server", "backend_type", lbType, "err", err) } } // Restore file permissions by removing any ACLs added - if err := securityManager.DeleteACLEntries(); err != nil { + err = securityManager.DeleteACLEntries() + if err != nil { logger.Error("Failed to remove ACL entries", "err", err) } @@ -502,11 +513,13 @@ func isAlive(ctx context.Context, aliveChannel chan bool, u *url.URL, logger *sl conn, err := d.DialContext(ctx, "tcp", u.Host) if err != nil { logger.Debug("Backend unreachable", "backend", u.Redacted(), "err", err) + aliveChannel <- false return } _ = conn.Close() + aliveChannel <- true } diff --git a/pkg/lb/cli/cli_test.go b/pkg/lb/cli/cli_test.go index 5d3d6327..e2e94b46 100644 --- a/pkg/lb/cli/cli_test.go +++ b/pkg/lb/cli/cli_test.go @@ -46,7 +46,8 @@ func queryLB(address, clusterID string) error { return err } - if err := resp.Body.Close(); err != nil { + err = resp.Body.Close() + if err != nil { return err } @@ -109,8 +110,10 @@ ceems_lb: // Query LB for i := range 10 { - if err := queryLB("localhost:9040", "default"); err == nil { - if err := queryLB("localhost:9040", "default"); err == nil { + err := queryLB("localhost:9040", "default") + if err == nil { + err := queryLB("localhost:9040", "default") + if err == nil { break } } diff --git a/pkg/lb/frontend/frontend.go b/pkg/lb/frontend/frontend.go index 4fc6fce9..1ee5b5b0 100644 --- a/pkg/lb/frontend/frontend.go +++ b/pkg/lb/frontend/frontend.go @@ -98,7 +98,8 @@ func New(c *Config) (LoadBalancer, error) { defer cancel() // Validate LB - if err := lb.validate(ctx); err != nil { + err = lb.validate(ctx) + if err != nil { return nil, fmt.Errorf("failed to valiate load balancer frontend: %w", err) } @@ -115,7 +116,8 @@ func (lb *loadBalancer) Start(_ context.Context) error { lb.logger.Info("Starting "+base.CEEMSLoadBalancerAppName, "listening", lb.server.Addr) // Listen for requests - if err := web.ListenAndServe(lb.server, lb.webConfig, lb.logger); err != nil && + err := web.ListenAndServe(lb.server, lb.webConfig, lb.logger) + if err != nil && !errors.Is(err, http.ErrServerClosed) { lb.logger.Error("Failed to Listen and Serve HTTP server", "err", err) @@ -129,7 +131,8 @@ func (lb *loadBalancer) Start(_ context.Context) error { func (lb *loadBalancer) Shutdown(ctx context.Context) error { // Close DB connection only if DB file is provided if lb.amw.ceems.db != nil { - if err := lb.amw.ceems.db.Close(); err != nil { + err := lb.amw.ceems.db.Close() + if err != nil { lb.logger.Error("Failed to close DB connection", "err", err) return err @@ -137,7 +140,8 @@ func (lb *loadBalancer) Shutdown(ctx context.Context) error { } // Shutdown the server - if err := lb.server.Shutdown(ctx); err != nil { + err := lb.server.Shutdown(ctx) + if err != nil { lb.logger.Error("Failed to shutdown HTTP server", "err", err) return err @@ -247,7 +251,8 @@ func (lb *loadBalancer) validate(ctx context.Context) error { var cluster models.Cluster for rows.Next() { - if err := rows.Scan(&cluster.ID, &cluster.Manager); err != nil { + err := rows.Scan(&cluster.ID, &cluster.Manager) + if err != nil { continue } @@ -256,7 +261,8 @@ func (lb *loadBalancer) validate(ctx context.Context) error { // Ref: http://go-database-sql.org/errors.html // Get all the errors during iteration - if err := rows.Err(); err != nil { + err = rows.Err() + if err != nil { lb.logger.Error("Errors during scanning rows", "err", err) } diff --git a/pkg/lb/frontend/frontend_test.go b/pkg/lb/frontend/frontend_test.go index f07b99a8..d7191746 100644 --- a/pkg/lb/frontend/frontend_test.go +++ b/pkg/lb/frontend/frontend_test.go @@ -30,7 +30,7 @@ import ( var noOpLogger = slog.New(slog.DiscardHandler) -func setupClusterIDsDB(d string) error { +func setupClusterIDsDB(ctx context.Context, d string) error { dbPath := filepath.Join(d, "ceems.db") db, err := sql.Open("sqlite3", dbPath) @@ -52,7 +52,7 @@ INSERT INTO units VALUES(3, 'os-1', 'openstack'); INSERT INTO units VALUES(4, 'slurm-1', 'slurm'); COMMIT;` - _, err = db.Exec(stmts) + _, err = db.ExecContext(ctx, stmts) if err != nil { return fmt.Errorf("failed to insert mock data into DB: %w", err) } @@ -86,15 +86,18 @@ func dummyTSDBServer(clusterID string) *httptest.Server { } server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "config") { - if err := json.NewEncoder(w).Encode(&expectedConfig); err != nil { + err := json.NewEncoder(w).Encode(&expectedConfig) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "flags") { - if err := json.NewEncoder(w).Encode(&expectedFlags); err != nil { + err := json.NewEncoder(w).Encode(&expectedFlags) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "runtimeinfo") { - if err := json.NewEncoder(w).Encode(&expectedRuntimeInfo); err != nil { + err := json.NewEncoder(w).Encode(&expectedRuntimeInfo) + if err != nil { w.Write([]byte("KO")) } } else { @@ -107,7 +110,7 @@ func dummyTSDBServer(clusterID string) *httptest.Server { func TestNewFrontend(t *testing.T) { tmpDir := t.TempDir() - err := setupClusterIDsDB(tmpDir) + err := setupClusterIDsDB(t.Context(), tmpDir) require.NoError(t, err, "failed to setup test DB") clusterID := "slurm-0" @@ -367,7 +370,7 @@ func TestNewFrontendTwoGroups(t *testing.T) { func TestValidateClusterIDsWithDBPass(t *testing.T) { tmpDir := t.TempDir() - err := setupClusterIDsDB(tmpDir) + err := setupClusterIDsDB(t.Context(), tmpDir) require.NoError(t, err, "failed to setup test DB") // Backends for group 1 @@ -399,7 +402,7 @@ func TestValidateClusterIDsWithDBPass(t *testing.T) { func TestValidateClusterIDsWithDBFail(t *testing.T) { tmpDir := t.TempDir() - err := setupClusterIDsDB(tmpDir) + err := setupClusterIDsDB(t.Context(), tmpDir) require.NoError(t, err, "failed to setup test DB") // Backends for group 1 @@ -444,7 +447,8 @@ func TestValidateClusterIDsWithAPIPass(t *testing.T) { } ceemsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) @@ -484,7 +488,8 @@ func TestValidateClusterIDsWithAPIFail(t *testing.T) { } ceemsServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } })) diff --git a/pkg/lb/frontend/helpers.go b/pkg/lb/frontend/helpers.go index 4d11cbf9..e6b63a7f 100644 --- a/pkg/lb/frontend/helpers.go +++ b/pkg/lb/frontend/helpers.go @@ -13,7 +13,8 @@ func ceemsAPIRequest[T any](req *http.Request, client *http.Client) ([]T, error) // Make request // If request failed, forbid the query. It can happen when CEEMS API server // goes offline and we should wait for it to come back online - if resp, err := client.Do(req); err != nil { + resp, err := client.Do(req) + if err != nil { return nil, err } else { defer resp.Body.Close() @@ -31,7 +32,9 @@ func ceemsAPIRequest[T any](req *http.Request, client *http.Client) ([]T, error) // Unpack into data var data ceems_api_http.Response[T] - if err = json.Unmarshal(body, &data); err != nil { + + err = json.Unmarshal(body, &data) + if err != nil { return nil, err } diff --git a/pkg/lb/frontend/middleware.go b/pkg/lb/frontend/middleware.go index bff634e4..f46786f0 100644 --- a/pkg/lb/frontend/middleware.go +++ b/pkg/lb/frontend/middleware.go @@ -183,10 +183,12 @@ func newAuthMiddleware(c *Config) (*authenticationMiddleware, error) { // Set DB pointer only if file exists. Else sql.Open will create an empty // file as if exists already - if _, err := os.Stat(dbAbsPath); err == nil { + _, err = os.Stat(dbAbsPath) + if err == nil { dsn := fmt.Sprintf("file:%s?%s", dbAbsPath, "_mutex=no&mode=ro&_busy_timeout=5000") - if db, err = sql.Open("sqlite3", dsn); err != nil { + db, err = sql.Open("sqlite3", dsn) + if err != nil { return nil, err } } @@ -203,7 +205,8 @@ func newAuthMiddleware(c *Config) (*authenticationMiddleware, error) { } // Make a CEEMS API server client from client config - if ceemsClient, err = config.NewClientFromConfig(c.APIServer.Web.HTTPClientConfig, "ceems_api_server"); err != nil { + ceemsClient, err = config.NewClientFromConfig(c.APIServer.Web.HTTPClientConfig, "ceems_api_server") + if err != nil { return nil, err } @@ -261,7 +264,9 @@ func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler ErrorType: "bad_request", Error: "invalid cluster ID. Set cluster ID using X-Ceems-Cluster-Id header in Prometheus datasource.", } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { amw.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -295,7 +300,9 @@ func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler ErrorType: "unauthorized", Error: "no user header found. Make sure to set send_user_header = true in [dataproxy] section of Grafana configuration file.", } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { amw.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -324,7 +331,9 @@ func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler ErrorType: "forbidden", Error: "user do not have permissions to this resource", } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { amw.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -334,7 +343,8 @@ func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler // Clone request, parse query params and set them in request context // This will ensure we set query params in request's context always - if err = amw.parseRequest(reqParams, r); err != nil { + err = amw.parseRequest(reqParams, r) + if err != nil { amw.logger.Error("Failed to parse query in the request", "logged_user", loggedUser, "err", err) } @@ -360,7 +370,9 @@ func (amw *authenticationMiddleware) Middleware(next http.Handler) http.Handler ErrorType: "forbidden", Error: "user do not have permissions to view unit metrics", } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { amw.logger.Error("Failed to encode response", "err", err) w.Write([]byte("KO")) } @@ -426,13 +438,15 @@ func (amw *authenticationMiddleware) isUserUnit( // Make request // If request failed, forbid the query. It can happen when CEEMS API server // goes offline and we should wait for it to come back online - if resp, err := amw.ceems.client.Do(req); err != nil { + resp, err := amw.ceems.client.Do(req) + if err != nil { amw.logger.Error("Failed to make request for unit ownership verification", "user", user, "queried_uuids", strings.Join(uuids, ","), "err", err) return false } else if resp.StatusCode != http.StatusOK { defer resp.Body.Close() + amw.logger.Error("Unauthorised query", "user", user, "queried_uuids", strings.Join(uuids, ","), "status_code", resp.StatusCode) diff --git a/pkg/lb/frontend/middleware_test.go b/pkg/lb/frontend/middleware_test.go index 208282b0..ec275f51 100644 --- a/pkg/lb/frontend/middleware_test.go +++ b/pkg/lb/frontend/middleware_test.go @@ -23,7 +23,7 @@ import ( "github.com/stretchr/testify/require" ) -func setupTestDB(d string) (*sql.DB, error) { +func setupTestDB(ctx context.Context, d string) (*sql.DB, error) { dbPath := filepath.Join(d, "test.db") db, err := sql.Open("sqlite3", dbPath) @@ -104,7 +104,7 @@ INSERT INTO admin_users VALUES(5, 'all', 'adm5', '["grafana"]'); INSERT INTO admin_users VALUES(6, 'all', 'adm6', '["grafana"]'); COMMIT;` - _, err = db.Exec(stmts) + _, err = db.ExecContext(ctx, stmts) if err != nil { return nil, fmt.Errorf("failed to insert mock data into DB: %w", err) } @@ -112,9 +112,9 @@ COMMIT;` return db, nil } -func setupMiddlewareWithDB(tmpDir string) (http.Handler, error) { +func setupMiddlewareWithDB(ctx context.Context, tmpDir string) (http.Handler, error) { // Setup test DB - db, err := setupTestDB(tmpDir) + db, err := setupTestDB(ctx, tmpDir) if err != nil { return nil, err } @@ -137,7 +137,7 @@ func setupMiddlewareWithDB(tmpDir string) (http.Handler, error) { func setupMiddlewareWithAPI(ctx context.Context, tmpDir string) (http.Handler, error) { // Setup test DB - db, err := setupTestDB(tmpDir) + db, err := setupTestDB(ctx, tmpDir) if err != nil { return nil, err } @@ -196,7 +196,8 @@ func setupCEEMSAPI(ctx context.Context, db *sql.DB) *httptest.Server { w.Write([]byte("fail")) } } else { - if admins, err := http_api.AdminUsers(ctx, db); err == nil { + admins, err := http_api.AdminUsers(ctx, db) + if err == nil { // Write response w.WriteHeader(http.StatusOK) @@ -205,7 +206,8 @@ func setupCEEMSAPI(ctx context.Context, db *sql.DB) *httptest.Server { Data: admins, } - if err = json.NewEncoder(w).Encode(&response); err != nil { + err = json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -217,7 +219,7 @@ func setupCEEMSAPI(ctx context.Context, db *sql.DB) *httptest.Server { func TestMiddleware(t *testing.T) { // Setup middleware handlers - handlerToTestDB, err := setupMiddlewareWithDB(t.TempDir()) + handlerToTestDB, err := setupMiddlewareWithDB(t.Context(), t.TempDir()) require.NoError(t, err, "failed to setup middleware with DB") handlerToTestAPI, err := setupMiddlewareWithAPI(t.Context(), t.TempDir()) require.NoError(t, err, "failed to setup middleware with API") @@ -350,6 +352,7 @@ func TestMiddleware(t *testing.T) { resDB := responseRecorderDB.Result() defer resDB.Body.Close() + assert.Equal(t, test.code, resDB.StatusCode, "%s with DB", test.name) // Tests with CEEMS API @@ -359,6 +362,7 @@ func TestMiddleware(t *testing.T) { resAPI := responseRecorderAPI.Result() defer resAPI.Body.Close() + assert.Equal(t, test.code, resAPI.StatusCode, "%s with API", test.name) } } diff --git a/pkg/lb/frontend/parse.go b/pkg/lb/frontend/parse.go index a7faa9fc..a4ce6fd2 100644 --- a/pkg/lb/frontend/parse.go +++ b/pkg/lb/frontend/parse.go @@ -38,7 +38,8 @@ func parseTSDBRequest(p *ReqParams, r *http.Request) error { } // If failed to read body, skip verification and go to request proxy - if body, err = io.ReadAll(r.Body); err != nil { + body, err = io.ReadAll(r.Body) + if err != nil { return fmt.Errorf("failed to read request body: %w", err) } @@ -47,7 +48,8 @@ func parseTSDBRequest(p *ReqParams, r *http.Request) error { clonedReq.Body = io.NopCloser(bytes.NewReader(body)) // Get form values - if err = clonedReq.ParseForm(); err != nil { + err = clonedReq.ParseForm() + if err != nil { return fmt.Errorf("failed to parse request form data: %w", err) } @@ -85,7 +87,8 @@ func parsePyroRequest(p *ReqParams, r *http.Request) error { } // If failed to read body, skip verification and go to request proxy - if body, err = io.ReadAll(r.Body); err != nil { + body, err = io.ReadAll(r.Body) + if err != nil { return fmt.Errorf("failed to read request body: %w", err) } @@ -97,7 +100,9 @@ func parsePyroRequest(p *ReqParams, r *http.Request) error { case strings.HasSuffix(r.URL.Path, "SelectMergeStacktraces"): // Read body into request data data := querierv1.SelectMergeStacktracesRequest{} - if err := proto.Unmarshal(body, &data); err != nil { + + err := proto.Unmarshal(body, &data) + if err != nil { return fmt.Errorf("failed to umarshall request body: %w", err) } @@ -108,7 +113,9 @@ func parsePyroRequest(p *ReqParams, r *http.Request) error { case strings.HasSuffix(r.URL.Path, "LabelNames"): // Read body into request data data := typesv1.LabelNamesRequest{} - if err := proto.Unmarshal(body, &data); err != nil { + + err := proto.Unmarshal(body, &data) + if err != nil { return fmt.Errorf("failed to umarshall request body: %w", err) } @@ -121,7 +128,9 @@ func parsePyroRequest(p *ReqParams, r *http.Request) error { case strings.HasSuffix(r.URL.Path, "LabelValues"): // Read body into request data data := typesv1.LabelValuesRequest{} - if err := proto.Unmarshal(body, &data); err != nil { + + err := proto.Unmarshal(body, &data) + if err != nil { return fmt.Errorf("failed to umarshall request body: %w", err) } diff --git a/pkg/lb/serverpool/leastconn_test.go b/pkg/lb/serverpool/leastconn_test.go index dc790ea0..d0beb6dc 100644 --- a/pkg/lb/serverpool/leastconn_test.go +++ b/pkg/lb/serverpool/leastconn_test.go @@ -84,6 +84,7 @@ func TestLeastConnectionLB(t *testing.T) { for _, id := range lcIDs { dummyServer := httptest.NewServer(h) defer dummyServer.Close() + backendURL, err := url.Parse(dummyServer.URL) require.NoError(t, err) diff --git a/pkg/lb/serverpool/roundrobin.go b/pkg/lb/serverpool/roundrobin.go index 0cdd0724..be2b905e 100644 --- a/pkg/lb/serverpool/roundrobin.go +++ b/pkg/lb/serverpool/roundrobin.go @@ -20,6 +20,7 @@ type roundRobin struct { func (s *roundRobin) Rotate(id string) backend.Server { s.mux.Lock() defer s.mux.Unlock() + s.current = (s.current + 1) % s.Size(id) return s.backends[id][s.current] diff --git a/pkg/sqlite3/sqlite3.go b/pkg/sqlite3/sqlite3.go index 3fcbfab9..958874c4 100644 --- a/pkg/sqlite3/sqlite3.go +++ b/pkg/sqlite3/sqlite3.go @@ -34,16 +34,20 @@ func init() { sql.Register(DriverName, &Driver{ sqlite3.SQLiteDriver{ ConnectHook: func(conn *sqlite3.SQLiteConn) error { - if err := conn.RegisterFunc("add_metric_map", addMetricMap, true); err != nil { + err := conn.RegisterFunc("add_metric_map", addMetricMap, true) + if err != nil { return err } - if err := conn.RegisterFunc("avg_metric_map", avgMetricMap, true); err != nil { + err = conn.RegisterFunc("avg_metric_map", avgMetricMap, true) + if err != nil { return err } - if err := conn.RegisterAggregator("sum_metric_map_agg", newSumMetricMap, true); err != nil { + err = conn.RegisterAggregator("sum_metric_map_agg", newSumMetricMap, true) + if err != nil { return err } - if err := conn.RegisterAggregator("avg_metric_map_agg", newAvgMetricMapAgg, true); err != nil { + err = conn.RegisterAggregator("avg_metric_map_agg", newAvgMetricMapAgg, true) + if err != nil { return err } @@ -76,11 +80,8 @@ type Driver struct { // be fetched by the user using GetLastConn. The connection ensures it's cleaned up // when it's closed. This method is not used by the user, but rather by sql.Open. func (d *Driver) Open(dsn string) (driver.Conn, error) { - var inner driver.Conn - - var err error - - if inner, err = d.SQLiteDriver.Open(dsn); err != nil { + inner, err := d.SQLiteDriver.Open(dsn) + if err != nil { return nil, err } @@ -94,9 +95,11 @@ func (d *Driver) Open(dsn string) (driver.Conn, error) { } mu.Lock() + seq++ conn := &Conn{cid: seq, SQLiteConn: sconn} conns[conn.cid] = conn + mu.Unlock() return conn, nil @@ -105,8 +108,9 @@ func (d *Driver) Open(dsn string) (driver.Conn, error) { // Conn wraps a sqlite3.SQLiteConn and maintains an ID so that the connection can be // closed. type Conn struct { - cid uint64 *sqlite3.SQLiteConn + + cid uint64 } // Close the DB connection @@ -162,11 +166,14 @@ func NumConns() int { func addMetricMap(existing, current string) string { // Unmarshal strings into MetricMap type var existingMetricMap, currentMetricMap models.MetricMap - if err := json.Unmarshal([]byte(existing), &existingMetricMap); err != nil { + + err := json.Unmarshal([]byte(existing), &existingMetricMap) + if err != nil { panic(err) } - if err := json.Unmarshal([]byte(current), ¤tMetricMap); err != nil { + err = json.Unmarshal([]byte(current), ¤tMetricMap) + if err != nil { panic(err) } @@ -197,11 +204,14 @@ func addMetricMap(existing, current string) string { func avgMetricMap(existing, current string, existingWeight, currentWeight float64) string { // Unmarshal strings into MetricMap type var existingMetricMap, currentMetricMap models.MetricMap - if err := json.Unmarshal([]byte(existing), &existingMetricMap); err != nil { + + err := json.Unmarshal([]byte(existing), &existingMetricMap) + if err != nil { panic(err) } - if err := json.Unmarshal([]byte(current), ¤tMetricMap); err != nil { + err = json.Unmarshal([]byte(current), ¤tMetricMap) + if err != nil { panic(err) } @@ -261,7 +271,8 @@ func (g *sumMetricMap) Step(m string) { return } - if err := json.Unmarshal([]byte(m), &g.currentMetricMap); err != nil { + err := json.Unmarshal([]byte(m), &g.currentMetricMap) + if err != nil { panic(err) } @@ -305,7 +316,8 @@ func (g *avgMetricMapAgg) Step(m string, w float64) { return } - if err := json.Unmarshal([]byte(m), &g.currentMetricMap); err != nil { + err := json.Unmarshal([]byte(m), &g.currentMetricMap) + if err != nil { panic(err) } diff --git a/pkg/sqlite3/sqlite3_test.go b/pkg/sqlite3/sqlite3_test.go index 9c667398..77b38b4a 100644 --- a/pkg/sqlite3/sqlite3_test.go +++ b/pkg/sqlite3/sqlite3_test.go @@ -4,6 +4,7 @@ package sqlite3 import ( + "context" "database/sql" "fmt" "io" @@ -45,10 +46,11 @@ func TestOpenMany(t *testing.T) { for i := range expectedConnections { db, err := sql.Open(DriverName, filepath.Join(tmpdir, fmt.Sprintf("test-%d.db", i+1))) require.NoError(t, err, "could not open connection to database") - require.NoError(t, db.Ping(), "could not ping database to establish a connection") + require.NoError(t, db.PingContext(t.Context()), "could not ping database to establish a connection") closers[i] = db var ok bool + conns[i], ok = GetLastConn() require.True(t, ok, "expected new connection") } @@ -261,7 +263,7 @@ func TestAvgMetricMapAgg(t *testing.T) { assert.Equal(t, expectedMap, aggMap) } -func setupDB(tmpDir string, aggMetric bool, units []models.Unit) (models.MetricMap, models.MetricMap, error) { +func setupDB(ctx context.Context, tmpDir string, aggMetric bool, units []models.Unit) (models.MetricMap, models.MetricMap, error) { dbPath := filepath.Join(tmpDir, "test.db") db, err := sql.Open(DriverName, dbPath) @@ -280,7 +282,7 @@ CREATE TABLE units ( ); CREATE UNIQUE INDEX uq_cluster_id_uuid_start ON units (uuid);` - _, err = db.Exec(stmts) + _, err = db.ExecContext(ctx, stmts) if err != nil { return nil, nil, fmt.Errorf("failed to create table in DB: %w", err) } @@ -290,18 +292,20 @@ INSERT INTO units (uuid,total_time_seconds,avg_cpu_usage) VALUES(:uuid,:total_ti total_time_seconds = add_metric_map(total_time_seconds, :total_time_seconds), avg_cpu_usage = avg_metric_map(avg_cpu_usage, :avg_cpu_usage, CAST(json_extract(total_time_seconds, '$.alloc_cputime') AS REAL), CAST(json_extract(:total_time_seconds, '$.alloc_cputime') AS REAL))` - sqlStmt, err := db.Prepare(updateStmt) + sqlStmt, err := db.PrepareContext(ctx, updateStmt) if err != nil { return nil, nil, fmt.Errorf("failed to prepare statement for table %w", err) } defer sqlStmt.Close() for _, unit := range units { - if _, err := sqlStmt.Exec( + _, err := sqlStmt.ExecContext( + ctx, sql.Named("uuid", unit.UUID), sql.Named("total_time_seconds", unit.TotalTime), sql.Named("avg_cpu_usage", unit.AveCPUUsage), - ); err != nil { + ) + if err != nil { return nil, nil, fmt.Errorf("failed to insert data for table %w", err) } } @@ -309,10 +313,10 @@ INSERT INTO units (uuid,total_time_seconds,avg_cpu_usage) VALUES(:uuid,:total_ti // Make units query var cpuUsage, totalTimes models.MetricMap if aggMetric { - _ = db.QueryRow("SELECT avg_metric_map_agg(avg_cpu_usage, CAST(json_extract(total_time_seconds, '$.alloc_cputime') AS REAL)) AS avg_cpu_usage, sum_metric_map_agg(total_time_seconds) AS total_time_seconds FROM units"). + _ = db.QueryRowContext(ctx, "SELECT avg_metric_map_agg(avg_cpu_usage, CAST(json_extract(total_time_seconds, '$.alloc_cputime') AS REAL)) AS avg_cpu_usage, sum_metric_map_agg(total_time_seconds) AS total_time_seconds FROM units"). Scan(&cpuUsage, &totalTimes) } else { - _ = db.QueryRow("SELECT avg_cpu_usage, total_time_seconds FROM units").Scan(&cpuUsage, &totalTimes) + _ = db.QueryRowContext(ctx, "SELECT avg_cpu_usage, total_time_seconds FROM units").Scan(&cpuUsage, &totalTimes) } return cpuUsage, totalTimes, nil @@ -478,7 +482,7 @@ func TestCustomFuncsInDB(t *testing.T) { } for _, test := range tests { - gotCPUUsage, gotTotalTimes, err := setupDB(t.TempDir(), test.aggMetric, test.units) + gotCPUUsage, gotTotalTimes, err := setupDB(t.Context(), t.TempDir(), test.aggMetric, test.units) require.NoError(t, err) assert.Equal(t, test.expectedCPUUsage, gotCPUUsage, test.name) assert.Equal(t, test.expectedTotalTimes, gotTotalTimes, test.name) diff --git a/pkg/tsdb/client.go b/pkg/tsdb/client.go index 6c336c67..1632a843 100644 --- a/pkg/tsdb/client.go +++ b/pkg/tsdb/client.go @@ -183,7 +183,8 @@ func (t *Client) Settings(ctx context.Context) *Settings { } // Update settings and lastUpdate time - if settings, err := t.fetchSettings(ctx); err == nil { + settings, err := t.fetchSettings(ctx) + if err == nil { t.lastUpdate = time.Now() t.settingsCache = settings } @@ -342,7 +343,9 @@ func (t *Client) fetchSettings(ctx context.Context) (*Settings, error) { // Unmarhsall config into struct var config Config - if err := yaml.Unmarshal([]byte(c.YAML), &config); err != nil { + + err = yaml.Unmarshal([]byte(c.YAML), &config) + if err != nil { return nil, err } @@ -364,23 +367,25 @@ func (t *Client) fetchSettings(ctx context.Context) (*Settings, error) { settings.EvaluationInterval = time.Duration(config.Global.EvaluationInterval) // Get query timeout and max samples from flags - if v, err := strconv.ParseInt(flags["query.max-samples"], 10, 64); err != nil { + v, err := strconv.ParseInt(flags["query.max-samples"], 10, 64) + if err != nil { settings.QueryMaxSamples = v } - if queryTimeout, err := model.ParseDuration(flags["query.timeout"]); err == nil { + queryTimeout, err := model.ParseDuration(flags["query.timeout"]) + if err == nil { settings.QueryTimeout = time.Duration(queryTimeout) } - if queryTimeout, err := model.ParseDuration(flags["query.lookback-delta"]); err == nil { - settings.QueryLookbackDelta = time.Duration(queryTimeout) + lookbackDelta, err := model.ParseDuration(flags["query.lookback-delta"]) + if err == nil { + settings.QueryLookbackDelta = time.Duration(lookbackDelta) } - var retentionPeriod model.Duration - // If storageRetention is set to duration ONLY, we can consider it as // retention period - if retentionPeriod, err = model.ParseDuration(info.StorageRetention); err != nil { + retentionPeriod, err := model.ParseDuration(info.StorageRetention) + if err != nil { // If storageRetention is set to size or time and size, we need to get // "actual" retention period for retentionString := range strings.SplitSeq(info.StorageRetention, "or") { diff --git a/pkg/tsdb/client_test.go b/pkg/tsdb/client_test.go index cfea2b64..444522f4 100644 --- a/pkg/tsdb/client_test.go +++ b/pkg/tsdb/client_test.go @@ -139,7 +139,8 @@ func testTSDBServer(emptyResponse bool) *httptest.Server { Status: "error", } - if err := json.NewEncoder(w).Encode(&expected); err != nil { + err := json.NewEncoder(w).Encode(&expected) + if err != nil { w.Write([]byte("KO")) } @@ -147,37 +148,44 @@ func testTSDBServer(emptyResponse bool) *httptest.Server { } if strings.HasSuffix(r.URL.Path, "config") { - if err := json.NewEncoder(w).Encode(&expectedConfig); err != nil { + err := json.NewEncoder(w).Encode(&expectedConfig) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "flags") { - if err := json.NewEncoder(w).Encode(&expectedFlags); err != nil { + err := json.NewEncoder(w).Encode(&expectedFlags) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "runtimeinfo") { - if err := json.NewEncoder(w).Encode(&expectedRuntimeInfo); err != nil { + err := json.NewEncoder(w).Encode(&expectedRuntimeInfo) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "series") { - if err := json.NewEncoder(w).Encode(&expectedSeries); err != nil { + err := json.NewEncoder(w).Encode(&expectedSeries) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "labels") { - if err := json.NewEncoder(w).Encode(&expectedLabels); err != nil { + err := json.NewEncoder(w).Encode(&expectedLabels) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "query") { _ = r.ParseForm() expectedQueryLookback, _ = model.ParseDuration(r.Form["lookback_delta"][0]) - if err := json.NewEncoder(w).Encode(&expectedQuery); err != nil { + err := json.NewEncoder(w).Encode(&expectedQuery) + if err != nil { w.Write([]byte("KO")) } } else if strings.HasSuffix(r.URL.Path, "query_range") { _ = r.ParseForm() expectedQueryRangeLookback, _ = model.ParseDuration(r.Form["lookback_delta"][0]) - if err := json.NewEncoder(w).Encode(&expectedQueryRange); err != nil { + err := json.NewEncoder(w).Encode(&expectedQueryRange) + if err != nil { w.Write([]byte("KO")) } } @@ -423,7 +431,8 @@ func TestTSDBDeleteSuccess(t *testing.T) { var err error server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err = r.ParseForm(); err != nil { + err = r.ParseForm() + if err != nil { w.Write([]byte("KO")) return @@ -451,7 +460,8 @@ func TestTSDBDeleteFail(t *testing.T) { var err error server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if err = r.ParseForm(); err != nil { + err = r.ParseForm() + if err != nil { w.Write([]byte("KO")) return diff --git a/scripts/e2e-test.sh b/scripts/e2e-test.sh index 868bc655..72d46310 100755 --- a/scripts/e2e-test.sh +++ b/scripts/e2e-test.sh @@ -48,7 +48,7 @@ pkill ceems_exporter || true pkill ceems_api_server || true pkill ceems_lb || true pkill redfish_proxy || true -pkill ceems_k8s_admission_controller || true +pkill ceems_k8s_admis || true if [[ "${scenario}" =~ ^"exporter" ]] then diff --git a/scripts/mock_servers/main.go b/scripts/mock_servers/main.go index b715b977..120e49ca 100644 --- a/scripts/mock_servers/main.go +++ b/scripts/mock_servers/main.go @@ -72,7 +72,8 @@ func lenLoop(i uint32) int { // ServiceRootHandler handles root of redfish API. func ServiceRootHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/redfish/service_root.json"); err == nil { + data, err := assetsFS.ReadFile("assets/redfish/service_root.json") + if err == nil { w.Write(data) return @@ -84,7 +85,8 @@ func ServiceRootHandler(w http.ResponseWriter, r *http.Request) { // ChassisRootHandler handles chassis collections of redfish API. func ChassisRootHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/redfish/chassis_collection.json"); err == nil { + data, err := assetsFS.ReadFile("assets/redfish/chassis_collection.json") + if err == nil { w.Write(data) return @@ -97,7 +99,9 @@ func ChassisRootHandler(w http.ResponseWriter, r *http.Request) { // ChassisHandler handles a given chassis of redfish API. func ChassisHandler(w http.ResponseWriter, r *http.Request) { chassisID := strings.ReplaceAll(strings.ToLower(r.PathValue("chassisID")), "-", "_") - if data, err := assetsFS.ReadFile(fmt.Sprintf("assets/redfish/%s.json", chassisID)); err == nil { + + data, err := assetsFS.ReadFile(fmt.Sprintf("assets/redfish/%s.json", chassisID)) + if err == nil { w.Write(data) return @@ -110,7 +114,9 @@ func ChassisHandler(w http.ResponseWriter, r *http.Request) { // ChassisPowerHandler handles chassis power of redfish API. func ChassisPowerHandler(w http.ResponseWriter, r *http.Request) { chassisID := strings.ReplaceAll(strings.ToLower(r.PathValue("chassisID")), "-", "_") - if data, err := assetsFS.ReadFile(fmt.Sprintf("assets/redfish/%s_power.json", chassisID)); err == nil { + + data, err := assetsFS.ReadFile(fmt.Sprintf("assets/redfish/%s_power.json", chassisID)) + if err == nil { w.Write(data) return @@ -122,7 +128,8 @@ func ChassisPowerHandler(w http.ResponseWriter, r *http.Request) { // PyroConfigHandler handles pyroscope config. func PyroConfigHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/pyroscope/config.yml"); err == nil { + data, err := assetsFS.ReadFile("assets/pyroscope/config.yml") + if err == nil { w.Write(data) return @@ -135,15 +142,20 @@ func PyroConfigHandler(w http.ResponseWriter, r *http.Request) { // ProfilesHandler handles pyroscope profiles. func ProfilesHandler(w http.ResponseWriter, r *http.Request) { // Parse request - if body, err := io.ReadAll(r.Body); err == nil { + body, err := io.ReadAll(r.Body) + if err == nil { reqData := querierv1.SelectMergeStacktracesRequest{} - if err := proto.Unmarshal(body, &reqData); err == nil { + + err := proto.Unmarshal(body, &reqData) + if err == nil { RespData := &querierv1.SelectMergeStacktracesResponse{ Flamegraph: &querierv1.FlameGraph{ Names: []string{reqData.GetLabelSelector()}, }, } - if body, err := proto.Marshal(RespData); err == nil { + + body, err := proto.Marshal(RespData) + if err == nil { w.Write(body) return @@ -166,7 +178,8 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) { query = r.URL.Query()["query"][0] case http.MethodPost: // Call ParseForm() to parse the raw query and update r.PostForm and r.Form. - if err := r.ParseForm(); err != nil { + err := r.ParseForm() + if err != nil { http.Error(w, "ParseForm error", http.StatusInternalServerError) return @@ -306,7 +319,9 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) { "result": results, }, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -321,7 +336,8 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) { query = r.URL.Query()["query"][0] case http.MethodPost: // Call ParseForm() to parse the raw query and update r.PostForm and r.Form. - if err := r.ParseForm(); err != nil { + err := r.ParseForm() + if err != nil { http.Error(w, "ParseForm error", http.StatusInternalServerError) return @@ -478,7 +494,9 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) { "result": results, }, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -491,7 +509,9 @@ func ConfigHandler(w http.ResponseWriter, r *http.Request) { "yaml": "global:\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n evaluation_interval: 10s\n external_labels:\n environment: prometheus-demo\nalerting:\n alertmanagers:\n - follow_redirects: true\n enable_http2: true\n scheme: http\n timeout: 10s\n api_version: v2\n static_configs:\n - targets:\n - demo.do.prometheus.io:9093\nrule_files:\n- /etc/prometheus/rules/*.rules\nscrape_configs:\n- job_name: prometheus\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n static_configs:\n - targets:\n - demo.do.prometheus.io:9090\n- job_name: random\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n file_sd_configs:\n - files:\n - /etc/prometheus/file_sd/random.yml\n refresh_interval: 5m\n- job_name: caddy\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n static_configs:\n - targets:\n - localhost:2019\n- job_name: grafana\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n static_configs:\n - targets:\n - demo.do.prometheus.io:3000\n- job_name: node\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n file_sd_configs:\n - files:\n - /etc/prometheus/file_sd/node.yml\n refresh_interval: 5m\n- job_name: alertmanager\n honor_timestamps: true\n track_timestamps_staleness: false\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n file_sd_configs:\n - files:\n - /etc/prometheus/file_sd/alertmanager.yml\n refresh_interval: 5m\n- job_name: cadvisor\n honor_timestamps: true\n track_timestamps_staleness: true\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /metrics\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n file_sd_configs:\n - files:\n - /etc/prometheus/file_sd/cadvisor.yml\n refresh_interval: 5m\n- job_name: blackbox\n honor_timestamps: true\n track_timestamps_staleness: false\n params:\n module:\n - http_2xx\n scrape_interval: 15s\n scrape_timeout: 10s\n scrape_protocols:\n - OpenMetricsText1.0.0\n - OpenMetricsText0.0.1\n - PrometheusText0.0.4\n metrics_path: /probe\n scheme: http\n enable_compression: true\n follow_redirects: true\n enable_http2: true\n relabel_configs:\n - source_labels: [__address__]\n separator: ;\n regex: (.*)\n target_label: __param_target\n replacement: $1\n action: replace\n - source_labels: [__param_target]\n separator: ;\n regex: (.*)\n target_label: instance\n replacement: $1\n action: replace\n - separator: ;\n regex: (.*)\n target_label: __address__\n replacement: 127.0.0.1:9115\n action: replace\n static_configs:\n - targets:\n - http://localhost:9100\n", }, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -506,7 +526,9 @@ func FlagsHandler(w http.ResponseWriter, r *http.Request) { "query.timeout": "2m", }, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -529,7 +551,9 @@ func RuntimeInfoHandler(w http.ResponseWriter, r *http.Request) { "storageRetention": "10y", }, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -540,7 +564,9 @@ func LabelNamesHandler(w http.ResponseWriter, r *http.Request) { Status: "success", Data: []string{"job", "instance", "__name__"}, } - if err := json.NewEncoder(w).Encode(&response); err != nil { + + err := json.NewEncoder(w).Encode(&response) + if err != nil { w.Write([]byte("KO")) } } @@ -554,7 +580,8 @@ func ServersHandler(w http.ResponseWriter, r *http.Request) { fileName = "servers" } - if data, err := assetsFS.ReadFile(fmt.Sprintf("assets/openstack/compute/%s.json", fileName)); err == nil { + data, err := assetsFS.ReadFile(fmt.Sprintf("assets/openstack/compute/%s.json", fileName)) + if err == nil { w.Write(data) return @@ -570,7 +597,8 @@ func TokensHandler(w http.ResponseWriter, r *http.Request) { var t map[string]any - if err := decoder.Decode(&t); err != nil { + err := decoder.Decode(&t) + if err != nil { w.Write([]byte("KO")) return @@ -582,7 +610,8 @@ func TokensHandler(w http.ResponseWriter, r *http.Request) { // UsersHandler handles OS users. func UsersHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/openstack/identity/users.json"); err == nil { + data, err := assetsFS.ReadFile("assets/openstack/identity/users.json") + if err == nil { w.Write(data) return @@ -595,7 +624,9 @@ func UsersHandler(w http.ResponseWriter, r *http.Request) { // ProjectsHandler handles OS projects. func ProjectsHandler(w http.ResponseWriter, r *http.Request) { userID := r.PathValue("id") - if data, err := assetsFS.ReadFile(fmt.Sprintf("assets/openstack/identity/%s.json", userID)); err == nil { + + data, err := assetsFS.ReadFile(fmt.Sprintf("assets/openstack/identity/%s.json", userID)) + if err == nil { w.Write(data) return @@ -607,7 +638,8 @@ func ProjectsHandler(w http.ResponseWriter, r *http.Request) { // PodsListHandler handles k8s pods. func PodsListHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/k8s/pods-metadata.json"); err == nil { + data, err := assetsFS.ReadFile("assets/k8s/pods-metadata.json") + if err == nil { w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/vnd.kubernetes.protobuf") w.Write(data) @@ -621,7 +653,8 @@ func PodsListHandler(w http.ResponseWriter, r *http.Request) { // RoleBindingsListHandler handles k8s rolebindings. func RoleBindingsListHandler(w http.ResponseWriter, r *http.Request) { - if data, err := assetsFS.ReadFile("assets/k8s/rolebindings.json"); err == nil { + data, err := assetsFS.ReadFile("assets/k8s/rolebindings.json") + if err == nil { w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/vnd.kubernetes.protobuf") w.Write(data) @@ -651,8 +684,10 @@ func redfishProxyTarget(ctx context.Context, i, portNum int, tls bool) { ReadHeaderTimeout: 3 * time.Second, Handler: redfishMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Redfish target", err) } }() @@ -689,8 +724,10 @@ func redfishServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: redfishMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Redfish server", err) } }() @@ -719,8 +756,10 @@ func pyroServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: pyroMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Pyroscope server", err) } }() @@ -753,8 +792,10 @@ func promServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: promMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Prometheus server", err) } }() @@ -782,8 +823,10 @@ func osNovaServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: osNovaMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Openstack compute API server", err) } }() @@ -813,8 +856,10 @@ func osKSServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: osKSMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake Openstack identity API server", err) } }() @@ -843,8 +888,10 @@ func k8sAPIServer(ctx context.Context) { ReadHeaderTimeout: 3 * time.Second, Handler: k8sAPIMux, } + defer func() { - if err := server.Shutdown(ctx); err != nil { + err := server.Shutdown(ctx) + if err != nil { log.Println("Failed to shutdown fake k8s API server", err) } }() @@ -871,7 +918,8 @@ func kubeletSocketServer(_ context.Context) { log.Println("To close connection CTRL+C :-)") // Make socket dir - if err := os.MkdirAll(vendorSocketDir, 0o700); err != nil { + err := os.MkdirAll(vendorSocketDir, 0o700) + if err != nil { log.Fatal(err) } diff --git a/scripts/pyro_requestor/main.go b/scripts/pyro_requestor/main.go index 40e05761..6f0f0547 100644 --- a/scripts/pyro_requestor/main.go +++ b/scripts/pyro_requestor/main.go @@ -79,7 +79,9 @@ func main() { if resp.StatusCode == http.StatusOK { // Unpack into data respData := &querierv1.SelectMergeStacktracesResponse{} - if err = proto.Unmarshal(body, respData); err != nil { + + err = proto.Unmarshal(body, respData) + if err != nil { log.Fatalln("failed to umarshal proto response body", err) }