Skip to content

Commit 026a8df

Browse files
authored
feat: add troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota (#42)
1 parent 0626a29 commit 026a8df

File tree

5 files changed

+179
-0
lines changed

5 files changed

+179
-0
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ The handler filters tools dynamically based on `GetMyPermissions` from Sysdig Se
6060
| `troubleshoot_kubernetes_list_top_network_errors_in_pods` | `tool_troubleshoot_kubernetes_list_top_network_errors_in_pods.go` | Shows the top network errors by pod over a given interval. | `promql.exec` | "Show the top 10 pods with the most network errors in cluster 'production'" |
6161
| `troubleshoot_kubernetes_list_count_pods_per_cluster` | `tool_troubleshoot_kubernetes_list_count_pods_per_cluster.go` | List the count of running Kubernetes Pods grouped by cluster and namespace. | `promql.exec` | "List the count of running Kubernetes Pods in cluster 'production'" |
6262
| `troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota` | `tool_troubleshoot_kubernetes_list_underutilized_pods_by_cpu_quota.go` | List Kubernetes pods with CPU usage below 25% of the quota limit. | `promql.exec` | "Show the top 10 underutilized pods by CPU quota in cluster 'production'" |
63+
| `troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota` | `tool_troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota.go` | List Kubernetes pods with memory usage below 25% of the limit. | `promql.exec` | "Show the top 10 underutilized pods by memory quota in cluster 'production'" |
6364

6465
Every tool has a companion `_test.go` file that exercises request validation, permission metadata, and Sysdig client calls through mocks.
6566
Note that if you add more tools you need to also update this file to reflect that.

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ The server dynamically filters the available tools based on the permissions asso
163163
- **Required Permission**: `promql.exec`
164164
- **Sample Prompt**: "Show the top 10 underutilized pods by CPU quota in cluster 'production'"
165165

166+
- **`troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota`**
167+
- **Description**: List Kubernetes pods with memory usage below 25% of the limit.
168+
- **Required Permission**: `promql.exec`
169+
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"
170+
166171
## Requirements
167172

168173
- [Go](https://go.dev/doc/install) 1.25 or higher (if running without Docker).

cmd/server/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
106106
tools.NewTroubleshootKubernetesListTopNetworkErrorsInPods(sysdigClient),
107107
tools.NewTroubleshootKubernetesListCountPodsPerCluster(sysdigClient),
108108
tools.NewTroubleshootKubernetesListUnderutilizedPodsByCPUQuota(sysdigClient),
109+
tools.NewTroubleshootKubernetesListUnderutilizedPodsByMemoryQuota(sysdigClient),
109110
)
110111
return handler
111112
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package tools
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"strings"
9+
10+
"github.com/mark3labs/mcp-go/mcp"
11+
"github.com/mark3labs/mcp-go/server"
12+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
13+
)
14+
15+
type TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota struct {
16+
SysdigClient sysdig.ExtendedClientWithResponsesInterface
17+
}
18+
19+
func NewTroubleshootKubernetesListUnderutilizedPodsByMemoryQuota(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota {
20+
return &TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota{
21+
SysdigClient: sysdigClient,
22+
}
23+
}
24+
25+
func (t *TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota) RegisterInServer(s *server.MCPServer) {
26+
tool := mcp.NewTool("troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota",
27+
mcp.WithDescription("List Kubernetes pods with memory usage below 25% of the limit."),
28+
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
29+
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
30+
mcp.WithNumber("limit",
31+
mcp.Description("Maximum number of pods to return."),
32+
mcp.DefaultNumber(10),
33+
),
34+
mcp.WithOutputSchema[map[string]any](),
35+
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
36+
)
37+
s.AddTool(tool, t.handle)
38+
}
39+
40+
func (t *TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
41+
clusterName := mcp.ParseString(request, "cluster_name", "")
42+
namespaceName := mcp.ParseString(request, "namespace_name", "")
43+
limit := mcp.ParseInt(request, "limit", 10)
44+
45+
query := buildUnderutilizedPodsByMemoryQuery(clusterName, namespaceName)
46+
47+
limitQuery := sysdig.LimitQuery(limit)
48+
params := &sysdig.GetQueryV1Params{
49+
Query: query,
50+
Limit: &limitQuery,
51+
}
52+
53+
httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
54+
if err != nil {
55+
return mcp.NewToolResultErrorFromErr("failed to get underutilized pod list", err), nil
56+
}
57+
58+
if httpResp.StatusCode != 200 {
59+
bodyBytes, _ := io.ReadAll(httpResp.Body)
60+
return mcp.NewToolResultErrorf("failed to get underutilized pod list: status code %d, body: %s", httpResp.StatusCode, string(bodyBytes)), nil
61+
}
62+
63+
var queryResponse sysdig.QueryResponseV1
64+
if err := json.NewDecoder(httpResp.Body).Decode(&queryResponse); err != nil {
65+
return mcp.NewToolResultErrorFromErr("failed to decode response", err), nil
66+
}
67+
68+
return mcp.NewToolResultJSON(queryResponse)
69+
}
70+
71+
func buildUnderutilizedPodsByMemoryQuery(clusterName, namespaceName string) string {
72+
filters := []string{}
73+
if clusterName != "" {
74+
filters = append(filters, fmt.Sprintf(`kube_cluster_name="%s"`, clusterName))
75+
}
76+
if namespaceName != "" {
77+
filters = append(filters, fmt.Sprintf(`kube_namespace_name="%s"`, namespaceName))
78+
}
79+
80+
filterString := ""
81+
if len(filters) > 0 {
82+
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
83+
}
84+
85+
return fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_used_bytes%s) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_limit_bytes%s) > 0) < 0.25", filterString, filterString)
86+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package tools_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"io"
7+
"net/http"
8+
9+
"github.com/mark3labs/mcp-go/mcp"
10+
"github.com/mark3labs/mcp-go/server"
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
14+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
15+
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
16+
"go.uber.org/mock/gomock"
17+
)
18+
19+
var _ = Describe("TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota Tool", func() {
20+
var (
21+
tool *tools.TroubleshootKubernetesListUnderutilizedPodsByMemoryQuota
22+
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
23+
mcpServer *server.MCPServer
24+
ctrl *gomock.Controller
25+
)
26+
27+
BeforeEach(func() {
28+
ctrl = gomock.NewController(GinkgoT())
29+
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
30+
tool = tools.NewTroubleshootKubernetesListUnderutilizedPodsByMemoryQuota(mockSysdig)
31+
mcpServer = server.NewMCPServer("test", "test")
32+
tool.RegisterInServer(mcpServer)
33+
})
34+
35+
It("should register successfully in the server", func() {
36+
Expect(mcpServer.GetTool("troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota")).NotTo(BeNil())
37+
})
38+
39+
When("listing underutilized pods", func() {
40+
DescribeTable("it succeeds", func(ctx context.Context, toolName string, request mcp.CallToolRequest, expectedParamsRequested sysdig.GetQueryV1Params) {
41+
mockSysdig.EXPECT().GetQueryV1(gomock.Any(), &expectedParamsRequested).Return(&http.Response{
42+
StatusCode: http.StatusOK,
43+
Body: io.NopCloser(bytes.NewBufferString(`{"status":"success"}`)),
44+
}, nil)
45+
46+
serverTool := mcpServer.GetTool(toolName)
47+
result, err := serverTool.Handler(ctx, request)
48+
Expect(err).NotTo(HaveOccurred())
49+
50+
resultData, ok := result.Content[0].(mcp.TextContent)
51+
Expect(ok).To(BeTrue())
52+
Expect(resultData.Text).To(MatchJSON(`{"status":"success"}`))
53+
},
54+
Entry(nil,
55+
"troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota",
56+
mcp.CallToolRequest{
57+
Params: mcp.CallToolParams{
58+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota",
59+
Arguments: map[string]any{},
60+
},
61+
},
62+
sysdig.GetQueryV1Params{
63+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_used_bytes) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_limit_bytes) > 0) < 0.25`,
64+
Limit: asPtr(sysdig.LimitQuery(10)),
65+
},
66+
),
67+
Entry(nil,
68+
"troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota",
69+
mcp.CallToolRequest{
70+
Params: mcp.CallToolParams{
71+
Name: "troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota",
72+
Arguments: map[string]any{
73+
"cluster_name": "test-cluster",
74+
"namespace_name": "test-namespace",
75+
"limit": 20,
76+
},
77+
},
78+
},
79+
sysdig.GetQueryV1Params{
80+
Query: `sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_used_bytes{kube_cluster_name="test-cluster",kube_namespace_name="test-namespace"}) / (sum by (kube_cluster_name, kube_namespace_name, kube_pod_name)(sysdig_container_memory_limit_bytes{kube_cluster_name="test-cluster",kube_namespace_name="test-namespace"}) > 0) < 0.25`,
81+
Limit: asPtr(sysdig.LimitQuery(20)),
82+
},
83+
),
84+
)
85+
})
86+
})

0 commit comments

Comments
 (0)