Skip to content

Commit b159efe

Browse files
committed
[CI] Add Terraform resources for daily CronJob that processes LLVM commits
1 parent 1d9240c commit b159efe

File tree

3 files changed

+163
-0
lines changed

3 files changed

+163
-0
lines changed

premerge/gke_cluster/main.tf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ resource "google_container_cluster" "llvm_premerge" {
1212
# for adding windows nodes to the cluster.
1313
networking_mode = "VPC_NATIVE"
1414
ip_allocation_policy {}
15+
16+
workload_identity_config {
17+
workload_pool = "llvm-premerge-checks.svc.id.goog"
18+
}
1519
}
1620

1721
resource "google_container_node_pool" "llvm_premerge_linux_service" {
@@ -23,6 +27,10 @@ resource "google_container_node_pool" "llvm_premerge_linux_service" {
2327

2428
node_config {
2529
machine_type = "e2-highcpu-4"
30+
31+
workload_metadata_config {
32+
mode = "GKE_METADATA"
33+
}
2634
# Terraform wants to recreate the node pool everytime whe running
2735
# terraform apply unless we explicitly set this.
2836
# TODO(boomanaiden154): Look into why terraform is doing this so we do

premerge/main.tf

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,106 @@ resource "kubernetes_manifest" "metrics_deployment" {
190190

191191
depends_on = [kubernetes_namespace.metrics, kubernetes_secret.metrics_secrets]
192192
}
193+
194+
# Resources for collecting LLVM operational metrics data
195+
196+
# Service accounts and bindings to grant access to the
197+
# BigQuery API for our cronjob
198+
resource "google_service_account" "operational_metrics_gsa" {
199+
account_id = "operational-metrics-gsa"
200+
display_name = "Operational Metrics GSA"
201+
}
202+
203+
resource "google_project_iam_binding" "bigquery_jobuser_binding" {
204+
project = google_service_account.operational_metrics_gsa.project
205+
role = "roles/bigquery.jobUser"
206+
207+
members = [
208+
"serviceAccount:${google_service_account.operational_metrics_gsa.email}",
209+
]
210+
211+
depends_on = [google_service_account.operational_metrics_gsa]
212+
}
213+
214+
resource "kubernetes_namespace" "operational_metrics" {
215+
metadata {
216+
name = "operational-metrics"
217+
}
218+
provider = kubernetes.llvm-premerge-us-central
219+
}
220+
221+
resource "kubernetes_service_account" "operational_metrics_ksa" {
222+
metadata {
223+
name = "operational-metrics-ksa"
224+
namespace = "operational-metrics"
225+
annotations = {
226+
"iam.gke.io/gcp-service-account" = google_service_account.operational_metrics_gsa.email
227+
}
228+
}
229+
230+
depends_on = [kubernetes_namespace.operational_metrics]
231+
}
232+
233+
resource "google_service_account_iam_binding" "workload_identity_binding" {
234+
service_account_id = google_service_account.operational_metrics_gsa.name
235+
role = "roles/iam.workloadIdentityUser"
236+
237+
members = [
238+
"serviceAccount:${google_service_account.operational_metrics_gsa.project}.svc.id.goog[operational-metrics/operational-metrics-ksa]",
239+
]
240+
241+
depends_on = [
242+
google_service_account.operational_metrics_gsa,
243+
kubernetes_service_account.operational_metrics_ksa,
244+
]
245+
}
246+
247+
# The container for scraping LLVM commits needs persistent storage
248+
# for a local check-out of llvm/llvm-project
249+
resource "kubernetes_persistent_volume_claim" "operational_metrics_pvc" {
250+
metadata {
251+
name = "operational-metrics-pvc"
252+
namespace = "operational-metrics"
253+
}
254+
255+
spec {
256+
access_modes = ["ReadWriteOnce"]
257+
resources {
258+
requests = {
259+
storage = "20Gi"
260+
}
261+
}
262+
storage_class_name = "standard-rwo"
263+
}
264+
265+
depends_on = [kubernetes_namespace.operational_metrics]
266+
}
267+
268+
resource "kubernetes_secret" "operational_metrics_secrets" {
269+
metadata {
270+
name = "operational-metrics-secrets"
271+
namespace = "operational-metrics"
272+
}
273+
274+
data = {
275+
"github-token" = data.google_secret_manager_secret_version.metrics_github_pat.secret_data
276+
"grafana-api-key" = data.google_secret_manager_secret_version.metrics_grafana_api_key.secret_data
277+
"grafana-metrics-userid" = data.google_secret_manager_secret_version.metrics_grafana_metrics_userid.secret_data
278+
}
279+
280+
type = "Opaque"
281+
provider = kubernetes.llvm-premerge-us-central
282+
depends_on = [kubernetes_namespace.operational_metrics]
283+
}
284+
285+
resource "kubernetes_manifest" "operational_metrics_cronjob" {
286+
manifest = yamldecode(file("operational_metrics_cronjob.yaml"))
287+
provider = kubernetes.llvm-premerge-us-central
288+
289+
depends_on = [
290+
kubernetes_namespace.operational_metrics,
291+
kubernetes_persistent_volume_claim.operational_metrics_pvc,
292+
kubernetes_secret.operational_metrics_secrets,
293+
kubernetes_service_account.operational_metrics_ksa,
294+
]
295+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# operational_metrics_cronjob.yaml
2+
apiVersion: batch/v1
3+
kind: CronJob
4+
metadata:
5+
name: operational-metrics-cronjob
6+
namespace: operational-metrics
7+
spec:
8+
# Midnight PDT
9+
schedule: "0 7 * * *"
10+
timeZone: "Etc/UTC"
11+
concurrencyPolicy: Forbid
12+
jobTemplate:
13+
spec:
14+
template:
15+
spec:
16+
serviceAccountName: operational-metrics-ksa
17+
nodeSelector:
18+
iam.gke.io/gke-metadata-server-enabled: "true"
19+
volumes:
20+
- name: metrics-volume
21+
persistentVolumeClaim:
22+
claimName: operational-metrics-pvc
23+
containers:
24+
- name: process-llvm-commits
25+
image: ghcr.io/llvm/operations-metrics:latest
26+
env:
27+
- name: GITHUB_TOKEN
28+
valueFrom:
29+
secretKeyRef:
30+
name: operational-metrics-secrets
31+
key: github-token
32+
- name: GRAFANA_API_KEY
33+
valueFrom:
34+
secretKeyRef:
35+
name: operational-metrics-secrets
36+
key: grafana-api-key
37+
- name: GRAFANA_METRICS_USERID
38+
valueFrom:
39+
secretKeyRef:
40+
name: operational-metrics-secrets
41+
key: grafana-metrics-userid
42+
volumeMounts:
43+
- name: metrics-volume
44+
mountPath: "/data"
45+
resources:
46+
requests:
47+
cpu: "250m"
48+
memory: "256Mi"
49+
limits:
50+
cpu: "1"
51+
memory: "512Mi"
52+
restartPolicy: OnFailure

0 commit comments

Comments
 (0)