Skip to content

Commit fd1cd12

Browse files
committed
initial SLURM sidecar code upload
Signed-off-by: Surax98 <giacomo.surace@gmail.com>
1 parent 48f2abe commit fd1cd12

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+27965
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
envs.sh
2+
bin

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
all: sidecar
2+
3+
sidecar:
4+
CGO_ENABLED=0 GOOS=linux go build -o bin/slurm-sd cmd/main.go

Readme.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
![Interlink logo](./docs/static/img/interlink_logo.png)
2+
3+
## :information_source: Overview
4+
5+
### Introduction
6+
InterLink aims to provide an abstraction for the execution of a Kubernetes pod on any remote resource capable of managing a Container execution lifecycle.
7+
We target to facilitate the development of provider specific plugins, so the resource providers can leverage the power of virtual kubelet without a black belt in kubernetes internals.
8+
9+
The project consists of two main components:
10+
11+
- __A Kubernetes Virtual Node:__ based on the [VirtualKubelet](https://virtual-kubelet.io/) technology. Translating request for a kubernetes pod execution into a remote call to the interLink API server.
12+
- __The interLink API server:__ a modular and pluggable REST server where you can create your own Container manager plugin (called sidecars), or use the existing ones: remote docker execution on a remote host, singularity Container on a remote SLURM batch system. This repo aims to maintain the SLURM sidecar as a standalone plugin.
13+
14+
The project got inspired by the [KNoC](https://github.com/CARV-ICS-FORTH/knoc) and [Liqo](https://github.com/liqotech/liqo/tree/master) projects, enhancing that with the implemention a generic API layer b/w the virtual kubelet component and the provider logic for the container lifecycle management.
15+
16+
## :information_source: Usage
17+
18+
### Requirements
19+
- __[Our Kubernetes Virtual Node and the interLink API server](https://github.com/interTwin-eu/interLink)__
20+
- __[The Go programming language](https://go.dev/doc/install)__ (to build binaries)
21+
- __[Docker Engine](https://docs.docker.com/engine/)__ (optional)
22+
23+
Note: if you want a quick start setup (using a Docker container), Go is not necessary
24+
25+
### Quick Start

cmd/main.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"net/http"
6+
"strconv"
7+
8+
"github.com/sirupsen/logrus"
9+
"github.com/virtual-kubelet/virtual-kubelet/log"
10+
logruslogger "github.com/virtual-kubelet/virtual-kubelet/log/logrus"
11+
12+
commonIL "github.com/intertwin-eu/interlink/pkg/common"
13+
slurm "github.com/intertwin-eu/interlink/pkg/slurm"
14+
)
15+
16+
func main() {
17+
logger := logrus.StandardLogger()
18+
19+
interLinkConfig, err := commonIL.NewInterLinkConfig()
20+
if err != nil {
21+
panic(err)
22+
}
23+
24+
if interLinkConfig.VerboseLogging {
25+
logger.SetLevel(logrus.DebugLevel)
26+
} else if interLinkConfig.ErrorsOnlyLogging {
27+
logger.SetLevel(logrus.ErrorLevel)
28+
} else {
29+
logger.SetLevel(logrus.InfoLevel)
30+
}
31+
32+
log.L = logruslogger.FromLogrus(logrus.NewEntry(logger))
33+
34+
JobIDs := make(map[string]*slurm.JidStruct)
35+
Ctx, cancel := context.WithCancel(context.Background())
36+
defer cancel()
37+
log.G(Ctx).Debug("Debug level: " + strconv.FormatBool(interLinkConfig.VerboseLogging))
38+
39+
SidecarAPIs := slurm.SidecarHandler{
40+
Config: interLinkConfig,
41+
JIDs: &JobIDs,
42+
Ctx: Ctx,
43+
}
44+
45+
mutex := http.NewServeMux()
46+
mutex.HandleFunc("/status", SidecarAPIs.StatusHandler)
47+
mutex.HandleFunc("/create", SidecarAPIs.SubmitHandler)
48+
mutex.HandleFunc("/delete", SidecarAPIs.StopHandler)
49+
mutex.HandleFunc("/getLogs", SidecarAPIs.GetLogsHandler)
50+
51+
slurm.CreateDirectories(interLinkConfig)
52+
slurm.LoadJIDs(Ctx, interLinkConfig, &JobIDs)
53+
54+
err = http.ListenAndServe(":"+interLinkConfig.Sidecarport, mutex)
55+
if err != nil {
56+
log.G(Ctx).Fatal(err)
57+
}
58+
}

docker/Dockerfile

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
FROM golang:1.21 as build-stage
2+
3+
WORKDIR /app
4+
5+
COPY . .
6+
RUN CGO_ENABLED=0 GOOS=linux go build -o bin/slurm-sidecar cmd/main.go
7+
8+
9+
# Deploy the application binary into a lean image
10+
#FROM ubuntu:latest AS build-release-stage
11+
FROM ubuntu:22.04
12+
13+
# Settings for all images
14+
ENV TIMEZONE=America/New_York
15+
16+
# Run system updates
17+
RUN apt update && apt -y upgrade
18+
19+
# Set time zone
20+
RUN ln -sn /usr/share/zoneinfo/${TIMEZONE} /etc/localtime \
21+
&& echo ${TIMEZONE} > /etc/timezone \
22+
&& apt -y install tzdata
23+
24+
# Set locale
25+
RUN apt -y install locales \
26+
&& locale-gen en_US.UTF-8 \
27+
&& update-locale LANG=en_US.UTF-8 LC_MESSAGES=POSIX
28+
29+
# Install system packages
30+
RUN DEBIAN_FRONTEND=noninteractive \
31+
apt -y install munge \
32+
slurm-wlm
33+
34+
# Slurm configuration
35+
COPY docker/slurm.conf /etc/slurm/slurm.conf
36+
RUN mkdir -p /var/spool/slurmctld \
37+
&& chown slurm:slurm /var/spool/slurmctld
38+
39+
# Startup configuration
40+
COPY docker/startup.sh /etc/startup.sh
41+
RUN chmod 555 /etc/startup.sh
42+
43+
WORKDIR /root
44+
45+
COPY --from=build-stage /app/bin/slurm-sidecar /sidecar/slurm-sidecar
46+
47+
ENV INTERLINKCONFIGPATH=/root/InterLinkConfig.yaml
48+
49+
COPY docker/InterLinkConfig.yaml .
50+
51+
RUN apt update && apt install -y software-properties-common \
52+
&& add-apt-repository -y ppa:apptainer/ppa \
53+
&& apt install -y apptainer
54+
55+
RUN mkdir -p /cvmfs/grid.cern.ch/etc/grid-security
56+
57+
CMD ["/bin/sh", "-c", "/etc/startup.sh && /sidecar/slurm-sidecar"]

docker/Dockerfile.slurm

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
FROM ubuntu:22.04
2+
3+
# Settings for all images
4+
ENV TIMEZONE=America/New_York
5+
6+
# Run system updates
7+
RUN apt update && apt -y upgrade
8+
9+
# Set time zone
10+
RUN ln -sn /usr/share/zoneinfo/${TIMEZONE} /etc/localtime \
11+
&& echo ${TIMEZONE} > /etc/timezone \
12+
&& apt -y install tzdata
13+
14+
# Set locale
15+
RUN apt -y install locales \
16+
&& locale-gen en_US.UTF-8 \
17+
&& update-locale LANG=en_US.UTF-8 LC_MESSAGES=POSIX
18+
19+
# Install system packages
20+
RUN DEBIAN_FRONTEND=noninteractive \
21+
apt -y install munge \
22+
slurm-wlm
23+
24+
# Slurm configuration
25+
COPY docker/slurm.conf /etc/slurm/slurm.conf
26+
RUN mkdir -p /var/spool/slurmctld \
27+
&& chown slurm:slurm /var/spool/slurmctld
28+
29+
# Startup configuration
30+
COPY docker/startup.sh /etc/startup.sh
31+
RUN chmod 555 /etc/startup.sh
32+
33+
WORKDIR /root
34+
35+
RUN apt install -y software-properties-common \
36+
&& add-apt-repository -y ppa:apptainer/ppa \
37+
&& apt install -y apptainer
38+
39+
CMD ["/bin/sh", "-c", "/etc/startup.sh && bash"]

docker/InterLinkConfig.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
VKTokenFile: "$HOME/interLink/token"
2+
InterlinkURL: "http://interlink-api"
3+
SidecarURL: "http://docker-sidecar"
4+
InterlinkPort: "3000"
5+
SidecarPort: "4000"
6+
SbatchPath: "/usr/bin/sbatch"
7+
ScancelPath: "/usr/bin/scancel"
8+
SqueuePath: "/usr/bin/squeue"
9+
ExportPodData: true
10+
DataRootFolder: ".local/interlink/jobs/"
11+
ServiceAccount: "interlink"
12+
Namespace: "vk"
13+
Tsocks: false
14+
TsocksPath: "$WORK/tsocks-1.8beta5+ds1/libtsocks.so"
15+
TsocksLoginNode: "login01"
16+
BashPath: /bin/bash
17+
VerboseLogging: true
18+
ErrorsOnlyLogging: false
19+
Pod_IP: "172.16.9.11"

docker/docker-compose.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
version: '3.7'
2+
services:
3+
slurm-sidecar:
4+
container_name: slurm-sidecar
5+
build:
6+
context: ../
7+
dockerfile: docker/Dockerfile
8+
restart: always
9+
privileged: true
10+
cap_add:
11+
- SYS_ADMIN
12+
#network_mode: "host"
13+
ports:
14+
- 4000:4000
15+
volumes:
16+
- type: bind
17+
source: ../examples/config
18+
target: /etc/interlink
19+
# healthcheck:
20+
# test: ["CMD", "/check.sh"]
21+
# interval: 10s
22+
# timeout: 10s
23+
# retries: 3
24+
# start_period: 5s

docker/slurm.conf

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# slurm.conf file generated by configurator easy.html.
2+
# Put this file on all nodes of your cluster.
3+
# See the slurm.conf man page for more information.
4+
#
5+
ClusterName=cluster
6+
SlurmctldHost=<<HOSTNAME>>
7+
#
8+
#MailProg=/bin/mail
9+
MpiDefault=none
10+
#MpiParams=ports=#-#
11+
ProctrackType=proctrack/linuxproc
12+
ReturnToService=1
13+
SlurmctldPidFile=/var/run/slurmctld.pid
14+
#SlurmctldPort=6817
15+
SlurmdPidFile=/var/run/slurmd.pid
16+
#SlurmdPort=6818
17+
SlurmdSpoolDir=/var/spool/slurmd
18+
SlurmUser=slurm
19+
#SlurmdUser=root
20+
StateSaveLocation=/var/spool/slurmctld
21+
SwitchType=switch/none
22+
TaskPlugin=task/none
23+
#
24+
#
25+
# TIMERS
26+
#KillWait=30
27+
#MinJobAge=300
28+
#SlurmctldTimeout=120
29+
#SlurmdTimeout=300
30+
#
31+
#
32+
# SCHEDULING
33+
SchedulerType=sched/backfill
34+
SelectType=select/cons_tres
35+
SelectTypeParameters=CR_Core
36+
#
37+
#
38+
# LOGGING AND ACCOUNTING
39+
AccountingStorageType=accounting_storage/none
40+
#JobAcctGatherFrequency=30
41+
JobAcctGatherType=jobacct_gather/none
42+
#SlurmctldDebug=info
43+
SlurmctldLogFile=/var/log/slurmctld.log
44+
#SlurmdDebug=info
45+
SlurmdLogFile=/var/log/slurmd.log
46+
#
47+
#
48+
# COMPUTE NODES
49+
NodeName=<<HOSTNAME>> CPUs=<<CPU>> RealMemory=<<MEMORY>> State=UNKNOWN
50+
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP

docker/startup.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
# Determine whether script is running as root
4+
sudo_cmd=""
5+
if [ "$(id -u)" != "0" ]; then
6+
sudo_cmd="sudo"
7+
sudo -k
8+
fi
9+
10+
# Configure Slurm to use maximum available processors and memory
11+
# and start required services
12+
${sudo_cmd} bash <<SCRIPT
13+
sed -i "s/<<HOSTNAME>>/$(hostname)/" /etc/slurm/slurm.conf
14+
sed -i "s/<<CPU>>/$(nproc)/" /etc/slurm/slurm.conf
15+
sed -i "s/<<MEMORY>>/$(if [[ "$(slurmd -C)" =~ RealMemory=([0-9]+) ]]; then echo "${BASH_REMATCH[1]}"; else exit 100; fi)/" /etc/slurm/slurm.conf
16+
service munge start
17+
service slurmd start
18+
service slurmctld start
19+
SCRIPT
20+
21+
# Revoke sudo permissions
22+
if [[ ${sudo_cmd} ]]; then
23+
sudo -k
24+
fi

0 commit comments

Comments
 (0)