Skip to content

Commit c35d317

Browse files
'sm-start-ssh' merged into 'sm-setup-ssh' to avoid confusion
1 parent 0b81f0c commit c35d317

File tree

4 files changed

+65
-62
lines changed

4 files changed

+65
-62
lines changed

sagemaker_ssh_helper/sm-helper-functions

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,16 @@ function _install_helper_scripts() {
2020
_silent_install sm-helper-functions
2121
_silent_install sm-connect-ssh-proxy +x
2222
_silent_install sm-wait +x
23-
_silent_install sm-start-ssh +x
2423
_silent_install sm-save-env +x
2524
_silent_install sm-init-ssm +x
2625
_silent_install sm-ssh-ide +x
2726
_silent_install sm-local-start-ssh +x
2827
_silent_install sm-local-ssh-ide +x
2928
_silent_install sm-local-ssh-training +x
29+
_silent_install sm-local-ssh-inference +x
30+
_silent_install sm-local-ssh-processing +x
31+
_silent_install sm-local-ssh-transform +x
32+
_silent_install sm-local-ssh-notebook +x
3033
}
3134

3235
function _is_centos() {

sagemaker_ssh_helper/sm-setup-ssh

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# from 'requirements.txt' and 'bootstrap_on_start' parameter was passed to the wrapper, or manually
66
# from training / processing / inference script, e. g. with subprocess.check_call()
77

8-
# This script can be called simultaneously multiple times in a distributed training job
8+
# This script can be called simultaneously multiple times in a distributed training or inference job
99
# To avoid race conditions, we install helper scripts under an exclusive lock
1010
if [[ "$1" == "install-helper-scripts" ]]; then
1111
dir=$(dirname "$0")
@@ -17,21 +17,77 @@ fi
1717

1818
set -e
1919

20-
flock /tmp/sm-install-lock bash "$0" install-helper-scripts | sed 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'
20+
if [[ "$1" == "start-ssh" ]]; then
21+
dir=$(dirname "$0")
22+
source "$dir"/sm-helper-functions
23+
24+
# Log IP addresses of the container (useful only in training in combination with VPC + VPN)
25+
echo "SSH Helper Log IP: $(hostname -I)"
26+
27+
chmod 1777 /tmp
28+
mkdir -p ~/.ssh
29+
30+
# Install SSH (if using MPI, already installed)
31+
if _is_centos; then
32+
yum install -y openssh-server
33+
else
34+
export DEBIAN_FRONTEND=noninteractive
35+
apt-get update
36+
apt-get install -y --no-install-recommends openssh-server
37+
fi
38+
39+
# Save and dump SageMaker environment for SSH sessions
40+
sm-save-env
41+
42+
# Dump container bootstrap environment (PID 1) - can be different from above, useful for debugging
43+
ps wwwe -p 1 | tail -1
44+
45+
sed -i -e 's~^ClientAliveInterval~#ClientAliveInterval~' /etc/ssh/sshd_config
46+
echo "ClientAliveInterval 15" >> /etc/ssh/sshd_config
47+
48+
sed -i -e 's~^PermitRootLogin~#PermitRootLogin~' /etc/ssh/sshd_config
49+
echo PermitRootLogin yes >> /etc/ssh/sshd_config
50+
51+
sed -i -e 's~^AuthorizedKeysFile~#AuthorizedKeysFile~' /etc/ssh/sshd_config
52+
echo "AuthorizedKeysFile /etc/ssh/authorized_keys" >> /etc/ssh/sshd_config
53+
54+
# Start SSH server
55+
if _is_centos; then
56+
# NOTE: systemctl will not work in CentOS SageMaker container (e.g. Spark processing) because lack of
57+
# privileges to access DBUS, so we run sshd manually. This command doesn't work:
58+
# # service sshd start || (echo "ERROR: Failed to start sshd service" && exit 255)
59+
[[ -f /etc/ssh/ssh_host_rsa_key ]] || (echo "Generating new SSH keys" && ssh-keygen -A)
60+
/usr/sbin/sshd
61+
else
62+
service ssh start || (echo "ERROR: Failed to start ssh service" && exit 255)
63+
fi
64+
65+
sm-init-ssm
66+
67+
# Running forever as daemon
68+
amazon-ssm-agent
69+
70+
echo "ERROR: agent died"
71+
exit 1 # should never reach this line
72+
fi
73+
74+
flock /tmp/sm-install-lock bash "$0" install-helper-scripts \
75+
| sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'
2176

2277
# nohup will detach the child process from parent and run it in background
2378
# flock prevents from starting more than 1 process
2479
# redirection to /proc/1/fd/1 will write logs to CloudWatch
2580
# sed will prepend log output with SSH Helper prefix
2681
if [[ ! -f /tmp/sm-start-ssh-lock ]]; then
2782
if [[ "$SSH_LOG_TO_STDOUT" == "true" ]]; then
28-
flock -n /tmp/sm-start-ssh-lock sm-start-ssh &
83+
flock -n /tmp/sm-start-ssh-lock bash "$0" start-ssh &
2984
else
3085
nohup flock -n /tmp/sm-start-ssh-lock \
31-
sm-start-ssh 2>&1 \
86+
bash "$0" start-ssh 2>&1 \
3287
| sed -u 's/^/[sagemaker-ssh-helper][sm-start-ssh] /' \
3388
>/proc/1/fd/1 2>&1 &
3489
fi
3590
fi
3691

37-
sm-wait "${SSH_WAIT_TIME_SECONDS:-60}" | sed 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'
92+
sm-wait "${SSH_WAIT_TIME_SECONDS:-60}" \
93+
| sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh] /'

sagemaker_ssh_helper/sm-start-ssh

Lines changed: 0 additions & 55 deletions
This file was deleted.

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
'sagemaker_ssh_helper/sm-local-ssh-inference',
7474
'sagemaker_ssh_helper/sm-local-ssh-processing',
7575
'sagemaker_ssh_helper/sm-local-configure',
76-
'sagemaker_ssh_helper/sm-start-ssh',
7776
'sagemaker_ssh_helper/sm-ssh-ide',
7877
'sagemaker_ssh_helper/sm-save-env',
7978
'sagemaker_ssh_helper/sm-init-ssm',

0 commit comments

Comments
 (0)