Skip to content
This repository was archived by the owner on Jan 9, 2023. It is now read-only.

Commit 98d9c1a

Browse files
authored
Merge pull request #267 from dippynark/264-cluster-autoscaler-documentation
264 upgrade cluster autoscaler
2 parents 9cd8a19 + b90f195 commit 98d9c1a

File tree

21 files changed

+359
-66
lines changed

21 files changed

+359
-66
lines changed

docs/spelling_wordlist.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,12 @@ Jenkins
6565
prepended
6666
username
6767
loopback
68+
addons
69+
autoscaler
70+
prometheus
71+
ubuntu
72+
offline
73+
admin
74+
plugin
75+
checklist
76+
localhost

docs/user-guide.rst

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,32 @@ The configuration file can be found at ``$HOME/.tarmak/tarmak.yaml`` (default).
150150
The Pod Security Policy manifests can be found within the tarmak directory at
151151
``puppet/modules/kubernetes/templates/pod-security-policy.yaml.erb``
152152

153+
Cluster Autoscaler
154+
~~~~~~~~~~~~~~~~~~
155+
156+
Tarmak supports deploying `Cluster Autoscaler
157+
<https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler>`_ when
158+
spinning up a Kubernetes cluster. The following `tarmak.yaml` snippet shows how
159+
you would enable Cluster Autoscaler.
160+
161+
.. code-block:: yaml
162+
163+
kubernetes:
164+
clusterAutoscaler:
165+
enabled: true
166+
...
167+
168+
The above configuration would deploy Cluster Autoscaler with an image of
169+
`gcr.io/google_containers/cluster-autoscaler` using the recommend version based
170+
on the version of your Kubernetes cluster. The configuration block accepts two
171+
optional fields of `image` and `version` allowing you to change these defaults.
172+
Note that the final image tag used when deploying Cluster Autoscaler will be the
173+
configured version prepended with the letter `v`.
174+
175+
The current implementation will configure the first instance pool of type worker
176+
in your cluster configuration to scale between `minCount` and `maxCount`. We
177+
plan to add support for an arbitrary number of worker instance pools.
178+
153179
Logging
154180
~~~~~~~
155181

@@ -336,7 +362,6 @@ certificate is valid for ``jenkins.<environment>.<zone>``.
336362
type: ssd
337363
...
338364
339-
340365
Tiller
341366
~~~~~~
342367

@@ -362,7 +387,6 @@ allows to override the deployed version:
362387
consider Helm's `security best practices
363388
<https://github.com/kubernetes/helm/blob/master/docs/securing_installation.md>`_.
364389

365-
366390
Prometheus
367391
~~~~~~~~~~
368392

pkg/puppet/puppet.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,32 @@ func (p *Puppet) writeHieraData(puppetPath string, cluster interfaces.Cluster) e
306306
return fmt.Errorf("error writing global hiera config: %s", err)
307307
}
308308

309+
// retrieve details for first worker instance pool
310+
workerMinCount := 0
311+
workerMaxCount := 0
312+
workerInstancePoolName := ""
313+
if cluster.Config().Kubernetes.ClusterAutoscaler != nil && cluster.Config().Kubernetes.ClusterAutoscaler.Enabled {
314+
for _, instancePool := range cluster.InstancePools() {
315+
if instancePool.Role().Name() == clusterv1alpha1.KubernetesWorkerRoleName {
316+
workerMinCount = instancePool.MinCount()
317+
workerMaxCount = instancePool.MaxCount()
318+
workerInstancePoolName = instancePool.Name()
319+
break
320+
}
321+
}
322+
}
323+
309324
// loop through instance pools
310325
for _, instancePool := range cluster.InstancePools() {
311326

312327
classes, variables := contentInstancePoolConfig(cluster.Config(), instancePool.Config(), instancePool.Role().Name())
313328

329+
if instancePool.Role().Name() == clusterv1alpha1.KubernetesMasterRoleName && cluster.Config().Kubernetes.ClusterAutoscaler != nil && cluster.Config().Kubernetes.ClusterAutoscaler.Enabled {
330+
variables = append(variables, fmt.Sprintf(`kubernetes_addons::cluster_autoscaler::min_instances: %d`, workerMinCount))
331+
variables = append(variables, fmt.Sprintf(`kubernetes_addons::cluster_autoscaler::max_instances: %d`, workerMaxCount))
332+
variables = append(variables, fmt.Sprintf(`kubernetes_addons::cluster_autoscaler::instance_pool_name: "%s"`, workerInstancePoolName))
333+
}
334+
314335
// classes
315336
err = p.writeLines(
316337
filepath.Join(hieraPath, "instance_pools", fmt.Sprintf("%s_classes.yaml", instancePool.Name())), classes,

pkg/tarmak/cluster/cluster.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,8 @@ func (c *Cluster) Variables() map[string]interface{} {
392392
if ok {
393393
output[fmt.Sprintf("%s_ami", instancePool.TFName())] = ids
394394
}
395-
output[fmt.Sprintf("%s_instance_count", instancePool.TFName())] = instancePool.Config().MinCount
395+
output[fmt.Sprintf("%s_min_instance_count", instancePool.TFName())] = instancePool.Config().MinCount
396+
output[fmt.Sprintf("%s_max_instance_count", instancePool.TFName())] = instancePool.Config().MaxCount
396397
}
397398

398399
// set network cidr

pkg/tarmak/instance_pool/instance_pool.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,11 @@ func (n *InstancePool) RootVolume() interfaces.Volume {
148148
return n.rootVolume
149149
}
150150

151-
func (n *InstancePool) Count() int {
152-
// TODO: this needs to be replaced by Max/Min
151+
func (n *InstancePool) MinCount() int {
152+
return n.conf.MinCount
153+
}
154+
155+
func (n *InstancePool) MaxCount() int {
153156
return n.conf.MaxCount
154157
}
155158

pkg/tarmak/interfaces/interfaces.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ type InstancePool interface {
248248
Role() *role.Role
249249
Volumes() []Volume
250250
Zones() []string
251+
MinCount() int
252+
MaxCount() int
251253
}
252254

253255
type Volume interface {

puppet/hieradata/common.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ tarmak::kubernetes_api_url: "https://api.%{::tarmak_cluster}.%{::tarmak_dns_root
1515
# TODO: This should come from terraform
1616
tarmak::etcd_instances: 3
1717

18-
# cluster scaler config
19-
kubernetes_addons::cluster_autoscaler::min_instances: 3
20-
kubernetes_addons::cluster_autoscaler::max_instances: 10
21-
2218
# point heapster to influxdb
2319
kubernetes_addons::heapster::sink: influxdb:http://monitoring-influxdb.kube-system:8086
2420

puppet/modules/kubernetes_addons/manifests/cluster_autoscaler.pp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
String $limit_mem='500Mi',
66
String $request_cpu='100m',
77
String $request_mem='300Mi',
8+
String $instance_pool_name='worker',
89
Integer $min_instances=3,
910
Integer $max_instances=6,
1011
$ca_mounts=$::kubernetes_addons::params::ca_mounts,
@@ -20,14 +21,20 @@
2021
$rbac_enabled = false
2122
}
2223

23-
if defined('$kubernetes::cluster_name') {
24-
$asg_name="kubernetes-${::kubernetes::cluster_name}-worker"
24+
if defined('$kubernetes::cluster_name') and $instance_pool_name != '' {
25+
$asg_name="${::kubernetes::cluster_name}-kubernetes-${instance_pool_name}"
2526
} else {
26-
$asg_name=undef
27+
fail('asg name must be set')
2728
}
2829

2930
if $version == '' {
30-
if versioncmp($::kubernetes::version, '1.7.0') >= 0 {
31+
if versioncmp($::kubernetes::version, '1.10.0') >= 0 {
32+
$_version = '1.2.0'
33+
} elsif versioncmp($::kubernetes::version, '1.9.0') >= 0 {
34+
$_version = '1.1.0'
35+
} elsif versioncmp($::kubernetes::version, '1.8.0') >= 0 {
36+
$_version = '1.0.0'
37+
} elsif versioncmp($::kubernetes::version, '1.7.0') >= 0 {
3138
$_version = '0.6.0'
3239
} elsif versioncmp($::kubernetes::version, '1.6.0') >= 0 {
3340
$_version = '0.5.4'
@@ -40,6 +47,10 @@
4047
$_version = $version
4148
}
4249

50+
if versioncmp($_version, '0.6.0') >= 0 {
51+
$balance_similar_node_groups = true
52+
}
53+
4354
if versioncmp($::kubernetes::version, '1.6.0') >= 0 {
4455
$version_before_1_6 = false
4556
} else {
@@ -50,6 +61,7 @@
5061
kubernetes::apply{'cluster-autoscaler':
5162
manifests => [
5263
template('kubernetes_addons/cluster-autoscaler-deployment.yaml.erb'),
64+
template('kubernetes_addons/cluster-autoscaler-rbac.yaml.erb'),
5365
],
5466
}
5567
}

puppet/modules/kubernetes_addons/spec/classes/cluster_autoscaler_spec.rb

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,25 +49,37 @@ class kubernetes{
4949
end
5050

5151
it 'has asg configured' do
52-
expect(manifests[0]).to match(%{3:6:kubernetes-cluster1-worker})
52+
expect(manifests[0]).to match(%{3:6:cluster1-kubernetes-worker})
5353
end
5454

5555
it 'has cloud_provider configured' do
5656
expect(manifests[0]).to match(%{cloud-provider=aws})
5757
end
5858

59-
it 'has cert path set' do
60-
expect(manifests[0]).to match(%{path: /etc/ssl/certs})
61-
end
62-
6359
it 'has AWS_REGION set' do
6460
expect(manifests[0]).to match(%r{value: eu-west-1$})
6561
end
62+
63+
it 'has host network set' do
64+
expect(manifests[0]).to match(%r{hostNetwork: true$})
65+
end
66+
67+
it 'has master toleration set' do
68+
expect(manifests[0]).to match(%r{tolerations:\s+- key: "node-role\.kubernetes\.io\/master"\s+operator: "Exists"\s+effect: "NoSchedule"})
69+
end
70+
71+
it 'has critical addon toleration set' do
72+
expect(manifests[0]).to match(%r{- key: "CriticalAddonsOnly"\s+operator: "Exists"})
73+
end
74+
75+
it 'has master node affinity set' do
76+
expect(manifests[0]).to match(%r{nodeAffinity:\s+requiredDuringSchedulingIgnoredDuringExecution:\s+nodeSelectorTerms:\s+- matchExpressions:\s+- key: "node-role\.kubernetes\.io\/master"\s+operator: "Exists"})
77+
end
6678
end
6779

6880
context 'with kubernetes 1.5' do
6981
let(:kubernetes_version) do
70-
'1.5.6'
82+
'1.5.0'
7183
end
7284
it 'uses correct image version' do
7385
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v0.4.0})
@@ -76,7 +88,7 @@ class kubernetes{
7688

7789
context 'with kubernetes 1.6' do
7890
let(:kubernetes_version) do
79-
'1.6.6'
91+
'1.6.0'
8092
end
8193
it 'uses correct image version' do
8294
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v0.5.4})
@@ -85,10 +97,37 @@ class kubernetes{
8597

8698
context 'with kubernetes 1.7' do
8799
let(:kubernetes_version) do
88-
'1.7.1'
100+
'1.7.0'
89101
end
90102
it 'uses correct image version' do
91103
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v0.6.0})
92104
end
93105
end
106+
107+
context 'with kubernetes 1.8' do
108+
let(:kubernetes_version) do
109+
'1.8.0'
110+
end
111+
it 'uses correct image version' do
112+
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v1.0.0})
113+
end
114+
end
115+
116+
context 'with kubernetes 1.9' do
117+
let(:kubernetes_version) do
118+
'1.9.0'
119+
end
120+
it 'uses correct image version' do
121+
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v1.1.0})
122+
end
123+
end
124+
125+
context 'with kubernetes 1.10' do
126+
let(:kubernetes_version) do
127+
'1.10.0'
128+
end
129+
it 'uses correct image version' do
130+
expect(manifests[0]).to match(%r{gcr.io/google_containers/cluster-autoscaler:v1.2.0})
131+
end
132+
end
94133
end

puppet/modules/kubernetes_addons/templates/cluster-autoscaler-deployment.yaml.erb

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ metadata:
77
labels:
88
app: cluster-autoscaler
99
kubernetes.io/cluster-service: "true"
10+
version: <%= @_version %>
1011
spec:
1112
replicas: 1
1213
selector:
@@ -17,10 +18,32 @@ spec:
1718
labels:
1819
app: cluster-autoscaler
1920
annotations:
21+
prometheus.io/port: "8085"
22+
prometheus.io/scrape: "true"
2023
scheduler.alpha.kubernetes.io/critical-pod: ''
24+
<%- if @version_before_1_6 -%>
2125
scheduler.alpha.kubernetes.io/tolerations: '[{"key":"CriticalAddonsOnly", "operator":"Exists"}]'
26+
<%- end -%>
2227
spec:
28+
tolerations:
29+
- key: "node-role.kubernetes.io/master"
30+
operator: "Exists"
31+
effect: "NoSchedule"
32+
<%- unless @version_before_1_6 -%>
33+
- key: "CriticalAddonsOnly"
34+
operator: "Exists"
35+
<%- end -%>
36+
<%- if @rbac_enabled -%>
37+
serviceAccountName: cluster-autoscaler
38+
<%- end -%>
2339
hostNetwork: true
40+
affinity:
41+
nodeAffinity:
42+
requiredDuringSchedulingIgnoredDuringExecution:
43+
nodeSelectorTerms:
44+
- matchExpressions:
45+
- key: "node-role.kubernetes.io/master"
46+
operator: "Exists"
2447
containers:
2548
- image: "<%= @image %>:v<%= @_version %>"
2649
name: cluster-autoscaler
@@ -31,27 +54,23 @@ spec:
3154
requests:
3255
cpu: <%= @request_cpu %>
3356
memory: <%= @request_mem %>
57+
livenessProbe:
58+
httpGet:
59+
path: "/health-check"
60+
port: 8085
3461
command:
3562
- ./cluster-autoscaler
3663
- --v=4
3764
- --cloud-provider=<%= @cloud_provider %>
3865
- --skip-nodes-with-local-storage=false
3966
- --nodes=<%= @min_instances %>:<%= @max_instances %>:<%= @asg_name %>
67+
- --skip-nodes-with-system-pods=false
68+
<% if @balance_similar_node_groups -%>
69+
- --balance-similar-node-groups
70+
<% end -%>
4071
<% if @aws_region -%>
4172
env:
4273
- name: AWS_REGION
4374
value: <%= @aws_region %>
4475
<% end -%>
45-
imagePullPolicy: Always
46-
volumeMounts:
47-
<% @ca_mounts.each do |ca| -%>
48-
- name: <%= ca['name'] %>
49-
readOnly: <%= ca['readOnly'] %>
50-
mountPath: <%= ca['mountPath'] %>
51-
<% end -%>
52-
volumes:
53-
<% @ca_mounts.each do |ca| -%>
54-
- name: <%= ca['name'] %>
55-
hostPath:
56-
path: <%= ca['mountPath'] %>
57-
<% end -%>
76+
imagePullPolicy: Always

0 commit comments

Comments
 (0)