From 7bbccd64ccf456c43e48b719679985f56ff3c940 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 18 Jun 2025 17:11:01 +0530 Subject: [PATCH 01/16] Adding Byteman support in acceptance test suites via new docker-compose --- .../src/main/compose/ozonesecure-ha-fi/.env | 31 ++ .../skip-notify-group-remove.btm | 10 + .../byteman-scripts/skip-put-block.btm | 10 + .../ozonesecure-ha-fi/docker-compose.yaml | 289 ++++++++++++++++++ .../compose/ozonesecure-ha-fi/docker-config | 170 +++++++++++ .../main/compose/ozonesecure-ha-fi/krb5.conf | 41 +++ .../compose/ozonesecure-ha-fi/ranger.yaml | 54 ++++ .../main/compose/ozonesecure-ha-fi/test.sh | 39 +++ .../src/main/smoketest/lib/BytemanLibrary.py | 67 ++++ .../smoketest/ozone-fi/BytemanKeywords.robot | 49 +++ .../ozone-fi/byteman_faults_sample.robot | 16 + 11 files changed, 776 insertions(+) create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf create mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml create mode 100755 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh create mode 100644 hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py create mode 100644 hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot create mode 100644 hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env new file mode 100644 index 00000000000..e5557e2200a --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HDDS_VERSION=${hdds.version} +HADOOP_IMAGE=apache/hadoop +HADOOP_VERSION=${hadoop.version} +OZONE_RUNNER_VERSION=${docker.ozone-runner.version} +OZONE_RUNNER_IMAGE=apache/ozone-runner +OZONE_TESTKRB5_IMAGE=${docker.ozone-testkr5b.image} +OZONE_VOLUME=./data +OZONE_OPTS= +RANGER_DB_IMAGE=postgres +RANGER_DB_IMAGE_VERSION=12 +RANGER_IMAGE=ghcr.io/adoroszlai/ranger-admin +RANGER_IMAGE_VERSION=0ae34250d3af672776fca6a53047699adf3afce5-${ranger.version}-8 +RANGER_VERSION=${ranger.version} +# Byteman agent configuration for different Ozone components +BYTEMAN_OPTS="-javaagent:/opt/byteman.jar=listener:true" \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm new file mode 100644 index 00000000000..8bd3783d2bb --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm @@ -0,0 +1,10 @@ +RULE skip notifyGroupRemove +CLASS org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine +METHOD notifyGroupRemove +AT ENTRY +IF TRUE +DO + System.out.println("[" + java.time.LocalDateTime.now() + "] BYTEMAN: " + + "Skip notifyGroupRemove in ContainerStateMachine"); + return; +ENDRULE \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm new file mode 100644 index 00000000000..391680f512c --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm @@ -0,0 +1,10 @@ +RULE Block putBlock +CLASS org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl +METHOD putBlock +AT ENTRY +IF TRUE +DO + System.out.println("[" + java.time.LocalDateTime.now() + "] BYTEMAN: " + + "Blocking putBlock in BlockManagerImpl"); + return 0; +ENDRULE \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml new file mode 100644 index 00000000000..73fdf5c54fc --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +x-common-config: + &common-config + dns_search: . + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + - ./byteman-scripts:/opt/byteman/scripts:ro + env_file: + - docker-config + +services: + kdc: + image: ${OZONE_TESTKRB5_IMAGE} + hostname: kdc + dns_search: . + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + command: ["/opt/hadoop/compose/common/init-kdc.sh"] + networks: + ozone_net: + ipv4_address: 172.25.0.100 + kms: + image: ${HADOOP_IMAGE}:${HADOOP_VERSION} + dns_search: . + ports: + - 9600:9600 + env_file: + - ./docker-config + volumes: + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + - ../../libexec/transformation.py:/opt/transformation.py + environment: + HADOOP_CONF_DIR: /opt/hadoop/etc/hadoop + command: ["hadoop", "kms"] + networks: + ozone_net: + ipv4_address: 172.25.0.101 + datanode1: + <<: *common-config + ports: + - 19864:9999 + - 9090:9090 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + - "recon=172.25.0.115" + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: ${BYTEMAN_OPTS},port:9090 + networks: + ozone_net: + ipv4_address: 172.25.0.102 + datanode2: + <<: *common-config + ports: + - 9866:9999 + - 9091:9091 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + - "recon=172.25.0.115" + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: ${BYTEMAN_OPTS},port:9091 + networks: + ozone_net: + ipv4_address: 172.25.0.103 + datanode3: + <<: *common-config + ports: + - 9868:9999 + - 9092:9092 + command: ["/opt/hadoop/bin/ozone","datanode"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + - "recon=172.25.0.115" + environment: + WAITFOR: scm3.org:9894 + OZONE_OPTS: ${BYTEMAN_OPTS},port:9092 + networks: + ozone_net: + ipv4_address: 172.25.0.104 + om1: + hostname: om1 + <<: *common-config + ports: + - 9880:9874 + - 9890:9872 + - 9093:9093 + #- 18001:18001 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: ${BYTEMAN_OPTS},port:9093 + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.111 + om2: + hostname: om2 + <<: *common-config + ports: + - 9882:9874 + - 9892:9872 + - 9094:9094 + #- 18002:18002 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: ${BYTEMAN_OPTS},port:9094 + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.112 + om3: + hostname: om3 + <<: *common-config + ports: + - 9884:9874 + - 9894:9872 + - 9095:9095 + #- 18003:18003 + environment: + WAITFOR: scm3.org:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + OZONE_OPTS: ${BYTEMAN_OPTS},port:9095 + command: ["/opt/hadoop/bin/ozone","om"] + extra_hosts: + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.113 + httpfs: + hostname: httpfs + <<: *common-config + ports: + - 14000:14000 + - 9100:9100 + command: [ "/opt/hadoop/bin/ozone","httpfs" ] + environment: + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + OZONE_OPTS: ${BYTEMAN_OPTS},port:9100 + networks: + ozone_net: + ipv4_address: 172.25.0.119 + s3g: + hostname: s3g + <<: *common-config + ports: + - 9878:9878 + - 9101:9101 + command: ["ozone","s3g"] + environment: + OZONE-SITE.XML_ozone.s3g.domain.name=s3g.internal + OZONE_OPTS: ${BYTEMAN_OPTS},port:9101 + networks: + ozone_net: + ipv4_address: 172.25.0.120 + scm1.org: + hostname: scm1.org + <<: *common-config + ports: + - 9990:9876 + - 9992:9860 + - 9097:9097 + env_file: + - docker-config + environment: + ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: ${BYTEMAN_OPTS},port:9097 + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1=172.25.0.111" + - "om2=172.25.0.112" + - "om3=172.25.0.113" + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.116 + scm2.org: + hostname: scm2.org + <<: *common-config + ports: + - 9994:9876 + - 9996:9860 + - 9098:9098 + environment: + WAITFOR: scm1.org:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: ${BYTEMAN_OPTS},port:9098 + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1=172.25.0.111" + - "om2=172.25.0.112" + - "om3=172.25.0.113" + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.117 + scm3.org: + hostname: scm3.org + <<: *common-config + ports: + - 9998:9876 + - 10002:9860 + - 9099:9099 + environment: + WAITFOR: scm2.org:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" + OZONE_OPTS: ${BYTEMAN_OPTS},port:9099 + command: ["/opt/hadoop/bin/ozone","scm"] + extra_hosts: + - "om1=172.25.0.111" + - "om2=172.25.0.112" + - "om3=172.25.0.113" + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.118 + recon: + hostname: recon + <<: *common-config + ports: + - 9888:9888 + - 9096:9096 + environment: + OZONE_OPTS: ${BYTEMAN_OPTS},port:9096 + command: ["/opt/hadoop/bin/ozone","recon"] + extra_hosts: + - "om1=172.25.0.111" + - "om2=172.25.0.112" + - "om3=172.25.0.113" + - "scm1.org=172.25.0.116" + - "scm2.org=172.25.0.117" + - "scm3.org=172.25.0.118" + networks: + ozone_net: + ipv4_address: 172.25.0.115 +networks: + ozone_net: + ipam: + driver: default + config: + - subnet: "172.25.0.0/24" diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config new file mode 100644 index 00000000000..9e2e7cbf0a0 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config @@ -0,0 +1,170 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# For HttpFS service it is required to enable proxying users. +CORE-SITE.XML_hadoop.proxyuser.httpfs.hosts=* +CORE-SITE.XML_hadoop.proxyuser.httpfs.groups=* + +CORE-SITE.XML_fs.defaultFS=ofs://omservice +CORE-SITE.XML_fs.trash.interval=1 + +OZONE-SITE.XML_ozone.om.service.ids=omservice +OZONE-SITE.XML_ozone.om.internal.service.id=omservice +OZONE-SITE.XML_ozone.om.nodes.omservice=om1,om2,om3 +OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 +OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 +OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 +OZONE-SITE.XML_ozone.om.http-address.omservice.om1=om1 +OZONE-SITE.XML_ozone.om.http-address.omservice.om2=om2 +OZONE-SITE.XML_ozone.om.http-address.omservice.om3=om3 + +OZONE-SITE.XML_ozone.scm.service.ids=scmservice +OZONE-SITE.XML_ozone.scm.primordial.node.id=scm1 +OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1.org +OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2.org +OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3.org +OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s + +OZONE-SITE.XML_ozone.om.volume.listall.allowed=false + +OZONE-SITE.XML_ozone.scm.container.size=1GB +OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB +OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s +OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 +OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data/metadata +OZONE-SITE.XML_ozone.scm.block.client.address=scm +OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata +OZONE-SITE.XML_ozone.handler.type=distributed +OZONE-SITE.XML_ozone.scm.client.address=scm +OZONE-SITE.XML_hdds.block.token.enabled=true +OZONE-SITE.XML_hdds.container.token.enabled=true +OZONE-SITE.XML_hdds.grpc.tls.enabled=true +OZONE-SITE.XML_ozone.server.default.replication=3 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s +OZONE-SITE.XML_hdds.container.report.interval=60s +OZONE-SITE.XML_hdds.container.ratis.datastream.enabled=true + +OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m +OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon +OZONE-SITE.XML_ozone.recon.om.snapshot.task.initial.delay=20s +OZONE-SITE.XML_ozone.recon.address=recon:9891 + +OZONE-SITE.XML_ozone.security.enabled=true +OZONE-SITE.XML_ozone.acl.enabled=true +OZONE-SITE.XML_ozone.acl.authorizer.class=org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer +OZONE-SITE.XML_ozone.administrators="testuser,recon,om" +OZONE-SITE.XML_ozone.s3.administrators="testuser,s3g" + +OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.datanode.volume.min.free.space=100MB +HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019 +HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012 +CORE-SITE.XML_dfs.data.transfer.protection=authentication +CORE-SITE.XML_hadoop.security.authentication=kerberos +CORE-SITE.XML_hadoop.security.auth_to_local="DEFAULT" +CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms + + +OZONE-SITE.XML_hdds.scm.kerberos.principal=scm/scm@EXAMPLE.COM +OZONE-SITE.XML_hdds.scm.kerberos.keytab.file=/etc/security/keytabs/scm.keytab +OZONE-SITE.XML_ozone.om.kerberos.principal=om/om@EXAMPLE.COM +OZONE-SITE.XML_ozone.om.kerberos.keytab.file=/etc/security/keytabs/om.keytab +OZONE-SITE.XML_ozone.recon.kerberos.keytab.file=/etc/security/keytabs/recon.keytab +OZONE-SITE.XML_ozone.recon.kerberos.principal=recon/recon@EXAMPLE.COM + +OZONE-SITE.XML_ozone.s3g.kerberos.keytab.file=/etc/security/keytabs/s3g.keytab +OZONE-SITE.XML_ozone.s3g.kerberos.principal=s3g/s3g@EXAMPLE.COM + +OZONE-SITE.XML_ozone.httpfs.kerberos.keytab.file=/etc/security/keytabs/httpfs.keytab +OZONE-SITE.XML_ozone.httpfs.kerberos.principal=httpfs/httpfs@EXAMPLE.COM + +OZONE-SITE.XML_hdds.datanode.kerberos.principal=dn/dn@EXAMPLE.COM +OZONE-SITE.XML_hdds.datanode.kerberos.keytab.file=/etc/security/keytabs/dn.keytab + +OZONE-SITE.XML_ozone.security.http.kerberos.enabled=true +OZONE-SITE.XML_ozone.s3g.secret.http.enabled=true +OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.AuthenticationFilterInitializer + +OZONE-SITE.XML_ozone.om.http.auth.type=kerberos +OZONE-SITE.XML_hdds.scm.http.auth.type=kerberos +OZONE-SITE.XML_hdds.datanode.http.auth.type=kerberos +OZONE-SITE.XML_ozone.s3g.http.auth.type=kerberos +OZONE-SITE.XML_ozone.s3g.secret.http.auth.type=kerberos +OZONE-SITE.XML_ozone.httpfs.http.auth.type=kerberos +OZONE-SITE.XML_ozone.recon.http.auth.type=kerberos + +OZONE-SITE.XML_hdds.scm.http.auth.kerberos.principal=HTTP/scm@EXAMPLE.COM +OZONE-SITE.XML_hdds.scm.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.om.http.auth.kerberos.principal=HTTP/om@EXAMPLE.COM +OZONE-SITE.XML_ozone.om.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.principal=HTTP/db@EXAMPLE.COM +OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab +OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.principal=HTTP/s3g@EXAMPLE.COM +OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.principal=HTTP/httpfs@EXAMPLE.COM +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.principal=HTTP/recon@EXAMPLE.COM +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab +OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab +OZONE-SITE.XML_ozone.http.basedir=/tmp/ozone_http + +CORE-SITE.XML_hadoop.http.authentication.simple.anonymous.allowed=false +CORE-SITE.XML_hadoop.http.authentication.signature.secret.file=/etc/security/http_secret +CORE-SITE.XML_hadoop.http.authentication.type=kerberos +CORE-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM +CORE-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab + + +CORE-SITE.XML_hadoop.security.authorization=true +HADOOP-POLICY.XML_ozone.om.security.client.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.datanode.container.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.container.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.block.protocol.acl=* +HADOOP-POLICY.XML_hdds.security.client.scm.certificate.protocol.acl=* +HADOOP-POLICY.XML_ozone.security.reconfigure.protocol.acl=* + +HDFS-SITE.XML_rpc.metrics.quantile.enable=true +HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 + +HTTPFS-SITE.XML_hadoop.http.authentication.type=kerberos +HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/httpfs@EXAMPLE.COM +HTTPFS-SITE.XML_httpfs.hadoop.authentication.type=kerberos +HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab +HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.principal=httpfs/httpfs@EXAMPLE.COM +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.users=* +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.groups=* +KMS-SITE.XML_hadoop.kms.proxyuser.s3g.hosts=* + +#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. +#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm + +OZONE_DATANODE_SECURE_USER=root +JSVC_HOME=/usr/bin + +OZONE_CONF_DIR=/etc/hadoop +OZONE_LOG_DIR=/var/log/hadoop + +no_proxy=om,scm,recon,s3g,kdc,localhost,127.0.0.1 + +# Explicitly enable filesystem snapshot feature for this Docker compose cluster +OZONE-SITE.XML_ozone.filesystem.snapshot.enabled=true + + +OZONE-SITE.XML_hdds.secret.key.rotate.duration=5m +OZONE-SITE.XML_hdds.secret.key.rotate.check.duration=1m +OZONE-SITE.XML_hdds.secret.key.expiry.duration=1h diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf new file mode 100644 index 00000000000..eefc5b9c685 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[logging] +default = FILE:/var/log/krb5libs.log +kdc = FILE:/var/log/krb5kdc.log +admin_server = FILE:/var/log/kadmind.log + +[libdefaults] + dns_canonicalize_hostname = false + dns_lookup_realm = false + ticket_lifetime = 24h + renew_lifetime = 7d + forwardable = true + rdns = false + default_realm = EXAMPLE.COM + +[realms] + EXAMPLE.COM = { + kdc = kdc + admin_server = kdc + max_renewable_life = 7d + } + +[domain_realm] + .example.com = EXAMPLE.COM + example.com = EXAMPLE.COM + diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml new file mode 100644 index 00000000000..1d3c2255314 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +x-om-ranger-config: + &om-ranger-config + environment: + OZONE_MANAGER_CLASSPATH: "/opt/ranger/ozone-plugin/lib/libext/*:/opt/ozone/conf" + OZONE-SITE.XML_ozone.acl.authorizer.class: "org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer" + OZONE-SITE.XML_ozone.om.multitenancy.enabled: "true" + OZONE-SITE.XML_ozone.om.ranger.https-address: "http://ranger:6080" + OZONE-SITE.XML_ozone.om.ranger.https.admin.api.user: "admin" + OZONE-SITE.XML_ozone.om.ranger.https.admin.api.passwd: "rangerR0cks!" + OZONE-SITE.XML_ozone.om.ranger.service: "dev_ozone" + OZONE-SITE.XML_ozone.om.tenant.dev.skip.ranger: "false" + RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.policy.rest.url: "http://ranger:6080" + RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.policyengine.option.disable.policy.refresher: "true" + RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.service.name: "dev_ozone" + volumes: + - ../..:/opt/hadoop + - ../_keytabs:/etc/security/keytabs + - ./krb5.conf:/etc/krb5.conf + - ${RANGER_OZONE_PLUGIN_DIR}:/opt/ranger/ozone-plugin + tmpfs: + - /opt/ozone/conf + command: bash -c "sudo --preserve-env /opt/ranger/ozone-plugin/enable-ozone-plugin.sh && /opt/hadoop/bin/ozone om" + +services: + om1: + <<: *om-ranger-config + om2: + <<: *om-ranger-config + om3: + <<: *om-ranger-config + ranger: + networks: + ozone_net: + ipv4_address: 172.25.0.200 + ranger-db: + networks: + ozone_net: + ipv4_address: 172.25.0.201 diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh new file mode 100755 index 00000000000..0ad547adebc --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:HA-secure + +set -u -o pipefail + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR + +export SECURITY_ENABLED=true +export OM_SERVICE_ID="omservice" +export SCM=scm1.org +export OM=om1.org + +: ${OZONE_BUCKET_KEY_NAME:=key1} + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +start_docker_env + +execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} + +execute_robot_test $OM ozone-fi/byteman_faults_sample.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py new file mode 100644 index 00000000000..149847cbdd2 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import subprocess +import requests +import json +from robot.api import logger + +class BytemanLibrary: + def __init__(self): + self.byteman_clients = {} + + def connect_to_byteman_agent(self, component_name, host="localhost", port=9091): + """Connect to Byteman agent on specific component""" + self.byteman_clients[component_name] = {'host': host, 'port': port, + 'base_url': f"http://{host}:{port}"} + logger.info(f"Connected to Byteman agent for {component_name} at {host}:{port}") + + def add_byteman_rule(self, component_name, rule_file): + """Add Byteman rule into specific component""" + client = self.byteman_clients[component_name] + + # Use bmsubmit command to load rule + cmd = ["bmsubmit", f"-p {client['port']}", f"-l {rule_file}"] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise RuntimeError(f"Failed to load rule: {result.stderr}") + + logger.info(f"Loaded Byteman rule {rule_file} into {component_name}") + + def remove_byteman_rule(self, component_name, rule_file): + """Remove Byteman rule for specific component""" + client = self.byteman_clients[component_name] + + cmd = ["bmsubmit", f"-p {client['port']}", f"-u {rule_file}"] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise RuntimeError(f"Failed to unload rule: {result.stderr}") + + logger.info(f"Unloaded Byteman rule {rule_file} from {component_name}") + + def list_all_byteman_rules(self, component_name): + """List all Byteman rules for specific component""" + client = self.byteman_clients[component_name] + + cmd = ["bmsubmit", f"-p {client['port']}", "-l"] + result = subprocess.run(cmd, capture_output=True, text=True) + + logger.info(f"Active rules in {component_name}: {result.stdout}") + return result.stdout \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot new file mode 100644 index 00000000000..e1cb00efa96 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot @@ -0,0 +1,49 @@ +*** Settings *** +Library BytemanLibrary + +*** Variables *** +${DATANODE1_BYTEMAN_HOST_PORT} 172.25.0.102:9090 +${DATANODE2_BYTEMAN_HOST_PORT} 172.25.0.103:9091 +${DATANODE3_BYTEMAN_HOST_PORT} 172.25.0.104:9092 +${OM1_BYTEMAN_HOST_PORT} 172.25.0.111:9093 +${OM2_BYTEMAN_HOST_PORT} 172.25.0.112:9094 +${OM3_BYTEMAN_HOST_PORT} 172.25.0.113:9095 +${RECON_BYTEMAN_HOST_PORT} 172.25.0.115:9096 +${SCM1_BYTEMAN_HOST_PORT} 172.25.0.116:9097 +${SCM2_BYTEMAN_HOST_PORT} 172.25.0.117:9098 +${SCM3_BYTEMAN_HOST_PORT} 172.25.0.118:9099 +${HTTPFS_BYTEMAN_HOST_PORT} 172.25.0.119:9100 +${S3G_BYTEMAN_HOST_PORT} 172.25.0.120:9101 + +*** Keywords *** +Setup Byteman For Component + [Arguments] ${component} ${host_port} + ${host} ${port} = Split String ${host_port} : + Connect To Byteman Agent ${component} ${host} ${port} + +Setup All Byteman Agents + Setup Byteman For Component datanode1 ${DATANODE1_BYTEMAN_HOST_PORT} + Setup Byteman For Component datanode2 ${DATANODE2_BYTEMAN_HOST_PORT} + Setup Byteman For Component datanode3 ${DATANODE3_BYTEMAN_HOST_PORT} + Setup Byteman For Component om1 ${OM1_BYTEMAN_HOST_PORT} + Setup Byteman For Component om2 ${OM2_BYTEMAN_HOST_PORT} + Setup Byteman For Component om3 ${OM3_BYTEMAN_HOST_PORT} + Setup Byteman For Component recon ${RECON_BYTEMAN_HOST_PORT} + Setup Byteman For Component scm1 ${SCM1_BYTEMAN_HOST_PORT} + Setup Byteman For Component scm2 ${SCM2_BYTEMAN_HOST_PORT} + Setup Byteman For Component scm3 ${SCM3_BYTEMAN_HOST_PORT} + Setup Byteman For Component https ${HTTPFS_BYTEMAN_HOST_PORT} + Setup Byteman For Component s3g ${S3G_BYTEMAN_HOST_PORT} + +Inject Fault Into Component + [Arguments] ${component} ${rule_file} + Add Byteman Rule ${component} ${rule_file} + +Remove Fault From Component + [Arguments] ${component} ${rule_file} + Remove Byteman Rule ${component} ${rule_file} + +Verify Byteman Rules Active + [Arguments] ${component} + ${rules} = List all Byteman Rules ${component} + Should Not Be Empty ${rules} \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot new file mode 100644 index 00000000000..84d7641a0c4 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -0,0 +1,16 @@ +*** Settings *** +Resource BytemanKeywords.robot +Suite Setup Setup All Byteman Agents + +*** Test Cases *** +Test Skip Put Block on the datanode + Inject Fault Into Component datanode1 /opt/byteman/scripts/skip-put-block.btm + + # Run your Ozone operations that should be affected by delay + Execute Ozone Command ozone sh volume create /vol1 + Execute Ozone Command ozone sh bucket create /vol1/buck1 + Execute Ozone Command ozone sh volume key put /vol1/buck1/key1 /opt/byteman/scripts/skip-put-block.btm + + # Verify the put block was skipped + + Remove Fault From Component datanode1 /opt/byteman/scripts/skip-put-block.btm From 24096c0702619e71cb15ca979f97f2b0cb74d25e Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 01:19:55 +0530 Subject: [PATCH 02/16] Reused ozonesecure-ha config --- .../skip-notify-group-remove.btm | 0 .../byteman-scripts/skip-put-block.btm | 0 .../ozonesecure-ha-fi/docker-compose.yaml | 289 ------------------ .../compose/ozonesecure-ha-fi/docker-config | 170 ----------- .../main/compose/ozonesecure-ha-fi/krb5.conf | 41 --- .../compose/ozonesecure-ha-fi/ranger.yaml | 54 ---- .../dist/src/main/compose/ozonesecure-ha/.env | 1 + .../.env => ozonesecure-ha/byteman.yaml} | 48 ++- .../test-byteman.sh} | 11 +- .../src/main/smoketest/lib/BytemanLibrary.py | 24 +- .../smoketest/ozone-fi/BytemanKeywords.robot | 29 +- .../ozone-fi/byteman_faults_sample.robot | 42 ++- 12 files changed, 93 insertions(+), 616 deletions(-) rename hadoop-ozone/dist/src/main/compose/{ozonesecure-ha-fi => common}/byteman-scripts/skip-notify-group-remove.btm (100%) rename hadoop-ozone/dist/src/main/compose/{ozonesecure-ha-fi => common}/byteman-scripts/skip-put-block.btm (100%) delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf delete mode 100644 hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml rename hadoop-ozone/dist/src/main/compose/{ozonesecure-ha-fi/.env => ozonesecure-ha/byteman.yaml} (52%) rename hadoop-ozone/dist/src/main/compose/{ozonesecure-ha-fi/test.sh => ozonesecure-ha/test-byteman.sh} (84%) mode change 100755 => 100644 diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm b/hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-notify-group-remove.btm similarity index 100% rename from hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-notify-group-remove.btm rename to hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-notify-group-remove.btm diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm b/hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-put-block.btm similarity index 100% rename from hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/byteman-scripts/skip-put-block.btm rename to hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-put-block.btm diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml deleted file mode 100644 index 73fdf5c54fc..00000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-compose.yaml +++ /dev/null @@ -1,289 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -x-common-config: - &common-config - dns_search: . - image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - - ./byteman-scripts:/opt/byteman/scripts:ro - env_file: - - docker-config - -services: - kdc: - image: ${OZONE_TESTKRB5_IMAGE} - hostname: kdc - dns_search: . - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - command: ["/opt/hadoop/compose/common/init-kdc.sh"] - networks: - ozone_net: - ipv4_address: 172.25.0.100 - kms: - image: ${HADOOP_IMAGE}:${HADOOP_VERSION} - dns_search: . - ports: - - 9600:9600 - env_file: - - ./docker-config - volumes: - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - - ../../libexec/transformation.py:/opt/transformation.py - environment: - HADOOP_CONF_DIR: /opt/hadoop/etc/hadoop - command: ["hadoop", "kms"] - networks: - ozone_net: - ipv4_address: 172.25.0.101 - datanode1: - <<: *common-config - ports: - - 19864:9999 - - 9090:9090 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - - "recon=172.25.0.115" - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: ${BYTEMAN_OPTS},port:9090 - networks: - ozone_net: - ipv4_address: 172.25.0.102 - datanode2: - <<: *common-config - ports: - - 9866:9999 - - 9091:9091 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - - "recon=172.25.0.115" - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: ${BYTEMAN_OPTS},port:9091 - networks: - ozone_net: - ipv4_address: 172.25.0.103 - datanode3: - <<: *common-config - ports: - - 9868:9999 - - 9092:9092 - command: ["/opt/hadoop/bin/ozone","datanode"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - - "recon=172.25.0.115" - environment: - WAITFOR: scm3.org:9894 - OZONE_OPTS: ${BYTEMAN_OPTS},port:9092 - networks: - ozone_net: - ipv4_address: 172.25.0.104 - om1: - hostname: om1 - <<: *common-config - ports: - - 9880:9874 - - 9890:9872 - - 9093:9093 - #- 18001:18001 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: ${BYTEMAN_OPTS},port:9093 - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.111 - om2: - hostname: om2 - <<: *common-config - ports: - - 9882:9874 - - 9892:9872 - - 9094:9094 - #- 18002:18002 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: ${BYTEMAN_OPTS},port:9094 - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.112 - om3: - hostname: om3 - <<: *common-config - ports: - - 9884:9874 - - 9894:9872 - - 9095:9095 - #- 18003:18003 - environment: - WAITFOR: scm3.org:9894 - ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION - OZONE_OPTS: ${BYTEMAN_OPTS},port:9095 - command: ["/opt/hadoop/bin/ozone","om"] - extra_hosts: - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.113 - httpfs: - hostname: httpfs - <<: *common-config - ports: - - 14000:14000 - - 9100:9100 - command: [ "/opt/hadoop/bin/ozone","httpfs" ] - environment: - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} - OZONE_OPTS: ${BYTEMAN_OPTS},port:9100 - networks: - ozone_net: - ipv4_address: 172.25.0.119 - s3g: - hostname: s3g - <<: *common-config - ports: - - 9878:9878 - - 9101:9101 - command: ["ozone","s3g"] - environment: - OZONE-SITE.XML_ozone.s3g.domain.name=s3g.internal - OZONE_OPTS: ${BYTEMAN_OPTS},port:9101 - networks: - ozone_net: - ipv4_address: 172.25.0.120 - scm1.org: - hostname: scm1.org - <<: *common-config - ports: - - 9990:9876 - - 9992:9860 - - 9097:9097 - env_file: - - docker-config - environment: - ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: ${BYTEMAN_OPTS},port:9097 - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1=172.25.0.111" - - "om2=172.25.0.112" - - "om3=172.25.0.113" - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.116 - scm2.org: - hostname: scm2.org - <<: *common-config - ports: - - 9994:9876 - - 9996:9860 - - 9098:9098 - environment: - WAITFOR: scm1.org:9894 - ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: ${BYTEMAN_OPTS},port:9098 - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1=172.25.0.111" - - "om2=172.25.0.112" - - "om3=172.25.0.113" - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.117 - scm3.org: - hostname: scm3.org - <<: *common-config - ports: - - 9998:9876 - - 10002:9860 - - 9099:9099 - environment: - WAITFOR: scm2.org:9894 - ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION - OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "${OZONE_SAFEMODE_MIN_DATANODES:-3}" - OZONE_OPTS: ${BYTEMAN_OPTS},port:9099 - command: ["/opt/hadoop/bin/ozone","scm"] - extra_hosts: - - "om1=172.25.0.111" - - "om2=172.25.0.112" - - "om3=172.25.0.113" - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.118 - recon: - hostname: recon - <<: *common-config - ports: - - 9888:9888 - - 9096:9096 - environment: - OZONE_OPTS: ${BYTEMAN_OPTS},port:9096 - command: ["/opt/hadoop/bin/ozone","recon"] - extra_hosts: - - "om1=172.25.0.111" - - "om2=172.25.0.112" - - "om3=172.25.0.113" - - "scm1.org=172.25.0.116" - - "scm2.org=172.25.0.117" - - "scm3.org=172.25.0.118" - networks: - ozone_net: - ipv4_address: 172.25.0.115 -networks: - ozone_net: - ipam: - driver: default - config: - - subnet: "172.25.0.0/24" diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config deleted file mode 100644 index 9e2e7cbf0a0..00000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/docker-config +++ /dev/null @@ -1,170 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# For HttpFS service it is required to enable proxying users. -CORE-SITE.XML_hadoop.proxyuser.httpfs.hosts=* -CORE-SITE.XML_hadoop.proxyuser.httpfs.groups=* - -CORE-SITE.XML_fs.defaultFS=ofs://omservice -CORE-SITE.XML_fs.trash.interval=1 - -OZONE-SITE.XML_ozone.om.service.ids=omservice -OZONE-SITE.XML_ozone.om.internal.service.id=omservice -OZONE-SITE.XML_ozone.om.nodes.omservice=om1,om2,om3 -OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 -OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 -OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 -OZONE-SITE.XML_ozone.om.http-address.omservice.om1=om1 -OZONE-SITE.XML_ozone.om.http-address.omservice.om2=om2 -OZONE-SITE.XML_ozone.om.http-address.omservice.om3=om3 - -OZONE-SITE.XML_ozone.scm.service.ids=scmservice -OZONE-SITE.XML_ozone.scm.primordial.node.id=scm1 -OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 -OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1.org -OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2.org -OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3.org -OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s - -OZONE-SITE.XML_ozone.om.volume.listall.allowed=false - -OZONE-SITE.XML_ozone.scm.container.size=1GB -OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB -OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s -OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 -OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data/metadata -OZONE-SITE.XML_ozone.scm.block.client.address=scm -OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata -OZONE-SITE.XML_ozone.handler.type=distributed -OZONE-SITE.XML_ozone.scm.client.address=scm -OZONE-SITE.XML_hdds.block.token.enabled=true -OZONE-SITE.XML_hdds.container.token.enabled=true -OZONE-SITE.XML_hdds.grpc.tls.enabled=true -OZONE-SITE.XML_ozone.server.default.replication=3 -OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s -OZONE-SITE.XML_hdds.container.report.interval=60s -OZONE-SITE.XML_hdds.container.ratis.datastream.enabled=true - -OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m -OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon -OZONE-SITE.XML_ozone.recon.om.snapshot.task.initial.delay=20s -OZONE-SITE.XML_ozone.recon.address=recon:9891 - -OZONE-SITE.XML_ozone.security.enabled=true -OZONE-SITE.XML_ozone.acl.enabled=true -OZONE-SITE.XML_ozone.acl.authorizer.class=org.apache.hadoop.ozone.security.acl.OzoneNativeAuthorizer -OZONE-SITE.XML_ozone.administrators="testuser,recon,om" -OZONE-SITE.XML_ozone.s3.administrators="testuser,s3g" - -OZONE-SITE.XML_hdds.datanode.dir=/data/hdds -OZONE-SITE.XML_hdds.datanode.volume.min.free.space=100MB -HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019 -HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012 -CORE-SITE.XML_dfs.data.transfer.protection=authentication -CORE-SITE.XML_hadoop.security.authentication=kerberos -CORE-SITE.XML_hadoop.security.auth_to_local="DEFAULT" -CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms - - -OZONE-SITE.XML_hdds.scm.kerberos.principal=scm/scm@EXAMPLE.COM -OZONE-SITE.XML_hdds.scm.kerberos.keytab.file=/etc/security/keytabs/scm.keytab -OZONE-SITE.XML_ozone.om.kerberos.principal=om/om@EXAMPLE.COM -OZONE-SITE.XML_ozone.om.kerberos.keytab.file=/etc/security/keytabs/om.keytab -OZONE-SITE.XML_ozone.recon.kerberos.keytab.file=/etc/security/keytabs/recon.keytab -OZONE-SITE.XML_ozone.recon.kerberos.principal=recon/recon@EXAMPLE.COM - -OZONE-SITE.XML_ozone.s3g.kerberos.keytab.file=/etc/security/keytabs/s3g.keytab -OZONE-SITE.XML_ozone.s3g.kerberos.principal=s3g/s3g@EXAMPLE.COM - -OZONE-SITE.XML_ozone.httpfs.kerberos.keytab.file=/etc/security/keytabs/httpfs.keytab -OZONE-SITE.XML_ozone.httpfs.kerberos.principal=httpfs/httpfs@EXAMPLE.COM - -OZONE-SITE.XML_hdds.datanode.kerberos.principal=dn/dn@EXAMPLE.COM -OZONE-SITE.XML_hdds.datanode.kerberos.keytab.file=/etc/security/keytabs/dn.keytab - -OZONE-SITE.XML_ozone.security.http.kerberos.enabled=true -OZONE-SITE.XML_ozone.s3g.secret.http.enabled=true -OZONE-SITE.XML_ozone.http.filter.initializers=org.apache.hadoop.security.AuthenticationFilterInitializer - -OZONE-SITE.XML_ozone.om.http.auth.type=kerberos -OZONE-SITE.XML_hdds.scm.http.auth.type=kerberos -OZONE-SITE.XML_hdds.datanode.http.auth.type=kerberos -OZONE-SITE.XML_ozone.s3g.http.auth.type=kerberos -OZONE-SITE.XML_ozone.s3g.secret.http.auth.type=kerberos -OZONE-SITE.XML_ozone.httpfs.http.auth.type=kerberos -OZONE-SITE.XML_ozone.recon.http.auth.type=kerberos - -OZONE-SITE.XML_hdds.scm.http.auth.kerberos.principal=HTTP/scm@EXAMPLE.COM -OZONE-SITE.XML_hdds.scm.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.om.http.auth.kerberos.principal=HTTP/om@EXAMPLE.COM -OZONE-SITE.XML_ozone.om.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.principal=HTTP/db@EXAMPLE.COM -OZONE-SITE.XML_hdds.datanode.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.keytab=/etc/security/keytabs/HTTP.keytab -OZONE-SITE.XML_ozone.s3g.http.auth.kerberos.principal=HTTP/s3g@EXAMPLE.COM -OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -OZONE-SITE.XML_ozone.httpfs.http.auth.kerberos.principal=HTTP/httpfs@EXAMPLE.COM -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.principal=HTTP/recon@EXAMPLE.COM -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab -OZONE-SITE.XML_ozone.recon.http.auth.kerberos.keytab=/etc/security/keytabs/recon.keytab -OZONE-SITE.XML_ozone.http.basedir=/tmp/ozone_http - -CORE-SITE.XML_hadoop.http.authentication.simple.anonymous.allowed=false -CORE-SITE.XML_hadoop.http.authentication.signature.secret.file=/etc/security/http_secret -CORE-SITE.XML_hadoop.http.authentication.type=kerberos -CORE-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/ozone@EXAMPLE.COM -CORE-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/HTTP.keytab - - -CORE-SITE.XML_hadoop.security.authorization=true -HADOOP-POLICY.XML_ozone.om.security.client.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.datanode.container.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.container.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.block.protocol.acl=* -HADOOP-POLICY.XML_hdds.security.client.scm.certificate.protocol.acl=* -HADOOP-POLICY.XML_ozone.security.reconfigure.protocol.acl=* - -HDFS-SITE.XML_rpc.metrics.quantile.enable=true -HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 - -HTTPFS-SITE.XML_hadoop.http.authentication.type=kerberos -HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -HTTPFS-SITE.XML_hadoop.http.authentication.kerberos.principal=HTTP/httpfs@EXAMPLE.COM -HTTPFS-SITE.XML_httpfs.hadoop.authentication.type=kerberos -HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.keytab=/etc/security/keytabs/httpfs.keytab -HTTPFS-SITE.XML_httpfs.hadoop.authentication.kerberos.principal=httpfs/httpfs@EXAMPLE.COM -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.users=* -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.groups=* -KMS-SITE.XML_hadoop.kms.proxyuser.s3g.hosts=* - -#Enable this variable to print out all hadoop rpc traffic to the stdout. See http://byteman.jboss.org/ to define your own instrumentation. -#BYTEMAN_SCRIPT_URL=https://raw.githubusercontent.com/apache/hadoop/trunk/dev-support/byteman/hadooprpc.btm - -OZONE_DATANODE_SECURE_USER=root -JSVC_HOME=/usr/bin - -OZONE_CONF_DIR=/etc/hadoop -OZONE_LOG_DIR=/var/log/hadoop - -no_proxy=om,scm,recon,s3g,kdc,localhost,127.0.0.1 - -# Explicitly enable filesystem snapshot feature for this Docker compose cluster -OZONE-SITE.XML_ozone.filesystem.snapshot.enabled=true - - -OZONE-SITE.XML_hdds.secret.key.rotate.duration=5m -OZONE-SITE.XML_hdds.secret.key.rotate.check.duration=1m -OZONE-SITE.XML_hdds.secret.key.expiry.duration=1h diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf deleted file mode 100644 index eefc5b9c685..00000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/krb5.conf +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[logging] -default = FILE:/var/log/krb5libs.log -kdc = FILE:/var/log/krb5kdc.log -admin_server = FILE:/var/log/kadmind.log - -[libdefaults] - dns_canonicalize_hostname = false - dns_lookup_realm = false - ticket_lifetime = 24h - renew_lifetime = 7d - forwardable = true - rdns = false - default_realm = EXAMPLE.COM - -[realms] - EXAMPLE.COM = { - kdc = kdc - admin_server = kdc - max_renewable_life = 7d - } - -[domain_realm] - .example.com = EXAMPLE.COM - example.com = EXAMPLE.COM - diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml deleted file mode 100644 index 1d3c2255314..00000000000 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/ranger.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -x-om-ranger-config: - &om-ranger-config - environment: - OZONE_MANAGER_CLASSPATH: "/opt/ranger/ozone-plugin/lib/libext/*:/opt/ozone/conf" - OZONE-SITE.XML_ozone.acl.authorizer.class: "org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer" - OZONE-SITE.XML_ozone.om.multitenancy.enabled: "true" - OZONE-SITE.XML_ozone.om.ranger.https-address: "http://ranger:6080" - OZONE-SITE.XML_ozone.om.ranger.https.admin.api.user: "admin" - OZONE-SITE.XML_ozone.om.ranger.https.admin.api.passwd: "rangerR0cks!" - OZONE-SITE.XML_ozone.om.ranger.service: "dev_ozone" - OZONE-SITE.XML_ozone.om.tenant.dev.skip.ranger: "false" - RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.policy.rest.url: "http://ranger:6080" - RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.policyengine.option.disable.policy.refresher: "true" - RANGER-OZONE-SECURITY.XML_ranger.plugin.ozone.service.name: "dev_ozone" - volumes: - - ../..:/opt/hadoop - - ../_keytabs:/etc/security/keytabs - - ./krb5.conf:/etc/krb5.conf - - ${RANGER_OZONE_PLUGIN_DIR}:/opt/ranger/ozone-plugin - tmpfs: - - /opt/ozone/conf - command: bash -c "sudo --preserve-env /opt/ranger/ozone-plugin/enable-ozone-plugin.sh && /opt/hadoop/bin/ozone om" - -services: - om1: - <<: *om-ranger-config - om2: - <<: *om-ranger-config - om3: - <<: *om-ranger-config - ranger: - networks: - ozone_net: - ipv4_address: 172.25.0.200 - ranger-db: - networks: - ozone_net: - ipv4_address: 172.25.0.201 diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env index 75619126ca4..9627a0c0372 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env @@ -27,3 +27,4 @@ RANGER_DB_IMAGE_VERSION=12 RANGER_IMAGE=ghcr.io/adoroszlai/ranger-admin RANGER_IMAGE_VERSION=0ae34250d3af672776fca6a53047699adf3afce5-${ranger.version}-8 RANGER_VERSION=${ranger.version} +BYTEMAN_PORT=9091 \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml similarity index 52% rename from hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env rename to hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml index e5557e2200a..5ea9b9fded2 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/.env +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml @@ -13,19 +13,37 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +x-byteman-config: + &byteman-config + environment: + BYTEMAN_HOME: /opt/byteman/ + OZONE_OPTS: -javaagent:/opt/byteman/lib/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} + BYTEMAN_PORT: ${BYTEMAN_PORT} + volumes: + - ../common/byteman-scripts:/opt/byteman/scripts -HDDS_VERSION=${hdds.version} -HADOOP_IMAGE=apache/hadoop -HADOOP_VERSION=${hadoop.version} -OZONE_RUNNER_VERSION=${docker.ozone-runner.version} -OZONE_RUNNER_IMAGE=apache/ozone-runner -OZONE_TESTKRB5_IMAGE=${docker.ozone-testkr5b.image} -OZONE_VOLUME=./data -OZONE_OPTS= -RANGER_DB_IMAGE=postgres -RANGER_DB_IMAGE_VERSION=12 -RANGER_IMAGE=ghcr.io/adoroszlai/ranger-admin -RANGER_IMAGE_VERSION=0ae34250d3af672776fca6a53047699adf3afce5-${ranger.version}-8 -RANGER_VERSION=${ranger.version} -# Byteman agent configuration for different Ozone components -BYTEMAN_OPTS="-javaagent:/opt/byteman.jar=listener:true" \ No newline at end of file +services: + datanode1: + <<: *byteman-config + datanode2: + <<: *byteman-config + datanode3: + <<: *byteman-config + om1: + <<: *byteman-config + om2: + <<: *byteman-config + om3: + <<: *byteman-config + httpfs: + <<: *byteman-config + s3g: + <<: *byteman-config + scm1.org: + <<: *byteman-config + scm2.org: + <<: *byteman-config + scm3.org: + <<: *byteman-config + recon: + <<: *byteman-config diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh old mode 100755 new mode 100644 similarity index 84% rename from hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh rename to hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh index 0ad547adebc..41f6c94b1fc --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha-fi/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -#suite:HA-secure +#suite:misc set -u -o pipefail @@ -25,15 +25,12 @@ export COMPOSE_DIR export SECURITY_ENABLED=true export OM_SERVICE_ID="omservice" export SCM=scm1.org -export OM=om1.org - -: ${OZONE_BUCKET_KEY_NAME:=key1} +export COMPOSE_FILE=docker-compose.yaml:byteman.yaml # shellcheck source=/dev/null source "$COMPOSE_DIR/../testlib.sh" start_docker_env -execute_command_in_container kms hadoop key create ${OZONE_BUCKET_KEY_NAME} - -execute_robot_test $OM ozone-fi/byteman_faults_sample.robot +## Run virtual host test cases +execute_robot_test om1 ozone-fi/byteman_faults_sample.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py index 149847cbdd2..0a510987853 100644 --- a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py +++ b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information +# distributed with this work for additional consolermation # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance @@ -17,7 +17,6 @@ # under the License. import subprocess -import requests import json from robot.api import logger @@ -29,39 +28,36 @@ def connect_to_byteman_agent(self, component_name, host="localhost", port=9091): """Connect to Byteman agent on specific component""" self.byteman_clients[component_name] = {'host': host, 'port': port, 'base_url': f"http://{host}:{port}"} - logger.info(f"Connected to Byteman agent for {component_name} at {host}:{port}") + logger.console(f"Connected to Byteman agent for {component_name} at {host}:{port}") def add_byteman_rule(self, component_name, rule_file): """Add Byteman rule into specific component""" client = self.byteman_clients[component_name] # Use bmsubmit command to load rule - cmd = ["bmsubmit", f"-p {client['port']}", f"-l {rule_file}"] - + cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-l", rule_file] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"Failed to load rule: {result.stderr}") - - logger.info(f"Loaded Byteman rule {rule_file} into {component_name}") + logger.console(f"Loaded Byteman rule {rule_file} into {component_name}") def remove_byteman_rule(self, component_name, rule_file): """Remove Byteman rule for specific component""" client = self.byteman_clients[component_name] - cmd = ["bmsubmit", f"-p {client['port']}", f"-u {rule_file}"] - + cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-u", rule_file] + logger.console(cmd) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"Failed to unload rule: {result.stderr}") - - logger.info(f"Unloaded Byteman rule {rule_file} from {component_name}") + logger.console(f"Unloaded Byteman rule {rule_file} from {component_name}") - def list_all_byteman_rules(self, component_name): + def list_byteman_rules(self, component_name): """List all Byteman rules for specific component""" client = self.byteman_clients[component_name] - cmd = ["bmsubmit", f"-p {client['port']}", "-l"] + cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-l"] result = subprocess.run(cmd, capture_output=True, text=True) - logger.info(f"Active rules in {component_name}: {result.stdout}") + logger.console(f"Active rules in {component_name}: {result.stdout}") return result.stdout \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot index e1cb00efa96..e8579dbf042 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot @@ -1,19 +1,21 @@ *** Settings *** -Library BytemanLibrary +Library ../lib/BytemanLibrary.py +Library String *** Variables *** -${DATANODE1_BYTEMAN_HOST_PORT} 172.25.0.102:9090 -${DATANODE2_BYTEMAN_HOST_PORT} 172.25.0.103:9091 -${DATANODE3_BYTEMAN_HOST_PORT} 172.25.0.104:9092 -${OM1_BYTEMAN_HOST_PORT} 172.25.0.111:9093 -${OM2_BYTEMAN_HOST_PORT} 172.25.0.112:9094 -${OM3_BYTEMAN_HOST_PORT} 172.25.0.113:9095 -${RECON_BYTEMAN_HOST_PORT} 172.25.0.115:9096 -${SCM1_BYTEMAN_HOST_PORT} 172.25.0.116:9097 -${SCM2_BYTEMAN_HOST_PORT} 172.25.0.117:9098 -${SCM3_BYTEMAN_HOST_PORT} 172.25.0.118:9099 -${HTTPFS_BYTEMAN_HOST_PORT} 172.25.0.119:9100 -${S3G_BYTEMAN_HOST_PORT} 172.25.0.120:9101 +${BYTEMAN_PORT} 9091 +${DATANODE1_BYTEMAN_HOST_PORT} datanode1:${BYTEMAN_PORT} +${DATANODE2_BYTEMAN_HOST_PORT} datanode2:${BYTEMAN_PORT} +${DATANODE3_BYTEMAN_HOST_PORT} datanode3:${BYTEMAN_PORT} +${OM1_BYTEMAN_HOST_PORT} om1:${BYTEMAN_PORT} +${OM2_BYTEMAN_HOST_PORT} om2:${BYTEMAN_PORT} +${OM3_BYTEMAN_HOST_PORT} om3:${BYTEMAN_PORT} +${RECON_BYTEMAN_HOST_PORT} recon:${BYTEMAN_PORT} +${SCM1_BYTEMAN_HOST_PORT} scm1.org:${BYTEMAN_PORT} +${SCM2_BYTEMAN_HOST_PORT} scm2.org:${BYTEMAN_PORT} +${SCM3_BYTEMAN_HOST_PORT} scm3:${BYTEMAN_PORT} +${HTTPFS_BYTEMAN_HOST_PORT} httpfs:${BYTEMAN_PORT} +${S3G_BYTEMAN_HOST_PORT} s3g:${BYTEMAN_PORT} *** Keywords *** Setup Byteman For Component @@ -22,6 +24,7 @@ Setup Byteman For Component Connect To Byteman Agent ${component} ${host} ${port} Setup All Byteman Agents + Log Inside Setup All Byteman Agents Setup Byteman For Component datanode1 ${DATANODE1_BYTEMAN_HOST_PORT} Setup Byteman For Component datanode2 ${DATANODE2_BYTEMAN_HOST_PORT} Setup Byteman For Component datanode3 ${DATANODE3_BYTEMAN_HOST_PORT} diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index 84d7641a0c4..a0d21fed234 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -1,16 +1,32 @@ +*** Variables *** +${RULE} /opt/hadoop/compose/common/byteman-scripts/skip-put-block.btm +${vol} vol1 +${buck} buck1 +${key} key1 +${SECURITY_ENABLED} true + *** Settings *** -Resource BytemanKeywords.robot -Suite Setup Setup All Byteman Agents +Resource BytemanKeywords.robot +Resource os.robot +Resource ../lib/os.robot +Resource ../commonlib.robot +Resource ../ozone-lib/shell.robot +Suite Setup Setup Suite +Suite Teardown Teardown Suite + + +*** Keywords *** +Setup Suite + Setup All Byteman Agents + Inject Fault Into Component datanode1 ${RULE} + Log To Console Kinit + Execute And Ignore Error kinit -kt /etc/security/keytabs/testuser.keytab testuser/om + + +Teardown Suite + Remove Fault From Component datanode1 ${RULE} *** Test Cases *** -Test Skip Put Block on the datanode - Inject Fault Into Component datanode1 /opt/byteman/scripts/skip-put-block.btm - - # Run your Ozone operations that should be affected by delay - Execute Ozone Command ozone sh volume create /vol1 - Execute Ozone Command ozone sh bucket create /vol1/buck1 - Execute Ozone Command ozone sh volume key put /vol1/buck1/key1 /opt/byteman/scripts/skip-put-block.btm - - # Verify the put block was skipped - - Remove Fault From Component datanode1 /opt/byteman/scripts/skip-put-block.btm +Print Byteman Port + ${BYTEMAN_PORT} = Get Environment Variable BYTEMAN_PORT + Log ${BYTEMAN_PORT} From 840dd49126d9aba1624ceef2818ddbbfb7939bdf Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 01:37:38 +0530 Subject: [PATCH 03/16] Addressed review comments --- .../byteman}/skip-notify-group-remove.btm | 0 .../byteman}/skip-put-block.btm | 0 .../dist/src/main/compose/ozonesecure-ha/byteman.yaml | 4 +--- .../src/main/smoketest/ozone-fi/byteman_faults_sample.robot | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) rename {hadoop-ozone/dist/src/main/compose/common/byteman-scripts => dev-support/byteman}/skip-notify-group-remove.btm (100%) rename {hadoop-ozone/dist/src/main/compose/common/byteman-scripts => dev-support/byteman}/skip-put-block.btm (100%) diff --git a/hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-notify-group-remove.btm b/dev-support/byteman/skip-notify-group-remove.btm similarity index 100% rename from hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-notify-group-remove.btm rename to dev-support/byteman/skip-notify-group-remove.btm diff --git a/hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-put-block.btm b/dev-support/byteman/skip-put-block.btm similarity index 100% rename from hadoop-ozone/dist/src/main/compose/common/byteman-scripts/skip-put-block.btm rename to dev-support/byteman/skip-put-block.btm diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml index 5ea9b9fded2..563c86e042a 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml @@ -17,10 +17,8 @@ x-byteman-config: &byteman-config environment: BYTEMAN_HOME: /opt/byteman/ - OZONE_OPTS: -javaagent:/opt/byteman/lib/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} + OZONE_OPTS: -javaagent:/opt/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} BYTEMAN_PORT: ${BYTEMAN_PORT} - volumes: - - ../common/byteman-scripts:/opt/byteman/scripts services: datanode1: diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index a0d21fed234..51e1a5a2ae7 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -1,5 +1,5 @@ *** Variables *** -${RULE} /opt/hadoop/compose/common/byteman-scripts/skip-put-block.btm +${RULE} /opt/hadoop/share/ozone/byteman/skip-put-block.btm ${vol} vol1 ${buck} buck1 ${key} key1 From b7cf25ab8b5be741e6a05b64c1b105cfe644bd46 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 01:45:45 +0530 Subject: [PATCH 04/16] Add BYTEMAN_HOME in .env --- hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env index 9627a0c0372..18011f88d37 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/.env @@ -27,4 +27,5 @@ RANGER_DB_IMAGE_VERSION=12 RANGER_IMAGE=ghcr.io/adoroszlai/ranger-admin RANGER_IMAGE_VERSION=0ae34250d3af672776fca6a53047699adf3afce5-${ranger.version}-8 RANGER_VERSION=${ranger.version} -BYTEMAN_PORT=9091 \ No newline at end of file +BYTEMAN_PORT=9091 +BYTEMAN_HOME=/opt/byteman/ From 61bd2c2e7bd81dc93662395d2e98ab2b36a361b5 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 11:25:50 +0530 Subject: [PATCH 05/16] Updating the image version for CI --- hadoop-ozone/dist/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index 87235af34d8..497af9e8af3 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -25,7 +25,7 @@ Apache Ozone Distribution - 20241216-1-jdk21 + 8562aa6f8079e5e18c65ae6db75deea9706b9783 ghcr.io/apache/ozone-testkrb5:20241129-1 apache/ozone -rocky From 0a937ae7a4f1f2d0c51ed669717c8a86377b572b Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 11:31:45 +0530 Subject: [PATCH 06/16] Add Apache license header to all files --- dev-support/byteman/skip-notify-group-remove.btm | 16 ++++++++++++++++ dev-support/byteman/skip-put-block.btm | 16 ++++++++++++++++ .../smoketest/ozone-fi/BytemanKeywords.robot | 16 ++++++++++++++++ .../ozone-fi/byteman_faults_sample.robot | 16 ++++++++++++++++ 4 files changed, 64 insertions(+) diff --git a/dev-support/byteman/skip-notify-group-remove.btm b/dev-support/byteman/skip-notify-group-remove.btm index 8bd3783d2bb..f885bab4fd1 100644 --- a/dev-support/byteman/skip-notify-group-remove.btm +++ b/dev-support/byteman/skip-notify-group-remove.btm @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + RULE skip notifyGroupRemove CLASS org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine METHOD notifyGroupRemove diff --git a/dev-support/byteman/skip-put-block.btm b/dev-support/byteman/skip-put-block.btm index 391680f512c..65c46f2c6c2 100644 --- a/dev-support/byteman/skip-put-block.btm +++ b/dev-support/byteman/skip-put-block.btm @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + RULE Block putBlock CLASS org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl METHOD putBlock diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot index e8579dbf042..24756f65b6d 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + *** Settings *** Library ../lib/BytemanLibrary.py Library String diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index 51e1a5a2ae7..6a460d7d1a3 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + *** Variables *** ${RULE} /opt/hadoop/share/ozone/byteman/skip-put-block.btm ${vol} vol1 From 7a8074bdda73f49ddd1c1ce64160f89a4af19e8e Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 18:36:46 +0530 Subject: [PATCH 07/16] Updated runner version adn Cleaned up code based on review --- hadoop-ozone/dist/pom.xml | 2 +- .../src/main/smoketest/lib/BytemanLibrary.py | 80 +++++++++++-------- .../smoketest/ozone-fi/BytemanKeywords.robot | 61 +++++--------- .../ozone-fi/byteman_faults_sample.robot | 32 +++++--- 4 files changed, 87 insertions(+), 88 deletions(-) diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index 497af9e8af3..55a4620c355 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -25,7 +25,7 @@ Apache Ozone Distribution - 8562aa6f8079e5e18c65ae6db75deea9706b9783 + 20250624-1-jdk21 ghcr.io/apache/ozone-testkrb5:20241129-1 apache/ozone -rocky diff --git a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py index 0a510987853..a1b54367d0b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py +++ b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file -# distributed with this work for additional consolermation +# distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance @@ -17,47 +17,61 @@ # under the License. import subprocess -import json +import os from robot.api import logger class BytemanLibrary: def __init__(self): - self.byteman_clients = {} + self.byteman_port = os.getenv("BYTEMAN_PORT", "9091") + self.byteman_cmd = ["bmsubmit", "-p", self.byteman_port] + + def update_component_name(self, component_name): + return f"{component_name}.org" if component_name in ["scm1", "scm2", "scm3"] else component_name + + def run_byteman_cmd(self, component_name, args, action_desc): + """Run a byteman command and handle error/logging""" + cmd = self.byteman_cmd + ["-h", component_name] + args + result = subprocess.run(cmd, capture_output=True, text=True) - def connect_to_byteman_agent(self, component_name, host="localhost", port=9091): - """Connect to Byteman agent on specific component""" - self.byteman_clients[component_name] = {'host': host, 'port': port, - 'base_url': f"http://{host}:{port}"} - logger.console(f"Connected to Byteman agent for {component_name} at {host}:{port}") + if result.returncode != 0: + raise RuntimeError(f"Failed to {action_desc} for {component_name}: {result.stderr.strip()}") + logger.console(f"{action_desc} for {component_name} successful.") + return result.stdout + def add_byteman_rule(self, component_name, rule_file): """Add Byteman rule into specific component""" - client = self.byteman_clients[component_name] - - # Use bmsubmit command to load rule - cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-l", rule_file] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - raise RuntimeError(f"Failed to load rule: {result.stderr}") - logger.console(f"Loaded Byteman rule {rule_file} into {component_name}") - + component_name = self.update_component_name(component_name) + self.run_byteman_cmd(component_name, ["-l", rule_file], f"Add rule {rule_file}") + def remove_byteman_rule(self, component_name, rule_file): """Remove Byteman rule for specific component""" - client = self.byteman_clients[component_name] - - cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-u", rule_file] - logger.console(cmd) - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - raise RuntimeError(f"Failed to unload rule: {result.stderr}") - logger.console(f"Unloaded Byteman rule {rule_file} from {component_name}") - + component_name = self.update_component_name(component_name) + self.run_byteman_cmd(component_name, ["-u", rule_file], f"Remove rule {rule_file}") + def list_byteman_rules(self, component_name): - """List all Byteman rules for specific component""" - client = self.byteman_clients[component_name] - - cmd = ["bmsubmit", "-h", component_name, "-p", str(client['port']), "-l"] - result = subprocess.run(cmd, capture_output=True, text=True) + """List Active Byteman rules for specific component and return file list""" + component_name = self.update_component_name(component_name) + output = self.run_byteman_cmd(component_name, ["-l"], "List rules") - logger.console(f"Active rules in {component_name}: {result.stdout}") - return result.stdout \ No newline at end of file + matching_lines = [line for line in output.splitlines() if '# File' in line] + file_list = [line.split()[2] for line in matching_lines if len(line.split()) >= 3] + + if matching_lines: + logger.console(f"Active rules in {component_name}:\n" + "\n".join(matching_lines)) + else: + logger.console(f"Active rules in {component_name}: No rules found") + + return file_list + + def remove_all_byteman_rules(self, component_name): + """Remove all Byteman rules for specific component""" + component_name = self.update_component_name(component_name) + rule_files = self.list_byteman_rules(component_name) + + if not rule_files: + logger.console(f"No active rules to remove for {component_name}") + return + + for rule_file in rule_files: + self.remove_byteman_rule(component_name, rule_file) diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot index 24756f65b6d..8ab655e53c9 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot @@ -18,51 +18,28 @@ Library ../lib/BytemanLibrary.py Library String + *** Variables *** -${BYTEMAN_PORT} 9091 -${DATANODE1_BYTEMAN_HOST_PORT} datanode1:${BYTEMAN_PORT} -${DATANODE2_BYTEMAN_HOST_PORT} datanode2:${BYTEMAN_PORT} -${DATANODE3_BYTEMAN_HOST_PORT} datanode3:${BYTEMAN_PORT} -${OM1_BYTEMAN_HOST_PORT} om1:${BYTEMAN_PORT} -${OM2_BYTEMAN_HOST_PORT} om2:${BYTEMAN_PORT} -${OM3_BYTEMAN_HOST_PORT} om3:${BYTEMAN_PORT} -${RECON_BYTEMAN_HOST_PORT} recon:${BYTEMAN_PORT} -${SCM1_BYTEMAN_HOST_PORT} scm1.org:${BYTEMAN_PORT} -${SCM2_BYTEMAN_HOST_PORT} scm2.org:${BYTEMAN_PORT} -${SCM3_BYTEMAN_HOST_PORT} scm3:${BYTEMAN_PORT} -${HTTPFS_BYTEMAN_HOST_PORT} httpfs:${BYTEMAN_PORT} -${S3G_BYTEMAN_HOST_PORT} s3g:${BYTEMAN_PORT} +@{COMPONENTS} datanode1 datanode2 datanode3 om1 om2 om3 recon scm1 scm2 scm3 s3g + *** Keywords *** -Setup Byteman For Component - [Arguments] ${component} ${host_port} - ${host} ${port} = Split String ${host_port} : - Connect To Byteman Agent ${component} ${host} ${port} - -Setup All Byteman Agents +Inject Fault Into All Components + [Arguments] ${rule_file} Log Inside Setup All Byteman Agents - Setup Byteman For Component datanode1 ${DATANODE1_BYTEMAN_HOST_PORT} - Setup Byteman For Component datanode2 ${DATANODE2_BYTEMAN_HOST_PORT} - Setup Byteman For Component datanode3 ${DATANODE3_BYTEMAN_HOST_PORT} - Setup Byteman For Component om1 ${OM1_BYTEMAN_HOST_PORT} - Setup Byteman For Component om2 ${OM2_BYTEMAN_HOST_PORT} - Setup Byteman For Component om3 ${OM3_BYTEMAN_HOST_PORT} - Setup Byteman For Component recon ${RECON_BYTEMAN_HOST_PORT} - Setup Byteman For Component scm1 ${SCM1_BYTEMAN_HOST_PORT} - Setup Byteman For Component scm2 ${SCM2_BYTEMAN_HOST_PORT} - Setup Byteman For Component scm3 ${SCM3_BYTEMAN_HOST_PORT} - Setup Byteman For Component https ${HTTPFS_BYTEMAN_HOST_PORT} - Setup Byteman For Component s3g ${S3G_BYTEMAN_HOST_PORT} + FOR ${component} IN @{components} + Add Byteman Rule ${component} ${rule_file} + END -Inject Fault Into Component - [Arguments] ${component} ${rule_file} - Add Byteman Rule ${component} ${rule_file} - -Remove Fault From Component - [Arguments] ${component} ${rule_file} - Remove Byteman Rule ${component} ${rule_file} +Remove Fault From All Components + [Arguments] ${rule_file} + Log Inside Cleanup All Byteman Agents + FOR ${component} IN @{components} + Remove Byteman Rule ${component} ${rule_file} + END -Verify Byteman Rules Active - [Arguments] ${component} - ${rules} = List all Byteman Rules ${component} - Should Not Be Empty ${rules} \ No newline at end of file +List Byteman Rules for All Components + Log Inside Cleanup All Byteman Agents + FOR ${component} IN @{components} + List Byteman Rules ${component} + END diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index 6a460d7d1a3..9834964f153 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -15,15 +15,14 @@ *** Variables *** -${RULE} /opt/hadoop/share/ozone/byteman/skip-put-block.btm +${RULE1} /opt/hadoop/share/ozone/byteman/skip-put-block.btm +${RULE2} /opt/hadoop/share/ozone/byteman/skip-notify-group-remove.btm ${vol} vol1 ${buck} buck1 ${key} key1 -${SECURITY_ENABLED} true *** Settings *** Resource BytemanKeywords.robot -Resource os.robot Resource ../lib/os.robot Resource ../commonlib.robot Resource ../ozone-lib/shell.robot @@ -33,16 +32,25 @@ Suite Teardown Teardown Suite *** Keywords *** Setup Suite - Setup All Byteman Agents - Inject Fault Into Component datanode1 ${RULE} - Log To Console Kinit - Execute And Ignore Error kinit -kt /etc/security/keytabs/testuser.keytab testuser/om - + Inject Fault Into All Components ${RULE1} Teardown Suite - Remove Fault From Component datanode1 ${RULE} + Remove Fault From All Components ${RULE1} *** Test Cases *** -Print Byteman Port - ${BYTEMAN_PORT} = Get Environment Variable BYTEMAN_PORT - Log ${BYTEMAN_PORT} + +Print All Byteman Rules + List Byteman Rules for All Components + +Inject Byteman Rule in one component + Add Byteman Rule datanode1 ${RULE2} + List Byteman Rules datanode1 + Remove Byteman Rule datanode1 ${RULE2} + +Inject Multiple Byteman Rules in one component + Add Byteman Rule datanode1 ${RULE2} + List Byteman Rules datanode1 + Remove All Byteman Rules datanode1 + ${rules} = List Byteman Rules datanode1 + Should Be Empty ${rules} + Add Byteman Rule datanode1 ${RULE1} \ No newline at end of file From 97e3801675502fe3c48b41afbd996103f1841762 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 18:47:57 +0530 Subject: [PATCH 08/16] Removed extra variables --- .../ozone-fi/byteman_faults_sample.robot | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index 9834964f153..36cbec934a5 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -17,26 +17,13 @@ *** Variables *** ${RULE1} /opt/hadoop/share/ozone/byteman/skip-put-block.btm ${RULE2} /opt/hadoop/share/ozone/byteman/skip-notify-group-remove.btm -${vol} vol1 -${buck} buck1 -${key} key1 *** Settings *** Resource BytemanKeywords.robot -Resource ../lib/os.robot -Resource ../commonlib.robot -Resource ../ozone-lib/shell.robot -Suite Setup Setup Suite -Suite Teardown Teardown Suite +Suite Setup Inject Fault Into All Components ${RULE1} +Suite Teardown Remove Fault From All Components ${RULE1} -*** Keywords *** -Setup Suite - Inject Fault Into All Components ${RULE1} - -Teardown Suite - Remove Fault From All Components ${RULE1} - *** Test Cases *** Print All Byteman Rules From 4bae053b44460fd39dd21e7c496a28f8f33238d3 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Tue, 24 Jun 2025 18:53:53 +0530 Subject: [PATCH 09/16] Updated OZONE_OPTS to OZONE_SERVER_OPTS --- hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml index 563c86e042a..0aa8bc4844f 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml @@ -17,7 +17,7 @@ x-byteman-config: &byteman-config environment: BYTEMAN_HOME: /opt/byteman/ - OZONE_OPTS: -javaagent:/opt/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} + OZONE_SERVER_OPTS: -javaagent:/opt/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} BYTEMAN_PORT: ${BYTEMAN_PORT} services: From c73cc3d72cdbd024efaf4620930c422f02172483 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 00:32:17 +0530 Subject: [PATCH 10/16] Added more methods for wider control --- .../smoketest/ozone-fi/BytemanKeywords.robot | 96 ++++++++++++++++--- .../ozone-fi/byteman_faults_sample.robot | 29 ++++-- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot index 8ab655e53c9..e6ed9b6318d 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/BytemanKeywords.robot @@ -20,26 +20,100 @@ Library String *** Variables *** -@{COMPONENTS} datanode1 datanode2 datanode3 om1 om2 om3 recon scm1 scm2 scm3 s3g - +@{ALL_COMPONENTS} datanode1 datanode2 datanode3 om1 om2 om3 recon scm1 scm2 scm3 s3g +@{DATANODE_COMPONENTS} datanode1 datanode2 datanode3 +@{OM_COMPONENTS} om1 om2 om3 +@{SCM_COMPONENTS} scm1 scm2 scm3 *** Keywords *** Inject Fault Into All Components [Arguments] ${rule_file} - Log Inside Setup All Byteman Agents - FOR ${component} IN @{components} - Add Byteman Rule ${component} ${rule_file} + Log Injecting fault ${rule_file} into all components + FOR ${component} IN @{ALL_COMPONENTS} + Run Keyword And Continue On Failure Add Byteman Rule ${component} ${rule_file} END + Remove Fault From All Components [Arguments] ${rule_file} - Log Inside Cleanup All Byteman Agents - FOR ${component} IN @{components} - Remove Byteman Rule ${component} ${rule_file} + Log Removing fault ${rule_file} from all components + FOR ${component} IN @{ALL_COMPONENTS} + Run Keyword And Continue On Failure Remove Byteman Rule ${component} ${rule_file} END + List Byteman Rules for All Components - Log Inside Cleanup All Byteman Agents - FOR ${component} IN @{components} - List Byteman Rules ${component} + Log Listing active rules for all components + FOR ${component} IN @{ALL_COMPONENTS} + Run Keyword And Continue On Failure List Byteman Rules ${component} + END + + +Remove All Rules From All Components + Log Removing all rules from all components + FOR ${component} IN @{ALL_COMPONENTS} + Run Keyword And Continue On Failure Remove All Byteman Rules ${component} + END + + +Inject Fault Into Datanodes Only + [Arguments] ${rule_file} + Log Injecting fault ${rule_file} into datanodes only + FOR ${component} IN @{DATANODE_COMPONENTS} + Run Keyword And Continue On Failure Add Byteman Rule ${component} ${rule_file} + END + +List Byteman Rules for Datanodes + Log Listing active rules for all datanodes + FOR ${component} IN @{DATANODE_COMPONENTS} + Run Keyword And Continue On Failure List Byteman Rules ${component} + END + +Remove Fault From Datanodes Only + [Arguments] ${rule_file} + Log Removing fault ${rule_file} from datanodes only + FOR ${component} IN @{DATANODE_COMPONENTS} + Run Keyword And Continue On Failure Remove Byteman Rule ${component} ${rule_file} + END + + +Inject Fault Into OMs Only + [Arguments] ${rule_file} + Log Injecting fault ${rule_file} into oms only + FOR ${component} IN @{OM_COMPONENTS} + Run Keyword And Continue On Failure Add Byteman Rule ${component} ${rule_file} + END + +List Byteman Rules for OMs + Log Listing active rules for all OMs + FOR ${component} IN @{OM_COMPONENTS} + Run Keyword And Continue On Failure List Byteman Rules ${component} + END + +Remove Fault From OMs Only + [Arguments] ${rule_file} + Log Removing fault ${rule_file} from oms only + FOR ${component} IN @{OM_COMPONENTS} + Run Keyword And Continue On Failure Remove Byteman Rule ${component} ${rule_file} + END + + +Inject Fault Into SCMs Only + [Arguments] ${rule_file} + Log Injecting fault ${rule_file} into scms only + FOR ${component} IN @{SCM_COMPONENTS} + Run Keyword And Continue On Failure Add Byteman Rule ${component} ${rule_file} + END + +List Byteman Rules for SCMs + Log Listing active rules for all SCMs + FOR ${component} IN @{SCM_COMPONENTS} + Run Keyword And Continue On Failure List Byteman Rules ${component} + END + +Remove Fault From SCMs Only + [Arguments] ${rule_file} + Log Removing fault ${rule_file} from scms only + FOR ${component} IN @{SCM_COMPONENTS} + Run Keyword And Continue On Failure Remove Byteman Rule ${component} ${rule_file} END diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot index 36cbec934a5..0f32f39cec8 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/byteman_faults_sample.robot @@ -20,14 +20,14 @@ ${RULE2} /opt/hadoop/share/ozone/byteman/skip-notify-group-remove.btm *** Settings *** Resource BytemanKeywords.robot -Suite Setup Inject Fault Into All Components ${RULE1} -Suite Teardown Remove Fault From All Components ${RULE1} *** Test Cases *** Print All Byteman Rules + Inject Fault Into All Components ${RULE1} List Byteman Rules for All Components + Remove Fault From All Components ${RULE1} Inject Byteman Rule in one component Add Byteman Rule datanode1 ${RULE2} @@ -35,9 +35,26 @@ Inject Byteman Rule in one component Remove Byteman Rule datanode1 ${RULE2} Inject Multiple Byteman Rules in one component + Add Byteman Rule datanode1 ${RULE1} Add Byteman Rule datanode1 ${RULE2} - List Byteman Rules datanode1 - Remove All Byteman Rules datanode1 - ${rules} = List Byteman Rules datanode1 + ${rules} = List Byteman Rules datanode1 + ${rules_count} = Get Length ${rules} + Should Be Equal As Integers ${rules_count} 2 + Remove All Byteman Rules datanode1 + ${rules} = List Byteman Rules datanode1 Should Be Empty ${rules} - Add Byteman Rule datanode1 ${RULE1} \ No newline at end of file + +Test Datanode Only Fault Injection + Inject Fault Into Datanodes Only ${RULE1} + List Byteman Rules for Datanodes + Remove Fault From Datanodes Only ${RULE1} + +Test OM Only Fault Injection + Inject Fault Into OMs Only ${RULE1} + List Byteman Rules for OMs + Remove Fault From OMs Only ${RULE1} + +Test SCM Only Fault Injection + Inject Fault Into SCMs Only ${RULE1} + List Byteman Rules for SCMs + Remove Fault From SCMs Only ${RULE1} \ No newline at end of file From 50f6067a0323b6b868e34fb5badd4df4ce58bd47 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 01:02:49 +0530 Subject: [PATCH 11/16] Add #suite:HA-secure to enable acceptance-suites run --- .../dist/src/main/compose/ozonesecure-ha/test-byteman.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh index 41f6c94b1fc..cd1aad851c7 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-byteman.sh @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -#suite:misc +#suite:HA-secure set -u -o pipefail From 47e5d5320a1f5f082eafd84e03b8fa496d9db28c Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 01:09:33 +0530 Subject: [PATCH 12/16] Updating interface to address in java opts --- hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml index 0aa8bc4844f..73388b9ac44 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/byteman.yaml @@ -17,7 +17,7 @@ x-byteman-config: &byteman-config environment: BYTEMAN_HOME: /opt/byteman/ - OZONE_SERVER_OPTS: -javaagent:/opt/byteman.jar=listener:true,interface:0.0.0.0,port:${BYTEMAN_PORT} + OZONE_SERVER_OPTS: -javaagent:/opt/byteman.jar=listener:true,address:0.0.0.0,port:${BYTEMAN_PORT} BYTEMAN_PORT: ${BYTEMAN_PORT} services: From 16527c93713cda7ec772f881e02f5e657b55cf08 Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 01:25:28 +0530 Subject: [PATCH 13/16] Add README --- .../src/main/smoketest/ozone-fi/README.md | 197 ++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md new file mode 100644 index 00000000000..ba87fcb774e --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md @@ -0,0 +1,197 @@ +# Ozone Byteman Fault Injection Testing with Robot Framework + +This directory contains Robot Framework test suites for performing fault injection testing in Apache Ozone using Byteman. + +## Overview + +Byteman is a Java bytecode manipulation tool that allows you to inject faults, delays, and other behaviors into running Java applications without modifying the source code. This testing framework uses Robot Framework to orchestrate Byteman operations across Ozone cluster components. + +## Prerequisites + +- Docker and Docker Compose +- Apache Ozone cluster running with Byteman agents enabled +- Robot Framework (automatically installed in the test containers) +- Byteman tools (bmsubmit) available in the test environment + +## Architecture + +### Components + +The fault injection framework consists of: + +1. **BytemanLibrary.py** - Python library providing Byteman operations +2. **BytemanKeywords.robot** - Robot Framework keywords for common operations +3. **Test files** - Specific test scenarios using the keywords + +### Supported Components + +- **Datanodes**: `datanode1`, `datanode2`, `datanode3` +- **OzoneManagers**: `om1`, `om2`, `om3` +- **StorageContainerManagers**: `scm1`, `scm2`, `scm3` +- **Other services**: `recon`, `s3g`, `httpfs` + +## Usage + +### Basic Test Structure + +```robot +*** Settings *** +Resource BytemanKeywords.robot +Suite Setup Setup Test Environment +Suite Teardown Cleanup Test Environment + +*** Variables *** +${RULE_FILE} /opt/hadoop/share/ozone/byteman/my-rule.btm + +*** Test Cases *** +My Fault Injection Test + Add Byteman Rule datanode1 ${RULE_FILE} + # Run your test operations here + Remove Byteman Rule datanode1 ${RULE_FILE} +``` + +### Group Operations +| Keyword | Description | Example | +|---------|-------------|---------| +| `Inject Fault Into All Components` | Inject rule into all components | `Inject Fault Into All Components ${rule}` | +| `Remove Fault From All Components` | Remove rule from all components | `Remove Fault From All Components ${rule}` | +| `Inject Fault Into Datanodes Only` | Inject rule into datanodes only | `Inject Fault Into Datanodes Only ${rule}` | +| `Inject Fault Into OMs Only` | Inject rule into OMs only | `Inject Fault Into OMs Only ${rule}` | +| `Inject Fault Into SCMs Only` | Inject rule into SCMs only | `Inject Fault Into SCMs Only ${rule}` | + +### Individual Operations +| Keyword | Description | Example | +|---------|-------------|---------| +| `Add Byteman Rule` | Add rule to specific component | `Add Byteman Rule datanode1 ${rule}` | +| `Remove Byteman Rule` | Remove rule from specific component | `Remove Byteman Rule datanode1 ${rule}` | +| `List Byteman Rules` | List active rules for component | `List Byteman Rules datanode1` | +| `Remove All Byteman Rules` | Remove all rules from component | `Remove All Byteman Rules datanode1` | + +### Bulk Operations +| Keyword | Description | Example | +|---------|-------------|---------| +| `List Byteman Rules for All Components` | List rules for all components | `List Byteman Rules for All Components` | +| `Remove All Rules From All Components` | Remove all rules from all components | `Remove All Rules From All Components` | + + +## Byteman Rules + +### Rule Location + +Byteman rules are stored in: `/opt/hadoop/share/ozone/byteman/` + +### Available Rules + +- `skip-put-block.btm` - Blocks putBlock operations in BlockManagerImpl +- `skip-notify-group-remove.btm` - Skips notifyGroupRemove in ContainerStateMachine +- Custom rules can be added to this directory + +### Environment Variables + +- `BYTEMAN_PORT` - Port for Byteman agent communication (default: 9091) +- `BYTEMAN_HOME` - Byteman installation directory + +### Component Variables + +The framework defines component groups: + +```robot +@{ALL_COMPONENTS} datanode1 datanode2 datanode3 om1 om2 om3 recon scm1 scm2 scm3 s3g +@{DATANODE_COMPONENTS} datanode1 datanode2 datanode3 +@{OM_COMPONENTS} om1 om2 om3 +@{SCM_COMPONENTS} scm1 scm2 scm3 +``` + +## Running Tests + +### Local Development + +```bash +# Navigate to compose directory +cd hadoop-ozone/dist/src/main/compose/ozonesecure-ha + +# Start cluster with Byteman enabled +export COMPOSE_FILE=docker-compose.yaml:byteman.yaml +docker-compose up -d + +# Run fault injection tests +./test-byteman.sh +``` + +### CI/CD Integration + +```bash +# Run specific test suite +execute_robot_test om1 ozone-fi/byteman_faults_sample.robot +``` + +## Troubleshooting + +### Common Issues + +1. **bmsubmit not found** + - Ensure Byteman is properly installed in the container + - Check PATH includes `/usr/local/bin` + +2. **Connection refused** + - Verify Byteman agents are running on target components + - Check BYTEMAN_PORT configuration + +3. **Rule file not found** + - Ensure rule files are mounted in the container + - Verify path `/opt/hadoop/share/ozone/byteman/` + +4. **Classpath errors** + - Rebuild Ozone distribution: `mvn clean install -DskipTests` + - Ensure all required JARs are present + +### Debugging + +Enable verbose logging: + +```robot +*** Settings *** +Library BytemanLibrary.py WITH NAME Byteman +Library OperatingSystem + +*** Test Cases *** +Debug Byteman Operations + ${output} = Execute bmsubmit -p 9091 -h datanode1 -l + Log ${output} +``` + +### Validation + +Check if Byteman agents are running: + +```bash +# Check if agent is listening +docker exec datanode1 netstat -ln | grep 9091 + +# List active rules +docker exec datanode1 bmsubmit -p 9091 -l +``` + +## Best Practices + +1. **Always clean up** - Use Suite Teardown to remove injected faults +2. **Use specific components** - Target specific services rather than all components when possible +3. **Error handling** - Use `Run Keyword And Continue On Failure` for bulk operations +4. **Documentation** - Document expected behavior and fault scenarios +5. **Isolation** - Ensure tests don't interfere with each other + +## Contributing + +When adding new test cases: + +1. Follow the existing keyword naming conventions +2. Add appropriate documentation strings +3. Include both positive and negative test scenarios +4. Ensure proper cleanup in teardown methods +5. Test with different cluster configurations + +## References + +- [Byteman Documentation](http://byteman.jboss.org/) +- [Robot Framework User Guide](https://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html) +- [Apache Ozone Documentation](https://ozone.apache.org/) \ No newline at end of file From fdc3bbc9f7b4939f6f32a14fdbadad947a7c06ba Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 01:32:00 +0530 Subject: [PATCH 14/16] Added License info in README --- .../dist/src/main/smoketest/ozone-fi/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md index ba87fcb774e..41fbf98a7cb 100644 --- a/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md +++ b/hadoop-ozone/dist/src/main/smoketest/ozone-fi/README.md @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Ozone Byteman Fault Injection Testing with Robot Framework This directory contains Robot Framework test suites for performing fault injection testing in Apache Ozone using Byteman. From bc9ba5870c352b28fbb004dc25e5361eec4c94f6 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" <6454655+adoroszlai@users.noreply.github.com> Date: Wed, 25 Jun 2025 12:16:13 +0200 Subject: [PATCH 15/16] Bump ozone-runner to 20250625-1-jdk21 --- hadoop-ozone/dist/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index 55a4620c355..b6a62358b6a 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -25,7 +25,7 @@ Apache Ozone Distribution - 20250624-1-jdk21 + 20250625-1-jdk21 ghcr.io/apache/ozone-testkrb5:20241129-1 apache/ozone -rocky From bbd2ecaa14dbee07ad14430d1404a86fa14bb82d Mon Sep 17 00:00:00 2001 From: Soumitra Sulav Date: Wed, 25 Jun 2025 22:11:55 +0530 Subject: [PATCH 16/16] Updated logger from console to info for cleaner log --- .../dist/src/main/smoketest/lib/BytemanLibrary.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py index a1b54367d0b..31cf508c874 100644 --- a/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py +++ b/hadoop-ozone/dist/src/main/smoketest/lib/BytemanLibrary.py @@ -36,7 +36,7 @@ def run_byteman_cmd(self, component_name, args, action_desc): if result.returncode != 0: raise RuntimeError(f"Failed to {action_desc} for {component_name}: {result.stderr.strip()}") - logger.console(f"{action_desc} for {component_name} successful.") + logger.info(f"{action_desc} for {component_name} successful.") return result.stdout def add_byteman_rule(self, component_name, rule_file): @@ -58,9 +58,9 @@ def list_byteman_rules(self, component_name): file_list = [line.split()[2] for line in matching_lines if len(line.split()) >= 3] if matching_lines: - logger.console(f"Active rules in {component_name}:\n" + "\n".join(matching_lines)) + logger.info(f"Active rules in {component_name}:\n" + "\n".join(matching_lines)) else: - logger.console(f"Active rules in {component_name}: No rules found") + logger.info(f"Active rules in {component_name}: No rules found") return file_list @@ -70,7 +70,7 @@ def remove_all_byteman_rules(self, component_name): rule_files = self.list_byteman_rules(component_name) if not rule_files: - logger.console(f"No active rules to remove for {component_name}") + logger.info(f"No active rules to remove for {component_name}") return for rule_file in rule_files: