diff --git a/getting-started/ceph/.env.example b/getting-started/ceph/.env.example new file mode 100644 index 0000000000..263b399b83 --- /dev/null +++ b/getting-started/ceph/.env.example @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +LANG=en_US.utf8 # Default system locale used inside containers +TZ=UTC # Timezone used inside containers +DASHBOARD_PORT=8443 # Port for Ceph Dashboard +INTERNAL_DASHBOARD_PORT=8443 # Internal port for Ceph Dashboard +RGW_PORT=8080 # Port for Rados Gateway +MON_IP=$(hostname -i) # IP address of the monitor +RGW_ACCESS_KEY=POLARIS123ACCESS # Access key for Polaris S3 user +RGW_SECRET_KEY=POLARIS456SECRET # Secret key for Polaris S3 user +FSID=b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e # Unique cluster identifier (use `uuidgen` to regenerate) +OSD_UUID_1=80505106-0d32-4777-bac9-3dfc901b1273 # Unique OSD identifier (use `uuidgen` to regenerate) +S3_ENDPOINT_URL=http://rgw1:7480 # Internal endpoint for S3-compatible RGW service +S3_REGION=us-east-1 # S3 region name +S3_POLARIS_BUCKET=polaris-storage # Default S3 bucket name for Polaris storage \ No newline at end of file diff --git a/getting-started/ceph/README.md b/getting-started/ceph/README.md new file mode 100644 index 0000000000..b70da3ee59 --- /dev/null +++ b/getting-started/ceph/README.md @@ -0,0 +1,148 @@ + + +# Getting Started with Apache Polaris and Ceph + +## Overview + +This guide describes how to spin up a **single-node Ceph cluster** with **RADOS Gateway (RGW)** for S3-compatible storage and configure it for use by **Polaris**. + +This example cluster is configured for basic access key authentication only. +It does not include STS (Security Token Service) or temporary credentials. +All access to the Ceph RGW (RADOS Gateway) and Polaris integration uses static S3-style credentials (as configured via radosgw-admin user create). + +Spark is used as a query engine. This example assumes a local Spark installation. +See the [Spark Notebooks Example](../spark/README.md) for a more advanced Spark setup. + +## Starting the Example + +Before starting the Ceph + Polaris stack, you’ll need to configure environment variables that define network settings, credentials, and cluster IDs. + +The services are started **in sequence**: +1. Monitor + Manager +2. OSD +3. RGW +4. Polaris + +Note: this example pulls the `apache/polaris:latest` image, but assumes the image is `1.2.0-incubating` or later. + +### 1. Copy the example environment file +```shell +cp .env.example .env +``` + +### 2. Start monitor and manager +```shell +docker compose up -d mon1 mgr +``` + +### 3. Start OSD +```shell +docker compose up -d osd1 +``` + +### 4. Start RGW +```shell +docker compose up -d rgw1 +``` +#### Check status +```shell +docker exec --interactive --tty ceph-mon1-1 ceph -s +``` +You should see something like: +```yaml +cluster: + id: b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e + health: HEALTH_WARN + mon is allowing insecure global_id reclaim + 1 monitors have not enabled msgr2 + 6 pool(s) have no replicas configured + +services: + mon: 1 daemons, quorum mon1 (age 49m) + mgr: mgr(active, since 94m) + osd: 1 osds: 1 up (since 36m), 1 in (since 93m) + rgw: 1 daemon active (1 hosts, 1 zones) +``` + +### 5. Create bucket for Polaris storage +```shell +docker compose up -d setup_bucket +``` + +### 6. Run Polaris service +```shell +docker compose up -d polaris +``` + +### 7. Setup polaris catalog +```shell +docker compose up -d polaris-setup +``` + +## 8. Connecting From Spark + +```shell +bin/spark-sql \ + --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.0,org.apache.iceberg:iceberg-aws-bundle:1.9.0 \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.polaris=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.polaris.type=rest \ + --conf spark.sql.catalog.polaris.io-impl=org.apache.iceberg.aws.s3.S3FileIO \ + --conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ + --conf spark.sql.catalog.polaris.token-refresh-enabled=true \ + --conf spark.sql.catalog.polaris.warehouse=quickstart_catalog \ + --conf spark.sql.catalog.polaris.scope=PRINCIPAL_ROLE:ALL \ + --conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation="" \ + --conf spark.sql.catalog.polaris.credential=root:s3cr3t \ + --conf spark.sql.catalog.polaris.client.region=irrelevant \ + --conf spark.sql.catalog.polaris.s3.access-key-id=POLARIS123ACCESS \ + --conf spark.sql.catalog.polaris.s3.secret-access-key=POLARIS456SECRET +``` + +Note: `s3cr3t` is defined as the password for the `root` user in the `docker-compose.yml` file. + +Note: The `client.region` configuration is required for the AWS S3 client to work, but it is not used in this example +since Ceph does not require a specific region. + +## 9. Running Queries + +Run inside the Spark SQL shell: + +``` +spark-sql (default)> use polaris; +Time taken: 0.837 seconds + +spark-sql ()> create namespace ns; +Time taken: 0.374 seconds + +spark-sql ()> create table ns.t1 as select 'abc'; +Time taken: 2.192 seconds + +spark-sql ()> select * from ns.t1; +abc +Time taken: 0.579 seconds, Fetched 1 row(s) +``` +## Lack of Credential Vending + +Notice that the Spark configuration does not contain a `X-Iceberg-Access-Delegation` header. +This is because example cluster does not include STS (Security Token Service) or temporary credentials. + +The lack of STS API is represented in the Catalog storage configuration by the +`stsUnavailable=true` property. diff --git a/getting-started/ceph/ceph-conf/ceph.conf b/getting-started/ceph/ceph-conf/ceph.conf new file mode 100644 index 0000000000..b93e5b5909 --- /dev/null +++ b/getting-started/ceph/ceph-conf/ceph.conf @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +[global] +fsid = b2f59c4b-5f14-4f8c-a9b7-3b7998c76a0e +mon_initial_members = mon1 +mon_host = mon1 +auth_cluster_required = cephx +auth_service_required = cephx +auth_client_required = cephx +osd_pool_default_size = 1 +osd_pool_default_min_size = 1 +osd_pool_default_pg_num = 333 +osd_crush_chooseleaf_type = 1 +mon_allow_pool_size_one= true + +[mon.mon1] +mon_data = /var/lib/ceph/mon/ceph-mon1 +mon_rocksdb_min_wal_logs = 1 +mon_rocksdb_max_total_wal_size = 64M +mon_rocksdb_options = max_background_compactions=4;max_background_flushes=2 + +[client.rgw1] +host = ceph-rgw1 +rgw_frontends = civetweb port=7480 diff --git a/getting-started/ceph/docker-compose.yml b/getting-started/ceph/docker-compose.yml new file mode 100644 index 0000000000..9febf037e0 --- /dev/null +++ b/getting-started/ceph/docker-compose.yml @@ -0,0 +1,209 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + + mon1: + image: quay.io/ceph/ceph:v19.2.3 + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/bootstrap-osd; + mkdir -p /var/lib/ceph/osd/ceph-0; + ceph-authtool --create-keyring /var/lib/ceph/tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'; + ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring \ + --gen-key -n client.admin \ + --cap mon 'allow *' --cap osd 'allow *' --cap mgr 'allow *' --cap mds 'allow *'; + ceph-authtool --create-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring \ + --gen-key -n client.bootstrap-osd \ + --cap mon 'profile bootstrap-osd' --cap mgr 'allow r'; + ceph-authtool /var/lib/ceph/tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring; + ceph-authtool /var/lib/ceph/tmp/ceph.mon.keyring --import-keyring /var/lib/ceph/bootstrap-osd/ceph.keyring; + chown ceph:ceph /var/lib/ceph/tmp/ceph.mon.keyring; + monmaptool --create --add mon1 ${MON_IP} --fsid ${FSID} /var/lib/ceph/tmp/monmap --clobber; + ceph-mon --mkfs -i mon1 --monmap /var/lib/ceph/tmp/monmap --keyring /var/lib/ceph/tmp/ceph.mon.keyring; + ceph-mon -i mon1 -f -d; + environment: + MON_IP: ${MON_IP} + FSID: ${FSID} + volumes: + - ./ceph-conf:/etc/ceph + - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd + + mgr: + image: quay.io/ceph/ceph:v19.2.3 + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/mgr/ceph-mgr; + ceph auth get-or-create mgr.mgr mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-mgr/keyring; + ceph-mgr -f -i mgr; + volumes: + - ./ceph-conf:/etc/ceph + depends_on: + - mon1 + ports: + - ${DASHBOARD_PORT}:${INTERNAL_DASHBOARD_PORT} + + osd1: + pid: host + privileged: true + image: quay.io/ceph/ceph:v19.2.3 + environment: + OSD_UUID_1: ${OSD_UUID_1} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/bootstrap-osd; + mkdir -p /var/lib/ceph/osd/ceph-0; + chown -R ceph:ceph /var/lib/ceph/osd/ceph-0; + ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-0/keyring \ + --gen-key -n osd.0 \ + --cap osd 'allow *' \ + --cap mon 'allow profile osd'; + ceph auth del osd.0 || true; + ceph auth add osd.0 -i /var/lib/ceph/osd/ceph-0/keyring; + ceph osd new ${OSD_UUID_1} -n client.bootstrap-osd -k /var/lib/ceph/bootstrap-osd/ceph.keyring; + ceph-osd -i 0 --mkfs --osd-data /var/lib/ceph/osd/ceph-0 --osd-uuid ${OSD_UUID_1} \ + --keyring /var/lib/ceph/osd/ceph-0/keyring; + ceph-osd -f -i 0; + volumes: + - ./ceph-conf:/etc/ceph + - ./bootstrap-osd:/var/lib/ceph/bootstrap-osd + depends_on: + - mon1 + + rgw1: + image: quay.io/ceph/ceph:v19.2.3 + container_name: rgw1 + environment: + MON_IP: ${MON_IP} + RGW_ACCESS_KEY: ${RGW_ACCESS_KEY} + RGW_SECRET_KEY: ${RGW_SECRET_KEY} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + mkdir -p /var/lib/ceph/radosgw/ceph-rgw1; + ceph auth get-or-create client.rgw1 mon 'allow rw' osd 'allow rwx'; + ceph auth caps client.rgw1 mon 'allow rw' osd 'allow rwx'; + ceph-authtool --create-keyring /var/lib/ceph/radosgw/ceph-rgw1/keyring --gen-key -n client.rgw1 --cap osd 'allow *' --cap mon 'allow *'; + ceph auth del client.rgw1 || true; + ceph auth add client.rgw1 -i /var/lib/ceph/radosgw/ceph-rgw1/keyring; + radosgw-admin user create --uid="polaris-user" \ + --display-name="Polaris User" \ + --access-key="${RGW_ACCESS_KEY}" \ + --secret-key="${RGW_SECRET_KEY}" || true; + echo ">>> RGW user created (access=${RGW_ACCESS_KEY}, secret=${RGW_SECRET_KEY})"; + radosgw -n client.rgw1 --rgw-frontends="beast port=7480" --foreground; + ports: + - "7480:7480" # RGW HTTP endpoint (S3) + - "7481:7481" + volumes: + - ./ceph-conf:/etc/ceph + depends_on: + - osd1 + + setup_bucket: + image: peakcom/s5cmd:latest + depends_on: + - rgw1 + environment: + AWS_ACCESS_KEY_ID: ${RGW_ACCESS_KEY} + AWS_SECRET_ACCESS_KEY: ${RGW_SECRET_KEY} + S3_ENDPOINT_URL: ${S3_ENDPOINT_URL} + S3_REGION: ${S3_REGION} + S3_POLARIS_BUCKET: ${S3_POLARIS_BUCKET} + entrypoint: "/bin/sh" + command: + - "-c" + - >- + set -ex; + echo ">>> Waiting for RGW to become ready..."; + sleep 5; + echo ">>> Create bucket if not exist..."; + /s5cmd --endpoint-url ${S3_ENDPOINT_URL} mb s3://${S3_POLARIS_BUCKET} || true; + tail -f /dev/null; + + polaris: + image: apache/polaris:latest + ports: + # API port + - "8181:8181" + # Optional, allows attaching a debugger to the Polaris JVM + - "5005:5005" + depends_on: + - rgw1 + environment: + JAVA_DEBUG: true + JAVA_DEBUG_PORT: "*:5005" + AWS_REGION: us-east-1 + AWS_ACCESS_KEY_ID: ${RGW_ACCESS_KEY} + AWS_SECRET_ACCESS_KEY: ${RGW_SECRET_KEY} + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + polaris.realm-context.realms: POLARIS + quarkus.otel.sdk.disabled: "true" + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 2s + timeout: 10s + retries: 10 + start_period: 10s + + polaris-setup: + image: alpine/curl + depends_on: + polaris: + condition: service_healthy + environment: + - CLIENT_ID=root + - CLIENT_SECRET=s3cr3t + volumes: + - ../assets/polaris/:/polaris + entrypoint: "/bin/sh" + command: + - "-c" + - >- + chmod +x /polaris/create-catalog.sh; + chmod +x /polaris/obtain-token.sh; + source /polaris/obtain-token.sh; + echo Creating catalog...; + export STORAGE_CONFIG_INFO='{"storageType":"S3", + "endpoint":"http://localhost:7480", + "endpointInternal":"http://rgw1:7480", + "stsUnavailable":"true", + "pathStyleAccess":true}'; + export STORAGE_LOCATION='s3://polaris-storage'; + /polaris/create-catalog.sh POLARIS $$TOKEN; + echo Extra grants...; + curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ + -X PUT \ + http://polaris:8181/api/management/v1/catalogs/quickstart_catalog/catalog-roles/catalog_admin/grants \ + -d '{"type":"catalog", "privilege":"CATALOG_MANAGE_CONTENT"}'; + echo Done.; + curl -H "Authorization: Bearer $$TOKEN" -H 'Content-Type: application/json' \ + -X GET \ + http://polaris:8181/api/management/v1/catalogs;