diff --git a/apache/livy/ExposedUI/README.md b/apache/livy/ExposedUI/README.md new file mode 100644 index 00000000..84c52f17 --- /dev/null +++ b/apache/livy/ExposedUI/README.md @@ -0,0 +1,22 @@ +# Setup Apache Livy with Docker Compose + +```bash +docker compose build spark-master +docker compose up +``` +# Access the Livy UI and execute PySpark code +```bash +curl -X POST -H "Content-Type: application/json" -d '{"kind":"pyspark"}' http://localhost:8998/sessions +# {"id":6,"name":null,"appId":null,"owner":null,"proxyUser":null,"state":"starting","kind":"pyspark","appInfo":{"driverLogUrl":null,"sparkUiUrl":null},"log":["stdout: ","\nstderr: "],"ttl":null,"driverMemory":null,"driverCores":0,"executorMemory":null,"executorCores":0,"conf":{},"archives":[],"files":[],"heartbeatTimeoutInSecond":0,"jars":[],"numExecutors":0,"pyFiles":[],"queue":null} + +# replace id from last response with $id +curl -X POST -H "Content-Type: application/json" -d '{"code":"import os\nprint(os.getcwd())"}' http://localhost:8998/sessions/$id/statements +# "java.lang.IllegalStateException: Session is in state starting" +# wait 30sec +# {"id":0,"code":"import os\nprint(os.getcwd())","state":"waiting","output":null,"progress":0.0,"started":0,"completed":0} + +# replace id from last reseponse with #statements_id +curl http://127.0.0.1:8998/sessions/$id/statements/$statements_id +# output.data is the stdout +# {"id":0,"code":"import os\nprint(os.getcwd())","state":"available","output":{"status":"ok","execution_count":0,"data":{"text/plain":"/opt"}},"progress":1.0,"started":1754515902001,"completed":1754515902003} +``` \ No newline at end of file diff --git a/apache/livy/ExposedUI/apache-livy/Dockerfile b/apache/livy/ExposedUI/apache-livy/Dockerfile new file mode 100644 index 00000000..c458ee0f --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/Dockerfile @@ -0,0 +1,16 @@ +#https://github.com/apache/incubator-livy?tab=readme-ov-file#building-livy +# Reuse the same image built for Spark Master/Worker +FROM mounirbs-local/spark-python3-java11:3.5.4 +USER root +ENV LIVY_HOME /opt/livy +WORKDIR /opt/ +# Get livy binaries from: https://livy.apache.org/download/ +RUN apt-get update && apt-get install -y unzip \ + && curl "https://dlcdn.apache.org/incubator/livy/0.8.0-incubating/apache-livy-0.8.0-incubating_2.12-bin.zip" -O \ + && unzip "apache-livy-0.8.0-incubating_2.12-bin" \ + && rm -rf "apache-livy-0.8.0-incubating_2.12-bin.zip" \ + && mv "apache-livy-0.8.0-incubating_2.12-bin" $LIVY_HOME \ + && mkdir $LIVY_HOME/logs \ + && chown -R spark:spark $LIVY_HOME + +USER spark diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf b/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf new file mode 100644 index 00000000..97147729 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy-client.conf @@ -0,0 +1,108 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Configurations for a Livy Client, any configurations set here will override any +# livy or spark-default configurations. +# +# Before a Livy Client is able to load these configurations the folder containing +# this file must be added to the application classpath +# + +# +# Configurations for Livy HTTPClient +# + +# HTTP Request configurations +# How long before a request times out +# livy.client.http.connection.timeout = 10s +# How long between data packets before a request times out +# livy.client.http.connection.socket.timeout = 5m +# Whether content is compressed +# livy.client.http.content.compress.enable = true + +# How long before idle connections are closed +# livy.client.http.connection.idle.timeout = 10m + +# Initial interval before polling for Job results +# livy.client.http.job.initial-poll-interval = 100ms +# Maximum interval between successive polls +# livy.client.http.job.max-poll-interval = 5s + +# +# Configurations for Livy RSCClient +# + +# Configurations for registering a client with the rpc server +# Unique client id for connections to the rpc server +# livy.rsc.client.auth.id = +# Secret value for authenticating client connections with server +# livy.rsc.client.auth.secret = + +# Timeout when stopping a rsc client +# livy.rsc.client.shutdown-timeout = 10s + +# Class of the rsc driver to use +# livy.rsc.driver-class = +# The kind of rsc session. Examples: pyspark or sparkr +# livy.rsc.session.kind = + +# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. +# livy.rsc.jars = +# Location of the SparkR package for running sparkr +# livy.rsc.sparkr.package = +# Location of the PySpark package for running pyspark +# livy.rsc.pyspark.archives = + +# Address for the RSC driver to connect back with it's connection info. +# livy.rsc.launcher.address = + +# Port Range on which RPC will launch . Port range in inclusive of start and end port . +livy.rsc.launcher.port.range = 10000~10010 + +# How long will the RSC wait for a connection for a Livy server before shutting itself down. +livy.rsc.server.idle-timeout = 10m + +# The user that should be impersonated when requesting a Livy session +# livy.rsc.proxy-user = + +# Host or IP adress of the rpc server + +#livy.rsc.rpc.server.address = livy-server +# How long the rsc client will wait when attempting to connect to the Livy server +#livy.rsc.server.connect.timeout = 90s + +# The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR +livy.rsc.channel.log.level = ERROR + +# SASL configurations for authentication +# SASL mechanism used for authentication +# livy.rsc.rpc.sasl.mechanisms = DIGEST-MD5 +# SASL qop used for authentication +# livy.rsc.rpc.sasl.qop = + +# Time between status checks for cancelled a Job +# livy.rsc.job-cancel.trigger-interval = 100ms +# Time before a cancelled a Job is forced into a Cancelled state +# livy.rsc.job-cancel.timeout = 30s + +# Number of statements kept in driver's memory +# livy.rsc.retained-statements = 100 +# +livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh b/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh new file mode 100644 index 00000000..04796887 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy-env.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# LIVY ENVIRONMENT VARIABLES +# +# - JAVA_HOME Java runtime to use. By default use "java" from PATH. +# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use. +# - SPARK_HOME Spark which you would like to use in Livy. +# - SPARK_CONF_DIR Optional directory where the Spark configuration lives. +# (Default: $SPARK_HOME/conf) +# - LIVY_LOG_DIR Where log files are stored. (Default: ${LIVY_HOME}/logs) +# - LIVY_PID_DIR Where the pid file is stored. (Default: /tmp) +# - LIVY_SERVER_JAVA_OPTS Java Opts for running livy server (You can set jvm related setting here, +# like jvm memory/gc algorithm and etc.) +# - LIVY_IDENT_STRING A name that identifies the Livy server instance, used to generate log file +# names. (Default: name of the user starting Livy). +# - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.) +# - LIVY_NICENESS Niceness of the Livy server process when running in the background. (Default: 0.) +# - LIVY_CLASSPATH Override if the additional classpath is required. + +export JAVA_HOME=/opt/java/openjdk +export SPARK_HOME=/opt/spark +export LIVY_LOG_DIR=/opt/livy/logs +export SPARK_CONF_DIR=/opt/spark/conf \ No newline at end of file diff --git a/apache/livy/ExposedUI/apache-livy/conf/livy.conf b/apache/livy/ExposedUI/apache-livy/conf/livy.conf new file mode 100644 index 00000000..4f289ef2 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/livy.conf @@ -0,0 +1,198 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Use this keystore for the SSL certificate and key. +# livy.keystore = + +# Specify the keystore password. +# livy.keystore.password = +# +# Specify the key password. +# livy.key-password = + +# Hadoop Credential Provider Path to get "livy.keystore.password" and "livy.key-password". +# Credential Provider can be created using command as follow: +# hadoop credential create "livy.keystore.password" -value "secret" -provider jceks://hdfs/path/to/livy.jceks +# livy.hadoop.security.credential.provider.path = + +# What host address to start the server on. By default, Livy will bind to all network interfaces. +livy.server.host = 0.0.0.0 + +# What port to start the server on. +livy.server.port = 8998 + +# What base path ui should work on. By default UI is mounted on "/". +# E.g.: livy.ui.basePath = /my_livy - result in mounting UI on /my_livy/ +# livy.ui.basePath = "" + +# What spark master Livy sessions should use. +livy.spark.master = spark://spark-master:7077 + +# What spark deploy mode Livy sessions should use. +livy.spark.deploy-mode = client + +# Configure Livy server http request and response header size. +# livy.server.request-header.size = 131072 +# livy.server.response-header.size = 131072 + +# Whether or not to send server version in http response. +# livy.server.send-server-version = false + +# Enabled to check whether timeout Livy sessions should be stopped. +#livy.server.session.timeout-check = true +# +# Whether or not to skip timeout check for a busy session +#livy.server.session.timeout-check.skip-busy = false + +# Time in milliseconds on how long Livy will wait before timing out an inactive session. +# Note that the inactive session could be busy running jobs. +#livy.server.session.timeout = 5m +# +# How long a finished session state should be kept in LivyServer for query. +#livy.server.session.state-retain.sec = 120s + +# If livy should impersonate the requesting users when creating a new session. +livy.impersonation.enabled = true + +# Logs size livy can cache for each session/batch. 0 means don't cache the logs. +# livy.cache-log.size = 200 + +# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. +# livy.rsc.jars = + +# Comma-separated list of Livy REPL jars. By default Livy will upload jars from its installation +# directory every time a session is started. By caching these files in HDFS, for example, startup +# time of sessions on YARN can be reduced. Please list all the repl dependencies including +# Scala version-specific livy-repl jars, Livy will automatically pick the right dependencies +# during session creation. +# livy.repl.jars = + +# Location of PySpark archives. By default Livy will upload the file from SPARK_HOME, but +# by caching the file in HDFS, startup time of PySpark sessions on YARN can be reduced. +# livy.pyspark.archives = + +# Location of the SparkR package. By default Livy will upload the file from SPARK_HOME, but +# by caching the file in HDFS, startup time of R sessions on YARN can be reduced. +# livy.sparkr.package = + +# List of local directories from where files are allowed to be added to user sessions. By +# default it's empty, meaning users can only reference remote URIs when starting their +# sessions. +livy.file.local-dir-whitelist = /target/ + +# Whether to enable csrf protection, by default it is false. If it is enabled, client should add +# http-header "X-Requested-By" in request if the http method is POST/DELETE/PUT/PATCH. +# livy.server.csrf-protection.enabled = + +# Whether to enable HiveContext in livy interpreter, if it is true hive-site.xml will be detected +# on user request and then livy server classpath automatically. +# livy.repl.enable-hive-context = + +# Recovery mode of Livy. Possible values: +# off: Default. Turn off recovery. Every time Livy shuts down, it stops and forgets all sessions. +# recovery: Livy persists session info to the state store. When Livy restarts, it recovers +# previous sessions from the state store. +# Must set livy.server.recovery.state-store and livy.server.recovery.state-store.url to +# configure the state store. +# livy.server.recovery.mode = off +# Zookeeper address used for HA and state store. e.g. host1:port1, host2:port2 +# livy.server.zookeeper.url = + +# Where Livy should store state to for recovery. Possible values: +# : Default. State store disabled. +# filesystem: Store state on a file system. +# zookeeper: Store state in a Zookeeper instance. +# livy.server.recovery.state-store = + +# For filesystem state store, the path of the state store directory. Please don't use a filesystem +# that doesn't support atomic rename (e.g. S3). e.g. file:///tmp/livy or hdfs:///. +# For zookeeper, the address to the Zookeeper servers. e.g. host1:port1,host2:port2 +# If livy.server.recovery.state-store is zookeeper, this config is for back-compatibility, +# so if both this config and livy.server.zookeeper.url exist, +# livy uses livy.server.zookeeper.url first. +# livy.server.recovery.state-store.url = + +# The policy of curator connecting to zookeeper. +# For example, m, n means retry m times and the interval of retry is n milliseconds. +# Please use the new config: livy.server.zk.retry-policy. +# Keep this config for back-compatibility. +# If both this config and livy.server.zk.retry-policy exist, +# livy uses livy.server.zk.retry-policy first. +# livy.server.recovery.zk-state-store.retry-policy = 5,100 + +# The policy of curator connecting to zookeeper. +# For example, m, n means retry m times and the interval of retry is n milliseconds +# livy.server.zk.retry-policy = + +# The dir in zk to store the data about session. +# livy.server.recovery.zk-state-store.key-prefix = livy + +# If Livy can't find the yarn app within this time, consider it lost. +# livy.server.yarn.app-lookup-timeout = 120s +# When the cluster is busy, we may fail to launch yarn app in app-lookup-timeout, then it would +# cause session leakage, so we need to check session leakage. +# How long to check livy session leakage +# livy.server.yarn.app-leakage.check-timeout = 600s +# how often to check livy session leakage +# livy.server.yarn.app-leakage.check-interval = 60s + +# How often Livy polls YARN to refresh YARN app state. +# livy.server.yarn.poll-interval = 5s +# +# Days to keep Livy server request logs. +# livy.server.request-log-retain.days = 5 + +# If the Livy Web UI should be included in the Livy Server. Enabled by default. +livy.ui.enabled = true + +# Whether to enable Livy server access control, if it is true then all the income requests will +# be checked if the requested user has permission. +# livy.server.access-control.enabled = false + +# Allowed users to access Livy, by default any user is allowed to access Livy. If user want to +# limit who could access Livy, user should list all the permitted users with comma separated. +# livy.server.access-control.allowed-users = * + +# A list of users with comma separated has the permission to change other user's submitted +# session, like submitting statements, deleting session. +# livy.server.access-control.modify-users = + +# A list of users with comma separated has the permission to view other user's infomation, like +# submitted session state, statement results. +# livy.server.access-control.view-users = +# +# Authentication support for Livy server +# Livy has a built-in SPnego authentication support for HTTP requests with below configurations. +# livy.server.auth.type = kerberos +# livy.server.auth.kerberos.principal = +# livy.server.auth.kerberos.keytab = +# livy.server.auth.kerberos.name-rules = DEFAULT +# +# If user wants to use custom authentication filter, configurations are: +# livy.server.auth.type = +# livy.server.auth..class = +# livy.server.auth..param. = +# livy.server.auth..param. = + +# Enable to allow custom classpath by proxy user in cluster mode +# The below configuration parameter is disabled by default. +# livy.server.session.allow-custom-classpath = true + +livy.repl.jars = /opt/livy/jars/livy-client-common-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/commons-codec-1.9.jar, /opt/livy/repl_2.12-jars/livy-core_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/minlog-1.3.0.jar, /opt/livy/repl_2.12-jars/kryo-shaded-4.0.2.jar, /opt/livy/repl_2.12-jars/livy-repl_2.12-0.8.0-incubating.jar, /opt/livy/repl_2.12-jars/objenesis-2.5.1.jar + +livy.rsc.jars = /opt/livy/rsc-jars/livy-api-0.8.0-incubating.jar, /opt/livy/rsc-jars/livy-rsc-0.8.0-incubating.jar diff --git a/apache/livy/ExposedUI/apache-livy/conf/log4j.properties b/apache/livy/ExposedUI/apache-livy/conf/log4j.properties new file mode 100644 index 00000000..70b67a6d --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/log4j.properties @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# The default Livy logging configuration. +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +log4j.logger.org.eclipse.jetty=WARN diff --git a/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist b/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist new file mode 100644 index 00000000..e371ed22 --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/conf/spark-blacklist @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# Configuration override / blacklist. Defines a list of properties that users are not allowed +# to override when starting Spark sessions. +# +# This file takes a list of property names (one per line). Empty lines and lines starting with "#" +# are ignored. +# + +# Disallow overriding the master and the deploy mode. +spark.master +spark.submit.deployMode + +# Disallow overriding the location of Spark cached jars. +spark.yarn.jar +spark.yarn.jars +spark.yarn.archive + +# Don't allow users to override the RSC timeout. +livy.rsc.server.idle-timeout diff --git a/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf b/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf new file mode 100644 index 00000000..25721bbf --- /dev/null +++ b/apache/livy/ExposedUI/apache-livy/spark/conf/spark-defaults.conf @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +spark.master spark://spark-master:7077 +spark.driver.host apache-livy +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 2g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" + +# Dynamic Allocation +# Livy considers a dynamic executor one full worker (no concept of cores). So if a worker has 4 cores, one executor for Apache Livy will contain 4 cores +spark.dynamicAllocation.enabled true +spark.dynamicAllocation.minExecutors 1 +spark.dynamicAllocation.maxExecutors 4 +spark.dynamicAllocation.initialExecutors 1 diff --git a/apache/livy/ExposedUI/docker-compose.yml b/apache/livy/ExposedUI/docker-compose.yml new file mode 100644 index 00000000..f00b4646 --- /dev/null +++ b/apache/livy/ExposedUI/docker-compose.yml @@ -0,0 +1,78 @@ +services: + spark-master: + container_name: spark-master + hostname: spark-master + build: + context: ./ + dockerfile: ./spark/Dockerfile + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8080:8080" + - "7077:7077" + - "6066:6066" + labels: + kompose.service.expose: true + kompose.service.type: headless + environment: + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 + - SPARK_MASTER_WEBUI_PORT=8080 + - SPARK_DAEMON_MEMORY=2g + - SPARK_MASTER_OPTS="-Dspark.master.rest.enabled=true" + - PYSPARK_PYTHON=python3 + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-master.sh && tail -f /dev/null" + volumes: + - ./python:/python + + spark-worker: + # reuse the image built for the spark-master + image: mounirbs-local/spark-python3-java11:3.5.4 + ports: + - "8081:8081" + labels: + kompose.service.expose: true + kompose.service.type: headless + container_name: spark-worker + hostname: spark-worker + environment: + - SPARK_WORKER_CORES=1 + - SPARK_WORKER_MEMORY=2g + - PYSPARK_PYTHON=python3 + depends_on: + - spark-master + entrypoint: + - "bash" + - "-c" + - "/opt/spark/sbin/start-worker.sh spark://spark-master:7077 && tail -f /dev/null" + volumes: + - ./python:/python + + apache-livy: + container_name: apache-livy + hostname: apache-livy + environment: + - PYSPARK_PYTHON=python3 + build: ./apache-livy/ + image: mounirbs-local/livy-spark3.5.4-python3-java11:0.8 + command: ["sh", "-c", "/opt/livy/bin/livy-server"] + user: root + volumes: + - ./apache-livy/conf/:/opt/livy/conf/ + - ./apache-livy/spark/conf/:/opt/spark/conf/ + ports: + - '8998:8998' + labels: + kompose.service.expose: true + kompose.service.type: headless + depends_on: + - spark-master + - spark-worker + + deploy: + resources: + limits: + cpus: '1' + memory: 2g diff --git a/apache/livy/ExposedUI/python/fabric/swagger.json b/apache/livy/ExposedUI/python/fabric/swagger.json new file mode 100644 index 00000000..3d533742 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/swagger.json @@ -0,0 +1,1753 @@ +{ + "swagger": "2.0", + "info": { + "version": "v1", + "title": "Livy Public API" + }, + "host": "api.fabric.microsoft.com", + "schemes": [ + "https" + ], + "paths": { + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches": { + "post": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Executes a batch.", + "operationId": "LivyApiBatch_ExecuteBatchAsync", + "consumes": [ + "application/json", + "text/json" + ], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "payload", + "in": "body", + "description": "", + "required": true, + "schema": { + "$ref": "#/definitions/BatchRequest" + } + } + ], + "responses": { + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/BatchResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiBatch" + ], + "summary": "List Batch Jobs.", + "operationId": "LivyApiBatch_ListBatchJobsAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "$top", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$skip", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$count", + "in": "query", + "type": "boolean", + "description": "" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/LivySparkActivityList" + } + }, + "default": { + "description": "Other Status", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}": { + "get": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Gets a Batch Details.", + "operationId": "LivyApiBatch_GetBatchAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "batchId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/BatchResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "delete": { + "tags": [ + "LivyApiBatch" + ], + "summary": "Cancels a Batch Execution.", + "operationId": "LivyApiBatch_CancelBatchAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "batchId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Acquire a Spark Session.", + "operationId": "LivyApiSession_AcquireSparkSessionAsync", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "payload", + "in": "body", + "description": "CreateSessionRequest.", + "required": true, + "schema": { + "$ref": "#/definitions/SessionRequest" + } + } + ], + "responses": { + "202": { + "description": "Accepted", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status code", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "List spark sessions.", + "operationId": "LivyApiSession_ListSparkSessionsAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "$top", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$skip", + "in": "query", + "type": "string", + "description": "" + }, + { + "name": "$count", + "in": "query", + "type": "boolean", + "description": "" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}": { + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "Get details of a spark session.", + "operationId": "LivyApiSession_GetSparkSessionAsync", + "consumes": [], + "produces": [ + "application/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/SessionResponse" + } + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "delete": { + "tags": [ + "LivyApiSession" + ], + "summary": "Stops and deletes a spark session.", + "operationId": "LivyApiSession_DeleteSparkSessionAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Execute a statement on a spark session.", + "operationId": "LivyApiSession_ExecuteSparkSessionStatementAsync", + "consumes": [ + "application/json", + "text/json" + ], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "payload", + "in": "body", + "description": "", + "required": true, + "schema": { + "$ref": "#/definitions/StatementRequest" + } + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + }, + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "List statements in an active session.", + "operationId": "LivyApiSession_ListSparkSessionStatementsAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace name.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementsResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}": { + "get": { + "tags": [ + "LivyApiSession" + ], + "summary": "Gets a spark statement from a spark session.", + "operationId": "LivyApiSession_GetSparkSessionStatementAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "sessionId", + "in": "path", + "description": "Spark Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "statementId", + "in": "path", + "description": "Statement Id.", + "required": true, + "type": "integer", + "format": "int32" + }, + { + "name": "from", + "in": "query", + "description": "Offset (in byte) which the output should begin from.", + "type": "integer", + "format": "int32" + }, + { + "name": "size", + "in": "query", + "description": "Size (in byte) of the returned output.", + "type": "integer", + "format": "int32" + } + ], + "responses": { + "200": { + "description": "OK" + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Cancels a statement execution.", + "operationId": "LivyApiSession_CancelSparkStatementAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "statementId", + "in": "path", + "description": "", + "required": true, + "type": "integer", + "format": "int32" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/StatementCancellationResponse" + } + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + }, + "/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout": { + "post": { + "tags": [ + "LivyApiSession" + ], + "summary": "Resets the timeout time of a session.", + "operationId": "LivyApiSession_ResetSparkSessionTimeoutAsync", + "consumes": [], + "produces": [ + "application/json", + "text/json" + ], + "parameters": [ + { + "name": "workspaceId", + "in": "path", + "description": "Workspace Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "lakehouseId", + "in": "path", + "description": "Lakehouse Id.", + "required": true, + "type": "string", + "format": "uuid" + }, + { + "name": "livyApiVersion", + "in": "path", + "required": true, + "type": "string" + }, + { + "name": "sessionId", + "in": "path", + "description": "Session Id.", + "required": true, + "type": "string", + "format": "uuid" + } + ], + "responses": { + "204": { + "description": "OK" + }, + "default": { + "description": "Other status codes.", + "schema": { + "$ref": "#/definitions/ErrorResponse" + } + } + } + } + } + }, + "definitions": { + "StatementResponse": { + "description": "Statement Response.", + "type": "object", + "properties": { + "id": { + "format": "int32", + "type": "integer" + }, + "code": { + "type": "string" + }, + "state": { + "enum": [ + "waiting", + "running", + "available", + "Error", + "cancelling", + "cancelled" + ], + "type": "string" + }, + "sourceId": { + "type": "string" + }, + "output": { + "$ref": "#/definitions/StatementOutput" + } + } + }, + "StatementOutput": { + "description": "Statement Output.", + "type": "object", + "properties": { + "status": { + "type": "string" + }, + "execution_count": { + "format": "int32", + "type": "integer" + }, + "data": { + "type": "object" + }, + "ename": { + "type": "string" + }, + "evalue": { + "type": "string" + }, + "traceback": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "BatchRequest": { + "description": "Request for runing batch job.", + "type": "object", + "allOf": [ + { + "$ref": "#/definitions/SessionRequest" + }, + { + "type": "object" + } + ], + "properties": { + "file": { + "type": "string" + }, + "className": { + "type": "string" + }, + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "jars": { + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "type": "array", + "items": { + "type": "string" + } + }, + "pyFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "SessionRequest": { + "description": "Request for acquiring a Session.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + }, + "conf": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "tags": { + "description": "Gets or sets the optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "driverMemory": { + "type": "string" + }, + "driverCores": { + "format": "int32", + "type": "integer" + }, + "executorMemory": { + "type": "string" + }, + "executorCores": { + "format": "int32", + "type": "integer" + }, + "numExecutors": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementRequest": { + "description": "Statement Request Body.", + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "sourceId": { + "type": "string" + } + } + }, + "LivySessionStateInformation": { + "description": "Livy Session State Information.", + "type": "object", + "properties": { + "notStartedAt": { + "format": "date-time", + "type": "string" + }, + "startingAt": { + "format": "date-time", + "type": "string" + }, + "idleAt": { + "format": "date-time", + "type": "string" + }, + "deadAt": { + "format": "date-time", + "type": "string" + }, + "shuttingDownAt": { + "format": "date-time", + "type": "string" + }, + "killedAt": { + "format": "date-time", + "type": "string" + }, + "recoveringAt": { + "format": "date-time", + "type": "string" + }, + "busyAt": { + "format": "date-time", + "type": "string" + }, + "errorAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "type": "string" + }, + "jobCreationRequest": { + "$ref": "#/definitions/SessionRequest" + } + } + }, + "SchedulerInformation": { + "description": "Scheduler Information.", + "type": "object", + "properties": { + "submittedAt": { + "format": "date-time", + "type": "string" + }, + "queuedAt": { + "format": "date-time", + "type": "string" + }, + "scheduledAt": { + "format": "date-time", + "type": "string" + }, + "endedAt": { + "format": "date-time", + "type": "string" + }, + "cancellationRequestedAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "enum": [ + "Queued", + "Scheduled", + "Ended" + ], + "type": "string" + } + } + }, + "SparkServicePluginInformation": { + "description": "Spark Service Plugin Information.", + "type": "object", + "properties": { + "preparationStartedAt": { + "format": "date-time", + "type": "string" + }, + "resourceAcquisitionStartedAt": { + "format": "date-time", + "type": "string" + }, + "submissionStartedAt": { + "format": "date-time", + "type": "string" + }, + "monitoringStartedAt": { + "format": "date-time", + "type": "string" + }, + "cleanupStartedAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "enum": [ + "Preparation", + "ResourceAcquisition", + "Queued", + "Submission", + "Monitoring", + "Cleanup", + "Ended" + ], + "type": "string" + } + } + }, + "LivyRequestBase": { + "description": "Livy Request Base.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "file": { + "type": "string" + }, + "className": { + "type": "string" + }, + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "jars": { + "type": "array", + "items": { + "type": "string" + } + }, + "files": { + "type": "array", + "items": { + "type": "string" + } + }, + "pyFiles": { + "type": "array", + "items": { + "type": "string" + } + }, + "archives": { + "type": "array", + "items": { + "type": "string" + } + }, + "conf": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "driverMemory": { + "type": "string" + }, + "driverCores": { + "format": "int32", + "type": "integer" + }, + "executorMemory": { + "type": "string" + }, + "executorCores": { + "format": "int32", + "type": "integer" + }, + "numExecutors": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementsResponse": { + "description": "Livy Statement Response Body.", + "type": "object", + "properties": { + "statements": { + "type": "array", + "items": { + "$ref": "#/definitions/StatementResponse" + } + }, + "total_statements": { + "format": "int32", + "type": "integer" + } + } + }, + "StatementCancellationResponse": { + "description": "Livy Statement CancellationResponse.", + "type": "object", + "properties": { + "msg": { + "type": "string" + } + } + }, + "BatchStateInformation": { + "description": "Batch State Information.", + "type": "object", + "properties": { + "notStartedAt": { + "format": "date-time", + "type": "string" + }, + "startingAt": { + "format": "date-time", + "type": "string" + }, + "runningAt": { + "format": "date-time", + "type": "string" + }, + "deadAt": { + "format": "date-time", + "type": "string" + }, + "successAt": { + "format": "date-time", + "type": "string" + }, + "killedAt": { + "format": "date-time", + "type": "string" + }, + "recoveringAt": { + "format": "date-time", + "type": "string" + }, + "currentState": { + "type": "string" + }, + "jobCreationRequest": { + "$ref": "#/definitions/LivyRequestBase" + } + } + }, + "ErrorResponse": { + "description": "The error response.", + "required": [ + "message", + "errorCode" + ], + "allOf": [ + { + "$ref": "#/definitions/ErrorResponseDetails" + } + ], + "properties": { + "requestId": { + "type": "string", + "description": "ID of the request associated with the error.", + "readOnly": true + }, + "moreDetails": { + "description": "List of additional error details.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorResponseDetails" + }, + "readOnly": true + } + }, + "readOnly": true + }, + "ErrorResponseDetails": { + "description": "The error response details.", + "required": [ + "message", + "errorCode" + ], + "properties": { + "errorCode": { + "type": "string", + "description": "A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users." + }, + "message": { + "type": "string", + "description": "A human readable representation of the error." + }, + "relatedResource": { + "$ref": "#/definitions/ErrorRelatedResource" + } + } + }, + "ErrorRelatedResource": { + "description": "The error related resource details object.", + "required": [ + "resourceId", + "resourceType" + ], + "properties": { + "resourceId": { + "type": "string", + "description": "Resource ID involved in the error." + }, + "resourceType": { + "type": "string", + "description": "Resource type involved in the error." + } + } + }, + "LivySparkActivityList": { + "description": "List of batches or sessions.", + "type": "object", + "properties": { + "items": { + "description": "List of items.", + "type": "array", + "items": { + "$ref": "#/definitions/LivySparkActivity" + } + }, + "totalCountOfMatchedItems": { + "format": "int32", + "description": "Total count of matched items.", + "type": "integer" + }, + "pageSize": { + "format": "int32", + "description": "Page size.", + "type": "integer" + } + } + }, + "LivySparkActivity": { + "description": "Batch or Session description when getting a list.", + "type": "object", + "properties": { + "id": { + "description": "Activity ID.", + "type": "string" + }, + "appId": { + "description": "Spark application ID.", + "type": "string" + }, + "name": { + "description": "Batch or Session Name.", + "type": "string" + }, + "workspaceId": { + "description": "Workspace ID.", + "type": "string" + }, + "submitterId": { + "description": "Submitter ID.", + "type": "string" + }, + "submitterName": { + "description": "Submitter name.", + "type": "string" + }, + "artifactId": { + "description": "Artifact ID.", + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + }, + "result": { + "description": "Job result.", + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "submittedAt": { + "format": "date-time", + "description": "Submitted at time.", + "type": "string" + }, + "startedAt": { + "format": "date-time", + "description": "Started at time.", + "type": "string" + }, + "endedAt": { + "format": "date-time", + "description": "Ended at time.", + "type": "string" + }, + "errorSource": { + "description": "Error source.", + "enum": [ + "System", + "User", + "Unknown", + "Dependency" + ], + "type": "string" + }, + "errorCode": { + "description": "Error Code.", + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "schedulerState": { + "description": "Scheduler state.", + "enum": [ + "Queued", + "Scheduled", + "Ended" + ], + "type": "string" + }, + "pluginState": { + "description": "Plugin state.", + "enum": [ + "Preparation", + "ResourceAcquisition", + "Queued", + "Submission", + "Monitoring", + "Cleanup", + "Ended" + ], + "type": "string" + }, + "livyState": { + "description": "Gets or sets livy state.", + "type": "string" + }, + "isJobTimedOut": { + "description": "If job is timed out.", + "type": "boolean" + } + } + }, + "BatchResponse": { + "description": "Class that represents livy responses for batch.", + "type": "object", + "properties": { + "livyInfo": { + "$ref": "#/definitions/BatchStateInformation" + }, + "fabricBatchStateInfo": { + "$ref": "#/definitions/BatchStateInfo" + }, + "name": { + "description": "Name of the batch.", + "type": "string" + }, + "id": { + "description": "ID created for the batch.", + "type": "string" + }, + "appId": { + "description": "Application id of this batch.", + "type": "string" + }, + "appInfo": { + "description": "Detailed application info.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "artifactId": { + "description": "Hosting artifact Id.", + "type": "string" + }, + "errorInfo": { + "description": "Detailed error information.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorInformation" + } + }, + "jobType": { + "description": "Spark job type.", + "enum": [ + "SparkBatch", + "SparkSession", + "ScopeBatch", + "JupyterEnvironment" + ], + "type": "string" + }, + "submitterId": { + "description": "ID of e user who submitted the session.", + "type": "string" + }, + "submitterName": { + "description": "Name of the user who submitted the session.", + "type": "string" + }, + "log": { + "description": "Log lines.", + "type": "array", + "items": { + "type": "string" + } + }, + "pluginInfo": { + "$ref": "#/definitions/SparkServicePluginInformation" + }, + "schedulerInfo": { + "$ref": "#/definitions/SchedulerInformation" + }, + "state": { + "description": "State of the batch or session.", + "enum": [ + "starting", + "running", + "dead", + "success", + "killed", + "idle", + "error", + "shutting_down", + "not_started", + "busy", + "recovering", + "submitting", + "not_submitted" + ], + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + } + } + }, + "BatchStateInfo": { + "description": "Batch state info.", + "type": "object", + "properties": { + "state": { + "description": "State of the batch acquisition.", + "enum": [ + "unknown", + "expired", + "queued", + "libraryPackaging", + "submitting", + "cancelling", + "cancelled", + "error" + ], + "type": "string" + }, + "errorMessage": { + "description": "Error message if the state is in \"error\".", + "type": "string" + } + } + }, + "ErrorInformation": { + "description": "Error Information.", + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "errorCode": { + "type": "string" + }, + "source": { + "enum": [ + "System", + "User", + "Unknown", + "Dependency" + ], + "type": "string" + } + } + }, + "SessionResponse": { + "type": "object", + "properties": { + "fabricSessionStateInfo": { + "$ref": "#/definitions/SessionStateInfo" + }, + "livyInfo": { + "$ref": "#/definitions/LivySessionStateInformation" + }, + "name": { + "description": "Name of the session.", + "type": "string" + }, + "id": { + "description": "Session ID created for the session.", + "type": "string" + }, + "appId": { + "description": "The application id of this session.", + "type": "string" + }, + "appInfo": { + "description": "Detailed application info.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "artifactId": { + "description": "Hosting artifact ID.", + "type": "string" + }, + "errorInfo": { + "description": "Detailed error information.", + "type": "array", + "items": { + "$ref": "#/definitions/ErrorInformation" + } + }, + "jobType": { + "description": "Spark job type.", + "enum": [ + "SparkBatch", + "SparkSession", + "ScopeBatch", + "JupyterEnvironment" + ], + "type": "string" + }, + "submitterId": { + "description": "ID of the user who submitted the sessionn.", + "type": "string" + }, + "submitterName": { + "description": "Name of the user who submitted the session.", + "type": "string" + }, + "log": { + "description": "Log lines.", + "type": "array", + "items": { + "type": "string" + } + }, + "pluginInfo": { + "$ref": "#/definitions/SparkServicePluginInformation" + }, + "schedulerInfo": { + "$ref": "#/definitions/SchedulerInformation" + }, + "state": { + "description": "Gets or sets the session state.", + "enum": [ + "starting", + "running", + "dead", + "success", + "killed", + "idle", + "error", + "shutting_down", + "not_started", + "busy", + "recovering", + "submitting", + "not_submitted" + ], + "type": "string" + }, + "tags": { + "description": "Optional tags.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "result": { + "enum": [ + "Uncertain", + "Succeeded", + "Failed", + "Cancelled" + ], + "type": "string" + }, + "cancellationReason": { + "description": "Cancellation reason.", + "type": "string" + } + } + }, + "SessionStateInfo": { + "description": "Session state info.", + "type": "object", + "properties": { + "state": { + "description": "Session acquisiton state.", + "enum": [ + "queued", + "libraryPackaging", + "acquiringSession", + "cancelling", + "cancelled", + "error", + "unknown" + ], + "type": "string" + }, + "errorMessage": { + "description": "Error message when in \"error\" state.", + "type": "string" + } + } + } + } +} \ No newline at end of file diff --git a/apache/livy/ExposedUI/python/fabric/swagger.yaml b/apache/livy/ExposedUI/python/fabric/swagger.yaml new file mode 100644 index 00000000..f447f338 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/swagger.yaml @@ -0,0 +1,1250 @@ +swagger: '2.0' +info: + version: v1 + title: Livy Public API +host: api.fabric.microsoft.com +schemes: + - https +paths: + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches: + post: + tags: + - LivyApiBatch + summary: Executes a batch. + operationId: LivyApiBatch_ExecuteBatchAsync + consumes: + - application/json + - text/json + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: payload + in: body + description: '' + required: true + schema: + $ref: '#/definitions/BatchRequest' + responses: + '202': + description: Accepted + schema: + $ref: '#/definitions/BatchResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiBatch + summary: List Batch Jobs. + operationId: LivyApiBatch_ListBatchJobsAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: $top + in: query + type: string + description: '' + - name: $skip + in: query + type: string + description: '' + - name: $count + in: query + type: boolean + description: '' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/LivySparkActivityList' + default: + description: Other Status + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/batches/{batchId}: + get: + tags: + - LivyApiBatch + summary: Gets a Batch Details. + operationId: LivyApiBatch_GetBatchAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: batchId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/BatchResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + delete: + tags: + - LivyApiBatch + summary: Cancels a Batch Execution. + operationId: LivyApiBatch_CancelBatchAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: '' + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: '' + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: batchId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions: + post: + tags: + - LivyApiSession + summary: Acquire a Spark Session. + operationId: LivyApiSession_AcquireSparkSessionAsync + consumes: + - application/json + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: payload + in: body + description: CreateSessionRequest. + required: true + schema: + $ref: '#/definitions/SessionRequest' + responses: + '202': + description: Accepted + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status code + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiSession + summary: List spark sessions. + operationId: LivyApiSession_ListSparkSessionsAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: $top + in: query + type: string + description: '' + - name: $skip + in: query + type: string + description: '' + - name: $count + in: query + type: boolean + description: '' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}: + get: + tags: + - LivyApiSession + summary: Get details of a spark session. + operationId: LivyApiSession_GetSparkSessionAsync + consumes: [] + produces: + - application/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Spark session Id. + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/SessionResponse' + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + delete: + tags: + - LivyApiSession + summary: Stops and deletes a spark session. + operationId: LivyApiSession_DeleteSparkSessionAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + responses: + '200': + description: OK + default: + description: Other status codes + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements: + post: + tags: + - LivyApiSession + summary: Execute a statement on a spark session. + operationId: LivyApiSession_ExecuteSparkSessionStatementAsync + consumes: + - application/json + - text/json + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Spark Session Id. + required: true + type: string + format: uuid + - name: payload + in: body + description: '' + required: true + schema: + $ref: '#/definitions/StatementRequest' + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + get: + tags: + - LivyApiSession + summary: List statements in an active session. + operationId: LivyApiSession_ListSparkSessionStatementsAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace name. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: '' + required: true + type: string + format: uuid + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementsResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}: + get: + tags: + - LivyApiSession + summary: Gets a spark statement from a spark session. + operationId: LivyApiSession_GetSparkSessionStatementAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: sessionId + in: path + description: Spark Session Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: statementId + in: path + description: Statement Id. + required: true + type: integer + format: int32 + - name: from + in: query + description: Offset (in byte) which the output should begin from. + type: integer + format: int32 + - name: size + in: query + description: Size (in byte) of the returned output. + type: integer + format: int32 + responses: + '200': + description: OK + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/statements/{statementId}/cancel: + post: + tags: + - LivyApiSession + summary: Cancels a statement execution. + operationId: LivyApiSession_CancelSparkStatementAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + - name: statementId + in: path + description: '' + required: true + type: integer + format: int32 + + responses: + '200': + description: OK + schema: + $ref: '#/definitions/StatementCancellationResponse' + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' + /v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}/livyApi/versions/{livyApiVersion}/sessions/{sessionId}/reset-timeout: + post: + tags: + - LivyApiSession + summary: Resets the timeout time of a session. + operationId: LivyApiSession_ResetSparkSessionTimeoutAsync + consumes: [] + produces: + - application/json + - text/json + parameters: + - name: workspaceId + in: path + description: Workspace Id. + required: true + type: string + format: uuid + - name: lakehouseId + in: path + description: Lakehouse Id. + required: true + type: string + format: uuid + - name: livyApiVersion + in: path + required: true + type: string + - name: sessionId + in: path + description: Session Id. + required: true + type: string + format: uuid + responses: + '204': + description: OK + default: + description: Other status codes. + schema: + $ref: '#/definitions/ErrorResponse' +definitions: + StatementResponse: + description: Statement Response. + type: object + properties: + id: + format: int32 + type: integer + code: + type: string + state: + enum: + - waiting + - running + - available + - Error + - cancelling + - cancelled + type: string + sourceId: + type: string + output: + $ref: '#/definitions/StatementOutput' + StatementOutput: + description: Statement Output. + type: object + properties: + status: + type: string + execution_count: + format: int32 + type: integer + data: + type: object + ename: + type: string + evalue: + type: string + traceback: + type: array + items: + type: string + BatchRequest: + description: Request for runing batch job. + type: object + allOf: + - $ref: '#/definitions/SessionRequest' + - type: object + properties: + file: + type: string + className: + type: string + args: + type: array + items: + type: string + jars: + type: array + items: + type: string + files: + type: array + items: + type: string + pyFiles: + type: array + items: + type: string + archives: + type: array + items: + type: string + SessionRequest: + description: Request for acquiring a Session. + type: object + properties: + name: + type: string + archives: + type: array + items: + type: string + conf: + type: object + additionalProperties: + type: string + tags: + description: Gets or sets the optional tags. + type: object + additionalProperties: + type: string + driverMemory: + type: string + driverCores: + format: int32 + type: integer + executorMemory: + type: string + executorCores: + format: int32 + type: integer + numExecutors: + format: int32 + type: integer + StatementRequest: + description: Statement Request Body. + type: object + properties: + code: + type: string + kind: + type: string + sourceId: + type: string + LivySessionStateInformation: + description: Livy Session State Information. + type: object + properties: + notStartedAt: + format: date-time + type: string + startingAt: + format: date-time + type: string + idleAt: + format: date-time + type: string + deadAt: + format: date-time + type: string + shuttingDownAt: + format: date-time + type: string + killedAt: + format: date-time + type: string + recoveringAt: + format: date-time + type: string + busyAt: + format: date-time + type: string + errorAt: + format: date-time + type: string + currentState: + type: string + jobCreationRequest: + $ref: '#/definitions/SessionRequest' + SchedulerInformation: + description: Scheduler Information. + type: object + properties: + submittedAt: + format: date-time + type: string + queuedAt: + format: date-time + type: string + scheduledAt: + format: date-time + type: string + endedAt: + format: date-time + type: string + cancellationRequestedAt: + format: date-time + type: string + currentState: + enum: + - Queued + - Scheduled + - Ended + type: string + SparkServicePluginInformation: + description: Spark Service Plugin Information. + type: object + properties: + preparationStartedAt: + format: date-time + type: string + resourceAcquisitionStartedAt: + format: date-time + type: string + submissionStartedAt: + format: date-time + type: string + monitoringStartedAt: + format: date-time + type: string + cleanupStartedAt: + format: date-time + type: string + currentState: + enum: + - Preparation + - ResourceAcquisition + - Queued + - Submission + - Monitoring + - Cleanup + - Ended + type: string + LivyRequestBase: + description: Livy Request Base. + type: object + properties: + name: + type: string + file: + type: string + className: + type: string + args: + type: array + items: + type: string + jars: + type: array + items: + type: string + files: + type: array + items: + type: string + pyFiles: + type: array + items: + type: string + archives: + type: array + items: + type: string + conf: + type: object + additionalProperties: + type: string + driverMemory: + type: string + driverCores: + format: int32 + type: integer + executorMemory: + type: string + executorCores: + format: int32 + type: integer + numExecutors: + format: int32 + type: integer + StatementsResponse: + description: Livy Statement Response Body. + type: object + properties: + statements: + type: array + items: + $ref: '#/definitions/StatementResponse' + total_statements: + format: int32 + type: integer + StatementCancellationResponse: + description: Livy Statement CancellationResponse. + type: object + properties: + msg: + type: string + BatchStateInformation: + description: Batch State Information. + type: object + properties: + notStartedAt: + format: date-time + type: string + startingAt: + format: date-time + type: string + runningAt: + format: date-time + type: string + deadAt: + format: date-time + type: string + successAt: + format: date-time + type: string + killedAt: + format: date-time + type: string + recoveringAt: + format: date-time + type: string + currentState: + type: string + jobCreationRequest: + $ref: '#/definitions/LivyRequestBase' + ErrorResponse: + description: The error response. + required: + - message + - errorCode + allOf: + - $ref: '#/definitions/ErrorResponseDetails' + properties: + requestId: + type: string + description: ID of the request associated with the error. + readOnly: true + moreDetails: + description: List of additional error details. + type: array + items: + $ref: '#/definitions/ErrorResponseDetails' + readOnly: true + readOnly: true + ErrorResponseDetails: + description: The error response details. + required: + - message + - errorCode + properties: + errorCode: + type: string + description: A specific identifier that provides information about an error condition, allowing for standardized communication between our service and its users. + message: + type: string + description: A human readable representation of the error. + relatedResource: + $ref: '#/definitions/ErrorRelatedResource' + ErrorRelatedResource: + description: The error related resource details object. + required: + - resourceId + - resourceType + properties: + resourceId: + type: string + description: Resource ID involved in the error. + resourceType: + type: string + description: Resource type involved in the error. + LivySparkActivityList: + description: List of batches or sessions. + type: object + properties: + items: + description: List of items. + type: array + items: + $ref: '#/definitions/LivySparkActivity' + totalCountOfMatchedItems: + format: int32 + description: Total count of matched items. + type: integer + pageSize: + format: int32 + description: Page size. + type: integer + LivySparkActivity: + description: Batch or Session description when getting a list. + type: object + properties: + id: + description: Activity ID. + type: string + appId: + description: Spark application ID. + type: string + name: + description: Batch or Session Name. + type: string + workspaceId: + description: Workspace ID. + type: string + submitterId: + description: Submitter ID. + type: string + submitterName: + description: Submitter name. + type: string + artifactId: + description: Artifact ID. + type: string + cancellationReason: + description: Cancellation reason. + type: string + result: + description: Job result. + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + submittedAt: + format: date-time + description: Submitted at time. + type: string + startedAt: + format: date-time + description: Started at time. + type: string + endedAt: + format: date-time + description: Ended at time. + type: string + errorSource: + description: Error source. + enum: + - System + - User + - Unknown + - Dependency + type: string + errorCode: + description: Error Code. + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + schedulerState: + description: Scheduler state. + enum: + - Queued + - Scheduled + - Ended + type: string + pluginState: + description: Plugin state. + enum: + - Preparation + - ResourceAcquisition + - Queued + - Submission + - Monitoring + - Cleanup + - Ended + type: string + livyState: + description: Gets or sets livy state. + type: string + isJobTimedOut: + description: If job is timed out. + type: boolean + BatchResponse: + description: Class that represents livy responses for batch. + type: object + properties: + livyInfo: + $ref: '#/definitions/BatchStateInformation' + fabricBatchStateInfo: + $ref: '#/definitions/BatchStateInfo' + name: + description: Name of the batch. + type: string + id: + description: ID created for the batch. + type: string + appId: + description: Application id of this batch. + type: string + appInfo: + description: Detailed application info. + type: object + additionalProperties: + type: string + artifactId: + description: Hosting artifact Id. + type: string + errorInfo: + description: Detailed error information. + type: array + items: + $ref: '#/definitions/ErrorInformation' + jobType: + description: Spark job type. + enum: + - SparkBatch + - SparkSession + - ScopeBatch + - JupyterEnvironment + type: string + submitterId: + description: ID of e user who submitted the session. + type: string + submitterName: + description: Name of the user who submitted the session. + type: string + log: + description: Log lines. + type: array + items: + type: string + pluginInfo: + $ref: '#/definitions/SparkServicePluginInformation' + schedulerInfo: + $ref: '#/definitions/SchedulerInformation' + state: + description: State of the batch or session. + enum: + - starting + - running + - dead + - success + - killed + - idle + - error + - shutting_down + - not_started + - busy + - recovering + - submitting + - not_submitted + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + result: + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + cancellationReason: + description: Cancellation reason. + type: string + BatchStateInfo: + description: Batch state info. + type: object + properties: + state: + description: State of the batch acquisition. + enum: + - unknown + - expired + - queued + - libraryPackaging + - submitting + - cancelling + - cancelled + - error + type: string + errorMessage: + description: Error message if the state is in "error". + type: string + ErrorInformation: + description: Error Information. + type: object + properties: + message: + type: string + errorCode: + type: string + source: + enum: + - System + - User + - Unknown + - Dependency + type: string + SessionResponse: + type: object + properties: + fabricSessionStateInfo: + $ref: '#/definitions/SessionStateInfo' + livyInfo: + $ref: '#/definitions/LivySessionStateInformation' + name: + description: Name of the session. + type: string + id: + description: Session ID created for the session. + type: string + appId: + description: The application id of this session. + type: string + appInfo: + description: Detailed application info. + type: object + additionalProperties: + type: string + artifactId: + description: Hosting artifact ID. + type: string + errorInfo: + description: Detailed error information. + type: array + items: + $ref: '#/definitions/ErrorInformation' + jobType: + description: Spark job type. + enum: + - SparkBatch + - SparkSession + - ScopeBatch + - JupyterEnvironment + type: string + submitterId: + description: ID of the user who submitted the sessionn. + type: string + submitterName: + description: Name of the user who submitted the session. + type: string + log: + description: Log lines. + type: array + items: + type: string + pluginInfo: + $ref: '#/definitions/SparkServicePluginInformation' + schedulerInfo: + $ref: '#/definitions/SchedulerInformation' + state: + description: Gets or sets the session state. + enum: + - starting + - running + - dead + - success + - killed + - idle + - error + - shutting_down + - not_started + - busy + - recovering + - submitting + - not_submitted + type: string + tags: + description: Optional tags. + type: object + additionalProperties: + type: string + result: + enum: + - Uncertain + - Succeeded + - Failed + - Cancelled + type: string + cancellationReason: + description: Cancellation reason. + type: string + SessionStateInfo: + description: Session state info. + type: object + properties: + state: + description: Session acquisiton state. + enum: + - queued + - libraryPackaging + - acquiringSession + - cancelling + - cancelled + - error + - unknown + type: string + errorMessage: + description: Error message when in "error" state. + type: string diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb b/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb new file mode 100644 index 00000000..d713ff1b --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/test_fabric..ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Authenticate and Request a token" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from msal import PublicClientApplication\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import requests\n", + "import time\n", + "\n", + "# Load environment variables from .env file\n", + "load_dotenv()\n", + "tenant_id = os.getenv('TENANT_ID')\n", + "client_id = os.getenv('CLIENT_ID')\n", + "workspace_id = os.getenv('WORKSPACE_ID')\n", + "lakehouse_id = os.getenv('LAKEHOUSE_ID')\n", + "redirect_url_port = os.getenv('REDIRECT_URL_PORT')\n", + "api_version = os.getenv('API_VERSION')\n", + "\n", + "app = PublicClientApplication(\n", + " client_id,\n", + " authority= f\"https://login.microsoftonline.com/{tenant_id}\", \n", + ")\n", + "\n", + "result = None\n", + "\n", + " # If no cached tokens or user interaction needed, acquire tokens interactively\n", + "if not result:\n", + " result = app.acquire_token_interactive(scopes=[\"https://api.fabric.microsoft.com/Lakehouse.Execute.All\", \"https://api.fabric.microsoft.com/Lakehouse.Read.All\", \"https://api.fabric.microsoft.com/Item.ReadWrite.All\", \n", + " \"https://api.fabric.microsoft.com/Workspace.ReadWrite.All\", \"https://api.fabric.microsoft.com/Code.AccessStorage.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All\", \n", + " \"https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All\", \"https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All\", \"https://api.fabric.microsoft.com/Code.AccessFabric.All\"],\n", + " port=f\"{redirect_url_port}\")\n", + "\n", + "# Get the access token\n", + "if \"access_token\" in result:\n", + " access_token = result[\"access_token\"]\n", + "else:\n", + " print(result.get(\"error\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(access_token)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Request a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The request to create the Livy session is submitted:{'id': '462fe66a-9858-408c-813c-b848da3d9e60', 'artifactId': '0db8ae59-a739-4b50-844c-ee2be3519871'}\n", + "462fe66a-9858-408c-813c-b848da3d9e60\n" + ] + } + ], + "source": [ + "if access_token:\n", + " api_base_url_mist='https://api.fabric.microsoft.com/v1'\n", + " livy_base_url = api_base_url_mist + \"/workspaces/\"+workspace_id+\"/lakehouses/\"+lakehouse_id +\"/livyApi/versions/\"+api_version+\"/sessions\"\n", + " headers = {\"Authorization\": \"Bearer \" + access_token}\n", + "\n", + "# Create a Livy session\n", + "create_livy_session = requests.post(livy_base_url, headers=headers, json={\n", + " \"name\": \"test pyspark session from python code\",\n", + " \"archives\": [],\n", + " \"conf\": { \n", + " },\n", + " \"tags\": {\n", + " },\n", + " \"driverMemory\": \"7g\",\n", + " \"driverCores\": 1,\n", + " \"executorMemory\": \"7g\",\n", + " \"executorCores\": 1,\n", + " \"numExecutors\": 2\n", + "})\n", + "print('The request to create the Livy session is submitted:' + str(create_livy_session.json()))\n", + "\n", + "livy_session_id = create_livy_session.json()['id']\n", + "print(livy_session_id)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## List Livy Sessions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "livy_session_url = livy_base_url\n", + "get_sessions_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_sessions_response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get details of a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + " print(get_session_response.json())\n", + " \n", + "print(get_session_response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute a statement on a Spark session - Local dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# call get session API\n", + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_session_response.json())\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "execute_statement = livy_session_url + \"/statements\"\n", + "code =\"\"\"\n", + "df = spark.createDataFrame([{\"id\": 1, \"name\": \"Mounir\"}])\n", + "df.show()\n", + "\"\"\"\n", + "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", + " \"code\": f\"{code}\",\n", + " \"kind\": \"pyspark\"\n", + " })\n", + "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", + "\n", + "statement_id = str(execute_statement_response.json()['id'])\n", + "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", + "get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "while get_statement_response.json()[\"state\"] != \"available\":\n", + " # Sleep for 5 seconds before making the next request\n", + " time.sleep(5)\n", + " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", + "\n", + " # Make the next request\n", + " get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "rst = get_statement_response.json()['output']['data']['text/plain']\n", + "print(rst)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execute a statement on a Spark session - Data on the LakeHouse" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# call get session API\n", + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "get_session_response = requests.get(livy_session_url, headers=headers)\n", + "print(get_session_response.json())\n", + "while get_session_response.json()[\"state\"] != \"idle\":\n", + " time.sleep(5)\n", + " get_session_response = requests.get(livy_session_url, headers=headers)\n", + "\n", + "execute_statement = livy_session_url + \"/statements\"\n", + "code =\"\"\"\n", + "df = spark.sql(\"SELECT count(*) as Total, AGE FROM person GROUP BY AGE\").show()\n", + "df.show()\n", + "\"\"\"\n", + "execute_statement_response = requests.post(execute_statement, headers=headers, json={\n", + " \"code\": f\"{code}\",\n", + " \"kind\": \"pyspark\"\n", + " })\n", + "print('the statement code is submitted as: ' + str(execute_statement_response.json()))\n", + "\n", + "statement_id = str(execute_statement_response.json()['id'])\n", + "get_statement = livy_session_url+ \"/statements/\" + statement_id\n", + "get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "while get_statement_response.json()[\"state\"] != \"available\":\n", + " # Sleep for 5 seconds before making the next request\n", + " time.sleep(5)\n", + " print('the statement code is submitted and running : ' + str(execute_statement_response.json()))\n", + "\n", + " # Make the next request\n", + " get_statement_response = requests.get(get_statement, headers=headers)\n", + "\n", + "rst = get_statement_response.json()['output']['data']['text/plain']\n", + "print(rst)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop and delete a Livy Session" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "livy_session_url = livy_base_url + \"/\" + livy_session_id\n", + "\n", + "delete_session_response = requests.delete(livy_session_url, headers=headers)\n", + "print(delete_session_response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3.10.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/apache/livy/ExposedUI/python/fabric/test_fabric.py b/apache/livy/ExposedUI/python/fabric/test_fabric.py new file mode 100644 index 00000000..0bdcb651 --- /dev/null +++ b/apache/livy/ExposedUI/python/fabric/test_fabric.py @@ -0,0 +1,45 @@ + +from msal import PublicClientApplication +from dotenv import load_dotenv +import os +import requests +import time + +# Load environment variables from .env file +load_dotenv() +tenant_id = os.getenv('TENANT_ID') +client_id = os.getenv('CLIENT_ID') +workspace_id = os.getenv('WORKSPACE_ID') +lakehouse_id = os.getenv('LAKEHOUSE_ID') +redirect_url_port = os.getenv('REDIRECT_URL_PORT') +api_version = os.getenv('API_VERSION') + +app = PublicClientApplication( + client_id, + authority= f"https://login.microsoftonline.com/{tenant_id}", +) + +result = None + + # If no cached tokens or user interaction needed, acquire tokens interactively +if not result: + result = app.acquire_token_interactive(scopes=["https://api.fabric.microsoft.com/Lakehouse.Execute.All", "https://api.fabric.microsoft.com/Lakehouse.Read.All", "https://api.fabric.microsoft.com/Item.ReadWrite.All", + "https://api.fabric.microsoft.com/Workspace.ReadWrite.All", "https://api.fabric.microsoft.com/Code.AccessStorage.All", "https://api.fabric.microsoft.com/Code.AccessAzureKeyvault.All", + "https://api.fabric.microsoft.com/Code.AccessAzureDataExplorer.All", "https://api.fabric.microsoft.com/Code.AccessAzureDataLake.All", "https://api.fabric.microsoft.com/Code.AccessFabric.All"], + port=f"{redirect_url_port}") + +# Get the access token +if "access_token" in result: + access_token = result["access_token"] +else: + print(result.get("error")) + +if access_token: + api_base_url_mist='https://api.fabric.microsoft.com/v1' + livy_base_url = api_base_url_mist + "/workspaces/"+workspace_id+"/lakehouses/"+lakehouse_id +"/livyApi/versions/"+api_version+"/sessions" + headers = {"Authorization": "Bearer " + access_token} + +# List Livy essions +livy_session_url = livy_base_url +get_sessions_response = requests.get(livy_session_url, headers=headers) +print(get_sessions_response.json()) diff --git a/apache/livy/ExposedUI/python/livy/delete_session.py b/apache/livy/ExposedUI/python/livy/delete_session.py new file mode 100644 index 00000000..b0c5047b --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/delete_session.py @@ -0,0 +1,5 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' + +r = requests.delete(host + '/sessions/1') +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/init_java_gateway.py b/apache/livy/ExposedUI/python/livy/init_java_gateway.py new file mode 100644 index 00000000..2304f509 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/init_java_gateway.py @@ -0,0 +1,26 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +data = { + 'code': textwrap.dedent(""" + # from https://stackoverflow.com/questions/65713299/javapackage-object-is-not-callable-error-executing-explain-in-pyspark-3-0 + # from https://github.com/apache/spark/blob/87bf6b0ea4ca0618c8604895d05037edce8b7cb0/python/pyspark/java_gateway.py#L153 + + from py4j.java_gateway import java_import + java_import(spark._sc._jvm, "org.apache.spark.SparkConf") + java_import(spark._sc._jvm, "org.apache.spark.api.java.*") + java_import(spark._sc._jvm, "org.apache.spark.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.ml.python.*") + java_import(spark._sc._jvm, "org.apache.spark.mllib.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.resource.*") + + java_import(spark._sc._jvm, "org.apache.spark.sql.*") + java_import(spark._sc._jvm, "org.apache.spark.sql.api.python.*") + java_import(spark._sc._jvm, "org.apache.spark.sql.hive.*") + """) +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code.py b/apache/livy/ExposedUI/python/livy/run_code.py new file mode 100644 index 00000000..464da289 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/run_code.py @@ -0,0 +1,15 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +data = { + 'code': textwrap.dedent(""" + df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) + + df.show() + """) +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/run_code_external_file.py b/apache/livy/ExposedUI/python/livy/run_code_external_file.py new file mode 100644 index 00000000..53085659 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/run_code_external_file.py @@ -0,0 +1,16 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} +statements_url = host + '/sessions/1/statements' + +external_python_file = './src/external_file.py' + +with open(external_python_file, 'r') as file: + pyspark_code = file.read() + +data = { + 'code': pyspark_code +} + +r = requests.post(statements_url, data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/src/external_file.py b/apache/livy/ExposedUI/python/livy/src/external_file.py new file mode 100644 index 00000000..def07fb9 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/src/external_file.py @@ -0,0 +1,12 @@ +# running the Apache Livy example (converted into Python3 syntax) https://livy.incubator.apache.org/examples/ +# The code is running from an external python file + +import random +NUM_SAMPLES = 100000 + +def sample(p): + x, y = random.random(), random.random() + return 1 if x*x + y*y < 1 else 0 + +count = sc.parallelize(range(0, NUM_SAMPLES)).map(sample).reduce(lambda a, b: a + b) +print("Pi is roughly %f" % (4.0 * count / NUM_SAMPLES)) diff --git a/apache/livy/ExposedUI/python/livy/start_session.py b/apache/livy/ExposedUI/python/livy/start_session.py new file mode 100644 index 00000000..b810a4fe --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/start_session.py @@ -0,0 +1,11 @@ +# from https://livy.apache.org/examples/ + +# requires pip install requests +import json, pprint, requests, textwrap +host = 'http://localhost:8998' +headers = {'Content-Type': 'application/json'} + +data = {'kind': 'pyspark', 'name': 'test pyspark session from python code', 'proxyUser': 'Mounir', 'executorMemory': '2g'} + +r = requests.post(host + '/sessions', data=json.dumps(data), headers=headers) +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/livy/wait_for_idle.py b/apache/livy/ExposedUI/python/livy/wait_for_idle.py new file mode 100644 index 00000000..32b9bd66 --- /dev/null +++ b/apache/livy/ExposedUI/python/livy/wait_for_idle.py @@ -0,0 +1,5 @@ +import json, pprint, requests, textwrap +host = 'http://localhost:8998' + +r = requests.get(host + '/sessions/1') +pprint.pprint(r.json()) diff --git a/apache/livy/ExposedUI/python/spark-submit/test_pandas.py b/apache/livy/ExposedUI/python/spark-submit/test_pandas.py new file mode 100644 index 00000000..bf8de60b --- /dev/null +++ b/apache/livy/ExposedUI/python/spark-submit/test_pandas.py @@ -0,0 +1,20 @@ +import pandas as pd + +from pyspark.sql import SparkSession + +app_name = "simple-app-pandas" + +spark = SparkSession.builder.appName(app_name).getOrCreate() + +# Creating a DataFrame from a dictionary +data = { + 'Name': ['Alice', 'Bob', 'Charlie'], + 'Age': [25, 30, 35], + 'City': ['New York', 'Los Angeles', 'Chicago'] +} + +df = pd.DataFrame(data) +print(df) + +spark.stop() + diff --git a/apache/livy/ExposedUI/python/spark-submit/test_spark.py b/apache/livy/ExposedUI/python/spark-submit/test_spark.py new file mode 100644 index 00000000..5c6f6e95 --- /dev/null +++ b/apache/livy/ExposedUI/python/spark-submit/test_spark.py @@ -0,0 +1,11 @@ +from pyspark.sql import SparkSession + +app_name = "simple-app" + +spark = SparkSession.builder.appName(app_name).getOrCreate() + +df = spark.createDataFrame([{"id": 1, "name": "Mounir"}]) + +df.show() + +spark.stop() \ No newline at end of file diff --git a/apache/livy/ExposedUI/requirements.txt b/apache/livy/ExposedUI/requirements.txt new file mode 100644 index 00000000..d9350b29 --- /dev/null +++ b/apache/livy/ExposedUI/requirements.txt @@ -0,0 +1,4 @@ +pandas +requests +msal +python-dotenv \ No newline at end of file diff --git a/apache/livy/ExposedUI/spark/Dockerfile b/apache/livy/ExposedUI/spark/Dockerfile new file mode 100644 index 00000000..803d3f1c --- /dev/null +++ b/apache/livy/ExposedUI/spark/Dockerfile @@ -0,0 +1,23 @@ +# https://github.com/apache/spark-docker/tree/master +# https://hub.docker.com/_/spark/tags +# Simulating Microsoft Fabric Runtime 1.3(with Apache Livy), Spark 3.5, Java 11 +FROM spark:3.5.4-python3 +USER root +RUN apt-get update && apt-get install -y curl + +# Install Python Dependencies +COPY ./requirements.txt /opt/ +RUN pip install -r /opt/requirements.txt + +# Apache Livy exception using Java 17 +# Exception in thread "main" java.util.concurrent.ExecutionException: javax.security.sasl.SaslException: Client closed before SASL negotiation finished. + +# Using Java 11 +# https://jdk.java.net/archive/ +WORKDIR /opt/java/ +RUN curl "https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz" -o openjdk-update.tar.gz \ + && tar -xzf "openjdk-update.tar.gz" \ + && rm -rf openjdk openjdk-update.tar.gz \ + && mv jdk-* openjdk + +USER spark