diff --git a/spark-minimal/Dockerfile b/spark-minimal/Dockerfile index dc85e91..0fa9bc6 100644 --- a/spark-minimal/Dockerfile +++ b/spark-minimal/Dockerfile @@ -1,28 +1,27 @@ -FROM ubuntu:16.04 -MAINTAINER Mark Mims +FROM ubuntu:20.04 +MAINTAINER Mark Mims , Alexander To -ENV SPARK_VERSION 2.2.0 -ENV SPARK_HADOOP_VERSION 2.6 +ENV SPARK_VERSION 3.1.1 +ENV SPARK_HADOOP_VERSION 2.7 ENV SPARK_HOME /spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_VERSION -ENV JAVA_HOME /usr/lib/jvm/java-8-oracle +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64/ ENV SPARK_TEMPLATE_PATH $SPARK_HOME/templates ENV SPARK_CONF_PATH $SPARK_HOME/conf ENV PATH $SPARK_HOME/bin:$PATH -RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections \ -&& apt-get update \ +RUN apt-get update \ && apt-get upgrade -y \ && apt-get install -y software-properties-common \ -&& add-apt-repository -y ppa:webupd8team/java \ +&& add-apt-repository -y ppa:openjdk-r/ppa \ && apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF \ && apt-get update \ && apt-get install -y \ curl \ dnsutils \ - oracle-java8-installer \ + openjdk-11-jdk \ && apt-get purge -y software-properties-common \ && apt-get autoremove -y \ && curl -OL http://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_VERSION.tgz \ @@ -31,7 +30,7 @@ RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true COPY *-site.xml $SPARK_TEMPLATE_PATH/ #COPY *.properties $SPARK_CONF_PATH/ -#COPY spark-defaults.conf $SPARK_CONF_PATH +COPY spark-defaults.conf $SPARK_CONF_PATH #COPY spark-env.sh $SPARK_CONF_PATH COPY jars/* $SPARK_HOME/jars/ diff --git a/spark-minimal/Makefile b/spark-minimal/Makefile index d7e0c27..5d8bfed 100644 --- a/spark-minimal/Makefile +++ b/spark-minimal/Makefile @@ -2,7 +2,7 @@ default: version list NAMESPACE=midsw205 IMAGE=$(shell basename $(PWD)) -VERSION=0.0.6 +VERSION=0.0.7 version: show-version show-version: diff --git a/spark-minimal/jars/com.github.luben_zstd--jni_1.4.9-1.jar b/spark-minimal/jars/com.github.luben_zstd--jni_1.4.9-1.jar new file mode 100644 index 0000000..1255741 Binary files /dev/null and b/spark-minimal/jars/com.github.luben_zstd--jni_1.4.9-1.jar differ diff --git a/spark-minimal/jars/org.apache.commons_commons-pool2_2.6.2.jar b/spark-minimal/jars/org.apache.commons_commons-pool2_2.6.2.jar new file mode 100644 index 0000000..4233f08 Binary files /dev/null and b/spark-minimal/jars/org.apache.commons_commons-pool2_2.6.2.jar differ diff --git a/spark-minimal/jars/org.apache.kafka_connect-api_2.7.0.jar b/spark-minimal/jars/org.apache.kafka_connect-api_2.7.0.jar new file mode 100644 index 0000000..09edff5 Binary files /dev/null and b/spark-minimal/jars/org.apache.kafka_connect-api_2.7.0.jar differ diff --git a/spark-minimal/jars/org.apache.kafka_kafka-clients-0.10.0.1.jar b/spark-minimal/jars/org.apache.kafka_kafka-clients-0.10.0.1.jar deleted file mode 100644 index 4ae5c93..0000000 Binary files a/spark-minimal/jars/org.apache.kafka_kafka-clients-0.10.0.1.jar and /dev/null differ diff --git a/spark-minimal/jars/org.apache.kafka_kafka-clients-2.7.0.jar b/spark-minimal/jars/org.apache.kafka_kafka-clients-2.7.0.jar new file mode 100644 index 0000000..72dc582 Binary files /dev/null and b/spark-minimal/jars/org.apache.kafka_kafka-clients-2.7.0.jar differ diff --git a/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.11-2.2.0.jar b/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.11-2.2.0.jar deleted file mode 100644 index fea5580..0000000 Binary files a/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.11-2.2.0.jar and /dev/null differ diff --git a/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.12-3.1.1.jar b/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.12-3.1.1.jar new file mode 100644 index 0000000..ced212e Binary files /dev/null and b/spark-minimal/jars/org.apache.spark_spark-sql-kafka-0-10_2.12-3.1.1.jar differ diff --git a/spark-minimal/jars/org.apache.spark_spark-streaming-kafka-0-10_2.12-2.4.0.jar b/spark-minimal/jars/org.apache.spark_spark-streaming-kafka-0-10_2.12-2.4.0.jar new file mode 100644 index 0000000..50a70af Binary files /dev/null and b/spark-minimal/jars/org.apache.spark_spark-streaming-kafka-0-10_2.12-2.4.0.jar differ diff --git a/spark-minimal/jars/org.apache.spark_spark-tags_2.11-2.2.0.jar b/spark-minimal/jars/org.apache.spark_spark-tags_2.11-2.2.0.jar deleted file mode 100644 index d99acfb..0000000 Binary files a/spark-minimal/jars/org.apache.spark_spark-tags_2.11-2.2.0.jar and /dev/null differ diff --git a/spark-minimal/jars/org.apache.spark_spark-tags_2.12-3.1.1.jar b/spark-minimal/jars/org.apache.spark_spark-tags_2.12-3.1.1.jar new file mode 100644 index 0000000..072c8a5 Binary files /dev/null and b/spark-minimal/jars/org.apache.spark_spark-tags_2.12-3.1.1.jar differ diff --git a/spark-minimal/jars/org.apache.spark_spark-token-provider-kafka-0-10_2.12_3.1.1.jar b/spark-minimal/jars/org.apache.spark_spark-token-provider-kafka-0-10_2.12_3.1.1.jar new file mode 100644 index 0000000..4728ee9 Binary files /dev/null and b/spark-minimal/jars/org.apache.spark_spark-token-provider-kafka-0-10_2.12_3.1.1.jar differ diff --git a/spark-minimal/jars/org.slf4j_slf4j-api-1.7.16.jar b/spark-minimal/jars/org.slf4j_slf4j-api-1.7.16.jar deleted file mode 100644 index 6828595..0000000 Binary files a/spark-minimal/jars/org.slf4j_slf4j-api-1.7.16.jar and /dev/null differ diff --git a/spark-minimal/jars/org.slf4j_slf4j-api-1.7.3.jar b/spark-minimal/jars/org.slf4j_slf4j-api-1.7.3.jar new file mode 100644 index 0000000..6bb83be Binary files /dev/null and b/spark-minimal/jars/org.slf4j_slf4j-api-1.7.3.jar differ diff --git a/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.2.6.jar b/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.2.6.jar deleted file mode 100644 index 5c354d1..0000000 Binary files a/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.2.6.jar and /dev/null differ diff --git a/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.8.4.jar b/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.8.4.jar new file mode 100644 index 0000000..aa5231e Binary files /dev/null and b/spark-minimal/jars/org.xerial.snappy_snappy-java-1.1.8.4.jar differ diff --git a/spark-minimal/spark-defaults.conf b/spark-minimal/spark-defaults.conf new file mode 100644 index 0000000..67ecfe6 --- /dev/null +++ b/spark-minimal/spark-defaults.conf @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +#spark.jars jars/org.apache.spark_spark-sql-kafka-0-10_2.12-3.1.1.jar +spark.jars.packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.1 +#spark.jars.ivy /root/spark-3.1.1-bin-hadoop2.7/ diff --git a/spark-python/Dockerfile b/spark-python/Dockerfile index 172bbe2..55de06c 100644 --- a/spark-python/Dockerfile +++ b/spark-python/Dockerfile @@ -1,8 +1,8 @@ -FROM midsw205/spark-minimal:0.0.6 -MAINTAINER Mark Mims +FROM atox120/spark-minimal:0.0.8 +MAINTAINER Mark Mims , Alexander To -ENV ANACONDA_VERSION 4.4.0 -ENV PY4J_VERSION 0.10.4 +ENV ANACONDA_VERSION 2020.11 +ENV PY4J_VERSION 0.10.9.2 WORKDIR / @@ -11,7 +11,7 @@ RUN apt-get install -y bzip2 \ && chmod 755 Anaconda3-$ANACONDA_VERSION-Linux-x86_64.sh \ && /Anaconda3-$ANACONDA_VERSION-Linux-x86_64.sh -b -p /opt/anaconda3 \ && rm /Anaconda3-$ANACONDA_VERSION-Linux-x86_64.sh \ - && /opt/anaconda3/bin/conda install pyarrow arrow-cpp parquet-cpp -c conda-forge + && /opt/anaconda3/bin/conda install pyarrow parquet-cpp arrow-cpp -c conda-forge ENV PATH /opt/anaconda3/bin:$PATH ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-$PY4J_VERSION-src.zip diff --git a/spark-python/Makefile b/spark-python/Makefile index d7e0c27..5d8bfed 100644 --- a/spark-python/Makefile +++ b/spark-python/Makefile @@ -2,7 +2,7 @@ default: version list NAMESPACE=midsw205 IMAGE=$(shell basename $(PWD)) -VERSION=0.0.6 +VERSION=0.0.7 version: show-version show-version: