diff --git a/.github/workflows/mvn-ci-build.yml b/.github/workflows/mvn-ci-build.yml
index c9337e81a..5320647cc 100644
--- a/.github/workflows/mvn-ci-build.yml
+++ b/.github/workflows/mvn-ci-build.yml
@@ -44,4 +44,4 @@ jobs:
cache: maven
- name: Build all module with Maven
- run: mvn clean install -ntp -B
+ run: mvn clean install -ntp -B -T 2C
diff --git a/pom.xml b/pom.xml
index 46641deef..9efd0b942 100644
--- a/pom.xml
+++ b/pom.xml
@@ -51,6 +51,10 @@
xtable-hudi-support
xtable-core
xtable-utilities
+ xtable-delta
+ xtable-integration-tests
+ xtable-hudi
+ xtable-iceberg
@@ -150,6 +154,16 @@
parquet-avro
${parquet.version}
+
+ org.apache.parquet
+ parquet-column
+ ${parquet.version}
+
+
+ org.apache.parquet
+ parquet-hadoop
+ ${parquet.version}
+
@@ -318,6 +332,12 @@
${spark.version}
provided
+
+ org.apache.spark
+ spark-catalyst_${scala.binary.version}
+ ${spark.version}
+ provided
+
commons-cli
@@ -683,6 +703,11 @@
false
-Xmx1024m
120
+
+ ${maven.multiModuleProjectDirectory}
+ ${project.version}
+ ${scala.binary.version}
+
diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml
index f277495e7..d06100dd4 100644
--- a/xtable-core/pom.xml
+++ b/xtable-core/pom.xml
@@ -34,75 +34,6 @@
xtable-api
${project.version}
-
- org.apache.xtable
- xtable-hudi-support-utils
- ${project.version}
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- com.fasterxml.jackson.module
- jackson-module-scala_${scala.binary.version}
-
-
- com.google.guava
- guava
-
-
-
-
- org.apache.avro
- avro
-
-
-
-
- org.scala-lang
- scala-library
-
-
-
-
- org.apache.hudi
- hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
- test
-
-
- org.apache.hudi
- hudi-common
-
-
- org.apache.hudi
- hudi-java-client
-
-
-
-
- org.apache.iceberg
- iceberg-core
-
-
- org.apache.iceberg
- iceberg-api
-
-
-
-
- io.delta
- delta-core_${scala.binary.version}
-
-
- io.delta
- delta-standalone_${scala.binary.version}
- test
-
@@ -116,27 +47,6 @@
org.apache.logging.log4j
log4j-api
-
- org.apache.logging.log4j
- log4j-1.2-api
-
-
-
-
- org.apache.iceberg
- iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}
- test
-
-
- org.apache.spark
- spark-core_${scala.binary.version}
- provided
-
-
- org.apache.spark
- spark-sql_${scala.binary.version}
- provided
-
@@ -173,5 +83,27 @@
log4j-slf4j2-impl
test
+
+
+ com.google.guava
+ guava
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+
diff --git a/xtable-core/src/test/java/org/apache/xtable/GenericTable.java b/xtable-core/src/test/java/org/apache/xtable/GenericTable.java
index dce0f21ab..98be8d15b 100644
--- a/xtable-core/src/test/java/org/apache/xtable/GenericTable.java
+++ b/xtable-core/src/test/java/org/apache/xtable/GenericTable.java
@@ -18,20 +18,10 @@
package org.apache.xtable;
-import static org.apache.xtable.model.storage.TableFormat.DELTA;
-import static org.apache.xtable.model.storage.TableFormat.HUDI;
-import static org.apache.xtable.model.storage.TableFormat.ICEBERG;
-
-import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SparkSession;
-
-import org.apache.hudi.common.model.HoodieTableType;
-
public interface GenericTable extends AutoCloseable {
// A list of values for the level field which serves as a basic field to partition on for tests
List LEVEL_VALUES = Arrays.asList("INFO", "WARN", "ERROR");
@@ -66,83 +56,6 @@ default String getDataPath() {
String getFilterQuery();
- static GenericTable getInstance(
- String tableName,
- Path tempDir,
- SparkSession sparkSession,
- JavaSparkContext jsc,
- String sourceFormat,
- boolean isPartitioned) {
- switch (sourceFormat) {
- case HUDI:
- return TestSparkHudiTable.forStandardSchemaAndPartitioning(
- tableName, tempDir, jsc, isPartitioned);
- case DELTA:
- return TestSparkDeltaTable.forStandardSchemaAndPartitioning(
- tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
- case ICEBERG:
- return TestIcebergTable.forStandardSchemaAndPartitioning(
- tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
- default:
- throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
- }
- }
-
- static GenericTable getInstanceWithAdditionalColumns(
- String tableName,
- Path tempDir,
- SparkSession sparkSession,
- JavaSparkContext jsc,
- String sourceFormat,
- boolean isPartitioned) {
- switch (sourceFormat) {
- case HUDI:
- return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning(
- tableName, tempDir, jsc, isPartitioned);
- case DELTA:
- return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning(
- tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
- case ICEBERG:
- return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning(
- tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
- default:
- throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
- }
- }
-
- static GenericTable getInstanceWithCustomPartitionConfig(
- String tableName,
- Path tempDir,
- JavaSparkContext jsc,
- String sourceFormat,
- String partitionConfig) {
- switch (sourceFormat) {
- case HUDI:
- return TestSparkHudiTable.forStandardSchema(
- tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE);
- default:
- throw new IllegalArgumentException(
- String.format(
- "Unsupported source format: %s for custom partition config", sourceFormat));
- }
- }
-
- static GenericTable getInstanceWithUUIDColumns(
- String tableName,
- Path tempDir,
- SparkSession sparkSession,
- JavaSparkContext jsc,
- String sourceFormat,
- boolean isPartitioned) {
- switch (sourceFormat) {
- case ICEBERG:
- return TestIcebergTable.forSchemaWithUUIDColumns(
- tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
- default:
- throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
- }
- }
-
static String getTableName() {
return "test_table_" + UUID.randomUUID().toString().replaceAll("-", "_");
}
diff --git a/xtable-delta/pom.xml b/xtable-delta/pom.xml
new file mode 100644
index 000000000..9704519e5
--- /dev/null
+++ b/xtable-delta/pom.xml
@@ -0,0 +1,243 @@
+
+
+
+ 4.0.0
+
+ org.apache.xtable
+ xtable
+ 0.2.0-SNAPSHOT
+
+
+ xtable-delta_${scala.binary.version}
+ XTable Project Delta
+
+
+
+ org.apache.xtable
+ xtable-api
+ ${project.version}
+ provided
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ provided
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.module
+ jackson-module-scala_${scala.binary.version}
+
+
+
+ com.google.guava
+ guava
+
+
+
+ org.apache.commons
+ commons-lang3
+
+
+
+
+ org.scala-lang
+ scala-library
+
+
+
+
+ io.delta
+ delta-core_${scala.binary.version}
+
+
+ io.delta
+ delta-standalone_${scala.binary.version}
+ test
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ provided
+
+
+ org.apache.spark
+ spark-catalyst_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ provided
+
+
+
+
+ org.mockito
+ mockito-core
+ test
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+
+
+ org.apache.logging.log4j
+ log4j-core
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ test
+
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+ package
+
+ shade
+
+
+
+
+
+
+ LICENSE
+ NOTICE
+ NOTICE.txt
+
+
+
+ META-INF/LICENSE
+ target/classes/META-INF/LICENSE
+
+
+ META-INF/NOTICE
+ target/classes/META-INF/NOTICE
+
+
+
+
+ com.fasterxml.jackson.core:jackson-core
+ com.fasterxml.jackson.core:jackson-databind
+ com.fasterxml.jackson.core:jackson-annotations
+ com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version}
+ org.scala-lang:scala-library
+ io.delta:delta-core_${scala.binary.version}
+ io.delta:delta-storage
+ org.apache.commons:commons-lang3
+ com.google.guava:guava
+
+
+
+
+ com.fasterxml.jackson.
+ org.apache.xtable.shade.com.fasterxml.jackson.
+
+
+ io.delta.
+ org.apache.xtable.shade.io.delta.
+
+
+ org.apache.commons.
+ org.apache.xtable.shade.org.apache.commons.
+
+
+ com.google.guava.
+ org.apache.xtable.shade.com.google.guava.
+
+
+
+
+
+
+
+
+
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/ScalaUtils.java b/xtable-delta/src/main/java/org/apache/xtable/delta/ScalaUtils.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/delta/ScalaUtils.java
rename to xtable-delta/src/main/java/org/apache/xtable/delta/ScalaUtils.java
diff --git a/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
new file mode 100644
index 000000000..cea6bd3ed
--- /dev/null
+++ b/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
@@ -0,0 +1,19 @@
+##########################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+org.apache.xtable.delta.DeltaConversionTarget
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java
index 3b1cc5294..9773522fd 100644
--- a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java
+++ b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionSource.java
@@ -52,7 +52,6 @@
import org.junit.jupiter.params.provider.MethodSource;
import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestSparkDeltaTable;
import org.apache.xtable.ValidationTestHelper;
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.model.CommitsBacklog;
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
index ed02893e3..eb37a8d16 100644
--- a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
+++ b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java
@@ -42,7 +42,6 @@
import scala.Option;
import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestSparkDeltaTable;
import org.apache.xtable.ValidationTestHelper;
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.model.CommitsBacklog;
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSync.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSync.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java
rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java
index ee5b1ccdd..38057874f 100644
--- a/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java
+++ b/xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.xtable;
+package org.apache.xtable.delta;
import java.io.Closeable;
import java.io.IOException;
@@ -44,7 +44,7 @@
import io.delta.tables.DeltaTable;
-import org.apache.xtable.delta.TestDeltaHelper;
+import org.apache.xtable.GenericTable;
@Getter
public class TestSparkDeltaTable implements GenericTable, Closeable {
diff --git a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
index fba3fe0a9..775484d39 100644
--- a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
+++ b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml
@@ -42,6 +42,12 @@
${project.version}
+
+ org.apache.xtable
+ xtable-hudi
+ ${project.version}
+
+
org.slf4j
@@ -180,6 +186,20 @@
log4j-slf4j2-impl
test
+
+
+ org.apache.xtable
+ xtable-delta_${scala.binary.version}
+ ${project.version}
+ test
+
+
+
+ org.apache.xtable
+ xtable-iceberg
+ ${project.version}
+ test
+
diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml
new file mode 100644
index 000000000..258e52e7b
--- /dev/null
+++ b/xtable-hudi/pom.xml
@@ -0,0 +1,294 @@
+
+
+
+ 4.0.0
+
+ org.apache.xtable
+ xtable
+ 0.2.0-SNAPSHOT
+
+
+ xtable-hudi
+ XTable Project Hudi
+
+
+
+ org.apache.xtable
+ xtable-api
+ ${project.version}
+ provided
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ provided
+
+
+ org.apache.xtable
+ xtable-hudi-support-utils
+ ${project.version}
+
+
+
+ com.google.guava
+ guava
+
+
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.parquet
+ parquet-column
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+ org.apache.parquet
+ parquet-hadoop
+
+
+
+
+ org.apache.hudi
+ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
+ test
+
+
+ org.apache.hudi
+ hudi-java-client
+
+
+
+ com.esotericsoftware
+ kryo
+ runtime
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ provided
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ provided
+
+
+
+
+ org.mockito
+ mockito-core
+ test
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+
+
+ org.apache.logging.log4j
+ log4j-core
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ test
+
+
+
+ org.apache.commons
+ commons-lang3
+ test
+
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+ package
+
+ shade
+
+
+ true
+
+
+
+
+ LICENSE
+ NOTICE
+ NOTICE.txt
+
+
+
+ META-INF/LICENSE
+ target/classes/META-INF/LICENSE
+
+
+ META-INF/NOTICE
+ target/classes/META-INF/NOTICE
+
+
+
+
+ org.apache.xtable:xtable-hudi-support-utils
+ com.fasterxml.jackson.core:jackson-databind
+ com.fasterxml.jackson.datatype:jackson-datatype-jsr310
+ com.fasterxml.jackson.core:jackson-core
+ com.fasterxml.jackson.core:jackson-annotations
+ org.apache.parquet:parquet-column
+ org.apache.parquet:parquet-avro
+ org.apache.parquet:parquet-common
+ org.apache.parquet:parquet-encoding
+ org.apache.parquet:parquet-hadoop
+ org.apache.parquet:parquet-format-structures
+ org.apache.parquet:parquet-jackson
+ org.apache.hudi:hudi-java-client
+ org.apache.hudi:hudi-client-common
+ org.apache.hudi:hudi-common
+ org.apache.avro:avro
+ com.google.guava:guava
+
+ org.apache.hbase:hbase-client
+ org.apache.hbase:hbase-common
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shaded
+ org.apache.hbase:hbase-server
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4
+ com.esotericsoftware:kryo
+ org.openjdk.jol:jol-core
+ org.lz4:lz4-java
+
+
+
+
+ com.esotericsoftware
+ org.apache.xtable.shade.com.esotericsoftware
+
+
+ org.apache.parquet.
+ org.apache.xtable.shade.org.apache.parquet.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.xtable.shade.org.apache.hadoop.hbase.
+
+ org.apache.hadoop.hbase.KeyValue$KeyComparator
+ org.apache.hadoop.hbase.CellComparator
+ org.apache.hadoop.hbase.CellComparatorImpl
+
+
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+ org.apache.hadoop.hbase.KeyValue$KeyComparator
+ org.apache.hadoop.hbase.CellComparator
+ org.apache.hadoop.hbase.CellComparatorImpl
+
+
+
+ com.google.guava.
+ org.apache.xtable.shade.com.google.guava.
+
+
+ com.fasterxml.jackson.
+ org.apache.xtable.shade.com.fasterxml.jackson.
+
+
+ net.jpountz.
+ org.apache.xtable.shade.net.jpountz.
+
+
+
+
+
+
+
+
+
+
diff --git a/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java b/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java
new file mode 100644
index 000000000..066b117b4
--- /dev/null
+++ b/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.avro.data;
+
+import org.apache.avro.Conversion;
+import org.apache.avro.LogicalType;
+import org.apache.avro.Schema;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.Days;
+import org.joda.time.LocalDate;
+import org.joda.time.LocalTime;
+
+/**
+ * Provides compatibility across various versions of avro used in the Hudi generated jars. Without
+ * this, there will be unresolved dependencies at runtime.
+ */
+@SuppressWarnings("unused")
+public class TimeConversions {
+ public static class DateConversion extends Conversion {
+ private static final LocalDate EPOCH_DATE = new LocalDate(1970, 1, 1);
+
+ @Override
+ public Class getConvertedType() {
+ return LocalDate.class;
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "date";
+ }
+
+ @Override
+ public LocalDate fromInt(Integer daysFromEpoch, Schema schema, LogicalType type) {
+ return EPOCH_DATE.plusDays(daysFromEpoch);
+ }
+
+ @Override
+ public Integer toInt(LocalDate date, Schema schema, LogicalType type) {
+ return Days.daysBetween(EPOCH_DATE, date).getDays();
+ }
+ }
+
+ public static class TimeConversion extends Conversion {
+ @Override
+ public Class getConvertedType() {
+ return LocalTime.class;
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "time-millis";
+ }
+
+ @Override
+ public LocalTime fromInt(Integer millisFromMidnight, Schema schema, LogicalType type) {
+ return LocalTime.fromMillisOfDay(millisFromMidnight);
+ }
+
+ @Override
+ public Integer toInt(LocalTime time, Schema schema, LogicalType type) {
+ return time.millisOfDay().get();
+ }
+ }
+
+ public static class TimeMicrosConversion extends Conversion {
+ @Override
+ public Class getConvertedType() {
+ return LocalTime.class;
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "time-micros";
+ }
+
+ @Override
+ public LocalTime fromLong(Long microsFromMidnight, Schema schema, LogicalType type) {
+ return LocalTime.fromMillisOfDay(microsFromMidnight / 1000);
+ }
+ }
+
+ public static class LossyTimeMicrosConversion extends TimeMicrosConversion {
+ @Override
+ public Long toLong(LocalTime time, Schema schema, LogicalType type) {
+ return 1000 * (long) time.millisOfDay().get();
+ }
+ }
+
+ public static class TimestampConversion extends Conversion {
+ @Override
+ public Class getConvertedType() {
+ return DateTime.class;
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "timestamp-millis";
+ }
+
+ @Override
+ public DateTime fromLong(Long millisFromEpoch, Schema schema, LogicalType type) {
+ return new DateTime(millisFromEpoch, DateTimeZone.UTC);
+ }
+
+ @Override
+ public Long toLong(DateTime timestamp, Schema schema, LogicalType type) {
+ return timestamp.getMillis();
+ }
+ }
+
+ public static class TimestampMicrosConversion extends Conversion {
+ @Override
+ public Class getConvertedType() {
+ return DateTime.class;
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "timestamp-micros";
+ }
+
+ @Override
+ public DateTime fromLong(Long microsFromEpoch, Schema schema, LogicalType type) {
+ return new DateTime(microsFromEpoch / 1000, DateTimeZone.UTC);
+ }
+ }
+
+ public static class LossyTimestampMicrosConversion extends TimestampMicrosConversion {
+ @Override
+ public Long toLong(DateTime timestamp, Schema schema, LogicalType type) {
+ return 1000 * timestamp.getMillis();
+ }
+ }
+}
diff --git a/xtable-core/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java b/xtable-hudi/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java
rename to xtable-hudi/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java
diff --git a/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java b/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java
new file mode 100644
index 000000000..b8419ccbf
--- /dev/null
+++ b/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable.hbase;
+
+import org.apache.hadoop.hbase.io.MetricsIOSource;
+import org.apache.hadoop.hbase.io.MetricsIOWrapper;
+import org.apache.hadoop.hbase.regionserver.MetricsHeapMemoryManagerSource;
+import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource;
+import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory;
+import org.apache.hadoop.hbase.regionserver.MetricsRegionServerWrapper;
+import org.apache.hadoop.hbase.regionserver.MetricsRegionSource;
+import org.apache.hadoop.hbase.regionserver.MetricsRegionWrapper;
+import org.apache.hadoop.hbase.regionserver.MetricsTableAggregateSource;
+import org.apache.hadoop.hbase.regionserver.MetricsTableSource;
+import org.apache.hadoop.hbase.regionserver.MetricsTableWrapperAggregate;
+import org.apache.hadoop.hbase.regionserver.MetricsUserAggregateSource;
+import org.apache.hadoop.hbase.regionserver.MetricsUserSource;
+
+/** Provides a No-Op metrics implementation for the HFile required by Hudi. */
+public class NoOpMetricsRegionServerSourceFactory implements MetricsRegionServerSourceFactory {
+ @Override
+ public MetricsRegionServerSource createServer(MetricsRegionServerWrapper regionServerWrapper) {
+ return null;
+ }
+
+ @Override
+ public MetricsRegionSource createRegion(MetricsRegionWrapper wrapper) {
+ return null;
+ }
+
+ @Override
+ public MetricsUserSource createUser(String shortUserName) {
+ return null;
+ }
+
+ @Override
+ public MetricsUserAggregateSource getUserAggregate() {
+ return null;
+ }
+
+ @Override
+ public MetricsTableSource createTable(String table, MetricsTableWrapperAggregate wrapper) {
+ return null;
+ }
+
+ @Override
+ public MetricsTableAggregateSource getTableAggregate() {
+ return null;
+ }
+
+ @Override
+ public MetricsHeapMemoryManagerSource getHeapMemoryManager() {
+ return null;
+ }
+
+ @Override
+ public MetricsIOSource createIO(MetricsIOWrapper wrapper) {
+ return new NoOpMetricsIOSource();
+ }
+
+ private static class NoOpMetricsIOSource implements MetricsIOSource {
+
+ @Override
+ public void updateFsReadTime(long t) {}
+
+ @Override
+ public void updateFsPReadTime(long t) {}
+
+ @Override
+ public void updateFsWriteTime(long t) {}
+
+ @Override
+ public void init() {}
+
+ @Override
+ public void setGauge(String gaugeName, long value) {}
+
+ @Override
+ public void incGauge(String gaugeName, long delta) {}
+
+ @Override
+ public void decGauge(String gaugeName, long delta) {}
+
+ @Override
+ public void removeMetric(String key) {}
+
+ @Override
+ public void incCounters(String counterName, long delta) {}
+
+ @Override
+ public void updateHistogram(String name, long value) {}
+
+ @Override
+ public String getMetricsContext() {
+ return "";
+ }
+
+ @Override
+ public String getMetricsDescription() {
+ return "";
+ }
+
+ @Override
+ public String getMetricsJmxContext() {
+ return "";
+ }
+
+ @Override
+ public String getMetricsName() {
+ return "";
+ }
+ }
+}
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStats.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStats.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStats.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStats.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java
similarity index 81%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java
index 85cb19c07..4b2968dc5 100644
--- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java
+++ b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java
@@ -18,10 +18,6 @@
package org.apache.xtable.hudi;
-import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH;
-import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.SECS_INSTANT_ID_LENGTH;
-import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT;
-
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
@@ -41,7 +37,7 @@ class HudiInstantUtils {
// https://bugs.openjdk.java.net/browse/JDK-8031085. hence have to do appendValue()
private static final DateTimeFormatter MILLIS_INSTANT_TIME_FORMATTER =
new DateTimeFormatterBuilder()
- .appendPattern(SECS_INSTANT_TIMESTAMP_FORMAT)
+ .appendPattern(HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT)
.appendValue(ChronoField.MILLI_OF_SECOND, 3)
.toFormatter()
.withZone(ZONE_ID);
@@ -59,8 +55,11 @@ static Instant parseFromInstantTime(String timestamp) {
String timestampInMillis = timestamp;
if (isSecondGranularity(timestamp)) {
timestampInMillis = timestamp + "999";
- } else if (timestamp.length() > MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH) {
- timestampInMillis = timestamp.substring(0, MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH);
+ } else if (timestamp.length()
+ > HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH) {
+ timestampInMillis =
+ timestamp.substring(
+ 0, HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH);
}
LocalDateTime dt = LocalDateTime.parse(timestampInMillis, MILLIS_INSTANT_TIME_FORMATTER);
@@ -76,6 +75,6 @@ static String convertInstantToCommit(Instant instant) {
}
private static boolean isSecondGranularity(String instant) {
- return instant.length() == SECS_INSTANT_ID_LENGTH;
+ return instant.length() == HoodieInstantTimeGenerator.SECS_INSTANT_ID_LENGTH;
}
}
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableManager.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableManager.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableManager.java
rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableManager.java
diff --git a/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory b/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory
new file mode 100644
index 000000000..dab50e041
--- /dev/null
+++ b/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.apache.xtable.hbase.NoOpMetricsRegionServerSourceFactory
diff --git a/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory
new file mode 100644
index 000000000..dab50e041
--- /dev/null
+++ b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.apache.xtable.hbase.NoOpMetricsRegionServerSourceFactory
diff --git a/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
new file mode 100644
index 000000000..2bea153b1
--- /dev/null
+++ b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
@@ -0,0 +1,19 @@
+##########################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+org.apache.xtable.hudi.HudiConversionTarget
diff --git a/xtable-core/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java b/xtable-hudi/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java
rename to xtable-hudi/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java
similarity index 98%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java
index c701a1d54..b7bccff24 100644
--- a/xtable-core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java
@@ -67,7 +67,8 @@ public static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaC
return getHoodieWriteConfig(metaClient, null);
}
- static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaClient, Schema schema) {
+ static synchronized HoodieWriteConfig getHoodieWriteConfig(
+ HoodieTableMetaClient metaClient, Schema schema) {
Properties properties = new Properties();
properties.setProperty(HoodieMetadataConfig.AUTO_INITIALIZE.key(), "false");
return HoodieWriteConfig.newBuilder()
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java
index 376ccedae..e085c638f 100644
--- a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSource.java
@@ -62,8 +62,6 @@
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestJavaHudiTable;
-import org.apache.xtable.TestSparkHudiTable;
import org.apache.xtable.ValidationTestHelper;
import org.apache.xtable.model.CommitsBacklog;
import org.apache.xtable.model.InstantsForIncrementalSync;
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java
similarity index 96%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java
index 03bb6e2ca..791748b68 100644
--- a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java
@@ -21,7 +21,6 @@
import static org.apache.xtable.hudi.HudiTestUtil.createWriteStatus;
import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig;
import static org.apache.xtable.hudi.HudiTestUtil.initTableAndGetMetaClient;
-import static org.junit.jupiter.api.Assertions.assertEquals;
import java.nio.file.Path;
import java.time.Duration;
@@ -368,7 +367,7 @@ CONTEXT, getHoodieWriteConfig(metaClient).getMetadataConfig(), tableBasePath, tr
assertColStats(hoodieBackedTableMetadata, partitionPath, fileName4);
}
// the first commit to the timeline should be archived
- assertEquals(
+ Assertions.assertEquals(
2, metaClient.getArchivedTimeline().reload().filterCompletedInstants().countInstants());
}
@@ -428,7 +427,7 @@ private void assertSchema(HoodieTableMetaClient metaClient, boolean includeMetaF
.requiredString(OTHER_FIELD_NAME)
.endRecord();
}
- assertEquals(expected, actual);
+ Assertions.assertEquals(expected, actual);
}
private void assertFileGroupCorrectness(
@@ -446,15 +445,15 @@ private void assertFileGroupCorrectness(
.getAllFileGroups(partitionPath)
.sorted(Comparator.comparing(HoodieFileGroup::getFileGroupId))
.collect(Collectors.toList());
- assertEquals(fileIdAndPath.size(), fileGroups.size());
+ Assertions.assertEquals(fileIdAndPath.size(), fileGroups.size());
for (int i = 0; i < fileIdAndPath.size(); i++) {
HoodieFileGroup fileGroup = fileGroups.get(i);
String expectedFileId = fileIdAndPath.get(i).getLeft();
String expectedFilePath = fileIdAndPath.get(i).getRight();
- assertEquals(expectedFileId, fileGroup.getFileGroupId().getFileId());
- assertEquals(partitionPath, fileGroup.getPartitionPath());
+ Assertions.assertEquals(expectedFileId, fileGroup.getFileGroupId().getFileId());
+ Assertions.assertEquals(partitionPath, fileGroup.getPartitionPath());
HoodieBaseFile baseFile = fileGroup.getAllBaseFiles().findFirst().get();
- assertEquals(
+ Assertions.assertEquals(
metaClient.getBasePathV2().toString() + "/" + expectedFilePath, baseFile.getPath());
}
fsView.close();
@@ -518,16 +517,16 @@ private void assertColStatsForField(
Map, HoodieMetadataColumnStats> fieldColStats =
hoodieBackedTableMetadata.getColumnStats(
Collections.singletonList(Pair.of(partitionPath, fileName)), fieldName);
- assertEquals(1, fieldColStats.size());
+ Assertions.assertEquals(1, fieldColStats.size());
HoodieMetadataColumnStats columnStats = fieldColStats.get(Pair.of(partitionPath, fileName));
- assertEquals(fieldName, columnStats.getColumnName());
- assertEquals(fileName, columnStats.getFileName());
- assertEquals(new StringWrapper(minValue), columnStats.getMinValue());
- assertEquals(new StringWrapper(maxValue), columnStats.getMaxValue());
- assertEquals(valueCount, columnStats.getValueCount());
- assertEquals(nullCount, columnStats.getNullCount());
- assertEquals(totalSize, columnStats.getTotalSize());
- assertEquals(-1, columnStats.getTotalUncompressedSize());
+ Assertions.assertEquals(fieldName, columnStats.getColumnName());
+ Assertions.assertEquals(fileName, columnStats.getFileName());
+ Assertions.assertEquals(new StringWrapper(minValue), columnStats.getMinValue());
+ Assertions.assertEquals(new StringWrapper(maxValue), columnStats.getMaxValue());
+ Assertions.assertEquals(valueCount, columnStats.getValueCount());
+ Assertions.assertEquals(nullCount, columnStats.getNullCount());
+ Assertions.assertEquals(totalSize, columnStats.getTotalSize());
+ Assertions.assertEquals(-1, columnStats.getTotalUncompressedSize());
}
private InternalDataFile getTestFile(String partitionPath, String fileName) {
diff --git a/xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java
similarity index 97%
rename from xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java
index 3e9a133a2..0d134df65 100644
--- a/xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java
@@ -16,9 +16,8 @@
* limitations under the License.
*/
-package org.apache.xtable;
+package org.apache.xtable.hudi;
-import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME;
import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig;
import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -104,6 +103,8 @@
import com.google.common.base.Preconditions;
+import org.apache.xtable.GenericTable;
+
public abstract class TestAbstractHudiTable
implements GenericTable, String> {
@@ -160,17 +161,19 @@ public abstract class TestAbstractHudiTable
String[] partitionFieldConfigs = partitionConfig.split(",");
if (partitionFieldConfigs.length == 1 && !partitionFieldConfigs[0].contains(".")) {
typedProperties.put(
- PARTITIONPATH_FIELD_NAME.key(), partitionFieldConfigs[0].split(":")[0]);
+ KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(),
+ partitionFieldConfigs[0].split(":")[0]);
if (partitionFieldConfigs[0].contains(".")) { // nested field
this.keyGenerator = new CustomKeyGenerator(typedProperties);
} else if (partitionFieldConfigs[0].contains("SIMPLE")) { // top level field
this.keyGenerator = new SimpleKeyGenerator(typedProperties);
} else { // top level timestamp field
- typedProperties.put(PARTITIONPATH_FIELD_NAME.key(), partitionConfig);
+ typedProperties.put(
+ KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionConfig);
this.keyGenerator = new TimestampBasedKeyGenerator(typedProperties);
}
} else {
- typedProperties.put(PARTITIONPATH_FIELD_NAME.key(), partitionConfig);
+ typedProperties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionConfig);
this.keyGenerator = new CustomKeyGenerator(typedProperties);
}
this.partitionFieldNames =
@@ -431,7 +434,9 @@ protected HoodieWriteConfig generateWriteConfig(Schema schema, TypedProperties k
// enable col stats only on un-partitioned data due to bug in Hudi
// https://issues.apache.org/jira/browse/HUDI-6954
.withMetadataIndexColumnStats(
- !keyGenProperties.getString(PARTITIONPATH_FIELD_NAME.key(), "").isEmpty())
+ !keyGenProperties
+ .getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "")
+ .isEmpty())
.withColumnStatsIndexForColumns(getColumnsFromSchema(schema))
.build();
Properties lockProperties = new Properties();
@@ -645,7 +650,7 @@ private GenericRecord generateGenericRecord(
value = System.currentTimeMillis();
} else if (fieldName.equals("level")) {
// a simple string field to be used for basic partitioning if required
- value = LEVEL_VALUES.get(RANDOM.nextInt(LEVEL_VALUES.size()));
+ value = GenericTable.LEVEL_VALUES.get(RANDOM.nextInt(GenericTable.LEVEL_VALUES.size()));
} else if (fieldName.equals("severity")) {
// a bounded integer field to be used for partition testing
value = RANDOM.nextBoolean() ? null : RANDOM.nextInt(3);
@@ -778,7 +783,7 @@ public void upsertRows(List> records) {
@Override
public List> insertRecordsForSpecialPartition(int numRecords) {
- return insertRecords(numRecords, SPECIAL_PARTITION_VALUE, true);
+ return insertRecords(numRecords, GenericTable.SPECIAL_PARTITION_VALUE, true);
}
@Override
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java
similarity index 95%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java
index 8f3b3f7e1..64364a2a8 100644
--- a/xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java
@@ -18,10 +18,6 @@
package org.apache.xtable.hudi;
-import static org.apache.xtable.hudi.HudiTestUtil.createWriteStatus;
-import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig;
-import static org.apache.xtable.hudi.HudiTestUtil.initTableAndGetMetaClient;
-import static org.apache.xtable.testutil.ColumnStatMapUtil.getColumnStats;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
@@ -98,13 +94,15 @@ void convertDiff() {
String fileName2 = "file2.parquet";
InternalDataFile addedFile2 =
createFile(
- String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName2), getColumnStats());
+ String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName2),
+ ColumnStatMapUtil.getColumnStats());
// remove files 3 files from two different partitions
String fileName3 = "file3.parquet";
InternalDataFile removedFile1 =
createFile(
- String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName3), getColumnStats());
+ String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName3),
+ ColumnStatMapUtil.getColumnStats());
// create file that matches hudi format to mimic that a file create by hudi is now being removed
// by another system
String fileIdForFile4 = "d1cf0980-445c-4c74-bdeb-b7e5d18779f5-0";
@@ -168,13 +166,15 @@ void extractSnapshotChanges_emptyTargetTable() throws IOException {
String fileName2 = "file2.parquet";
InternalDataFile addedFile2 =
createFile(
- String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName2), getColumnStats());
+ String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName2),
+ ColumnStatMapUtil.getColumnStats());
// create file in a second partition
String partitionPath2 = "partition2";
String fileName3 = "file3.parquet";
InternalDataFile addedFile3 =
createFile(
- String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName3), getColumnStats());
+ String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName3),
+ ColumnStatMapUtil.getColumnStats());
BaseFileUpdatesExtractor extractor =
BaseFileUpdatesExtractor.of(CONTEXT, new CachingPath(tableBasePath));
@@ -217,8 +217,8 @@ void extractSnapshotChanges_emptyTargetTable() throws IOException {
void extractSnapshotChanges_existingPartitionedTargetTable() {
String tableBasePath = tempDir.resolve(UUID.randomUUID().toString()).toString();
HoodieTableMetaClient setupMetaClient =
- initTableAndGetMetaClient(tableBasePath, "partition_field");
- HoodieWriteConfig writeConfig = getHoodieWriteConfig(setupMetaClient);
+ HudiTestUtil.initTableAndGetMetaClient(tableBasePath, "partition_field");
+ HoodieWriteConfig writeConfig = HudiTestUtil.getHoodieWriteConfig(setupMetaClient);
String partitionPath1 = "partition1";
String partitionPath2 = "partition2";
@@ -264,7 +264,7 @@ void extractSnapshotChanges_existingPartitionedTargetTable() {
InternalDataFile addedFile2 =
createFile(
String.format("%s/%s/%s", tableBasePath, partitionPath3, newFileName2),
- getColumnStats());
+ ColumnStatMapUtil.getColumnStats());
// InternalDataFile for one of the existing files in partition2
InternalDataFile existingFile =
createFile(
@@ -316,8 +316,9 @@ void extractSnapshotChanges_existingPartitionedTargetTable() {
@Test
void extractSnapshotChanges_existingNonPartitionedTargetTable() {
String tableBasePath = tempDir.resolve(UUID.randomUUID().toString()).toString();
- HoodieTableMetaClient setupMetaClient = initTableAndGetMetaClient(tableBasePath, "");
- HoodieWriteConfig writeConfig = getHoodieWriteConfig(setupMetaClient);
+ HoodieTableMetaClient setupMetaClient =
+ HudiTestUtil.initTableAndGetMetaClient(tableBasePath, "");
+ HoodieWriteConfig writeConfig = HudiTestUtil.getHoodieWriteConfig(setupMetaClient);
// initialize the table with 2 files
String existingFileName1 = "existing_file_1.parquet";
@@ -350,7 +351,9 @@ void extractSnapshotChanges_existingNonPartitionedTargetTable() {
// create a snapshot with a new file added along with one of the existing files
String newFileName1 = "new_file_1.parquet";
InternalDataFile addedFile1 =
- createFile(String.format("%s/%s", tableBasePath, newFileName1), getColumnStats());
+ createFile(
+ String.format("%s/%s", tableBasePath, newFileName1),
+ ColumnStatMapUtil.getColumnStats());
// InternalDataFile for one of the existing files in partition2
InternalDataFile existingFile =
createFile(
@@ -402,7 +405,7 @@ private WriteStatus getExpectedWriteStatus(
String fileName,
String partitionPath,
Map> recordStats) {
- return createWriteStatus(
+ return HudiTestUtil.createWriteStatus(
fileName, partitionPath, COMMIT_TIME, RECORD_COUNT, FILE_SIZE, recordStats);
}
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiConversionTarget.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiConversionTarget.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiConversionTarget.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiConversionTarget.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
index a18bb743d..5b8ce32b1 100644
--- a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java
@@ -49,7 +49,6 @@
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.util.HadoopOutputFile;
-import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -65,7 +64,6 @@
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestJavaHudiTable;
import org.apache.xtable.model.schema.InternalField;
import org.apache.xtable.model.schema.InternalSchema;
import org.apache.xtable.model.schema.InternalType;
@@ -448,7 +446,6 @@ private GenericRecord createRecord(
return record;
}
- @NotNull
private GenericData.Record getNestedRecord(Integer nestedIntValue) {
GenericData.Record nested = new GenericData.Record(NESTED_SCHEMA);
nested.put("nested_int", nestedIntValue);
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java
index ce3b25bda..6a5f8a63d 100644
--- a/xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.xtable;
+package org.apache.xtable.hudi;
import java.io.IOException;
import java.io.UncheckedIOException;
diff --git a/xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java
rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java
index 79316f5d9..5f62cc631 100644
--- a/xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java
+++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.xtable;
+package org.apache.xtable.hudi;
import java.nio.file.Path;
import java.time.Instant;
diff --git a/xtable-core/src/test/resources/schemas/basic_schema.avsc b/xtable-hudi/src/test/resources/schemas/basic_schema.avsc
similarity index 100%
rename from xtable-core/src/test/resources/schemas/basic_schema.avsc
rename to xtable-hudi/src/test/resources/schemas/basic_schema.avsc
diff --git a/xtable-iceberg/pom.xml b/xtable-iceberg/pom.xml
new file mode 100644
index 000000000..65614e610
--- /dev/null
+++ b/xtable-iceberg/pom.xml
@@ -0,0 +1,211 @@
+
+
+
+ 4.0.0
+
+ org.apache.xtable
+ xtable
+ 0.2.0-SNAPSHOT
+
+
+ xtable-iceberg
+ XTable Project Iceberg
+
+
+
+ org.apache.xtable
+ xtable-api
+ ${project.version}
+ provided
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ provided
+
+
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.iceberg
+ iceberg-core
+
+
+ org.apache.iceberg
+ iceberg-api
+
+
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+
+
+ com.google.guava
+ guava
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ org.apache.iceberg
+ iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}
+ test
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ test
+
+
+
+
+ org.mockito
+ mockito-core
+ test
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+
+
+ org.apache.logging.log4j
+ log4j-core
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ test
+
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+ package
+
+ shade
+
+
+
+
+
+
+ LICENSE
+ NOTICE
+ NOTICE.txt
+
+
+
+ META-INF/LICENSE
+ target/classes/META-INF/LICENSE
+
+
+ META-INF/NOTICE
+ target/classes/META-INF/NOTICE
+
+
+
+
+ org.apache.iceberg:iceberg-core
+ org.apache.iceberg:iceberg-api
+ org.apache.iceberg:iceberg-common
+ org.apache.iceberg:iceberg-bundled-guava
+ org.apache.avro:avro
+ com.github.ben-manes.caffeine:caffeine
+
+
+
+
+
+ org.apache.iceberg.
+ org.apache.xtable.shade.org.apache.iceberg.
+
+
+ org.apache.avro.
+ org.apache.xtable.shade.org.apache.avro.
+
+
+ com.github.ben-manes.
+ org.apache.xtable.shade.com.github.ben-manes.
+
+
+
+
+
+
+
+
+
+
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java
similarity index 98%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java
index a6abd2a91..738f19b07 100644
--- a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java
+++ b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java
@@ -42,7 +42,6 @@
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
-import org.apache.xtable.avro.AvroSchemaConverter;
import org.apache.xtable.exception.NotSupportedException;
import org.apache.xtable.model.InternalTable;
import org.apache.xtable.model.schema.InternalField;
@@ -58,7 +57,6 @@ public class IcebergPartitionValueConverter {
private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
private static final IcebergPartitionValueConverter INSTANCE =
new IcebergPartitionValueConverter();
- private static final AvroSchemaConverter SCHEMA_CONVERTER = AvroSchemaConverter.getInstance();
private static final String DOT = ".";
private static final String DOT_REPLACEMENT = "_x2E";
private static final String YEAR = "year";
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java
diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java
similarity index 100%
rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java
rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java
diff --git a/xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
similarity index 91%
rename from xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
rename to xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
index a00b41e08..c876b47ac 100644
--- a/xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
+++ b/xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget
@@ -16,6 +16,4 @@
# limitations under the License.
##########################################################################
-org.apache.xtable.hudi.HudiConversionTarget
-org.apache.xtable.delta.DeltaConversionTarget
org.apache.xtable.iceberg.IcebergConversionTarget
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java
index 210b6f9d6..c2c3f5261 100644
--- a/xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java
+++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionSource.java
@@ -47,7 +47,6 @@
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.data.Record;
-import org.apache.xtable.TestIcebergTable;
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.model.CommitsBacklog;
import org.apache.xtable.model.InstantsForIncrementalSync;
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/StubCatalog.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/StubCatalog.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/StubCatalog.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/StubCatalog.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java
index d90ba169f..247b2a0fd 100644
--- a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java
+++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java
@@ -126,7 +126,7 @@ public class TestIcebergDataHelper {
String recordKeyField;
List partitionFieldNames;
- public static enum SchemaType {
+ public enum SchemaType {
BASIC,
COMMON,
COMMON_WITH_ADDITIONAL_COLUMNS,
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java
index bd36dde91..4913979c5 100644
--- a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java
+++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java
@@ -82,7 +82,6 @@
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
-import org.apache.xtable.ITConversionController;
import org.apache.xtable.conversion.TargetTable;
import org.apache.xtable.model.InternalSnapshot;
import org.apache.xtable.model.InternalTable;
@@ -103,8 +102,8 @@
import org.apache.xtable.spi.sync.TableFormatSync;
/**
- * Validates that the metadata for the table is properly created/updated. {@link
- * ITConversionController} validates that the table and its data can be properly read.
+ * Validates that the metadata for the table is properly created/updated. ITConversionController
+ * validates that the table and its data can be properly read.
*/
public class TestIcebergSync {
private static final Random RANDOM = new Random();
diff --git a/xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java
similarity index 98%
rename from xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java
index 0c8336fef..05c9c5d91 100644
--- a/xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java
+++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.xtable;
+package org.apache.xtable.iceberg;
import static org.apache.iceberg.SnapshotSummary.TOTAL_RECORDS_PROP;
import static org.junit.jupiter.api.Assertions.*;
@@ -65,7 +65,7 @@
import com.google.common.base.Preconditions;
-import org.apache.xtable.iceberg.TestIcebergDataHelper;
+import org.apache.xtable.GenericTable;
@Getter
public class TestIcebergTable implements GenericTable {
@@ -172,7 +172,7 @@ public List insertRecordsForPartition(int numRows, String partitionValue
@Override
public List insertRecordsForSpecialPartition(int numRows) {
- return insertRecordsForPartition(numRows, SPECIAL_PARTITION_VALUE);
+ return insertRecordsForPartition(numRows, GenericTable.SPECIAL_PARTITION_VALUE);
}
@Override
@@ -241,7 +241,7 @@ public void deletePartition(String partitionValue) {
@Override
public void deleteSpecialPartition() {
- deletePartition(SPECIAL_PARTITION_VALUE);
+ deletePartition(GenericTable.SPECIAL_PARTITION_VALUE);
}
@Override
diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java
similarity index 100%
rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java
rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java
diff --git a/xtable-core/src/test/resources/partition_specs/catalog_sales.json b/xtable-iceberg/src/test/resources/partition_specs/catalog_sales.json
similarity index 100%
rename from xtable-core/src/test/resources/partition_specs/catalog_sales.json
rename to xtable-iceberg/src/test/resources/partition_specs/catalog_sales.json
diff --git a/xtable-core/src/test/resources/schemas/catalog_sales.json b/xtable-iceberg/src/test/resources/schemas/catalog_sales.json
similarity index 100%
rename from xtable-core/src/test/resources/schemas/catalog_sales.json
rename to xtable-iceberg/src/test/resources/schemas/catalog_sales.json
diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml
new file mode 100644
index 000000000..ea05ab5bb
--- /dev/null
+++ b/xtable-integration-tests/pom.xml
@@ -0,0 +1,259 @@
+
+
+
+ 4.0.0
+
+ org.apache.xtable
+ xtable
+ 0.2.0-SNAPSHOT
+
+
+ xtable-integration-tests
+ XTable Project Integration Test Suite
+
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ test
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ test
+
+
+
+ com.google.guava
+ guava
+ test
+
+
+
+
+ org.apache.hudi
+ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}
+ test
+
+
+ org.apache.hudi
+ hudi-java-client
+
+
+ org.apache.hbase
+ hbase-server
+
+
+ org.apache.hbase
+ hbase-client
+
+
+ test
+
+
+
+
+ org.apache.iceberg
+ iceberg-core
+ test
+
+
+ org.apache.iceberg
+ iceberg-api
+ test
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ org.apache.logging.log4j
+ log4j-api
+ test
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+ test
+
+
+
+
+ org.apache.iceberg
+ iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}
+ test
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ runtime
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ runtime
+
+
+ org.apache.spark
+ spark-catalyst_${scala.binary.version}
+ runtime
+
+
+
+ org.apache.xtable
+ xtable-api
+ ${project.version}
+ test
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ test
+
+
+ org.apache.xtable
+ xtable-delta_${scala.binary.version}
+ ${project.version}
+ test
+
+
+ org.apache.xtable
+ xtable-iceberg
+ ${project.version}
+ test
+
+
+ org.apache.xtable
+ xtable-hudi
+ ${project.version}
+ test
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+
+
+ org.apache.logging.log4j
+ log4j-core
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ test
+
+
+
+ org.apache.xtable
+ xtable-core_${scala.binary.version}
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+ org.apache.xtable
+ xtable-delta_${scala.binary.version}
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+ org.apache.xtable
+ xtable-hudi
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+ org.apache.xtable
+ xtable-iceberg
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+
+ org.apache.xtable
+ xtable-utilities_${scala.binary.version}
+ ${project.version}
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-deploy-plugin
+ ${maven-deploy-plugin.version}
+
+
+ true
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+
+
+ package
+
+ shade
+
+
+ spark-testing-bundle_${scala.binary.version}
+ false
+
+
+
+
+
+
+
diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java
new file mode 100644
index 000000000..cf3fe54b5
--- /dev/null
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Path;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
+import java.util.Base64;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.hudi.client.HoodieReadClient;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+
+import org.apache.xtable.hudi.HudiTestUtil;
+import org.apache.xtable.model.storage.TableFormat;
+
+class ConversionTestingBase {
+ @TempDir public static Path tempDir;
+ protected static final DateTimeFormatter DATE_FORMAT =
+ DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS").withZone(ZoneId.of("UTC"));
+ protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+ protected static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory());
+
+ protected static JavaSparkContext jsc;
+ protected static SparkSession sparkSession;
+
+ @BeforeAll
+ public static void setupOnce() {
+ SparkConf sparkConf = HudiTestUtil.getSparkConf(tempDir);
+ sparkSession =
+ SparkSession.builder().config(HoodieReadClient.addHoodieSupport(sparkConf)).getOrCreate();
+ sparkSession
+ .sparkContext()
+ .hadoopConfiguration()
+ .set("parquet.avro.write-old-list-structure", "false");
+ jsc = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
+ }
+
+ @AfterAll
+ public static void teardown() {
+ if (jsc != null) {
+ jsc.close();
+ }
+ if (sparkSession != null) {
+ sparkSession.close();
+ }
+ }
+
+ protected static List getOtherFormats(String sourceTableFormat) {
+ return Arrays.stream(TableFormat.values())
+ .filter(format -> !format.equals(sourceTableFormat))
+ .collect(Collectors.toList());
+ }
+
+ protected void checkDatasetEquivalenceWithFilter(
+ String sourceFormat,
+ GenericTable, ?> sourceTable,
+ List targetFormats,
+ String filter) {
+ checkDatasetEquivalence(
+ sourceFormat,
+ sourceTable,
+ Collections.emptyMap(),
+ targetFormats,
+ Collections.emptyMap(),
+ null,
+ filter);
+ }
+
+ protected void checkDatasetEquivalence(
+ String sourceFormat,
+ GenericTable, ?> sourceTable,
+ List targetFormats,
+ Integer expectedCount) {
+ checkDatasetEquivalence(
+ sourceFormat,
+ sourceTable,
+ Collections.emptyMap(),
+ targetFormats,
+ Collections.emptyMap(),
+ expectedCount,
+ "1 = 1");
+ }
+
+ protected void checkDatasetEquivalence(
+ String sourceFormat,
+ GenericTable, ?> sourceTable,
+ Map sourceOptions,
+ List targetFormats,
+ Map> targetOptions,
+ Integer expectedCount) {
+ checkDatasetEquivalence(
+ sourceFormat,
+ sourceTable,
+ sourceOptions,
+ targetFormats,
+ targetOptions,
+ expectedCount,
+ "1 = 1");
+ }
+
+ protected void checkDatasetEquivalence(
+ String sourceFormat,
+ GenericTable, ?> sourceTable,
+ Map sourceOptions,
+ List targetFormats,
+ Map> targetOptions,
+ Integer expectedCount,
+ String filterCondition) {
+ Dataset sourceRows =
+ sparkSession
+ .read()
+ .options(sourceOptions)
+ .format(sourceFormat.toLowerCase())
+ .load(sourceTable.getBasePath())
+ .orderBy(sourceTable.getOrderByColumn())
+ .filter(filterCondition);
+ Map> targetRowsByFormat =
+ targetFormats.stream()
+ .collect(
+ Collectors.toMap(
+ Function.identity(),
+ targetFormat -> {
+ Map finalTargetOptions =
+ targetOptions.getOrDefault(targetFormat, Collections.emptyMap());
+ if (targetFormat.equals(TableFormat.HUDI)) {
+ finalTargetOptions = new HashMap<>(finalTargetOptions);
+ finalTargetOptions.put(HoodieMetadataConfig.ENABLE.key(), "true");
+ finalTargetOptions.put(
+ "hoodie.datasource.read.extract.partition.values.from.path", "true");
+ }
+ return sparkSession
+ .read()
+ .options(finalTargetOptions)
+ .format(targetFormat.toLowerCase())
+ .load(sourceTable.getDataPath())
+ .orderBy(sourceTable.getOrderByColumn())
+ .filter(filterCondition);
+ }));
+
+ String[] selectColumnsArr = sourceTable.getColumnsToSelect().toArray(new String[] {});
+ List dataset1Rows = sourceRows.selectExpr(selectColumnsArr).toJSON().collectAsList();
+ targetRowsByFormat.forEach(
+ (format, targetRows) -> {
+ List dataset2Rows =
+ targetRows.selectExpr(selectColumnsArr).toJSON().collectAsList();
+ Assertions.assertEquals(
+ dataset1Rows.size(),
+ dataset2Rows.size(),
+ String.format(
+ "Datasets have different row counts when reading from Spark. Source: %s, Target: %s",
+ sourceFormat, format));
+ // sanity check the count to ensure test is set up properly
+ if (expectedCount != null) {
+ Assertions.assertEquals(expectedCount, dataset1Rows.size());
+ } else {
+ // if count is not known ahead of time, ensure datasets are non-empty
+ Assertions.assertFalse(dataset1Rows.isEmpty());
+ }
+
+ if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) {
+ compareDatasetWithUUID(dataset1Rows, dataset2Rows);
+ } else {
+ Assertions.assertEquals(
+ dataset1Rows,
+ dataset2Rows,
+ String.format(
+ "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
+ sourceFormat, format));
+ }
+ });
+ }
+
+ /**
+ * Compares two datasets where dataset1Rows is for Iceberg and dataset2Rows is for other formats
+ * (such as Delta or Hudi). - For the "uuid_field", if present, the UUID from dataset1 (Iceberg)
+ * is compared with the Base64-encoded UUID from dataset2 (other formats), after decoding. - For
+ * all other fields, the values are compared directly. - If neither row contains the "uuid_field",
+ * the rows are compared as plain JSON strings.
+ *
+ * @param dataset1Rows List of JSON rows representing the dataset in Iceberg format (UUID is
+ * stored as a string).
+ * @param dataset2Rows List of JSON rows representing the dataset in other formats (UUID might be
+ * Base64-encoded).
+ */
+ private void compareDatasetWithUUID(List dataset1Rows, List dataset2Rows) {
+ for (int i = 0; i < dataset1Rows.size(); i++) {
+ String row1 = dataset1Rows.get(i);
+ String row2 = dataset2Rows.get(i);
+ if (row1.contains("uuid_field") && row2.contains("uuid_field")) {
+ try {
+ JsonNode node1 = OBJECT_MAPPER.readTree(row1);
+ JsonNode node2 = OBJECT_MAPPER.readTree(row2);
+
+ // check uuid field
+ String uuidStr1 = node1.get("uuid_field").asText();
+ byte[] bytes = Base64.getDecoder().decode(node2.get("uuid_field").asText());
+ ByteBuffer bb = ByteBuffer.wrap(bytes);
+ UUID uuid2 = new UUID(bb.getLong(), bb.getLong());
+ String uuidStr2 = uuid2.toString();
+ Assertions.assertEquals(
+ uuidStr1,
+ uuidStr2,
+ String.format(
+ "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
+ uuidStr1, uuidStr2));
+
+ // check other fields
+ ((ObjectNode) node1).remove("uuid_field");
+ ((ObjectNode) node2).remove("uuid_field");
+ Assertions.assertEquals(
+ node1.toString(),
+ node2.toString(),
+ String.format(
+ "Datasets are not equivalent when comparing other fields. Source: %s, Target: %s",
+ node1, node2));
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ Assertions.assertEquals(
+ row1,
+ row2,
+ String.format(
+ "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
+ row1, row2));
+ }
+ }
+ }
+
+ private boolean containsUUIDFields(List rows) {
+ for (String row : rows) {
+ if (row.contains("\"uuid_field\"")) {
+ return true;
+ }
+ }
+ return false;
+ }
+}
diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java b/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java
new file mode 100644
index 000000000..fd62578ac
--- /dev/null
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable;
+
+import static org.apache.xtable.model.storage.TableFormat.DELTA;
+import static org.apache.xtable.model.storage.TableFormat.HUDI;
+import static org.apache.xtable.model.storage.TableFormat.ICEBERG;
+
+import java.nio.file.Path;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+import org.apache.hudi.common.model.HoodieTableType;
+
+import org.apache.xtable.delta.TestSparkDeltaTable;
+import org.apache.xtable.hudi.TestSparkHudiTable;
+import org.apache.xtable.iceberg.TestIcebergTable;
+
+public class GenericTableFactory {
+ static GenericTable getInstance(
+ String tableName,
+ Path tempDir,
+ SparkSession sparkSession,
+ JavaSparkContext jsc,
+ String sourceFormat,
+ boolean isPartitioned) {
+ switch (sourceFormat) {
+ case HUDI:
+ return TestSparkHudiTable.forStandardSchemaAndPartitioning(
+ tableName, tempDir, jsc, isPartitioned);
+ case DELTA:
+ return TestSparkDeltaTable.forStandardSchemaAndPartitioning(
+ tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
+ case ICEBERG:
+ return TestIcebergTable.forStandardSchemaAndPartitioning(
+ tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
+ default:
+ throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
+ }
+ }
+
+ static GenericTable getInstanceWithAdditionalColumns(
+ String tableName,
+ Path tempDir,
+ SparkSession sparkSession,
+ JavaSparkContext jsc,
+ String sourceFormat,
+ boolean isPartitioned) {
+ switch (sourceFormat) {
+ case HUDI:
+ return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning(
+ tableName, tempDir, jsc, isPartitioned);
+ case DELTA:
+ return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning(
+ tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
+ case ICEBERG:
+ return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning(
+ tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
+ default:
+ throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
+ }
+ }
+
+ static GenericTable getInstanceWithCustomPartitionConfig(
+ String tableName,
+ Path tempDir,
+ JavaSparkContext jsc,
+ String sourceFormat,
+ String partitionConfig) {
+ switch (sourceFormat) {
+ case HUDI:
+ return TestSparkHudiTable.forStandardSchema(
+ tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE);
+ default:
+ throw new IllegalArgumentException(
+ String.format(
+ "Unsupported source format: %s for custom partition config", sourceFormat));
+ }
+ }
+
+ static GenericTable getInstanceWithUUIDColumns(
+ String tableName,
+ Path tempDir,
+ SparkSession sparkSession,
+ JavaSparkContext jsc,
+ String sourceFormat,
+ boolean isPartitioned) {
+ switch (sourceFormat) {
+ case ICEBERG:
+ return TestIcebergTable.forSchemaWithUUIDColumns(
+ tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
+ default:
+ throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
+ }
+ }
+}
diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java
new file mode 100644
index 000000000..0d666ecc3
--- /dev/null
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xtable;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+import java.util.stream.Stream;
+
+import lombok.SneakyThrows;
+import lombok.extern.log4j.Log4j2;
+
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import org.apache.xtable.model.storage.TableFormat;
+import org.apache.xtable.utilities.RunSync;
+
+@Log4j2
+class ITBundleValidation extends ConversionTestingBase {
+ private static final String PROJECT_ROOT = System.getenv("ROOT_DIR");
+ private static final String PROJECT_VERSION = System.getenv("PROJECT_VERSION");
+ private static final String SCALA_VERSION = System.getenv("SCALA_VERSION");
+ private static final String UTILITIES_JAR_PATH =
+ String.format(
+ "%s/xtable-utilities/target/xtable-utilities_%s-%s-bundled.jar",
+ PROJECT_ROOT, SCALA_VERSION, PROJECT_VERSION);
+ private static final String ICEBERG_JAR_PATH =
+ String.format(
+ "%s/xtable-iceberg/target/xtable-iceberg-%s-bundled.jar", PROJECT_ROOT, PROJECT_VERSION);
+ private static final String HUDI_JAR_PATH =
+ String.format(
+ "%s/xtable-hudi/target/xtable-hudi-%s-bundled.jar", PROJECT_ROOT, PROJECT_VERSION);
+ private static final String DELTA_JAR_PATH =
+ String.format(
+ "%s/xtable-delta/target/xtable-delta_%s-%s-bundled.jar",
+ PROJECT_ROOT, SCALA_VERSION, PROJECT_VERSION);
+ private static final String SPARK_BUNDLE_PATH =
+ String.format(
+ "%s/xtable-integration-tests/target/spark-testing-bundle_%s.jar",
+ PROJECT_ROOT, SCALA_VERSION);
+
+ private static Stream generateTestParametersForFormats() {
+ List arguments = new ArrayList<>();
+ List formats = Arrays.asList(TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG);
+ for (String sourceTableFormat : formats) {
+ for (String targetTableFormat : formats) {
+ if (!sourceTableFormat.equals(targetTableFormat)) {
+ arguments.add(Arguments.of(sourceTableFormat, targetTableFormat));
+ }
+ }
+ }
+ return arguments.stream();
+ }
+ /*
+ * This test has the following steps at a high level.
+ * 1. Insert few records.
+ * 2. Upsert few records.
+ * 3. Delete few records.
+ * After each step the RunSync command is run as a separate process to validate proper dependencies are included in the bundles.
+ */
+ @ParameterizedTest
+ @MethodSource("generateTestParametersForFormats")
+ public void testConversionWithBundles(String sourceTableFormat, String targetTableFormat) {
+ String tableName = GenericTable.getTableName();
+ List targetTableFormats = Collections.singletonList(targetTableFormat);
+ String partitionConfig = "level:VALUE";
+ List> insertRecords;
+ try (GenericTable table =
+ GenericTableFactory.getInstance(
+ tableName, tempDir, sparkSession, jsc, sourceTableFormat, true)) {
+ String configPath =
+ writeConfig(sourceTableFormat, targetTableFormats, table, tableName, partitionConfig);
+ insertRecords = table.insertRows(100);
+
+ executeRunSync(configPath, sourceTableFormat, targetTableFormat);
+ checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 100);
+
+ // make multiple commits and then sync
+ table.insertRows(100);
+ table.upsertRows(insertRecords.subList(0, 20));
+ executeRunSync(configPath, sourceTableFormat, targetTableFormat);
+ checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 200);
+
+ table.deleteRows(insertRecords.subList(30, 50));
+ executeRunSync(configPath, sourceTableFormat, targetTableFormat);
+ checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 180);
+ checkDatasetEquivalenceWithFilter(
+ sourceTableFormat, table, targetTableFormats, table.getFilterQuery());
+ }
+ }
+
+ @SneakyThrows
+ private String writeConfig(
+ String sourceFormat,
+ List targetFormats,
+ GenericTable table,
+ String tableName,
+ String partitionSpec) {
+ RunSync.DatasetConfig.Table tableConfig =
+ new RunSync.DatasetConfig.Table(
+ table.getBasePath(), table.getDataPath(), tableName, partitionSpec, null);
+ RunSync.DatasetConfig datasetConfig =
+ new RunSync.DatasetConfig(
+ sourceFormat, targetFormats, Collections.singletonList(tableConfig));
+ Path configPath = tempDir.resolve("config_" + UUID.randomUUID());
+ YAML_MAPPER.writeValue(configPath.toFile(), datasetConfig);
+ return configPath.toString();
+ }
+
+ @SneakyThrows
+ private void executeRunSync(
+ String configPath, String sourceTableFormat, String targetTableFormat) {
+ String classPath =
+ String.format(
+ "%s:%s:%s",
+ UTILITIES_JAR_PATH,
+ getJarsForFormat(sourceTableFormat),
+ getJarsForFormat(targetTableFormat));
+ Process process =
+ new ProcessBuilder()
+ .command(
+ "java", "-cp", classPath, RunSync.class.getName(), "--datasetConfig", configPath)
+ .redirectErrorStream(true)
+ .start();
+ try (BufferedReader reader =
+ new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ log.info("Process log {}", line);
+ }
+ }
+ assertEquals(0, process.waitFor());
+ }
+
+ private String getJarsForFormat(String format) {
+ switch (format) {
+ case TableFormat.HUDI:
+ return HUDI_JAR_PATH;
+ case TableFormat.ICEBERG:
+ return ICEBERG_JAR_PATH;
+ case TableFormat.DELTA:
+ return String.format("%s:%s", DELTA_JAR_PATH, SPARK_BUNDLE_PATH);
+ default:
+ throw new UnsupportedOperationException("Unsupported format: " + format);
+ }
+ }
+}
diff --git a/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java
similarity index 67%
rename from xtable-core/src/test/java/org/apache/xtable/ITConversionController.java
rename to xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java
index 3d539766a..1654dde42 100644
--- a/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java
@@ -18,35 +18,22 @@
package org.apache.xtable;
-import static org.apache.xtable.GenericTable.getTableName;
-import static org.apache.xtable.hudi.HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG;
import static org.apache.xtable.hudi.HudiTestUtil.PartitionConfig;
-import static org.apache.xtable.model.storage.TableFormat.DELTA;
-import static org.apache.xtable.model.storage.TableFormat.HUDI;
-import static org.apache.xtable.model.storage.TableFormat.ICEBERG;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
import java.net.URI;
-import java.nio.ByteBuffer;
import java.nio.file.Files;
-import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.Instant;
-import java.time.ZoneId;
-import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
-import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@@ -56,24 +43,15 @@
import lombok.Builder;
import lombok.Value;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SparkSession;
-import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
-import org.apache.hudi.client.HoodieReadClient;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
@@ -85,10 +63,6 @@
import org.apache.spark.sql.delta.DeltaLog;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableList;
import org.apache.xtable.conversion.ConversionConfig;
@@ -98,41 +72,14 @@
import org.apache.xtable.conversion.TargetTable;
import org.apache.xtable.delta.DeltaConversionSourceProvider;
import org.apache.xtable.hudi.HudiConversionSourceProvider;
-import org.apache.xtable.hudi.HudiTestUtil;
+import org.apache.xtable.hudi.HudiSourceConfig;
+import org.apache.xtable.hudi.TestJavaHudiTable;
+import org.apache.xtable.hudi.TestSparkHudiTable;
import org.apache.xtable.iceberg.IcebergConversionSourceProvider;
import org.apache.xtable.model.storage.TableFormat;
import org.apache.xtable.model.sync.SyncMode;
-public class ITConversionController {
- @TempDir public static Path tempDir;
- private static final DateTimeFormatter DATE_FORMAT =
- DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS").withZone(ZoneId.of("UTC"));
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- private static JavaSparkContext jsc;
- private static SparkSession sparkSession;
-
- @BeforeAll
- public static void setupOnce() {
- SparkConf sparkConf = HudiTestUtil.getSparkConf(tempDir);
- sparkSession =
- SparkSession.builder().config(HoodieReadClient.addHoodieSupport(sparkConf)).getOrCreate();
- sparkSession
- .sparkContext()
- .hadoopConfiguration()
- .set("parquet.avro.write-old-list-structure", "false");
- jsc = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
- }
-
- @AfterAll
- public static void teardown() {
- if (jsc != null) {
- jsc.close();
- }
- if (sparkSession != null) {
- sparkSession.close();
- }
- }
+public class ITConversionController extends ConversionTestingBase {
private static Stream testCasesWithPartitioningAndSyncModes() {
return addBasicPartitionCases(testCasesWithSyncModes());
@@ -140,7 +87,8 @@ private static Stream testCasesWithPartitioningAndSyncModes() {
private static Stream generateTestParametersForFormatsSyncModesAndPartitioning() {
List arguments = new ArrayList<>();
- for (String sourceTableFormat : Arrays.asList(HUDI, DELTA, ICEBERG)) {
+ for (String sourceTableFormat :
+ Arrays.asList(TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG)) {
for (SyncMode syncMode : SyncMode.values()) {
for (boolean isPartitioned : new boolean[] {true, false}) {
arguments.add(Arguments.of(sourceTableFormat, syncMode, isPartitioned));
@@ -156,8 +104,9 @@ private static Stream generateTestParametersForUUID() {
for (boolean isPartitioned : new boolean[] {true, false}) {
// TODO: Add Hudi UUID support later (https://github.com/apache/incubator-xtable/issues/543)
// Current spark parquet reader can not handle fix-size byte array with UUID logic type
- List targetTableFormats = Arrays.asList(DELTA);
- arguments.add(Arguments.of(ICEBERG, targetTableFormats, syncMode, isPartitioned));
+ List targetTableFormats = Arrays.asList(TableFormat.DELTA);
+ arguments.add(
+ Arguments.of(TableFormat.ICEBERG, targetTableFormats, syncMode, isPartitioned));
}
}
return arguments.stream();
@@ -168,17 +117,17 @@ private static Stream testCasesWithSyncModes() {
}
private ConversionSourceProvider> getConversionSourceProvider(String sourceTableFormat) {
- if (sourceTableFormat.equalsIgnoreCase(HUDI)) {
+ if (sourceTableFormat.equalsIgnoreCase(TableFormat.HUDI)) {
ConversionSourceProvider hudiConversionSourceProvider =
new HudiConversionSourceProvider();
hudiConversionSourceProvider.init(jsc.hadoopConfiguration());
return hudiConversionSourceProvider;
- } else if (sourceTableFormat.equalsIgnoreCase(DELTA)) {
+ } else if (sourceTableFormat.equalsIgnoreCase(TableFormat.DELTA)) {
ConversionSourceProvider deltaConversionSourceProvider =
new DeltaConversionSourceProvider();
deltaConversionSourceProvider.init(jsc.hadoopConfiguration());
return deltaConversionSourceProvider;
- } else if (sourceTableFormat.equalsIgnoreCase(ICEBERG)) {
+ } else if (sourceTableFormat.equalsIgnoreCase(TableFormat.ICEBERG)) {
ConversionSourceProvider icebergConversionSourceProvider =
new IcebergConversionSourceProvider();
icebergConversionSourceProvider.init(jsc.hadoopConfiguration());
@@ -202,7 +151,7 @@ private ConversionSourceProvider> getConversionSourceProvider(String sourceTab
@MethodSource("generateTestParametersForFormatsSyncModesAndPartitioning")
public void testVariousOperations(
String sourceTableFormat, SyncMode syncMode, boolean isPartitioned) {
- String tableName = getTableName();
+ String tableName = GenericTable.getTableName();
ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration());
List targetTableFormats = getOtherFormats(sourceTableFormat);
String partitionConfig = null;
@@ -213,7 +162,7 @@ public void testVariousOperations(
getConversionSourceProvider(sourceTableFormat);
List> insertRecords;
try (GenericTable table =
- GenericTable.getInstance(
+ GenericTableFactory.getInstance(
tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) {
insertRecords = table.insertRows(100);
@@ -243,7 +192,7 @@ public void testVariousOperations(
}
try (GenericTable tableWithUpdatedSchema =
- GenericTable.getInstanceWithAdditionalColumns(
+ GenericTableFactory.getInstanceWithAdditionalColumns(
tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) {
ConversionConfig conversionConfig =
getTableSyncConfig(
@@ -291,7 +240,7 @@ public void testVariousOperationsWithUUID(
List targetTableFormats,
SyncMode syncMode,
boolean isPartitioned) {
- String tableName = getTableName();
+ String tableName = GenericTable.getTableName();
ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration());
String partitionConfig = null;
if (isPartitioned) {
@@ -301,7 +250,7 @@ public void testVariousOperationsWithUUID(
getConversionSourceProvider(sourceTableFormat);
List> insertRecords;
try (GenericTable table =
- GenericTable.getInstanceWithUUIDColumns(
+ GenericTableFactory.getInstanceWithUUIDColumns(
tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) {
insertRecords = table.insertRows(100);
@@ -334,9 +283,10 @@ public void testVariousOperationsWithUUID(
@MethodSource("testCasesWithPartitioningAndSyncModes")
public void testConcurrentInsertWritesInSource(
SyncMode syncMode, PartitionConfig partitionConfig) {
- String tableName = getTableName();
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
- List targetTableFormats = getOtherFormats(HUDI);
+ String tableName = GenericTable.getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
+ List targetTableFormats = getOtherFormats(TableFormat.HUDI);
try (TestJavaHudiTable table =
TestJavaHudiTable.forStandardSchema(
tableName, tempDir, partitionConfig.getHudiConfig(), HoodieTableType.COPY_ON_WRITE)) {
@@ -351,7 +301,7 @@ public void testConcurrentInsertWritesInSource(
ConversionConfig conversionConfig =
getTableSyncConfig(
- HUDI,
+ TableFormat.HUDI,
syncMode,
tableName,
table,
@@ -362,10 +312,10 @@ public void testConcurrentInsertWritesInSource(
new ConversionController(jsc.hadoopConfiguration());
conversionController.sync(conversionConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, targetTableFormats, 50);
+ checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 50);
table.insertRecordsWithCommitAlreadyStarted(insertsForCommit1, commitInstant1, true);
conversionController.sync(conversionConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, targetTableFormats, 100);
+ checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 100);
}
}
@@ -374,9 +324,10 @@ public void testConcurrentInsertWritesInSource(
public void testConcurrentInsertsAndTableServiceWrites(
SyncMode syncMode, PartitionConfig partitionConfig) {
HoodieTableType tableType = HoodieTableType.MERGE_ON_READ;
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
- List targetTableFormats = getOtherFormats(HUDI);
- String tableName = getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
+ List targetTableFormats = getOtherFormats(TableFormat.HUDI);
+ String tableName = GenericTable.getTableName();
try (TestSparkHudiTable table =
TestSparkHudiTable.forStandardSchema(
tableName, tempDir, jsc, partitionConfig.getHudiConfig(), tableType)) {
@@ -384,7 +335,7 @@ public void testConcurrentInsertsAndTableServiceWrites(
ConversionConfig conversionConfig =
getTableSyncConfig(
- HUDI,
+ TableFormat.HUDI,
syncMode,
tableName,
table,
@@ -394,7 +345,7 @@ public void testConcurrentInsertsAndTableServiceWrites(
ConversionController conversionController =
new ConversionController(jsc.hadoopConfiguration());
conversionController.sync(conversionConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, targetTableFormats, 50);
+ checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 50);
table.deleteRecords(insertedRecords1.subList(0, 20), true);
// At this point table should have 30 records but only after compaction.
@@ -406,26 +357,37 @@ public void testConcurrentInsertsAndTableServiceWrites(
Collections.singletonMap("hoodie.datasource.query.type", "read_optimized");
// Because compaction is not completed yet and read optimized query, there are 100 records.
checkDatasetEquivalence(
- HUDI, table, sourceHudiOptions, targetTableFormats, Collections.emptyMap(), 100);
+ TableFormat.HUDI,
+ table,
+ sourceHudiOptions,
+ targetTableFormats,
+ Collections.emptyMap(),
+ 100);
table.insertRecords(50, true);
conversionController.sync(conversionConfig, conversionSourceProvider);
// Because compaction is not completed yet and read optimized query, there are 150 records.
checkDatasetEquivalence(
- HUDI, table, sourceHudiOptions, targetTableFormats, Collections.emptyMap(), 150);
+ TableFormat.HUDI,
+ table,
+ sourceHudiOptions,
+ targetTableFormats,
+ Collections.emptyMap(),
+ 150);
table.completeScheduledCompaction(scheduledCompactionInstant);
conversionController.sync(conversionConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, targetTableFormats, 130);
+ checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 130);
}
}
@ParameterizedTest
- @ValueSource(strings = {HUDI, DELTA, ICEBERG})
+ @ValueSource(strings = {TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG})
public void testTimeTravelQueries(String sourceTableFormat) throws Exception {
- String tableName = getTableName();
+ String tableName = GenericTable.getTableName();
try (GenericTable table =
- GenericTable.getInstance(tableName, tempDir, sparkSession, jsc, sourceTableFormat, false)) {
+ GenericTableFactory.getInstance(
+ tableName, tempDir, sparkSession, jsc, sourceTableFormat, false)) {
table.insertRows(50);
List targetTableFormats = getOtherFormats(sourceTableFormat);
ConversionConfig conversionConfig =
@@ -482,12 +444,6 @@ public void testTimeTravelQueries(String sourceTableFormat) throws Exception {
}
}
- private static List getOtherFormats(String sourceTableFormat) {
- return Arrays.stream(TableFormat.values())
- .filter(format -> !format.equals(sourceTableFormat))
- .collect(Collectors.toList());
- }
-
private static Stream provideArgsForPartitionTesting() {
String timestampFilter =
String.format(
@@ -500,32 +456,44 @@ private static Stream provideArgsForPartitionTesting() {
return Stream.of(
Arguments.of(
buildArgsForPartition(
- HUDI, Arrays.asList(ICEBERG, DELTA), "level:SIMPLE", "level:VALUE", levelFilter)),
+ TableFormat.HUDI,
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA),
+ "level:SIMPLE",
+ "level:VALUE",
+ levelFilter)),
Arguments.of(
buildArgsForPartition(
- DELTA, Arrays.asList(ICEBERG, HUDI), null, "level:VALUE", levelFilter)),
+ TableFormat.DELTA,
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.HUDI),
+ null,
+ "level:VALUE",
+ levelFilter)),
Arguments.of(
buildArgsForPartition(
- ICEBERG, Arrays.asList(DELTA, HUDI), null, "level:VALUE", levelFilter)),
+ TableFormat.ICEBERG,
+ Arrays.asList(TableFormat.DELTA, TableFormat.HUDI),
+ null,
+ "level:VALUE",
+ levelFilter)),
Arguments.of(
// Delta Lake does not currently support nested partition columns
buildArgsForPartition(
- HUDI,
- Arrays.asList(ICEBERG),
+ TableFormat.HUDI,
+ Arrays.asList(TableFormat.ICEBERG),
"nested_record.level:SIMPLE",
"nested_record.level:VALUE",
nestedLevelFilter)),
Arguments.of(
buildArgsForPartition(
- HUDI,
- Arrays.asList(ICEBERG, DELTA),
+ TableFormat.HUDI,
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA),
"severity:SIMPLE",
"severity:VALUE",
severityFilter)),
Arguments.of(
buildArgsForPartition(
- HUDI,
- Arrays.asList(ICEBERG, DELTA),
+ TableFormat.HUDI,
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA),
"timestamp_micros_nullable_field:TIMESTAMP,level:SIMPLE",
"timestamp_micros_nullable_field:DAY:yyyy/MM/dd,level:VALUE",
timestampAndLevelFilter)));
@@ -534,7 +502,7 @@ private static Stream provideArgsForPartitionTesting() {
@ParameterizedTest
@MethodSource("provideArgsForPartitionTesting")
public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartitionDataHolder) {
- String tableName = getTableName();
+ String tableName = GenericTable.getTableName();
String sourceTableFormat = tableFormatPartitionDataHolder.getSourceTableFormat();
List targetTableFormats = tableFormatPartitionDataHolder.getTargetTableFormats();
Optional hudiPartitionConfig = tableFormatPartitionDataHolder.getHudiSourceConfig();
@@ -545,11 +513,12 @@ public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartit
GenericTable table;
if (hudiPartitionConfig.isPresent()) {
table =
- GenericTable.getInstanceWithCustomPartitionConfig(
+ GenericTableFactory.getInstanceWithCustomPartitionConfig(
tableName, tempDir, jsc, sourceTableFormat, hudiPartitionConfig.get());
} else {
table =
- GenericTable.getInstance(tableName, tempDir, sparkSession, jsc, sourceTableFormat, true);
+ GenericTableFactory.getInstance(
+ tableName, tempDir, sparkSession, jsc, sourceTableFormat, true);
}
try (GenericTable tableToClose = table) {
ConversionConfig conversionConfig =
@@ -577,8 +546,9 @@ public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartit
@ParameterizedTest
@EnumSource(value = SyncMode.class)
public void testSyncWithSingleFormat(SyncMode syncMode) {
- String tableName = getTableName();
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
+ String tableName = GenericTable.getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
try (TestJavaHudiTable table =
TestJavaHudiTable.forStandardSchema(
tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) {
@@ -586,42 +556,66 @@ public void testSyncWithSingleFormat(SyncMode syncMode) {
ConversionConfig conversionConfigIceberg =
getTableSyncConfig(
- HUDI, syncMode, tableName, table, ImmutableList.of(ICEBERG), null, null);
+ TableFormat.HUDI,
+ syncMode,
+ tableName,
+ table,
+ ImmutableList.of(TableFormat.ICEBERG),
+ null,
+ null);
ConversionConfig conversionConfigDelta =
- getTableSyncConfig(HUDI, syncMode, tableName, table, ImmutableList.of(DELTA), null, null);
+ getTableSyncConfig(
+ TableFormat.HUDI,
+ syncMode,
+ tableName,
+ table,
+ ImmutableList.of(TableFormat.DELTA),
+ null,
+ null);
ConversionController conversionController =
new ConversionController(jsc.hadoopConfiguration());
conversionController.sync(conversionConfigIceberg, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 100);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 100);
conversionController.sync(conversionConfigDelta, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(DELTA), 100);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.DELTA), 100);
table.insertRecords(100, true);
conversionController.sync(conversionConfigIceberg, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 200);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 200);
conversionController.sync(conversionConfigDelta, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(DELTA), 200);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.DELTA), 200);
}
}
@Test
public void testOutOfSyncIncrementalSyncs() {
- String tableName = getTableName();
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
+ String tableName = GenericTable.getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
try (TestJavaHudiTable table =
TestJavaHudiTable.forStandardSchema(
tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) {
ConversionConfig singleTableConfig =
getTableSyncConfig(
- HUDI, SyncMode.INCREMENTAL, tableName, table, ImmutableList.of(ICEBERG), null, null);
+ TableFormat.HUDI,
+ SyncMode.INCREMENTAL,
+ tableName,
+ table,
+ ImmutableList.of(TableFormat.ICEBERG),
+ null,
+ null);
ConversionConfig dualTableConfig =
getTableSyncConfig(
- HUDI,
+ TableFormat.HUDI,
SyncMode.INCREMENTAL,
tableName,
table,
- Arrays.asList(ICEBERG, DELTA),
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA),
null,
null);
@@ -630,12 +624,14 @@ public void testOutOfSyncIncrementalSyncs() {
new ConversionController(jsc.hadoopConfiguration());
// sync iceberg only
conversionController.sync(singleTableConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 50);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 50);
// insert more records
table.insertRecords(50, true);
// iceberg will be an incremental sync and delta will need to bootstrap with snapshot sync
conversionController.sync(dualTableConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Arrays.asList(ICEBERG, DELTA), 100);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), 100);
// insert more records
table.insertRecords(50, true);
@@ -643,20 +639,23 @@ public void testOutOfSyncIncrementalSyncs() {
table.insertRecords(50, true);
// incremental sync for two commits for iceberg only
conversionController.sync(singleTableConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 200);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 200);
// insert more records
table.insertRecords(50, true);
// incremental sync for one commit for iceberg and three commits for delta
conversionController.sync(dualTableConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Arrays.asList(ICEBERG, DELTA), 250);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), 250);
}
}
@Test
public void testIcebergCorruptedSnapshotRecovery() throws Exception {
- String tableName = getTableName();
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
+ String tableName = GenericTable.getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
try (TestJavaHudiTable table =
TestJavaHudiTable.forStandardSchema(
tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) {
@@ -665,11 +664,11 @@ public void testIcebergCorruptedSnapshotRecovery() throws Exception {
new ConversionController(jsc.hadoopConfiguration());
ConversionConfig conversionConfig =
getTableSyncConfig(
- HUDI,
+ TableFormat.HUDI,
SyncMode.INCREMENTAL,
tableName,
table,
- Collections.singletonList(ICEBERG),
+ Collections.singletonList(TableFormat.ICEBERG),
null,
null);
conversionController.sync(conversionConfig, conversionSourceProvider);
@@ -687,24 +686,26 @@ public void testIcebergCorruptedSnapshotRecovery() throws Exception {
Paths.get(URI.create(icebergTable.snapshot(previousSnapshotId).manifestListLocation())));
table.insertRows(10);
conversionController.sync(conversionConfig, conversionSourceProvider);
- checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 50);
+ checkDatasetEquivalence(
+ TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 50);
}
}
@Test
public void testMetadataRetention() throws Exception {
- String tableName = getTableName();
- ConversionSourceProvider> conversionSourceProvider = getConversionSourceProvider(HUDI);
+ String tableName = GenericTable.getTableName();
+ ConversionSourceProvider> conversionSourceProvider =
+ getConversionSourceProvider(TableFormat.HUDI);
try (TestJavaHudiTable table =
TestJavaHudiTable.forStandardSchema(
tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) {
ConversionConfig conversionConfig =
getTableSyncConfig(
- HUDI,
+ TableFormat.HUDI,
SyncMode.INCREMENTAL,
tableName,
table,
- Arrays.asList(ICEBERG, DELTA),
+ Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA),
null,
Duration.ofHours(0)); // force cleanup
ConversionController conversionController =
@@ -728,7 +729,7 @@ public void testMetadataRetention() throws Exception {
sparkSession
.read()
.format("hudi")
- .options(getTimeTravelOption(HUDI, instantAfterFirstCommit))
+ .options(getTimeTravelOption(TableFormat.HUDI, instantAfterFirstCommit))
.load(table.getBasePath())
.collectAsList();
Assertions.assertEquals(10, rows.size());
@@ -746,13 +747,13 @@ public void testMetadataRetention() throws Exception {
private Map getTimeTravelOption(String tableFormat, Instant time) {
Map options = new HashMap<>();
switch (tableFormat) {
- case HUDI:
+ case TableFormat.HUDI:
options.put("as.of.instant", DATE_FORMAT.format(time));
break;
- case ICEBERG:
+ case TableFormat.ICEBERG:
options.put("as-of-timestamp", String.valueOf(time.toEpochMilli()));
break;
- case DELTA:
+ case TableFormat.DELTA:
options.put("timestampAsOf", DATE_FORMAT.format(time));
break;
default:
@@ -761,191 +762,6 @@ private Map getTimeTravelOption(String tableFormat, Instant time
return options;
}
- private void checkDatasetEquivalenceWithFilter(
- String sourceFormat,
- GenericTable, ?> sourceTable,
- List targetFormats,
- String filter) {
- checkDatasetEquivalence(
- sourceFormat,
- sourceTable,
- Collections.emptyMap(),
- targetFormats,
- Collections.emptyMap(),
- null,
- filter);
- }
-
- private void checkDatasetEquivalence(
- String sourceFormat,
- GenericTable, ?> sourceTable,
- List targetFormats,
- Integer expectedCount) {
- checkDatasetEquivalence(
- sourceFormat,
- sourceTable,
- Collections.emptyMap(),
- targetFormats,
- Collections.emptyMap(),
- expectedCount,
- "1 = 1");
- }
-
- private void checkDatasetEquivalence(
- String sourceFormat,
- GenericTable, ?> sourceTable,
- Map sourceOptions,
- List targetFormats,
- Map> targetOptions,
- Integer expectedCount) {
- checkDatasetEquivalence(
- sourceFormat,
- sourceTable,
- sourceOptions,
- targetFormats,
- targetOptions,
- expectedCount,
- "1 = 1");
- }
-
- private void checkDatasetEquivalence(
- String sourceFormat,
- GenericTable, ?> sourceTable,
- Map sourceOptions,
- List targetFormats,
- Map> targetOptions,
- Integer expectedCount,
- String filterCondition) {
- Dataset sourceRows =
- sparkSession
- .read()
- .options(sourceOptions)
- .format(sourceFormat.toLowerCase())
- .load(sourceTable.getBasePath())
- .orderBy(sourceTable.getOrderByColumn())
- .filter(filterCondition);
- Map> targetRowsByFormat =
- targetFormats.stream()
- .collect(
- Collectors.toMap(
- Function.identity(),
- targetFormat -> {
- Map finalTargetOptions =
- targetOptions.getOrDefault(targetFormat, Collections.emptyMap());
- if (targetFormat.equals(HUDI)) {
- finalTargetOptions = new HashMap<>(finalTargetOptions);
- finalTargetOptions.put(HoodieMetadataConfig.ENABLE.key(), "true");
- finalTargetOptions.put(
- "hoodie.datasource.read.extract.partition.values.from.path", "true");
- }
- return sparkSession
- .read()
- .options(finalTargetOptions)
- .format(targetFormat.toLowerCase())
- .load(sourceTable.getDataPath())
- .orderBy(sourceTable.getOrderByColumn())
- .filter(filterCondition);
- }));
-
- String[] selectColumnsArr = sourceTable.getColumnsToSelect().toArray(new String[] {});
- List dataset1Rows = sourceRows.selectExpr(selectColumnsArr).toJSON().collectAsList();
- targetRowsByFormat.forEach(
- (format, targetRows) -> {
- List dataset2Rows =
- targetRows.selectExpr(selectColumnsArr).toJSON().collectAsList();
- assertEquals(
- dataset1Rows.size(),
- dataset2Rows.size(),
- String.format(
- "Datasets have different row counts when reading from Spark. Source: %s, Target: %s",
- sourceFormat, format));
- // sanity check the count to ensure test is set up properly
- if (expectedCount != null) {
- assertEquals(expectedCount, dataset1Rows.size());
- } else {
- // if count is not known ahead of time, ensure datasets are non-empty
- assertFalse(dataset1Rows.isEmpty());
- }
-
- if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) {
- compareDatasetWithUUID(dataset1Rows, dataset2Rows);
- } else {
- assertEquals(
- dataset1Rows,
- dataset2Rows,
- String.format(
- "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
- sourceFormat, format));
- }
- });
- }
-
- /**
- * Compares two datasets where dataset1Rows is for Iceberg and dataset2Rows is for other formats
- * (such as Delta or Hudi). - For the "uuid_field", if present, the UUID from dataset1 (Iceberg)
- * is compared with the Base64-encoded UUID from dataset2 (other formats), after decoding. - For
- * all other fields, the values are compared directly. - If neither row contains the "uuid_field",
- * the rows are compared as plain JSON strings.
- *
- * @param dataset1Rows List of JSON rows representing the dataset in Iceberg format (UUID is
- * stored as a string).
- * @param dataset2Rows List of JSON rows representing the dataset in other formats (UUID might be
- * Base64-encoded).
- */
- private void compareDatasetWithUUID(List dataset1Rows, List dataset2Rows) {
- for (int i = 0; i < dataset1Rows.size(); i++) {
- String row1 = dataset1Rows.get(i);
- String row2 = dataset2Rows.get(i);
- if (row1.contains("uuid_field") && row2.contains("uuid_field")) {
- try {
- JsonNode node1 = OBJECT_MAPPER.readTree(row1);
- JsonNode node2 = OBJECT_MAPPER.readTree(row2);
-
- // check uuid field
- String uuidStr1 = node1.get("uuid_field").asText();
- byte[] bytes = Base64.getDecoder().decode(node2.get("uuid_field").asText());
- ByteBuffer bb = ByteBuffer.wrap(bytes);
- UUID uuid2 = new UUID(bb.getLong(), bb.getLong());
- String uuidStr2 = uuid2.toString();
- assertEquals(
- uuidStr1,
- uuidStr2,
- String.format(
- "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
- uuidStr1, uuidStr2));
-
- // check other fields
- ((ObjectNode) node1).remove("uuid_field");
- ((ObjectNode) node2).remove("uuid_field");
- assertEquals(
- node1.toString(),
- node2.toString(),
- String.format(
- "Datasets are not equivalent when comparing other fields. Source: %s, Target: %s",
- node1, node2));
- } catch (JsonProcessingException e) {
- throw new RuntimeException(e);
- }
- } else {
- assertEquals(
- row1,
- row2,
- String.format(
- "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
- row1, row2));
- }
- }
- }
-
- private boolean containsUUIDFields(List rows) {
- for (String row : rows) {
- if (row.contains("\"uuid_field\"")) {
- return true;
- }
- }
- return false;
- }
-
private static Stream addBasicPartitionCases(Stream arguments) {
// add unpartitioned and partitioned cases
return arguments.flatMap(
@@ -995,7 +811,7 @@ private static ConversionConfig getTableSyncConfig(
Duration metadataRetention) {
Properties sourceProperties = new Properties();
if (partitionConfig != null) {
- sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionConfig);
+ sourceProperties.put(HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, partitionConfig);
}
SourceTable sourceTable =
SourceTable.builder()
diff --git a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java
similarity index 96%
rename from xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java
rename to xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java
index 0984b42be..8891fefb9 100644
--- a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java
@@ -31,6 +31,10 @@
import org.apache.xtable.model.storage.TableFormat;
import org.apache.xtable.spi.sync.ConversionTarget;
+/**
+ * This test is not in xtable-core because we want to test that the service loader can detect the
+ * implementations from the other modules.
+ */
public class TestConversionTargetFactory {
@Test
diff --git a/xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java
similarity index 99%
rename from xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java
rename to xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java
index 341b2cb02..63c83bda7 100644
--- a/xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java
+++ b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java
@@ -38,13 +38,13 @@
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestJavaHudiTable;
import org.apache.xtable.conversion.ConversionConfig;
import org.apache.xtable.conversion.ConversionController;
import org.apache.xtable.conversion.ConversionSourceProvider;
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.conversion.TargetTable;
import org.apache.xtable.hudi.HudiConversionSourceProvider;
+import org.apache.xtable.hudi.TestJavaHudiTable;
import org.apache.xtable.model.storage.TableFormat;
import org.apache.xtable.model.sync.SyncMode;
diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml
index bc91f99e8..f3b763df2 100644
--- a/xtable-utilities/pom.xml
+++ b/xtable-utilities/pom.xml
@@ -72,26 +72,29 @@
org.apache.spark
spark-core_${scala.binary.version}
- runtime
+ provided
org.apache.spark
spark-sql_${scala.binary.version}
- runtime
+ provided
+
+ org.apache.hadoop
+ hadoop-client-api
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+
org.apache.hadoop
hadoop-common
compile
-
- org.apache.parquet
- parquet-avro
-
-
org.apache.hadoop
diff --git a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java
index c84753de5..ea28c4c72 100644
--- a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java
+++ b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java
@@ -29,6 +29,8 @@
import java.util.stream.Collectors;
import lombok.Data;
+import lombok.Value;
+import lombok.extern.jackson.Jacksonized;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.cli.CommandLine;
@@ -42,7 +44,6 @@
import com.fasterxml.jackson.annotation.JsonMerge;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
import com.google.common.annotations.VisibleForTesting;
@@ -51,7 +52,6 @@
import org.apache.xtable.conversion.ConversionSourceProvider;
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.conversion.TargetTable;
-import org.apache.xtable.hudi.HudiSourceConfig;
import org.apache.xtable.iceberg.IcebergCatalogConfig;
import org.apache.xtable.model.storage.TableFormat;
import org.apache.xtable.model.sync.SyncMode;
@@ -115,11 +115,10 @@ public static void main(String[] args) throws IOException {
return;
}
- DatasetConfig datasetConfig = new DatasetConfig();
+ DatasetConfig datasetConfig;
try (InputStream inputStream =
Files.newInputStream(Paths.get(cmd.getOptionValue(DATASET_CONFIG_OPTION)))) {
- ObjectReader objectReader = YAML_MAPPER.readerForUpdating(datasetConfig);
- objectReader.readValue(inputStream);
+ datasetConfig = YAML_MAPPER.readValue(inputStream, DatasetConfig.class);
}
byte[] customConfig = getCustomConfigurations(cmd, HADOOP_CONFIG_PATH);
@@ -153,7 +152,7 @@ public static void main(String[] args) throws IOException {
Properties sourceProperties = new Properties();
if (table.getPartitionSpec() != null) {
sourceProperties.put(
- HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, table.getPartitionSpec());
+ "xtable.hudi.source.partition_field_spec_config", table.getPartitionSpec());
}
SourceTable sourceTable =
SourceTable.builder()
@@ -171,7 +170,7 @@ public static void main(String[] args) throws IOException {
tableFormat ->
TargetTable.builder()
.name(table.getTableName())
- .basePath(table.getTableBasePath())
+ .basePath(table.getTableDataPath())
.namespace(
table.getNamespace() == null
? null
@@ -242,7 +241,8 @@ static IcebergCatalogConfig loadIcebergCatalogConfig(byte[] customConfigs) throw
: YAML_MAPPER.readValue(customConfigs, IcebergCatalogConfig.class);
}
- @Data
+ @Jacksonized
+ @Value
public static class DatasetConfig {
/**
@@ -258,7 +258,8 @@ public static class DatasetConfig {
/** Configuration of the dataset to sync, path, table name, etc. */
List datasets;
- @Data
+ @Jacksonized
+ @Value
public static class Table {
/**
* The base path of the table to sync. Any authentication configuration needed by HDFS client