From 0d2a634d2353efdeecced4729be9f585789acdb1 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 28 Oct 2015 17:22:40 +0900 Subject: [PATCH 001/127] Implement PartitionedFileFragment --- .../fragment/PartitionedFileFragment.java | 254 ++++++++++++++++++ .../main/proto/StorageFragmentProtos.proto | 10 + 2 files changed, 264 insertions(+) create mode 100644 tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java new file mode 100644 index 0000000000..3819423dc2 --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.fragment; + +import com.google.common.base.Objects; +import com.google.gson.annotations.Expose; +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.BuiltinStorages; +import org.apache.tajo.storage.StorageFragmentProtos.PartitionedFileFragmentProto; +import org.apache.tajo.util.TUtil; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; + +public class PartitionedFileFragment implements Fragment, Comparable, Cloneable { + @Expose private String tableName; // required + @Expose private Path uri; // required + @Expose public Long startOffset; // required + @Expose public Long length; // required + + private String[] hosts; // Datanode hostnames + @Expose private int[] diskIds; + + @Expose private String partitionName; // required + + public PartitionedFileFragment(ByteString raw) throws InvalidProtocolBufferException { + PartitionedFileFragmentProto.Builder builder = PartitionedFileFragmentProto.newBuilder(); + builder.mergeFrom(raw); + builder.build(); + init(builder.build()); + } + + public PartitionedFileFragment(String tableName, Path uri, BlockLocation blockLocation, String partitionName) + throws IOException { + this.set(tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null, + partitionName); + } + + public PartitionedFileFragment(String tableName, Path uri, long start, long length, String[] hosts, int[] diskIds, + String partitionName) { + this.set(tableName, uri, start, length, hosts, diskIds, partitionName); + } + + // Non splittable + public PartitionedFileFragment(String tableName, Path uri, long start, long length, String[] hosts, + String partitionName) { + this.set(tableName, uri, start, length, hosts, null, partitionName); + } + + public PartitionedFileFragment(String fragmentId, Path path, long start, long length, String partitionName) { + this.set(fragmentId, path, start, length, null, null, partitionName); + } + + public PartitionedFileFragment(PartitionedFileFragmentProto proto) { + init(proto); + } + + private void init(PartitionedFileFragmentProto proto) { + int[] diskIds = new int[proto.getDiskIdsList().size()]; + int i = 0; + for(Integer eachValue: proto.getDiskIdsList()) { + diskIds[i++] = eachValue; + } + this.set(proto.getId(), new Path(proto.getPath()), + proto.getStartOffset(), proto.getLength(), + proto.getHostsList().toArray(new String[]{}), + diskIds, + proto.getPartitionName()); + } + + private void set(String tableName, Path path, long start, + long length, String[] hosts, int[] diskIds, String partitionName) { + this.tableName = tableName; + this.uri = path; + this.startOffset = start; + this.length = length; + this.hosts = hosts; + this.diskIds = diskIds; + this.partitionName = partitionName; + } + + + /** + * Get the list of hosts (hostname) hosting this block + */ + public String[] getHosts() { + if (hosts == null) { + this.hosts = new String[0]; + } + return hosts; + } + + /** + * Get the list of Disk Ids + * Unknown disk is -1. Others 0 ~ N + */ + public int[] getDiskIds() { + if (diskIds == null) { + this.diskIds = new int[getHosts().length]; + Arrays.fill(this.diskIds, -1); + } + return diskIds; + } + + public void setDiskIds(int[] diskIds){ + this.diskIds = diskIds; + } + + @Override + public String getTableName() { + return this.tableName; + } + + public Path getPath() { + return this.uri; + } + + public void setPath(Path path) { + this.uri = path; + } + + public Long getStartKey() { + return this.startOffset; + } + + public String getPartitionName() { + return partitionName; + } + + public void setPartitionName(String partitionName) { + this.partitionName = partitionName; + } + + @Override + public String getKey() { + return this.uri.toString(); + } + + @Override + public long getLength() { + return this.length; + } + + @Override + public boolean isEmpty() { + return this.length <= 0; + } + /** + * + * The offset range of tablets MUST NOT be overlapped. + * + * @param t + * @return If the table paths are not same, return -1. + */ + @Override + public int compareTo(PartitionedFileFragment t) { + if (getPath().equals(t.getPath())) { + long diff = this.getStartKey() - t.getStartKey(); + if (diff < 0) { + return -1; + } else if (diff > 0) { + return 1; + } else { + return 0; + } + } else { + return -1; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof PartitionedFileFragment) { + PartitionedFileFragment t = (PartitionedFileFragment) o; + if (getPath().equals(t.getPath()) + && TUtil.checkEquals(t.getStartKey(), this.getStartKey()) + && TUtil.checkEquals(t.getLength(), this.getLength())) { + return true; + } + } + return false; + } + + @Override + public int hashCode() { + return Objects.hashCode(tableName, uri, startOffset, length); + } + + public Object clone() throws CloneNotSupportedException { + PartitionedFileFragment frag = (PartitionedFileFragment) super.clone(); + frag.tableName = tableName; + frag.uri = uri; + frag.diskIds = diskIds; + frag.hosts = hosts; + + return frag; + } + + @Override + public String toString() { + return "\"fragment\": {\"id\": \""+ tableName +"\", \"path\": " + +getPath() + "\", \"start\": " + this.getStartKey() + ",\"length\": " + + getLength() + "\", \"partitionName\":" + getPartitionName() + "}" ; + } + + public FragmentProto getProto() { + PartitionedFileFragmentProto.Builder builder = PartitionedFileFragmentProto.newBuilder(); + builder.setId(this.tableName); + builder.setStartOffset(this.startOffset); + builder.setLength(this.length); + builder.setPath(this.uri.toString()); + if(diskIds != null) { + List idList = new ArrayList<>(); + for(int eachId: diskIds) { + idList.add(eachId); + } + builder.addAllDiskIds(idList); + } + + if(hosts != null) { + builder.addAllHosts(TUtil.newList(hosts)); + } + builder.setPartitionName(this.partitionName); + + FragmentProto.Builder fragmentBuilder = FragmentProto.newBuilder(); + fragmentBuilder.setId(this.tableName); + fragmentBuilder.setDataFormat(BuiltinStorages.TEXT); + fragmentBuilder.setContents(builder.buildPartial().toByteString()); + return fragmentBuilder.build(); + } +} diff --git a/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto b/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto index 0579f05aa9..40ea24a22a 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto +++ b/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto @@ -32,3 +32,13 @@ message FileFragmentProto { repeated string hosts = 5; repeated int32 disk_ids = 6; } + +message PartitionedFileFragmentProto { + required string id = 1; + required string path = 2; + required int64 start_offset = 3; + required int64 length = 4; + repeated string hosts = 5; + repeated int32 disk_ids = 6; + required string partitionName = 7; /* it cosists of partition key and value */ +} From 4d7e73b7b20d1e6721b0f6b2ee53c4d04eb278d4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 28 Oct 2015 18:10:53 +0900 Subject: [PATCH 002/127] Add unit test cases for PartitionedFileFragment --- .../storage/TestPartitionedFileFragment.java | 100 ++++++++++++++++++ .../fragment/PartitionedFileFragment.java | 2 +- 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java new file mode 100644 index 0000000000..ef9d7f9b25 --- /dev/null +++ b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage; + +import com.google.common.collect.Sets; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.storage.fragment.FileFragment; +import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionedFileFragment; +import org.apache.tajo.util.CommonTestingUtil; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.SortedSet; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestPartitionedFileFragment { + private Path path; + + @Before + public final void setUp() throws Exception { + path = CommonTestingUtil.getTestDir(); + } + + @Test + public final void testGetAndSetFields() { + PartitionedFileFragment fragment1 = new PartitionedFileFragment("table1_1", new Path(path, "table0/col1=1"), + 0, 500, "col1=1"); + + assertEquals("table1_1", fragment1.getTableName()); + assertEquals(new Path(path, "table0/col1=1"), fragment1.getPath()); + assertEquals("col1=1", fragment1.getPartitionName()); + assertTrue(0 == fragment1.getStartKey()); + assertTrue(500 == fragment1.getLength()); + } + + @Test + public final void testGetProtoAndRestore() { + PartitionedFileFragment fragment = new PartitionedFileFragment("table1_1", new Path(path, "table0/col1=1"), 0, + 500, "col1=1"); + + PartitionedFileFragment fragment1 = FragmentConvertor.convert(PartitionedFileFragment.class, fragment.getProto()); + assertEquals("table1_1", fragment1.getTableName()); + assertEquals(new Path(path, "table0/col1=1"), fragment1.getPath()); + assertEquals("col1=1", fragment1.getPartitionName()); + assertTrue(0 == fragment1.getStartKey()); + assertTrue(500 == fragment1.getLength()); + } + + @Test + public final void testCompareTo() { + final int num = 10; + PartitionedFileFragment[] tablets = new PartitionedFileFragment[num]; + for (int i = num - 1; i >= 0; i--) { + tablets[i] = new PartitionedFileFragment("tablet1", new Path(path, "tablet0/col1=" + i), i * 500, (i+1) * 500 + , "col1=" + i); + } + + Arrays.sort(tablets); + + for(int i = 0; i < num; i++) { + assertEquals("col1=" + i, tablets[i].getPartitionName()); + } + } + + @Test + public final void testCompareTo2() { + final int num = 1860; + PartitionedFileFragment[] tablets = new PartitionedFileFragment[num]; + for (int i = num - 1; i >= 0; i--) { + tablets[i] = new PartitionedFileFragment("tablet1", new Path(path, "tablet/col1=" +i), (long)i * 6553500, + (long) (i+1) * 6553500, "col1=" + i); + } + + SortedSet sortedSet = Sets.newTreeSet(); + for (PartitionedFileFragment frag : tablets) { + sortedSet.add(frag); + } + assertEquals(num, sortedSet.size()); + } +} diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java index 3819423dc2..3909dc0cee 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java @@ -187,7 +187,7 @@ public int compareTo(PartitionedFileFragment t) { return 0; } } else { - return -1; + return getPath().compareTo(t.getPath()); } } From 6fab5adadb303e690f7377547f842f84eb1f9286 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 29 Oct 2015 16:25:47 +0900 Subject: [PATCH 003/127] Add PartitionedTableUtil for finding filtered partition directories. --- .../planner/TestPartitionedTableUtil.java | 438 ++++++++++++++++ .../tajo/querymaster/Repartitioner.java | 2 +- .../rules/PartitionedTableRewriter.java | 22 +- .../tajo/plan/util/FilteredPartitionInfo.java | 46 ++ .../tajo/plan/util/PartitionedTableUtil.java | 470 ++++++++++++++++++ 5 files changed, 976 insertions(+), 2 deletions(-) create mode 100644 tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java create mode 100644 tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java create mode 100644 tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java new file mode 100644 index 0000000000..5eb97b6c3c --- /dev/null +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.LocalTajoTestingUtility; +import org.apache.tajo.QueryTestCaseBase; +import org.apache.tajo.algebra.Expr; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableDesc; +import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.partition.PartitionMethodDesc; +import org.apache.tajo.catalog.proto.CatalogProtos; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.engine.query.QueryContext; +import org.apache.tajo.plan.LogicalPlan; +import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.util.FilteredPartitionInfo; +import org.apache.tajo.plan.util.PartitionedTableUtil; +import org.apache.tajo.util.FileUtil; +import org.apache.tajo.util.KeyValueSet; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestPartitionedTableUtil extends QueryTestCaseBase { + + final static String PARTITION_TABLE_NAME = "tb_partition"; + final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; + + @BeforeClass + public static void setUp() throws Exception { + FileSystem fs = FileSystem.get(conf); + Path rootDir = TajoConf.getWarehouseDir(testingCluster.getConfiguration()); + + Schema schema = new Schema(); + schema.addColumn("n_nationkey", TajoDataTypes.Type.INT8); + schema.addColumn("n_name", TajoDataTypes.Type.TEXT); + schema.addColumn("n_regionkey", TajoDataTypes.Type.INT8); + + TableMeta meta = CatalogUtil.newTableMeta("TEXT", new KeyValueSet()); + + createExternalTableIncludedOnePartitionKeyColumn(fs, rootDir, schema, meta); + createExternalTableIncludedMultiplePartitionKeyColumns(fs, rootDir, schema, meta); + } + + private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem fs, Path rootDir, Schema schema, + TableMeta meta) throws Exception { + Schema partSchema = new Schema(); + partSchema.addColumn("key", TajoDataTypes.Type.TEXT); + + PartitionMethodDesc partitionMethodDesc = + new PartitionMethodDesc("TestPartitionedTableUtil", PARTITION_TABLE_NAME, + CatalogProtos.PartitionType.COLUMN, "key", partSchema); + + Path tablePath = new Path(rootDir, PARTITION_TABLE_NAME); + fs.mkdirs(tablePath); + + client.createExternalTable(PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + + TableDesc tableDesc = client.getTableDesc(PARTITION_TABLE_NAME); + assertNotNull(tableDesc); + + Path path = new Path(tableDesc.getUri().toString() + "/key=part123"); + fs.mkdirs(path); + FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key=part456"); + fs.mkdirs(path); + FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key=part789"); + fs.mkdirs(path); + FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); + } + + private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileSystem fs, Path rootDir, + Schema schema, TableMeta meta) throws Exception { + Schema partSchema = new Schema(); + partSchema.addColumn("key1", TajoDataTypes.Type.TEXT); + partSchema.addColumn("key2", TajoDataTypes.Type.TEXT); + partSchema.addColumn("key3", TajoDataTypes.Type.INT8); + + PartitionMethodDesc partitionMethodDesc = + new PartitionMethodDesc("TestPartitionedTableUtil", MULTIPLE_PARTITION_TABLE_NAME, + CatalogProtos.PartitionType.COLUMN, "key1,key2,key3", partSchema); + + Path tablePath = new Path(rootDir, MULTIPLE_PARTITION_TABLE_NAME); + fs.mkdirs(tablePath); + + client.createExternalTable(MULTIPLE_PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + + TableDesc tableDesc = client.getTableDesc(MULTIPLE_PARTITION_TABLE_NAME); + assertNotNull(tableDesc); + + Path path = new Path(tableDesc.getUri().toString() + "/key1=part123"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=1"); + fs.mkdirs(path); + FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=2"); + fs.mkdirs(path); + FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key1=part789"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789/key3=3"); + fs.mkdirs(path); + FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); + } + + @AfterClass + public static void tearDown() throws Exception { + client.executeQuery("DROP TABLE IF EXISTS " + PARTITION_TABLE_NAME + " PURGE;"); + client.executeQuery("DROP TABLE IF EXISTS " + MULTIPLE_PARTITION_TABLE_NAME + " PURGE;"); + } + + @Test + public void testFilterIncludePartitionKeyColumn() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part456' ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(1, filteredPaths.length); + assertEquals("key=part456", filteredPaths[0].getName()); + + assertEquals(10L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testWithoutAnyFilters() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SCAN, sortNode.getChild().getType()); + ScanNode scanNode = sortNode.getChild(); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=part123", filteredPaths[0].getName()); + assertEquals("key=part456", filteredPaths[1].getName()); + assertEquals("key=part789", filteredPaths[2].getName()); + + assertEquals(33L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonExistingPartitionValue() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part123456789'"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + assertEquals(0, filteredPartitionInfo.getPartitionPaths().length); + + assertEquals(0L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonPartitionKeyColumn() throws Exception { + String sql = "SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE n_nationkey = 1"; + Expr expr = sqlParser.parse(sql); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=part123", filteredPaths[0].getName()); + assertEquals("key=part456", filteredPaths[1].getName()); + assertEquals("key=part789", filteredPaths[2].getName()); + + assertEquals(33L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part789' and key2 = 'supp789' and key3=3"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(1, filteredPaths.length); + assertEquals("key3=3", filteredPaths[0].getName()); + assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); + assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); + + assertEquals(10L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part123' and key2 = 'supp123' order by n_nationkey"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(2, filteredPaths.length); + + assertEquals("key3=1", filteredPaths[0].getName()); + assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); + assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); + + assertEquals("key3=2", filteredPaths[1].getName()); + assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); + assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + + assertEquals(23L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonPartitionKeyColumns() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part123' and n_nationkey >= 2 order by n_nationkey"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(2, filteredPaths.length); + + assertEquals("key3=1", filteredPaths[0].getName()); + assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); + assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); + + assertEquals("key3=2", filteredPaths[1].getName()); + assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); + assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + + assertEquals(23L, filteredPartitionInfo.getTotalVolume()); + } + + @Test + public final void testPartitionPruningWitCTAS() throws Exception { + String tableName = "testPartitionPruningUsingDirectories".toLowerCase(); + String canonicalTableName = CatalogUtil.getCanonicalTableName("\"TestPartitionedTableUtil\"", tableName); + + executeString( + "create table " + canonicalTableName + "(col1 int4, col2 int4) partition by column(key float8) " + + " as select l_orderkey, l_partkey, l_quantity from default.lineitem"); + + TableDesc tableDesc = catalog.getTableDesc(getCurrentDatabase(), tableName); + assertNotNull(tableDesc); + + // With a filter which checks a partition key column + Expr expr = sqlParser.parse("SELECT * FROM " + canonicalTableName + " WHERE key <= 40.0 ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + scanNode); + assertNotNull(filteredPartitionInfo); + + Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=17.0", filteredPaths[0].getName()); + assertEquals("key=36.0", filteredPaths[1].getName()); + assertEquals("key=38.0", filteredPaths[2].getName()); + + assertEquals(12L, filteredPartitionInfo.getTotalVolume()); + + executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); + } +} \ No newline at end of file diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 815c44b26b..4f62ea6af4 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -471,7 +471,7 @@ public static List getFragmentsFromPartitionedTable(Tablespace tsHandl List fragments = Lists.newArrayList(); PartitionedTableScanNode partitionsScan = (PartitionedTableScanNode) scan; fragments.addAll(((FileTablespace) tsHandler).getSplits( - scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getInputPaths())); + scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getInputPaths())); partitionsScan.setInputPaths(null); return fragments; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index cf54f7b4c7..5e1ca2d46e 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -485,6 +485,24 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { return sb.toString(); } + private void updateTableStat(OverridableConf queryContext, PartitionedTableScanNode scanNode) + throws TajoException { + if (scanNode.getInputPaths().length > 0) { + try { + FileSystem fs = scanNode.getInputPaths()[0].getFileSystem(queryContext.getConf()); + long totalVolume = 0; + + for (Path input : scanNode.getInputPaths()) { + ContentSummary summary = fs.getContentSummary(input); + totalVolume += summary.getLength(); + } + scanNode.getTableDesc().getStats().setNumBytes(totalVolume); + } catch (Throwable e) { + throw new TajoInternalError(e); + } + } + } + private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -500,7 +518,9 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); rewrittenScanNode.init(scanNode, filteredPaths); - rewrittenScanNode.getTableDesc().getStats().setNumBytes(totalVolume); + // TODO: See TAJO-1927. This code have been commented temporarily. +// rewrittenScanNode.getTableDesc().getStats().setNumBytes(totalVolume); + updateTableStat(queryContext, rewrittenScanNode); // if it is topmost node, set it as the rootnode of this block. if (stack.empty() || block.getRoot().equals(scanNode)) { diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java new file mode 100644 index 0000000000..a5d62201da --- /dev/null +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tajo.plan.util; + +import org.apache.hadoop.fs.Path; + +public class FilteredPartitionInfo { + private Path[] partitionPaths; + private long totalVolume; + + public FilteredPartitionInfo(Path[] partitionPaths, long totalVolume) { + this.partitionPaths = partitionPaths; + this.totalVolume = totalVolume; + } + + public Path[] getPartitionPaths() { + return partitionPaths; + } + + public void setPartitionPaths(Path[] partitionPaths) { + this.partitionPaths = partitionPaths; + } + + public long getTotalVolume() { + return totalVolume; + } + + public void setTotalVolume(long totalVolume) { + this.totalVolume = totalVolume; + } +} \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java new file mode 100644 index 0000000000..a938f23b9c --- /dev/null +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -0,0 +1,470 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.plan.util; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.*; +import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.partition.PartitionMethodDesc; +import org.apache.tajo.catalog.proto.CatalogProtos; +import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.exception.*; +import org.apache.tajo.plan.expr.*; +import org.apache.tajo.plan.logical.ScanNode; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.storage.VTuple; +import org.apache.tajo.util.StringUtils; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.Stack; + +public class PartitionedTableUtil { + private static final Log LOG = LogFactory.getLog(PartitionedTableUtil.class); + + public static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, + ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, + UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + TableDesc table = scanNode.getTableDesc(); + PartitionMethodDesc partitionDesc = scanNode.getTableDesc().getPartitionMethod(); + + Schema paritionValuesSchema = new Schema(); + for (Column column : partitionDesc.getExpressionSchema().getRootColumns()) { + paritionValuesSchema.addColumn(column); + } + + Set indexablePredicateSet = Sets.newHashSet(); + + // if a query statement has a search condition, try to find indexable predicates + if (scanNode.hasQual()) { + EvalNode [] conjunctiveForms = AlgebraicUtil.toConjunctiveNormalFormArray(scanNode.getQual()); + Set remainExprs = Sets.newHashSet(conjunctiveForms); + + // add qualifier to schema for qual + paritionValuesSchema.setQualifier(scanNode.getCanonicalName()); + for (Column column : paritionValuesSchema.getRootColumns()) { + for (EvalNode simpleExpr : conjunctiveForms) { + if (checkIfIndexablePredicateOnTargetColumn(simpleExpr, column)) { + indexablePredicateSet.add(simpleExpr); + } + } + } + + // Partitions which are not matched to the partition filter conditions are pruned immediately. + // So, the partition filter conditions are not necessary later, and they are removed from + // original search condition for simplicity and efficiency. + remainExprs.removeAll(indexablePredicateSet); + if (remainExprs.isEmpty()) { + scanNode.setQual(null); + } else { + scanNode.setQual( + AlgebraicUtil.createSingletonExprFromCNF(remainExprs.toArray(new EvalNode[remainExprs.size()]))); + } + } + + if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates + return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, + indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); + } else { // otherwise, we will get all partition paths. + return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); + } + } + + private static boolean checkIfIndexablePredicateOnTargetColumn(EvalNode evalNode, Column targetColumn) { + if (checkIfIndexablePredicate(evalNode) || checkIfDisjunctiveButOneVariable(evalNode)) { + Set variables = EvalTreeUtil.findUniqueColumns(evalNode); + // if it contains only single variable matched to a target column + return variables.size() == 1 && variables.contains(targetColumn); + } else { + return false; + } + } + + /** + * Check if an expression consists of one variable and one constant and + * the expression is a comparison operator. + * + * @param evalNode The expression to be checked + * @return true if an expression consists of one variable and one constant + * and the expression is a comparison operator. Other, false. + */ + private static boolean checkIfIndexablePredicate(EvalNode evalNode) { + // TODO - LIKE with a trailing wild-card character and IN with an array can be indexable + return AlgebraicUtil.containSingleVar(evalNode) && AlgebraicUtil.isIndexableOperator(evalNode); + } + + /** + * + * @param evalNode The expression to be checked + * @return true if an disjunctive expression, consisting of indexable expressions + */ + private static boolean checkIfDisjunctiveButOneVariable(EvalNode evalNode) { + if (evalNode.getType() == EvalType.OR) { + BinaryEval orEval = (BinaryEval) evalNode; + boolean indexable = + checkIfIndexablePredicate(orEval.getLeftExpr()) && + checkIfIndexablePredicate(orEval.getRightExpr()); + + boolean sameVariable = + EvalTreeUtil.findUniqueColumns(orEval.getLeftExpr()) + .equals(EvalTreeUtil.findUniqueColumns(orEval.getRightExpr())); + + return indexable && sameVariable; + } else { + return false; + } + } + + private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, + String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, + UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, + UnsupportedException { + return findFilteredPartitionInfo(catalog, conf, tableName, partitionColumns, conjunctiveForms, tablePath, null); + } + + /** + * It assumes that each conjunctive form corresponds to one column. + * + * @param partitionColumns + * @param conjunctiveForms search condition corresponding to partition columns. + * If it is NULL, it means that there is no search condition for this table. + * @param tablePath + * @return + * @throws IOException + */ + private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, + String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) + throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, + UndefinedOperatorException, UnsupportedException { + + FilteredPartitionInfo filteredPartitionInfo = null; + Path[] filteredPaths = null; + FileSystem fs = tablePath.getFileSystem(conf); + String [] splits = CatalogUtil.splitFQTableName(tableName); + List partitions = null; + + try { + if (conjunctiveForms == null) { + partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); + if (partitions.isEmpty()) { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + } else { + filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); + } + } else { + if (catalog.existPartitions(splits[0], splits[1])) { + CatalogProtos.PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); + partitions = catalog.getPartitionsByAlgebra(request); + filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); + } else { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + } + } + } catch (UnsupportedException ue) { + // Partial catalog might not allow some filter conditions. For example, HiveMetastore doesn't In statement, + // regexp statement and so on. Above case, Tajo need to build filtered path by listing hdfs directories. + LOG.warn(ue.getMessage()); + partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); + if (partitions.isEmpty()) { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + } else { + filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); + } + scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); + } + + LOG.info("Filtered directory or files: " + filteredPartitionInfo.getPartitionPaths().length + + ", totalVolume:" + filteredPartitionInfo.getTotalVolume()); + + return filteredPartitionInfo; + } + + /** + * Build list of partition path by PartitionDescProto which is generated from CatalogStore. + * + * @param partitions + * @return + */ + private static FilteredPartitionInfo findFilteredPartitionInfoByPartitionDesc(List + partitions) { + long totalVolume = 0L; + Path[] filteredPaths = new Path[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + CatalogProtos.PartitionDescProto partition = partitions.get(i); + filteredPaths[i] = new Path(partition.getPath()); + totalVolume += partition.getNumBytes(); + } + return new FilteredPartitionInfo(filteredPaths, totalVolume); + } + + /** + * Build list of partition path by filtering directories in the given table path. + * + * + * @param partitionColumns + * @param conjunctiveForms + * @param fs + * @param tablePath + * @return + * @throws IOException + */ + private static Path[] findFilteredPathsFromFileSystem(Schema partitionColumns, EvalNode [] conjunctiveForms, + FileSystem fs, Path tablePath) throws IOException{ + Path[] filteredPaths = null; + PathFilter[] filters; + + if (conjunctiveForms == null) { + filters = buildAllAcceptingPathFilters(partitionColumns); + } else { + filters = buildPathFiltersForAllLevels(partitionColumns, conjunctiveForms); + } + + // loop from one to the number of partition columns + filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); + + for (int i = 1; i < partitionColumns.size(); i++) { + // Get all file status matched to a ith level path filter. + filteredPaths = toPathArray(fs.listStatus(filteredPaths, filters[i])); + } + return filteredPaths; + } + + /** + * Build algebra expressions for querying partitions and partition keys by using EvalNodeToExprConverter. + * + * @param databaseName the database name + * @param tableName the table name + * @param conjunctiveForms EvalNode which contains filter conditions + * @return + */ + private static CatalogProtos.PartitionsByAlgebraProto getPartitionsAlgebraProto( + String databaseName, String tableName, EvalNode [] conjunctiveForms) { + + CatalogProtos.PartitionsByAlgebraProto.Builder request = CatalogProtos.PartitionsByAlgebraProto.newBuilder(); + request.setDatabaseName(databaseName); + request.setTableName(tableName); + + if (conjunctiveForms != null) { + EvalNode evalNode = AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms); + EvalNodeToExprConverter convertor = new EvalNodeToExprConverter(databaseName + "." + tableName); + convertor.visit(null, evalNode, new Stack<>()); + request.setAlgebra(convertor.getResult().toJson()); + } else { + request.setAlgebra(""); + } + + return request.build(); + } + + /** + * Build path filters for all levels with a list of filter conditions. + * + * For example, consider you have a partitioned table for three columns (i.e., col1, col2, col3). + * Then, this methods will create three path filters for (col1), (col1, col2), (col1, col2, col3). + * + * Corresponding filter conditions will be placed on each path filter, + * If there is no corresponding expression for certain column, + * The condition will be filled with a true value. + * + * Assume that an user gives a condition WHERE col1 ='A' and col3 = 'C'. + * There is no filter condition corresponding to col2. + * Then, the path filter conditions are corresponding to the followings: + * + * The first path filter: col1 = 'A' + * The second path filter: col1 = 'A' AND col2 IS NOT NULL + * The third path filter: col1 = 'A' AND col2 IS NOT NULL AND col3 = 'C' + * + * 'IS NOT NULL' predicate is always true against the partition path. + * + * @param partitionColumns + * @param conjunctiveForms + * @return + */ + private static PathFilter [] buildPathFiltersForAllLevels(Schema partitionColumns, + EvalNode [] conjunctiveForms) { + // Building partition path filters for all levels + Column target; + PathFilter [] filters = new PathFilter[partitionColumns.size()]; + List accumulatedFilters = Lists.newArrayList(); + for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level + target = partitionColumns.getColumn(i); + + for (EvalNode expr : conjunctiveForms) { + if (EvalTreeUtil.findUniqueColumns(expr).contains(target)) { + // Accumulate one qual per level + accumulatedFilters.add(expr); + } + } + + if (accumulatedFilters.size() < (i + 1)) { + accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); + } + + EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( + accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); + filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); + } + + return filters; + } + + /** + * Build an array of path filters for all levels with all accepting filter condition. + * @param partitionColumns The partition columns schema + * @return The array of path filter, accpeting all partition paths. + */ + private static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { + Column target; + PathFilter [] filters = new PathFilter[partitionColumns.size()]; + List accumulatedFilters = Lists.newArrayList(); + for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level + target = partitionColumns.getColumn(i); + accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); + + EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( + accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); + filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); + } + return filters; + } + + private static Path[] toPathArray(FileStatus[] fileStatuses) { + Path[] paths = new Path[fileStatuses.length]; + for (int i = 0; i < fileStatuses.length; i++) { + FileStatus fileStatus = fileStatuses[i]; + paths[i] = fileStatus.getPath(); + } + return paths; + } + + private static long getTotalVolume(FileSystem fs, Path[] inputPaths) { + long totalVolume = 0L; + if (inputPaths.length > 0) { + try { + for (Path input : inputPaths) { + ContentSummary summary = fs.getContentSummary(input); + totalVolume += summary.getLength(); + } + } catch (Throwable e) { + throw new TajoInternalError(e); + } + } + return totalVolume; + } + + private static class PartitionPathFilter implements PathFilter { + + private Schema schema; + private EvalNode partitionFilter; + public PartitionPathFilter(Schema schema, EvalNode partitionFilter) { + this.schema = schema; + this.partitionFilter = partitionFilter; + partitionFilter.bind(null, schema); + } + + @Override + public boolean accept(Path path) { + Tuple tuple = buildTupleFromPartitionPath(schema, path, true); + if (tuple == null) { // if it is a file or not acceptable file + return false; + } + + return partitionFilter.eval(tuple).asBool(); + } + + @Override + public String toString() { + return partitionFilter.toString(); + } + } + + + /** + * Take a look at a column partition path. A partition path consists + * of a table path part and column values part. This method transforms + * a partition path into a tuple with a given partition column schema. + * + * hdfs://192.168.0.1/tajo/warehouse/table1/col1=abc/col2=def/col3=ghi + * ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * table path part column values part + * + * When a file path is given, it can perform two ways depending on beNullIfFile flag. + * If it is true, it returns NULL when a given path is a file. + * Otherwise, it returns a built tuple regardless of file or directory. + * + * @param partitionColumnSchema The partition column schema + * @param partitionPath The partition path + * @param beNullIfFile If true, this method returns NULL when a given path is a file. + * @return The tuple transformed from a column values part. + */ + public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Path partitionPath, + boolean beNullIfFile) { + int startIdx = partitionPath.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); + + if (startIdx == -1) { // if there is no partition column in the patch + return null; + } + String columnValuesPart = partitionPath.toString().substring(startIdx); + + String [] columnValues = columnValuesPart.split("/"); + + // true means this is a file. + if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { + return null; + } + + Tuple tuple = new VTuple(partitionColumnSchema.size()); + int i = 0; + for (; i < columnValues.length && i < partitionColumnSchema.size(); i++) { + String [] parts = columnValues[i].split("="); + if (parts.length != 2) { + return null; + } + int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); + Column keyColumn = partitionColumnSchema.getColumn(columnId); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); + } + for (; i < partitionColumnSchema.size(); i++) { + tuple.put(i, NullDatum.get()); + } + return tuple; + } + + /** + * Get a prefix of column partition path. For example, consider a column partition (col1, col2). + * Then, you will get a string 'col1='. + * + * @param partitionColumn the schema of column partition + * @return The first part string of column partition path. + */ + public static String getColumnPartitionPathPrefix(Schema partitionColumn) { + StringBuilder sb = new StringBuilder(); + sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); + return sb.toString(); + } +} \ No newline at end of file From b3bbcd188b0afc3b977f85005c0dffa20a8312dc Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 15:57:39 +0900 Subject: [PATCH 004/127] Remove the array of partition directories of rerwrite rule and apply PartitionedFileFragment. --- .../planner/TestPartitionedTableUtil.java | 38 ++ .../physical/TestExternalSortExec.java | 2 +- .../physical/TestFullOuterHashJoinExec.java | 8 +- .../physical/TestFullOuterMergeJoinExec.java | 12 +- .../physical/TestHashAntiJoinExec.java | 2 +- .../planner/physical/TestHashJoinExec.java | 4 +- .../physical/TestHashSemiJoinExec.java | 2 +- .../physical/TestLeftOuterHashJoinExec.java | 10 +- .../planner/physical/TestMergeJoinExec.java | 2 +- .../planner/physical/TestPhysicalPlanner.java | 52 +- .../TestProgressExternalSortExec.java | 2 +- .../physical/TestRightOuterHashJoinExec.java | 6 +- .../physical/TestRightOuterMergeJoinExec.java | 12 +- .../engine/planner/physical/TestSortExec.java | 2 +- .../physical/TestSortIntersectExec.java | 4 +- .../tajo/engine/util/TestTupleUtil.java | 51 +- .../testInnerAndOuterWithEmpty.1.Hash.plan | 4 +- ...rAndOuterWithEmpty.1.Hash_NoBroadcast.plan | 4 +- .../testInnerAndOuterWithEmpty.1.Sort.plan | 4 +- ...rAndOuterWithEmpty.1.Sort_NoBroadcast.plan | 4 +- .../engine/planner/PhysicalPlannerImpl.java | 50 +- .../rewriter/rules/BroadcastJoinRule.java | 7 +- .../rewriter/rules/GlobalPlanRewriteUtil.java | 14 +- .../engine/planner/physical/SeqScanExec.java | 9 +- .../tajo/master/TajoMasterClientService.java | 4 +- .../apache/tajo/master/exec/DDLExecutor.java | 5 +- .../exec/ExplainPlanPreprocessorForTest.java | 3 - .../NonForwardQueryResultFileScanner.java | 12 +- .../NonForwardQueryResultSystemScanner.java | 2 +- .../tajo/master/exec/QueryExecutor.java | 2 +- .../tajo/querymaster/Repartitioner.java | 53 +- .../org/apache/tajo/querymaster/Stage.java | 10 +- .../tajo/worker/ExecutionBlockContext.java | 2 +- .../apache/tajo/worker/TajoQueryEngine.java | 5 +- .../ws/rs/resources/QueryResultResource.java | 2 +- .../logical/PartitionedTableScanNode.java | 35 +- .../rules/PartitionedTableRewriter.java | 467 +----------------- .../plan/serder/LogicalNodeDeserializer.java | 7 - .../plan/serder/LogicalNodeSerializer.java | 11 - .../tajo/plan/util/FilteredPartitionInfo.java | 22 + .../tajo/plan/util/PartitionedTableUtil.java | 72 ++- .../verifier/PostLogicalPlanVerifier.java | 7 - tajo-plan/src/main/proto/Plan.proto | 9 - .../apache/tajo/storage/FileTablespace.java | 82 ++- 44 files changed, 396 insertions(+), 721 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java index 5eb97b6c3c..d7234f3ef0 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java @@ -172,6 +172,11 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { assertEquals(1, filteredPaths.length); assertEquals("key=part456", filteredPaths[0].getName()); + + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(1, partitionNames.length); + assertEquals("key=part456", partitionNames[0]); + assertEquals(10L, filteredPartitionInfo.getTotalVolume()); } @@ -203,6 +208,12 @@ public void testWithoutAnyFilters() throws Exception { assertEquals("key=part456", filteredPaths[1].getName()); assertEquals("key=part789", filteredPaths[2].getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(3, partitionNames.length); + assertEquals("key=part123", partitionNames[0]); + assertEquals("key=part456", partitionNames[1]); + assertEquals("key=part789", partitionNames[2]); + assertEquals(33L, filteredPartitionInfo.getTotalVolume()); } @@ -231,6 +242,7 @@ public void testFilterIncludeNonExistingPartitionValue() throws Exception { assertNotNull(filteredPartitionInfo); assertEquals(0, filteredPartitionInfo.getPartitionPaths().length); + assertNull(filteredPartitionInfo.getPartitionNames()); assertEquals(0L, filteredPartitionInfo.getTotalVolume()); } @@ -266,6 +278,12 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { assertEquals("key=part456", filteredPaths[1].getName()); assertEquals("key=part789", filteredPaths[2].getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(3, partitionNames.length); + assertEquals("key=part123", partitionNames[0]); + assertEquals("key=part456", partitionNames[1]); + assertEquals("key=part789", partitionNames[2]); + assertEquals(33L, filteredPartitionInfo.getTotalVolume()); } @@ -300,6 +318,10 @@ public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(1, partitionNames.length); + assertEquals("key1=part789/key2=supp789/key3=3", partitionNames[0]); + assertEquals(10L, filteredPartitionInfo.getTotalVolume()); } @@ -342,6 +364,11 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(2, partitionNames.length); + assertEquals("key1=part123/key2=supp123/key3=1", partitionNames[0]); + assertEquals("key1=part123/key2=supp123/key3=2", partitionNames[1]); + assertEquals(23L, filteredPartitionInfo.getTotalVolume()); } @@ -384,6 +411,11 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(2, partitionNames.length); + assertEquals("key1=part123/key2=supp123/key3=1", partitionNames[0]); + assertEquals("key1=part123/key2=supp123/key3=2", partitionNames[1]); + assertEquals(23L, filteredPartitionInfo.getTotalVolume()); } @@ -431,6 +463,12 @@ public final void testPartitionPruningWitCTAS() throws Exception { assertEquals("key=36.0", filteredPaths[1].getName()); assertEquals("key=38.0", filteredPaths[2].getName()); + String[] partitionNames = filteredPartitionInfo.getPartitionNames(); + assertEquals(3, partitionNames.length); + assertEquals("key=17.0", partitionNames[0]); + assertEquals("key=36.0", partitionNames[1]); + assertEquals("key=38.0", partitionNames[2]); + assertEquals(12L, filteredPartitionInfo.getTotalVolume()); executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java index e796bad443..fd741863ba 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java @@ -134,7 +134,7 @@ public final void testNext() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java index 2e26a2a0c6..06b8029e87 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java @@ -279,7 +279,7 @@ public final void testFullOuterHashJoinExec0() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -318,7 +318,7 @@ public final void testFullOuterHashJoinExec1() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -356,7 +356,7 @@ public final void testFullOuterHashJoinExec2() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -396,7 +396,7 @@ public final void testFullOuterHashJoinExec3() throws IOException, TajoException workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java index fd42b9a990..0a93cfd4d2 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java @@ -326,7 +326,7 @@ public final void testFullOuterMergeJoin0() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -363,7 +363,7 @@ public final void testFullOuterMergeJoin1() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -400,7 +400,7 @@ public final void testFullOuterMergeJoin2() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -438,7 +438,7 @@ public final void testFullOuterMergeJoin3() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -479,7 +479,7 @@ public final void testFullOuterMergeJoin4() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -518,7 +518,7 @@ public final void testFullOuterMergeJoin5() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java index 103d2b1ac6..db68e02de4 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java @@ -168,7 +168,7 @@ public final void testHashAntiJoin() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with an hash anti join. diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java index a4afa7fae1..8212f579de 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java @@ -164,7 +164,7 @@ public final void testHashInnerJoin() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -208,7 +208,7 @@ public final void testCheckIfInMemoryInnerJoinIsPossible() throws IOException, T ctx.setEnforcer(enforcer); ctx.getQueryContext().setLong(SessionVars.HASH_JOIN_SIZE_LIMIT.keyname(), 1); // set hash join limit as 1 MB - PhysicalPlannerImpl phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlannerImpl phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java index f99dc9e5f9..e4f879f86c 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java @@ -172,7 +172,7 @@ public final void testHashSemiJoin() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with an hash anti join. diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java index ab5f5093c7..40f0aac0cb 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java @@ -282,7 +282,7 @@ public final void testLeftOuterHashJoinExec0() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -315,7 +315,7 @@ public final void testLeftOuter_HashJoinExec1() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[1]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -350,7 +350,7 @@ public final void testLeftOuter_HashJoinExec2() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[2]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -385,7 +385,7 @@ public final void testLeftOuter_HashJoinExec3() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[3]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -420,7 +420,7 @@ public final void testLeftOuter_HashJoinExec4() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[4]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java index d29782573f..8bda0783a6 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java @@ -174,7 +174,7 @@ public final void testMergeInnerJoin() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, root); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof MergeJoinExec); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java index 70c19b204d..a12f2ff008 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java @@ -251,7 +251,7 @@ public final void testCreateScanPlan() throws IOException, TajoException { optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -282,7 +282,7 @@ public final void testCreateScanWithFilterPlan() throws IOException, TajoExcepti optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -310,7 +310,7 @@ public final void testGroupByPlan() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int i = 0; @@ -341,7 +341,7 @@ public final void testHashGroupByPlanWithALLField() throws IOException, TajoExce LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int i = 0; @@ -370,7 +370,7 @@ public final void testSortGroupByPlan() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan.getRootBlock().getRoot()); /*HashAggregateExec hashAgg = (HashAggregateExec) exec; @@ -437,7 +437,7 @@ public final void testStorePlan() throws IOException, TajoException { TableMeta outputMeta = CatalogUtil.newTableMeta("TEXT"); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -488,7 +488,7 @@ public final void testStorePlanWithMaxOutputFileSize() throws IOException, TajoE LogicalNode rootNode = optimizer.optimize(plan); // executing StoreTableExec - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -534,7 +534,7 @@ public final void testStorePlanWithRCFile() throws IOException, TajoException { TableMeta outputMeta = CatalogUtil.newTableMeta("RCFILE"); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -572,7 +572,7 @@ public final void testEnforceForDefaultColumnPartitionStorePlan() throws IOExcep Expr context = analyzer.parse(CreateTableAsStmts[2]); LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof SortBasedColPartitionStoreExec); } @@ -596,7 +596,7 @@ public final void testEnforceForHashBasedColumnPartitionStorePlan() throws IOExc ctx.setEnforcer(enforcer); ctx.setOutputPath(new Path(workDir, "grouped4")); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof HashBasedColPartitionStoreExec); } @@ -620,7 +620,7 @@ public final void testEnforceForSortBasedColumnPartitionStorePlan() throws IOExc ctx.setEnforcer(enforcer); ctx.setOutputPath(new Path(workDir, "grouped5")); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof SortBasedColPartitionStoreExec); } @@ -651,7 +651,7 @@ public final void testPartitionedStorePlan() throws IOException, TajoException { QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -715,7 +715,7 @@ public final void testPartitionedStorePlanWithMaxFileSize() throws IOException, LogicalNode rootNode = optimizer.optimize(plan); // Executing CREATE TABLE PARTITION BY - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -785,7 +785,7 @@ public final void testPartitionedStorePlanWithEmptyGroupingSet() QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -844,7 +844,7 @@ public final void testAggregationFunction() throws IOException, TajoException { function.setFirstPhase(); } - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); @@ -875,7 +875,7 @@ public final void testCountFunction() throws IOException, TajoException { function.setFirstPhase(); } - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); Tuple tuple = exec.next(); @@ -897,7 +897,7 @@ public final void testGroupByWithNullValue() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int count = 0; @@ -927,7 +927,7 @@ public final void testUnionPlan() throws IOException, TajoException, CloneNotSup union.setRightChild((LogicalNode) root.getChild().clone()); root.setChild(union); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, root); int count = 0; @@ -949,7 +949,7 @@ public final void testEvalExpr() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; exec.init(); @@ -962,7 +962,7 @@ public final void testEvalExpr() throws IOException, TajoException { plan = planner.createPlan(defaultContext, expr); rootNode = optimizer.optimize(plan); - phyPlanner = new PhysicalPlannerImpl(conf); + phyPlanner = new PhysicalPlannerImpl(conf, catalog); exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); tuple = exec.next(); @@ -992,7 +992,7 @@ public final void testCreateIndex() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); while (exec.next() != null) { @@ -1021,7 +1021,7 @@ public final void testDuplicateEliminate() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -1061,7 +1061,7 @@ public final void testSortEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -1083,7 +1083,7 @@ public final void testSortEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - phyPlanner = new PhysicalPlannerImpl(conf); + phyPlanner = new PhysicalPlannerImpl(conf, catalog); exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -1111,7 +1111,7 @@ public final void testGroupByEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -1133,7 +1133,7 @@ public final void testGroupByEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - phyPlanner = new PhysicalPlannerImpl(conf); + phyPlanner = new PhysicalPlannerImpl(conf, catalog); exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java index ec41b0da33..e0d64b4cd7 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java @@ -149,7 +149,7 @@ private void testProgress(long sortBufferBytesNum) throws Exception { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java index 9a7eaff725..d32879968b 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java @@ -244,7 +244,7 @@ public final void testRightOuter_HashJoinExec0() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[0]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -285,7 +285,7 @@ public final void testRightOuter_HashJoinExec1() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[1]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -326,7 +326,7 @@ public final void testRightOuter_HashJoinExec2() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[2]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java index 280722b001..f54a251018 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java @@ -330,7 +330,7 @@ public final void testRightOuterMergeJoin0() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -367,7 +367,7 @@ public final void testRightOuter_MergeJoin1() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -403,7 +403,7 @@ public final void testRightOuterMergeJoin2() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); @@ -439,7 +439,7 @@ public final void testRightOuter_MergeJoin3() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -477,7 +477,7 @@ public final void testRightOuter_MergeJoin4() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); @@ -514,7 +514,7 @@ public final void testRightOuterMergeJoin5() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java index 58c2a98d04..f2276484dd 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java @@ -129,7 +129,7 @@ public final void testNext() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java index 66d38b7d4c..09fd99078f 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java @@ -178,7 +178,7 @@ public final void testSortIntersectAll() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with sort intersect all . @@ -245,7 +245,7 @@ public final void testSortIntersect() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with sort intersect all . diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index 4a3565e184..852a95d621 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -24,10 +24,10 @@ import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.engine.planner.RangePartitionAlgorithm; import org.apache.tajo.engine.planner.UniformRangePartition; -import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.storage.*; import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder; import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; @@ -146,43 +146,72 @@ public void testBuildTupleFromPartitionPath() { schema.addColumn("key2", Type.TEXT); Path path = new Path("hdfs://tajo/warehouse/partition_test/"); - Tuple tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); + Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); assertNull(tuple); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123"); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/part-0000"); // wrong cases; - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); assertNull(tuple); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/key2=abc"); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/key2=abc/part-0001"); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); } + + + @Test + public void testBuildTupleFromPartitionName() { + Schema schema = new Schema(); + schema.addColumn("key1", Type.INT8); + schema.addColumn("key2", Type.TEXT); + + Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123", true); + assertNotNull(tuple); + assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); + assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); + + tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123", false); + assertNotNull(tuple); + assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); + assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); + + tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123/key2=abc", true); + assertNotNull(tuple); + assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); + assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); + + tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key2=abc", false); + assertNotNull(tuple); + assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(0)); + assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); + + } } diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan index ceb96f9959..bb47f6c9c5 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -51,8 +51,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan index f1fa414673..46e0b4b7ee 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan index ceb96f9959..bb47f6c9c5 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -51,8 +51,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan index f1fa414673..46e0b4b7ee 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index 52e3b89b5f..fe611cfd5e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.tajo.SessionVars; +import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.catalog.proto.CatalogProtos; @@ -35,7 +36,7 @@ import org.apache.tajo.engine.planner.global.DataChannel; import org.apache.tajo.engine.planner.physical.*; import org.apache.tajo.engine.query.QueryContext; -import org.apache.tajo.exception.TajoInternalError; +import org.apache.tajo.exception.*; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; @@ -46,6 +47,8 @@ import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.serder.PlanProto.SortEnforce; import org.apache.tajo.plan.serder.PlanProto.SortedInputEnforce; +import org.apache.tajo.plan.util.FilteredPartitionInfo; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageConstants; @@ -53,6 +56,7 @@ import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionedFileFragment; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.StringUtils; @@ -76,9 +80,11 @@ public class PhysicalPlannerImpl implements PhysicalPlanner { private static final int UNGENERATED_PID = -1; protected final TajoConf conf; + private final CatalogService catalog; - public PhysicalPlannerImpl(final TajoConf conf) { + public PhysicalPlannerImpl(final TajoConf conf, final CatalogService catalog) { this.conf = conf; + this.catalog = catalog; } public PhysicalExec createPlan(final TaskAttemptContext context, final LogicalNode logicalPlan) { @@ -99,6 +105,16 @@ public PhysicalExec createPlan(final TaskAttemptContext context, final LogicalNo } } catch (IOException ioe) { throw new TajoInternalError(ioe); + } catch (UndefinedDatabaseException ude) { + throw new TajoInternalError(ude); + } catch (UndefinedTableException ute) { + throw new TajoInternalError(ute); + } catch (UndefinedPartitionMethodException upme) { + throw new TajoInternalError(upme); + } catch (UndefinedOperatorException uoe) { + throw new TajoInternalError(uoe); + } catch (UnsupportedException ue) { + throw new TajoInternalError(ue); } } @@ -117,7 +133,8 @@ private PhysicalExec buildOutputOperator(TaskAttemptContext context, LogicalNode } private PhysicalExec createPlanRecursive(TaskAttemptContext ctx, LogicalNode logicalNode, Stack stack) - throws IOException { + throws IOException, UndefinedDatabaseException, UndefinedTableException, + UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { PhysicalExec leftExec; PhysicalExec rightExec; @@ -441,7 +458,7 @@ private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNod private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( - plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); + plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); @@ -902,7 +919,8 @@ private boolean checkIfSortEquivalance(TaskAttemptContext ctx, ScanNode scanNode } public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, Stack node) - throws IOException { + throws IOException, UndefinedDatabaseException, UndefinedTableException, + UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { // check if an input is sorted in the same order to the subsequence sort operator. // TODO - it works only if input files are raw files. We should check the file format. // Since the default intermediate file format is raw file, it is not problem right now. @@ -924,24 +942,14 @@ public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, St } } - if (scanNode instanceof PartitionedTableScanNode - && ((PartitionedTableScanNode)scanNode).getInputPaths() != null && - ((PartitionedTableScanNode)scanNode).getInputPaths().length > 0) { - + if (scanNode instanceof PartitionedTableScanNode) { if (broadcastFlag) { - PartitionedTableScanNode partitionedTableScanNode = (PartitionedTableScanNode) scanNode; - List fileFragments = TUtil.newList(); - - FileTablespace space = (FileTablespace) TablespaceManager.get(scanNode.getTableDesc().getUri()); - for (Path path : partitionedTableScanNode.getInputPaths()) { - fileFragments.addAll(TUtil.newList(space.split(scanNode.getCanonicalName(), path))); + FragmentProto [] fragments = ctx.getTables(scanNode.getCanonicalName()); + if (fragments == null) { + return new SeqScanExec(ctx, scanNode, null); + } else { + return new PartitionMergeScanExec(ctx, scanNode, fragments); } - - FragmentProto[] fragments = - FragmentConvertor.toFragmentProtoArray(fileFragments.toArray(new FileFragment[fileFragments.size()])); - - ctx.addFragments(scanNode.getCanonicalName(), fragments); - return new PartitionMergeScanExec(ctx, scanNode, fragments); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java index 2f5d09c6b9..65eee1855b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java @@ -329,12 +329,7 @@ private long estimateOutputVolumeInternal(LogicalNode node) throws TajoInternalE // broadcast method. return Long.MAX_VALUE; } else { - // if there is no selected partition - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - return 0; - } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); - } + return pScanNode.getTableDesc().getStats().getNumBytes(); } case TABLE_SUBQUERY: return estimateOutputVolumeInternal(((TableSubQueryNode) node).getSubQuery()); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java index b13cb0f1a8..908d32465b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java @@ -97,13 +97,6 @@ public static void replaceChild(LogicalNode newChild, ScanNode originalChild, Lo public static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); - if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - scanBytes = 0L; - } - } - return scanBytes; } else { return -1; @@ -133,12 +126,7 @@ public static long computeDescendentVolume(LogicalNode node) { // broadcast method. return Long.MAX_VALUE; } else { - // if there is no selected partition - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - return 0; - } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); - } + return pScanNode.getTableDesc().getStats().getNumBytes(); } case TABLE_SUBQUERY: return computeDescendentVolume(((TableSubQueryNode) node).getSubQuery()); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 2572e1db8d..08cec4537e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -34,10 +34,12 @@ import org.apache.tajo.plan.expr.FieldEval; import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionedFileFragment; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -98,11 +100,12 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { Tuple partitionRow = null; if (fragments != null && fragments.length > 0) { - List fileFragments = FragmentConvertor.convert(FileFragment.class, fragments); + List partitionedFileFragments = FragmentConvertor.convert(PartitionedFileFragment + .class, fragments); // Get a partition key value from a given path - partitionRow = PartitionedTableRewriter.buildTupleFromPartitionPath( - columnPartitionSchema, fileFragments.get(0).getPath(), false); + partitionRow = PartitionedTableUtil.buildTupleFromPartitionName(columnPartitionSchema, + partitionedFileFragments.get(0).getPartitionName(), false); } // Targets or search conditions may contain column references. diff --git a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java index 017b17fdc3..fb37c5f69b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java @@ -560,10 +560,10 @@ public GetQueryResultDataResponse getQueryResultData(RpcController controller, G if(request.hasCompressCodec()) { queryResultScanner = new NonForwardQueryResultFileScanner(context.getConf(), session.getSessionId(), - queryId, scanNode, Integer.MAX_VALUE, request.getCompressCodec()); + queryId, scanNode, Integer.MAX_VALUE, request.getCompressCodec(), catalog); } else { queryResultScanner = new NonForwardQueryResultFileScanner(context.getConf(), - session.getSessionId(), queryId, scanNode, Integer.MAX_VALUE); + session.getSessionId(), queryId, scanNode, Integer.MAX_VALUE, catalog); } queryResultScanner.init(); diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java index da19625341..00bed0d4e8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java @@ -41,6 +41,7 @@ import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageUtil; @@ -600,7 +601,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext Schema partitionColumns = partitionDesc.getExpressionSchema(); // Get the array of path filter, accepting all partition paths. - PathFilter[] filters = PartitionedTableRewriter.buildAllAcceptingPathFilters(partitionColumns); + PathFilter[] filters = PartitionedTableUtil.buildAllAcceptingPathFilters(partitionColumns); // loop from one to the number of partition columns Path [] filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); @@ -627,7 +628,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext List targetPartitions = TUtil.newList(); for(Path filteredPath : filteredPaths) { - int startIdx = filteredPath.toString().indexOf(PartitionedTableRewriter.getColumnPartitionPathPrefix + int startIdx = filteredPath.toString().indexOf(PartitionedTableUtil.getColumnPartitionPathPrefix (partitionColumns)); // if there is partition column in the path diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java index b747849681..8848a80d15 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java @@ -116,9 +116,6 @@ public LogicalNode visitPartitionedTableScan(PlanShapeFixerContext context, Logi throws TajoException { super.visitPartitionedTableScan(context, plan, block, node, stack); context.childNumbers.push(1); - Path[] inputPaths = node.getInputPaths(); - Arrays.sort(inputPaths); - node.setInputPaths(inputPaths); if (node.hasTargets()) { node.setTargets(sortTargets(node.getTargets())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index ce692704a3..3345b16437 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -28,6 +28,7 @@ import org.apache.tajo.QueryId; import org.apache.tajo.TaskAttemptId; import org.apache.tajo.TaskId; +import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableDesc; @@ -80,14 +81,15 @@ public class NonForwardQueryResultFileScanner implements NonForwardQueryResultSc private ExecutorService executor; private MemoryRowBlock rowBlock; private Future nextFetch; + private CatalogService catalog; public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, QueryId queryId, ScanNode scanNode, - int maxRow) throws IOException { - this(tajoConf, sessionId, queryId, scanNode, maxRow, null); + int maxRow, CatalogService catalog) throws IOException { + this(tajoConf, sessionId, queryId, scanNode, maxRow, null, catalog); } public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, QueryId queryId, ScanNode scanNode, - int maxRow, CodecType codecType) throws IOException { + int maxRow, CodecType codecType, CatalogService catalog) throws IOException { this.tajoConf = tajoConf; this.sessionId = sessionId; this.queryId = queryId; @@ -96,6 +98,7 @@ public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, Que this.maxRow = maxRow; this.rowEncoder = RowStoreUtil.createEncoder(scanNode.getOutSchema()); this.codecType = codecType; + this.catalog = catalog; } public void init() throws IOException, TajoException { @@ -108,7 +111,8 @@ private void initSeqScanExec() throws IOException, TajoException { List fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc)); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(catalog, tajoConf, fileTablespace, scanNode, + tableDesc)); } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java index 7f6db9bf59..79552c8848 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java @@ -701,7 +701,7 @@ public int getCurrentRowNumber() { class SimplePhysicalPlannerImpl extends PhysicalPlannerImpl { public SimplePhysicalPlannerImpl(TajoConf conf) { - super(conf); + super(conf, masterContext.getCatalog()); } @Override diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java index e260c003fd..4908034614 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java @@ -302,7 +302,7 @@ public void execSimpleQuery(QueryContext queryContext, Session session, String q plan.getRootBlock().getRoot()); final NonForwardQueryResultScanner queryResultScanner = new NonForwardQueryResultFileScanner( - context.getConf(), session.getSessionId(), queryInfo.getQueryId(), scanNode, maxRow); + context.getConf(), session.getSessionId(), queryInfo.getQueryId(), scanNode, maxRow, catalog); queryResultScanner.init(); session.addNonForwardQueryResultScanner(queryResultScanner); diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 4f62ea6af4..a239186de9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -30,6 +30,7 @@ import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.StatisticsUtil; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.conf.TajoConf; import org.apache.tajo.conf.TajoConf.ConfVars; import org.apache.tajo.engine.planner.PhysicalPlannerImpl; import org.apache.tajo.engine.planner.RangePartitionAlgorithm; @@ -40,13 +41,13 @@ import org.apache.tajo.engine.planner.global.MasterPlan; import org.apache.tajo.engine.planner.global.rewriter.rules.GlobalPlanRewriteUtil; import org.apache.tajo.engine.utils.TupleUtil; -import org.apache.tajo.exception.TajoException; -import org.apache.tajo.exception.TajoInternalError; -import org.apache.tajo.exception.UndefinedTableException; +import org.apache.tajo.exception.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.logical.SortNode.SortPurpose; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; +import org.apache.tajo.plan.util.FilteredPartitionInfo; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.querymaster.Task.IntermediateEntry; import org.apache.tajo.storage.*; @@ -375,19 +376,13 @@ private static void scheduleSymmetricRepartitionJoin(QueryMasterTask.QueryMaster //If there are more than one data files, that files should be added to fragments or partition path for (ScanNode eachScan: broadcastScans) { - - Path[] partitionScanPaths = null; TableDesc tableDesc = masterContext.getTableDesc(eachScan); Tablespace space = TablespaceManager.get(tableDesc.getUri()); if (eachScan.getType() == NodeType.PARTITIONS_SCAN) { - - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)eachScan; - partitionScanPaths = partitionScan.getInputPaths(); - // set null to inputPaths in getFragmentsFromPartitionedTable() - getFragmentsFromPartitionedTable((FileTablespace) space, eachScan, tableDesc); - partitionScan.setInputPaths(partitionScanPaths); - + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf tajoConf = masterContext.getConf(); + getFragmentsFromPartitionedTable(catalog, tajoConf, (FileTablespace) space, eachScan, tableDesc); } else { Collection scanFragments = @@ -461,18 +456,25 @@ public static Map>> merge /** * It creates a number of fragments for all partitions. */ - public static List getFragmentsFromPartitionedTable(Tablespace tsHandler, - ScanNode scan, - TableDesc table) throws IOException { + public static List getFragmentsFromPartitionedTable(CatalogService catalog, TajoConf conf, + Tablespace tsHandler, ScanNode scan, TableDesc table) throws IOException, UndefinedDatabaseException, + UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { Preconditions.checkArgument(tsHandler instanceof FileTablespace, "tsHandler must be FileTablespace"); if (!(scan instanceof PartitionedTableScanNode)) { throw new IllegalArgumentException("scan should be a PartitionedTableScanNode type."); } + List fragments = Lists.newArrayList(); PartitionedTableScanNode partitionsScan = (PartitionedTableScanNode) scan; + + FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, + partitionsScan); + fragments.addAll(((FileTablespace) tsHandler).getSplits( - scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getInputPaths())); - partitionsScan.setInputPaths(null); + scan.getCanonicalName(), table.getMeta(), table.getSchema(), filteredPartitionInfo.getPartitionNames(), + filteredPartitionInfo.getPartitionPaths())); + + partitionsScan.getTableDesc().getStats().setNumBytes(filteredPartitionInfo.getTotalVolume()); return fragments; } @@ -504,16 +506,13 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch TableDesc desc = stage.getContext().getTableDesc(scan); Collection scanFragments; - Path[] partitionScanPaths = null; - - Tablespace space = TablespaceManager.get(desc.getUri()); if (scan.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode) scan; - partitionScanPaths = partitionScan.getInputPaths(); // set null to inputPaths in getFragmentsFromPartitionedTable() - scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf tajoConf = stage.getContext().getConf(); + scanFragments = getFragmentsFromPartitionedTable(catalog, tajoConf, space, scan, desc); } else { scanFragments = space.getSplits(scan.getCanonicalName(), desc, scan.getQual()); } @@ -522,13 +521,7 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch if (i == baseScanId) { baseFragments = scanFragments; } else { - if (scan.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)scan; - // PhisicalPlanner make PartitionMergeScanExec when table is boradcast table and inputpaths is not empty - partitionScan.setInputPaths(partitionScanPaths); - } else { - broadcastFragments.addAll(scanFragments); - } + broadcastFragments.addAll(scanFragments); } } } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java index 0aaf92befa..32684aacc2 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java @@ -28,10 +28,7 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.state.*; import org.apache.tajo.*; -import org.apache.tajo.catalog.CatalogUtil; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.TableDesc; -import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.StatisticsUtil; @@ -1146,7 +1143,10 @@ private static void scheduleFragmentsForLeafQuery(Stage stage) throws IOExceptio // Also, we can ensure FileTableSpace if the type of ScanNode is PARTITIONS_SCAN. if (scan.getType() == NodeType.PARTITIONS_SCAN) { // After calling this method, partition paths are removed from the physical plan. - fragments = Repartitioner.getFragmentsFromPartitionedTable((FileTablespace) tablespace, scan, table); + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf tajoConf = stage.getContext().getConf(); + fragments = Repartitioner.getFragmentsFromPartitionedTable(catalog, tajoConf, (FileTablespace) tablespace, + scan, table); } else { fragments = tablespace.getSplits(scan.getCanonicalName(), table, scan.getQual()); } diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java index db28433cdf..c419f8eb9b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java @@ -105,7 +105,7 @@ public ExecutionBlockContext(TajoWorker.WorkerContext workerContext, ExecutionBl // Setup QueryEngine according to the query plan // Here, we can setup row-based query engine or columnar query engine. - this.queryEngine = new TajoQueryEngine(systemConf); + this.queryEngine = new TajoQueryEngine(systemConf, workerContext.getCatalog()); this.queryContext = new QueryContext(workerContext.getConf(), request.getQueryContext()); this.plan = request.getPlanJson(); this.resource = new ExecutionBlockSharedResource(); diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java b/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java index e7e81d4217..4c2feae830 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java @@ -18,6 +18,7 @@ package org.apache.tajo.worker; +import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.engine.planner.PhysicalPlanner; import org.apache.tajo.engine.planner.PhysicalPlannerImpl; @@ -30,8 +31,8 @@ public class TajoQueryEngine { private final PhysicalPlanner phyPlanner; - public TajoQueryEngine(TajoConf conf) throws IOException { - this.phyPlanner = new PhysicalPlannerImpl(conf); + public TajoQueryEngine(TajoConf conf, CatalogService catalog) throws IOException { + this.phyPlanner = new PhysicalPlannerImpl(conf, catalog); } public PhysicalExec createPlan(TaskAttemptContext ctx, LogicalNode plan) { diff --git a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java index 243806051a..4473a44d1d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java +++ b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java @@ -134,7 +134,7 @@ private static NonForwardQueryResultScanner getNonForwardQueryResultScanner( } resultScanner = new NonForwardQueryResultFileScanner(masterContext.getConf(), session.getSessionId(), queryId, - scanNode, Integer.MAX_VALUE); + scanNode, Integer.MAX_VALUE, masterContext.getCatalog()); resultScanner.init(); session.addNonForwardQueryResultScanner(resultScanner); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index a4bb94ced9..5733a0edb3 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -28,33 +28,34 @@ import org.apache.tajo.util.TUtil; public class PartitionedTableScanNode extends ScanNode { - @Expose Path [] inputPaths; - public PartitionedTableScanNode(int pid) { super(pid, NodeType.PARTITIONS_SCAN); } - public void init(ScanNode scanNode, Path[] inputPaths) { + public void init(ScanNode scanNode) { tableDesc = scanNode.tableDesc; setInSchema(scanNode.getInSchema()); setOutSchema(scanNode.getOutSchema()); this.qual = scanNode.qual; this.targets = scanNode.targets; - this.inputPaths = inputPaths; if (scanNode.hasAlias()) { alias = scanNode.alias; } } - public void setInputPaths(Path [] paths) { - this.inputPaths = paths; - } + public void init(ScanNode scanNode, Path[] inputPaths) { + tableDesc = scanNode.tableDesc; + setInSchema(scanNode.getInSchema()); + setOutSchema(scanNode.getOutSchema()); + this.qual = scanNode.qual; + this.targets = scanNode.targets; - public Path [] getInputPaths() { - return inputPaths; + if (scanNode.hasAlias()) { + alias = scanNode.alias; + } } - + public String toString() { StringBuilder sb = new StringBuilder("Partitions Scan (table=").append(getTableName()); if (hasAlias()) { @@ -81,8 +82,7 @@ public boolean equals(Object obj) { eq = eq && TUtil.checkEquals(this.tableDesc, other.tableDesc); eq = eq && TUtil.checkEquals(this.qual, other.qual); eq = eq && TUtil.checkEquals(this.targets, other.targets); - eq = eq && TUtil.checkEquals(this.inputPaths, other.inputPaths); - + return eq; } @@ -105,9 +105,6 @@ public Object clone() throws CloneNotSupportedException { unionScan.targets[i] = (Target) targets[i].clone(); } } - - unionScan.inputPaths = inputPaths; - return unionScan; } @@ -146,14 +143,6 @@ public PlanString getPlanString() { planStr.addDetail("out schema: ").appendDetail(getOutSchema().toString()); planStr.addDetail("in schema: ").appendDetail(getInSchema().toString()); - if (inputPaths != null) { - planStr.addExplan("num of filtered paths: ").appendExplain(""+ inputPaths.length); - int i = 0; - for (Path path : inputPaths) { - planStr.addDetail((i++) + ": ").appendDetail(path.toString()); - } - } - return planStr; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5e1ca2d46e..6c53d8a319 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,40 +18,19 @@ package org.apache.tajo.plan.rewrite.rules; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.*; import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; -import org.apache.tajo.catalog.partition.PartitionMethodDesc; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionsByAlgebraProto; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; -import org.apache.tajo.datum.DatumFactory; -import org.apache.tajo.datum.NullDatum; import org.apache.tajo.exception.*; import org.apache.tajo.plan.LogicalPlan; -import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; -import org.apache.tajo.plan.util.EvalNodeToExprConverter; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; -import org.apache.tajo.storage.Tuple; -import org.apache.tajo.storage.VTuple; -import org.apache.tajo.util.StringUtils; -import java.io.IOException; import java.util.*; public class PartitionedTableRewriter implements LogicalPlanRewriteRule { - private CatalogService catalog; - private long totalVolume; - - private static final Log LOG = LogFactory.getLog(PartitionedTableRewriter.class); - private static final String NAME = "Partitioned Table Rewriter"; private final Rewriter rewriter = new Rewriter(); @@ -79,430 +58,10 @@ public boolean isEligible(LogicalPlanRewriteRuleContext context) { public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoException { LogicalPlan plan = context.getPlan(); LogicalPlan.QueryBlock rootBlock = plan.getRootBlock(); - this.catalog = context.getCatalog(); rewriter.visit(context.getQueryContext(), plan, rootBlock, rootBlock.getRoot(), new Stack<>()); return plan; } - private static class PartitionPathFilter implements PathFilter { - - private Schema schema; - private EvalNode partitionFilter; - public PartitionPathFilter(Schema schema, EvalNode partitionFilter) { - this.schema = schema; - this.partitionFilter = partitionFilter; - partitionFilter.bind(null, schema); - } - - @Override - public boolean accept(Path path) { - Tuple tuple = buildTupleFromPartitionPath(schema, path, true); - if (tuple == null) { // if it is a file or not acceptable file - return false; - } - - return partitionFilter.eval(tuple).asBool(); - } - - @Override - public String toString() { - return partitionFilter.toString(); - } - } - - private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, - Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) - throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, - UndefinedOperatorException, UnsupportedException { - return findFilteredPaths(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); - } - - /** - * It assumes that each conjunctive form corresponds to one column. - * - * @param partitionColumns - * @param conjunctiveForms search condition corresponding to partition columns. - * If it is NULL, it means that there is no search condition for this table. - * @param tablePath - * @return - * @throws IOException - */ - private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, - Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) - throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, - UndefinedOperatorException, UnsupportedException { - - Path [] filteredPaths = null; - FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); - String [] splits = CatalogUtil.splitFQTableName(tableName); - List partitions = null; - - try { - if (conjunctiveForms == null) { - partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); - if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - } else { - filteredPaths = findFilteredPathsByPartitionDesc(partitions); - } - } else { - if (catalog.existPartitions(splits[0], splits[1])) { - PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); - partitions = catalog.getPartitionsByAlgebra(request); - filteredPaths = findFilteredPathsByPartitionDesc(partitions); - } else { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - } - } - } catch (UnsupportedException ue) { - // Partial catalog might not allow some filter conditions. For example, HiveMetastore doesn't In statement, - // regexp statement and so on. Above case, Tajo need to build filtered path by listing hdfs directories. - LOG.warn(ue.getMessage()); - partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); - if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - } else { - filteredPaths = findFilteredPathsByPartitionDesc(partitions); - } - scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); - } - - LOG.info("Filtered directory or files: " + filteredPaths.length); - return filteredPaths; - } - - /** - * Build list of partition path by PartitionDescProto which is generated from CatalogStore. - * - * @param partitions - * @return - */ - private Path[] findFilteredPathsByPartitionDesc(List partitions) { - Path [] filteredPaths = new Path[partitions.size()]; - for (int i = 0; i < partitions.size(); i++) { - PartitionDescProto partition = partitions.get(i); - filteredPaths[i] = new Path(partition.getPath()); - totalVolume += partition.getNumBytes(); - } - return filteredPaths; - } - - /** - * Build list of partition path by filtering directories in the given table path. - * - * - * @param partitionColumns - * @param conjunctiveForms - * @param fs - * @param tablePath - * @return - * @throws IOException - */ - private Path [] findFilteredPathsFromFileSystem(Schema partitionColumns, EvalNode [] conjunctiveForms, - FileSystem fs, Path tablePath) throws IOException{ - Path [] filteredPaths = null; - PathFilter [] filters; - - if (conjunctiveForms == null) { - filters = buildAllAcceptingPathFilters(partitionColumns); - } else { - filters = buildPathFiltersForAllLevels(partitionColumns, conjunctiveForms); - } - - // loop from one to the number of partition columns - filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); - - for (int i = 1; i < partitionColumns.size(); i++) { - // Get all file status matched to a ith level path filter. - filteredPaths = toPathArray(fs.listStatus(filteredPaths, filters[i])); - } - return filteredPaths; - } - - /** - * Build algebra expressions for querying partitions and partition keys by using EvalNodeToExprConverter. - * - * @param databaseName the database name - * @param tableName the table name - * @param conjunctiveForms EvalNode which contains filter conditions - * @return - */ - public static PartitionsByAlgebraProto getPartitionsAlgebraProto( - String databaseName, String tableName, EvalNode [] conjunctiveForms) { - - PartitionsByAlgebraProto.Builder request = PartitionsByAlgebraProto.newBuilder(); - request.setDatabaseName(databaseName); - request.setTableName(tableName); - - if (conjunctiveForms != null) { - EvalNode evalNode = AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms); - EvalNodeToExprConverter convertor = new EvalNodeToExprConverter(databaseName + "." + tableName); - convertor.visit(null, evalNode, new Stack<>()); - request.setAlgebra(convertor.getResult().toJson()); - } else { - request.setAlgebra(""); - } - - return request.build(); - } - - /** - * Build path filters for all levels with a list of filter conditions. - * - * For example, consider you have a partitioned table for three columns (i.e., col1, col2, col3). - * Then, this methods will create three path filters for (col1), (col1, col2), (col1, col2, col3). - * - * Corresponding filter conditions will be placed on each path filter, - * If there is no corresponding expression for certain column, - * The condition will be filled with a true value. - * - * Assume that an user gives a condition WHERE col1 ='A' and col3 = 'C'. - * There is no filter condition corresponding to col2. - * Then, the path filter conditions are corresponding to the followings: - * - * The first path filter: col1 = 'A' - * The second path filter: col1 = 'A' AND col2 IS NOT NULL - * The third path filter: col1 = 'A' AND col2 IS NOT NULL AND col3 = 'C' - * - * 'IS NOT NULL' predicate is always true against the partition path. - * - * @param partitionColumns - * @param conjunctiveForms - * @return - */ - private static PathFilter [] buildPathFiltersForAllLevels(Schema partitionColumns, - EvalNode [] conjunctiveForms) { - // Building partition path filters for all levels - Column target; - PathFilter [] filters = new PathFilter[partitionColumns.size()]; - List accumulatedFilters = Lists.newArrayList(); - for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level - target = partitionColumns.getColumn(i); - - for (EvalNode expr : conjunctiveForms) { - if (EvalTreeUtil.findUniqueColumns(expr).contains(target)) { - // Accumulate one qual per level - accumulatedFilters.add(expr); - } - } - - if (accumulatedFilters.size() < (i + 1)) { - accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); - } - - EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( - accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); - filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); - } - - return filters; - } - - /** - * Build an array of path filters for all levels with all accepting filter condition. - * @param partitionColumns The partition columns schema - * @return The array of path filter, accpeting all partition paths. - */ - public static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { - Column target; - PathFilter [] filters = new PathFilter[partitionColumns.size()]; - List accumulatedFilters = Lists.newArrayList(); - for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level - target = partitionColumns.getColumn(i); - accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); - - EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( - accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); - filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); - } - return filters; - } - - private Path [] toPathArray(FileStatus[] fileStatuses) { - Path [] paths = new Path[fileStatuses.length]; - for (int i = 0; i < fileStatuses.length; i++) { - FileStatus fileStatus = fileStatuses[i]; - paths[i] = fileStatus.getPath(); - totalVolume += fileStatus.getLen(); - } - return paths; - } - - public Path [] findFilteredPartitionPaths(OverridableConf queryContext, ScanNode scanNode) throws IOException, - UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, - UndefinedOperatorException, UnsupportedException { - TableDesc table = scanNode.getTableDesc(); - PartitionMethodDesc partitionDesc = scanNode.getTableDesc().getPartitionMethod(); - - Schema paritionValuesSchema = new Schema(); - for (Column column : partitionDesc.getExpressionSchema().getRootColumns()) { - paritionValuesSchema.addColumn(column); - } - - Set indexablePredicateSet = Sets.newHashSet(); - - // if a query statement has a search condition, try to find indexable predicates - if (scanNode.hasQual()) { - EvalNode [] conjunctiveForms = AlgebraicUtil.toConjunctiveNormalFormArray(scanNode.getQual()); - Set remainExprs = Sets.newHashSet(conjunctiveForms); - - // add qualifier to schema for qual - paritionValuesSchema.setQualifier(scanNode.getCanonicalName()); - for (Column column : paritionValuesSchema.getRootColumns()) { - for (EvalNode simpleExpr : conjunctiveForms) { - if (checkIfIndexablePredicateOnTargetColumn(simpleExpr, column)) { - indexablePredicateSet.add(simpleExpr); - } - } - } - - // Partitions which are not matched to the partition filter conditions are pruned immediately. - // So, the partition filter conditions are not necessary later, and they are removed from - // original search condition for simplicity and efficiency. - remainExprs.removeAll(indexablePredicateSet); - if (remainExprs.isEmpty()) { - scanNode.setQual(null); - } else { - scanNode.setQual( - AlgebraicUtil.createSingletonExprFromCNF(remainExprs.toArray(new EvalNode[remainExprs.size()]))); - } - } - - if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates - return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, - indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); - } else { // otherwise, we will get all partition paths. - return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); - } - } - - private boolean checkIfIndexablePredicateOnTargetColumn(EvalNode evalNode, Column targetColumn) { - if (checkIfIndexablePredicate(evalNode) || checkIfDisjunctiveButOneVariable(evalNode)) { - Set variables = EvalTreeUtil.findUniqueColumns(evalNode); - // if it contains only single variable matched to a target column - return variables.size() == 1 && variables.contains(targetColumn); - } else { - return false; - } - } - - /** - * Check if an expression consists of one variable and one constant and - * the expression is a comparison operator. - * - * @param evalNode The expression to be checked - * @return true if an expression consists of one variable and one constant - * and the expression is a comparison operator. Other, false. - */ - private boolean checkIfIndexablePredicate(EvalNode evalNode) { - // TODO - LIKE with a trailing wild-card character and IN with an array can be indexable - return AlgebraicUtil.containSingleVar(evalNode) && AlgebraicUtil.isIndexableOperator(evalNode); - } - - /** - * - * @param evalNode The expression to be checked - * @return true if an disjunctive expression, consisting of indexable expressions - */ - private boolean checkIfDisjunctiveButOneVariable(EvalNode evalNode) { - if (evalNode.getType() == EvalType.OR) { - BinaryEval orEval = (BinaryEval) evalNode; - boolean indexable = - checkIfIndexablePredicate(orEval.getLeftExpr()) && - checkIfIndexablePredicate(orEval.getRightExpr()); - - boolean sameVariable = - EvalTreeUtil.findUniqueColumns(orEval.getLeftExpr()) - .equals(EvalTreeUtil.findUniqueColumns(orEval.getRightExpr())); - - return indexable && sameVariable; - } else { - return false; - } - } - - /** - * Take a look at a column partition path. A partition path consists - * of a table path part and column values part. This method transforms - * a partition path into a tuple with a given partition column schema. - * - * hdfs://192.168.0.1/tajo/warehouse/table1/col1=abc/col2=def/col3=ghi - * ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ - * table path part column values part - * - * When a file path is given, it can perform two ways depending on beNullIfFile flag. - * If it is true, it returns NULL when a given path is a file. - * Otherwise, it returns a built tuple regardless of file or directory. - * - * @param partitionColumnSchema The partition column schema - * @param partitionPath The partition path - * @param beNullIfFile If true, this method returns NULL when a given path is a file. - * @return The tuple transformed from a column values part. - */ - public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Path partitionPath, - boolean beNullIfFile) { - int startIdx = partitionPath.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); - - if (startIdx == -1) { // if there is no partition column in the patch - return null; - } - String columnValuesPart = partitionPath.toString().substring(startIdx); - - String [] columnValues = columnValuesPart.split("/"); - - // true means this is a file. - if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { - return null; - } - - Tuple tuple = new VTuple(partitionColumnSchema.size()); - int i = 0; - for (; i < columnValues.length && i < partitionColumnSchema.size(); i++) { - String [] parts = columnValues[i].split("="); - if (parts.length != 2) { - return null; - } - int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); - Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); - } - for (; i < partitionColumnSchema.size(); i++) { - tuple.put(i, NullDatum.get()); - } - return tuple; - } - - /** - * Get a prefix of column partition path. For example, consider a column partition (col1, col2). - * Then, you will get a string 'col1='. - * - * @param partitionColumn the schema of column partition - * @return The first part string of column partition path. - */ - public static String getColumnPartitionPathPrefix(Schema partitionColumn) { - StringBuilder sb = new StringBuilder(); - sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); - return sb.toString(); - } - - private void updateTableStat(OverridableConf queryContext, PartitionedTableScanNode scanNode) - throws TajoException { - if (scanNode.getInputPaths().length > 0) { - try { - FileSystem fs = scanNode.getInputPaths()[0].getFileSystem(queryContext.getConf()); - long totalVolume = 0; - - for (Path input : scanNode.getInputPaths()) { - ContentSummary summary = fs.getContentSummary(input); - totalVolume += summary.getLength(); - } - scanNode.getTableDesc().getStats().setNumBytes(totalVolume); - } catch (Throwable e) { - throw new TajoInternalError(e); - } - } - } - private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -513,25 +72,17 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP return null; } - try { - Path [] filteredPaths = findFilteredPartitionPaths(queryContext, scanNode); - plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); - PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); - rewrittenScanNode.init(scanNode, filteredPaths); - // TODO: See TAJO-1927. This code have been commented temporarily. -// rewrittenScanNode.getTableDesc().getStats().setNumBytes(totalVolume); - updateTableStat(queryContext, rewrittenScanNode); + PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); + rewrittenScanNode.init(scanNode); - // if it is topmost node, set it as the rootnode of this block. - if (stack.empty() || block.getRoot().equals(scanNode)) { - block.setRoot(rewrittenScanNode); - } else { - PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); - } - block.registerNode(rewrittenScanNode); - } catch (IOException e) { - throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); + // if it is topmost node, set it as the rootnode of this block. + if (stack.empty() || block.getRoot().equals(scanNode)) { + block.setRoot(rewrittenScanNode); + } else { + PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); } + block.registerNode(rewrittenScanNode); + return null; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index 2051dfb0de..36ecf85983 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -462,13 +462,6 @@ private static PartitionedTableScanNode convertPartitionScan(OverridableConf con PlanProto.LogicalNode protoNode) { PartitionedTableScanNode partitionedScan = new PartitionedTableScanNode(protoNode.getNodeId()); fillScanNode(context, evalContext, protoNode, partitionedScan); - - PlanProto.PartitionScanSpec partitionScanProto = protoNode.getPartitionScan(); - Path [] paths = new Path[partitionScanProto.getPathsCount()]; - for (int i = 0; i < partitionScanProto.getPathsCount(); i++) { - paths[i] = new Path(partitionScanProto.getPaths(i)); - } - partitionedScan.setInputPaths(paths); return partitionedScan; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 7907668ef4..1ecd2ab547 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -474,19 +474,8 @@ public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPl throws TajoException { PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); - - PlanProto.PartitionScanSpec.Builder partitionScan = PlanProto.PartitionScanSpec.newBuilder(); - List pathStrs = TUtil.newList(); - if (node.getInputPaths() != null) { - for (Path p : node.getInputPaths()) { - pathStrs.add(p.toString()); - } - partitionScan.addAllPaths(pathStrs); - } - PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setScan(scanBuilder); - nodeBuilder.setPartitionScan(partitionScan); context.treeBuilder.addNodes(nodeBuilder); return node; diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java index a5d62201da..dc8008b462 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java @@ -21,13 +21,27 @@ public class FilteredPartitionInfo { private Path[] partitionPaths; + private String[] partitionNames; private long totalVolume; + public FilteredPartitionInfo() { + } + + public FilteredPartitionInfo(Path[] partitionPaths) { + this.partitionPaths = partitionPaths; + } + public FilteredPartitionInfo(Path[] partitionPaths, long totalVolume) { this.partitionPaths = partitionPaths; this.totalVolume = totalVolume; } + public FilteredPartitionInfo(Path[] partitionPaths, String[] partitionNames, long totalVolume) { + this.partitionPaths = partitionPaths; + this.partitionNames = partitionNames; + this.totalVolume = totalVolume; + } + public Path[] getPartitionPaths() { return partitionPaths; } @@ -36,6 +50,14 @@ public void setPartitionPaths(Path[] partitionPaths) { this.partitionPaths = partitionPaths; } + public String[] getPartitionNames() { + return partitionNames; + } + + public void setPartitionNames(String[] partitionNames) { + this.partitionNames = partitionNames; + } + public long getTotalVolume() { return totalVolume; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index a938f23b9c..44320f0aee 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -36,6 +36,7 @@ import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.StringUtils; +import java.io.File; import java.io.IOException; import java.util.List; import java.util.Set; @@ -60,8 +61,6 @@ public static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService cat // if a query statement has a search condition, try to find indexable predicates if (scanNode.hasQual()) { EvalNode [] conjunctiveForms = AlgebraicUtil.toConjunctiveNormalFormArray(scanNode.getQual()); - Set remainExprs = Sets.newHashSet(conjunctiveForms); - // add qualifier to schema for qual paritionValuesSchema.setQualifier(scanNode.getCanonicalName()); for (Column column : paritionValuesSchema.getRootColumns()) { @@ -71,17 +70,6 @@ public static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService cat } } } - - // Partitions which are not matched to the partition filter conditions are pruned immediately. - // So, the partition filter conditions are not necessary later, and they are removed from - // original search condition for simplicity and efficiency. - remainExprs.removeAll(indexablePredicateSet); - if (remainExprs.isEmpty()) { - scanNode.setQual(null); - } else { - scanNode.setQual( - AlgebraicUtil.createSingletonExprFromCNF(remainExprs.toArray(new EvalNode[remainExprs.size()]))); - } } if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates @@ -170,7 +158,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); + setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); } else { filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); } @@ -181,7 +170,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); } else { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); + setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); } } } catch (UnsupportedException ue) { @@ -191,7 +181,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths, getTotalVolume(fs, filteredPaths)); + filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); + setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); } else { filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); } @@ -214,12 +205,14 @@ private static FilteredPartitionInfo findFilteredPartitionInfoByPartitionDesc(Li partitions) { long totalVolume = 0L; Path[] filteredPaths = new Path[partitions.size()]; + String[] partitionNames = new String[partitions.size()]; for (int i = 0; i < partitions.size(); i++) { CatalogProtos.PartitionDescProto partition = partitions.get(i); filteredPaths[i] = new Path(partition.getPath()); + partitionNames[i] = partition.getPartitionName(); totalVolume += partition.getNumBytes(); } - return new FilteredPartitionInfo(filteredPaths, totalVolume); + return new FilteredPartitionInfo(filteredPaths, partitionNames, totalVolume); } /** @@ -338,7 +331,7 @@ private static CatalogProtos.PartitionsByAlgebraProto getPartitionsAlgebraProto( * @param partitionColumns The partition columns schema * @return The array of path filter, accpeting all partition paths. */ - private static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { + public static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { Column target; PathFilter [] filters = new PathFilter[partitionColumns.size()]; List accumulatedFilters = Lists.newArrayList(); @@ -362,19 +355,26 @@ private static Path[] toPathArray(FileStatus[] fileStatuses) { return paths; } - private static long getTotalVolume(FileSystem fs, Path[] inputPaths) { + private static void setFilteredPartitionInfo(FilteredPartitionInfo filteredPartitionInfo, FileSystem fs, + Schema partitionColumnSchema) { long totalVolume = 0L; - if (inputPaths.length > 0) { + String[] partitionNames = null; + if (filteredPartitionInfo.getPartitionPaths().length > 0) { try { - for (Path input : inputPaths) { + partitionNames = new String[filteredPartitionInfo.getPartitionPaths().length]; + for (int i = 0; i < filteredPartitionInfo.getPartitionPaths().length; i++) { + Path input = filteredPartitionInfo.getPartitionPaths()[i]; + int startIdx = input.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); ContentSummary summary = fs.getContentSummary(input); + partitionNames[i] = input.toString().substring(startIdx); totalVolume += summary.getLength(); } } catch (Throwable e) { throw new TajoInternalError(e); } } - return totalVolume; + filteredPartitionInfo.setPartitionNames(partitionNames); + filteredPartitionInfo.setTotalVolume(totalVolume); } private static class PartitionPathFilter implements PathFilter { @@ -467,4 +467,32 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); return sb.toString(); } + + public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName, + boolean beNullIfFile) { + String [] columnValues = partitionName.split("/"); + + // true means this is a file. + if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { + return null; + } + + Tuple tuple = new VTuple(partitionColumnSchema.size()); + + for (int i = 0; i < tuple.size(); i++) { + tuple.put(i, NullDatum.get()); + } + + for (int i = 0; i < columnValues.length; i++) { + String [] parts = columnValues[i].split("="); + if (parts.length == 2) { + int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); + Column keyColumn = partitionColumnSchema.getColumn(columnId); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); + } + } + + return tuple; + } + } \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java index 744256136b..09edc7272d 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java @@ -134,13 +134,6 @@ private static boolean isSimpleRelationNode(LogicalNode node) { private static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); - if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - scanBytes = 0L; - } - } - return scanBytes; } else { return -1; diff --git a/tajo-plan/src/main/proto/Plan.proto b/tajo-plan/src/main/proto/Plan.proto index 0cd0c32dcc..ff3cbc5830 100644 --- a/tajo-plan/src/main/proto/Plan.proto +++ b/tajo-plan/src/main/proto/Plan.proto @@ -73,7 +73,6 @@ message LogicalNode { optional SchemaProto out_schema = 5; optional ScanNode scan = 6; - optional PartitionScanSpec partitionScan = 7; optional IndexScanSpec indexScan = 8; optional JoinNode join = 9; optional FilterNode filter = 10; @@ -115,10 +114,6 @@ message ScanNode { required bool nameResolveBase = 7; } -message PartitionScanSpec { - repeated string paths = 1; -} - message IndexScanSpec { required SchemaProto keySchema = 1; required string indexPath = 2; @@ -231,10 +226,6 @@ enum JoinType { RIGHT_SEMI_JOIN = 9; } -message PartitionTableScanSpec { - repeated string paths = 1; -} - message PersistentStoreNode { optional int32 childSeq = 1; // CreateTableNode may not have any children. This should be improved at TAJO-1589. required string storageType = 2; diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 5cf81e62f5..59e1b14ed8 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -42,6 +42,7 @@ import org.apache.tajo.plan.logical.NodeType; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; +import org.apache.tajo.storage.fragment.PartitionedFileFragment; import org.apache.tajo.util.Bytes; import javax.annotation.Nullable; @@ -420,9 +421,29 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo return new FileFragment(fragmentId, file, blockLocation); } + protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + String partitionName) { + return new PartitionedFileFragment(fragmentId, file, start, length, partitionName); + } + + protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + String[] hosts, String partitionName) { + return new PartitionedFileFragment(fragmentId, file, start, length, hosts, partitionName); + } + + protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + , String partitionName) throws IOException { + return new PartitionedFileFragment(fragmentId, file, blockLocation, partitionName); + } + // for Non Splittable. eg, compressed gzip TextFile - protected FileFragment makeNonSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations) throws IOException { + protected Fragment makeNonSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations) throws IOException { + return makeNonSplit(fragmentId, file, start, length, blkLocations, null); + } + + protected Fragment makeNonSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations, String partitionName) throws IOException { Map hostsBlockMap = new HashMap<>(); for (BlockLocation blockLocation : blkLocations) { @@ -450,7 +471,12 @@ public int compare(Map.Entry v1, Map.Entry v2) Map.Entry entry = entries.get((entries.size() - 1) - i); hosts[i] = entry.getKey(); } - return new FileFragment(fragmentId, file, start, length, hosts); + + if (partitionName != null) { + return new PartitionedFileFragment(fragmentId, file, start, length, hosts, partitionName); + } else { + return new FileFragment(fragmentId, file, start, length, hosts); + } } /** @@ -477,12 +503,17 @@ private int[] getDiskIds(VolumeId[] volumeIds) { return diskIds; } + public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) + throws IOException { + return getSplits(tableName, meta, schema, null, inputs); + } + /** * Generate the list of files and make them into FileSplits. * * @throws IOException */ - public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) + public List getSplits(String tableName, TableMeta meta, Schema schema, String[] partitions, Path... inputs) throws IOException { // generate splits' @@ -490,6 +521,7 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, List volumeSplits = Lists.newArrayList(); List blockLocations = Lists.newArrayList(); + int i = 0; for (Path p : inputs) { ArrayList files = Lists.newArrayList(); if (fs.isFile(p)) { @@ -510,7 +542,11 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if (splittable) { for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makeSplit(tableName, path, blockLocation)); + if (partitions != null) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + } else { + volumeSplits.add(makeSplit(tableName, path, blockLocation)); + } } blockLocations.addAll(Arrays.asList(blkLocations)); @@ -519,10 +555,18 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if (blockSize >= length) { blockLocations.addAll(Arrays.asList(blkLocations)); for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makeSplit(tableName, path, blockLocation)); + if (partitions != null) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + } else { + volumeSplits.add(makeSplit(tableName, path, blockLocation)); + } } } else { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + if (partitions != null) { + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + } else { + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + } } } @@ -538,17 +582,34 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, // for s3 while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, + + if (partitions != null) { + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitions[i])); + } else { + splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); + } + bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, + + if (partitions != null) { + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitions[i])); + } else { + splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts())); + } } } else { // Non splittable - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + if (partitions != null) { + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + } else { + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + } } } } @@ -556,6 +617,7 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if(LOG.isDebugEnabled()){ LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); } + i++; } // Combine original fileFragments with new VolumeId information From 4f711fa2ff7a18979198d80a70f283f73b91edf9 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:14:49 +0900 Subject: [PATCH 005/127] Remove unnecessary method --- .../tajo/plan/logical/PartitionedTableScanNode.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 5733a0edb3..4eeafbd508 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -39,18 +39,6 @@ public void init(ScanNode scanNode) { this.qual = scanNode.qual; this.targets = scanNode.targets; - if (scanNode.hasAlias()) { - alias = scanNode.alias; - } - } - - public void init(ScanNode scanNode, Path[] inputPaths) { - tableDesc = scanNode.tableDesc; - setInSchema(scanNode.getInSchema()); - setOutSchema(scanNode.getOutSchema()); - this.qual = scanNode.qual; - this.targets = scanNode.targets; - if (scanNode.hasAlias()) { alias = scanNode.alias; } From 33dc1407a3d1417a81895e5e36d528f64c88bbbe Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:22:24 +0900 Subject: [PATCH 006/127] Update comments --- .../src/main/proto/StorageFragmentProtos.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto b/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto index 40ea24a22a..fc550148f8 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto +++ b/tajo-storage/tajo-storage-hdfs/src/main/proto/StorageFragmentProtos.proto @@ -40,5 +40,6 @@ message PartitionedFileFragmentProto { required int64 length = 4; repeated string hosts = 5; repeated int32 disk_ids = 6; - required string partitionName = 7; /* it cosists of partition key and value */ + // Partition Name: country=KOREA/city=SEOUL + required string partitionName = 7; } From dede3e2957a2cee7bccd235a3f873aac0ab40377 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:40:15 +0900 Subject: [PATCH 007/127] Remove unnecessary constructor parameter of PhysicalPlannerImpl --- .../physical/TestExternalSortExec.java | 2 +- .../physical/TestFullOuterHashJoinExec.java | 8 ++-- .../physical/TestFullOuterMergeJoinExec.java | 12 ++--- .../physical/TestHashAntiJoinExec.java | 2 +- .../planner/physical/TestHashJoinExec.java | 4 +- .../physical/TestHashSemiJoinExec.java | 2 +- .../physical/TestLeftOuterHashJoinExec.java | 10 ++-- .../planner/physical/TestMergeJoinExec.java | 2 +- .../planner/physical/TestPhysicalPlanner.java | 48 +++++++++---------- .../TestProgressExternalSortExec.java | 2 +- .../physical/TestRightOuterHashJoinExec.java | 6 +-- .../physical/TestRightOuterMergeJoinExec.java | 12 ++--- .../engine/planner/physical/TestSortExec.java | 2 +- .../physical/TestSortIntersectExec.java | 4 +- .../engine/planner/PhysicalPlannerImpl.java | 28 ++--------- .../NonForwardQueryResultSystemScanner.java | 2 +- .../tajo/worker/ExecutionBlockContext.java | 2 +- .../apache/tajo/worker/TajoQueryEngine.java | 5 +- 18 files changed, 67 insertions(+), 86 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java index c1383b925a..4ee2c9ca9a 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestExternalSortExec.java @@ -134,7 +134,7 @@ public final void testNext() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java index 06b8029e87..2e26a2a0c6 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterHashJoinExec.java @@ -279,7 +279,7 @@ public final void testFullOuterHashJoinExec0() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -318,7 +318,7 @@ public final void testFullOuterHashJoinExec1() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -356,7 +356,7 @@ public final void testFullOuterHashJoinExec2() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -396,7 +396,7 @@ public final void testFullOuterHashJoinExec3() throws IOException, TajoException workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java index 0a93cfd4d2..fd42b9a990 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestFullOuterMergeJoinExec.java @@ -326,7 +326,7 @@ public final void testFullOuterMergeJoin0() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -363,7 +363,7 @@ public final void testFullOuterMergeJoin1() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -400,7 +400,7 @@ public final void testFullOuterMergeJoin2() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -438,7 +438,7 @@ public final void testFullOuterMergeJoin3() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -479,7 +479,7 @@ public final void testFullOuterMergeJoin4() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -518,7 +518,7 @@ public final void testFullOuterMergeJoin5() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java index db68e02de4..103d2b1ac6 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashAntiJoinExec.java @@ -168,7 +168,7 @@ public final void testHashAntiJoin() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with an hash anti join. diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java index 8212f579de..a4afa7fae1 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashJoinExec.java @@ -164,7 +164,7 @@ public final void testHashInnerJoin() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -208,7 +208,7 @@ public final void testCheckIfInMemoryInnerJoinIsPossible() throws IOException, T ctx.setEnforcer(enforcer); ctx.getQueryContext().setLong(SessionVars.HASH_JOIN_SIZE_LIMIT.keyname(), 1); // set hash join limit as 1 MB - PhysicalPlannerImpl phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlannerImpl phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java index e4f879f86c..f99dc9e5f9 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestHashSemiJoinExec.java @@ -172,7 +172,7 @@ public final void testHashSemiJoin() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with an hash anti join. diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java index 40f0aac0cb..ab5f5093c7 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestLeftOuterHashJoinExec.java @@ -282,7 +282,7 @@ public final void testLeftOuterHashJoinExec0() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -315,7 +315,7 @@ public final void testLeftOuter_HashJoinExec1() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[1]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -350,7 +350,7 @@ public final void testLeftOuter_HashJoinExec2() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[2]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -385,7 +385,7 @@ public final void testLeftOuter_HashJoinExec3() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[3]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -420,7 +420,7 @@ public final void testLeftOuter_HashJoinExec4() throws IOException, TajoExceptio Expr expr = analyzer.parse(QUERIES[4]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java index 8bda0783a6..d29782573f 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestMergeJoinExec.java @@ -174,7 +174,7 @@ public final void testMergeInnerJoin() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, root); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof MergeJoinExec); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java index 5c826773c7..d3f1649914 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestPhysicalPlanner.java @@ -250,7 +250,7 @@ public final void testCreateScanPlan() throws IOException, TajoException { optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -281,7 +281,7 @@ public final void testCreateScanWithFilterPlan() throws IOException, TajoExcepti optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -309,7 +309,7 @@ public final void testGroupByPlan() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int i = 0; @@ -340,7 +340,7 @@ public final void testHashGroupByPlanWithALLField() throws IOException, TajoExce LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int i = 0; @@ -369,7 +369,7 @@ public final void testSortGroupByPlan() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan.getRootBlock().getRoot()); /*HashAggregateExec hashAgg = (HashAggregateExec) exec; @@ -436,7 +436,7 @@ public final void testStorePlan() throws IOException, TajoException { TableMeta outputMeta = CatalogUtil.newTableMeta("TEXT"); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -487,7 +487,7 @@ public final void testStorePlanWithMaxOutputFileSize() throws IOException, TajoE LogicalNode rootNode = optimizer.optimize(plan); // executing StoreTableExec - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -533,7 +533,7 @@ public final void testStorePlanWithRCFile() throws IOException, TajoException { TableMeta outputMeta = CatalogUtil.newTableMeta("RCFILE"); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -571,7 +571,7 @@ public final void testEnforceForDefaultColumnPartitionStorePlan() throws IOExcep Expr context = analyzer.parse(CreateTableAsStmts[2]); LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof SortBasedColPartitionStoreExec); } @@ -595,7 +595,7 @@ public final void testEnforceForHashBasedColumnPartitionStorePlan() throws IOExc ctx.setEnforcer(enforcer); ctx.setOutputPath(new Path(workDir, "grouped4")); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof HashBasedColPartitionStoreExec); } @@ -619,7 +619,7 @@ public final void testEnforceForSortBasedColumnPartitionStorePlan() throws IOExc ctx.setEnforcer(enforcer); ctx.setOutputPath(new Path(workDir, "grouped5")); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); assertTrue(exec instanceof SortBasedColPartitionStoreExec); } @@ -650,7 +650,7 @@ public final void testPartitionedStorePlan() throws IOException, TajoException { QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -714,7 +714,7 @@ public final void testPartitionedStorePlanWithMaxFileSize() throws IOException, LogicalNode rootNode = optimizer.optimize(plan); // Executing CREATE TABLE PARTITION BY - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -784,7 +784,7 @@ public final void testPartitionedStorePlanWithEmptyGroupingSet() QueryId queryId = id.getTaskId().getExecutionBlockId().getQueryId(); ExecutionBlockId ebId = id.getTaskId().getExecutionBlockId(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -843,7 +843,7 @@ public final void testAggregationFunction() throws IOException, TajoException { function.setFirstPhase(); } - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); @@ -874,7 +874,7 @@ public final void testCountFunction() throws IOException, TajoException { function.setFirstPhase(); } - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); Tuple tuple = exec.next(); @@ -896,7 +896,7 @@ public final void testGroupByWithNullValue() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); int count = 0; @@ -926,7 +926,7 @@ public final void testUnionPlan() throws IOException, TajoException, CloneNotSup union.setRightChild((LogicalNode) root.getChild().clone()); root.setChild(union); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, root); int count = 0; @@ -948,7 +948,7 @@ public final void testEvalExpr() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; exec.init(); @@ -961,7 +961,7 @@ public final void testEvalExpr() throws IOException, TajoException { plan = planner.createPlan(defaultContext, expr); rootNode = optimizer.optimize(plan); - phyPlanner = new PhysicalPlannerImpl(conf, catalog); + phyPlanner = new PhysicalPlannerImpl(conf); exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); tuple = exec.next(); @@ -991,7 +991,7 @@ public final void testCreateIndex() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); while (exec.next() != null) { @@ -1020,7 +1020,7 @@ public final void testDuplicateEliminate() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(defaultContext, expr); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; @@ -1059,7 +1059,7 @@ public final void testGroupByEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); @@ -1081,7 +1081,7 @@ public final void testGroupByEnforcer() throws IOException, TajoException { new FileFragment[] {frags[0]}, workDir); ctx.setEnforcer(enforcer); - phyPlanner = new PhysicalPlannerImpl(conf, catalog); + phyPlanner = new PhysicalPlannerImpl(conf); exec = phyPlanner.createPlan(ctx, rootNode); exec.init(); exec.next(); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java index 15b87cb86e..349aec0f20 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestProgressExternalSortExec.java @@ -149,7 +149,7 @@ private void testProgress(long sortBufferBytesNum) throws Exception { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), expr); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java index d32879968b..9a7eaff725 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterHashJoinExec.java @@ -244,7 +244,7 @@ public final void testRightOuter_HashJoinExec0() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[0]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -285,7 +285,7 @@ public final void testRightOuter_HashJoinExec1() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[1]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -326,7 +326,7 @@ public final void testRightOuter_HashJoinExec2() throws IOException, TajoExcepti Expr expr = analyzer.parse(QUERIES[2]); LogicalNode plan = planner.createPlan(defaultContext, expr).getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java index f54a251018..280722b001 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestRightOuterMergeJoinExec.java @@ -330,7 +330,7 @@ public final void testRightOuterMergeJoin0() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -367,7 +367,7 @@ public final void testRightOuter_MergeJoin1() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -403,7 +403,7 @@ public final void testRightOuterMergeJoin2() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); @@ -439,7 +439,7 @@ public final void testRightOuter_MergeJoin3() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; @@ -477,7 +477,7 @@ public final void testRightOuter_MergeJoin4() throws IOException, TajoException LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); @@ -514,7 +514,7 @@ public final void testRightOuterMergeJoin5() throws IOException, TajoException { LocalTajoTestingUtility.newTaskAttemptId(), merged, workDir); ctx.setEnforcer(enforcer); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, plan); ProjectionExec proj = (ProjectionExec) exec; assertTrue(proj.getChild() instanceof RightOuterMergeJoinExec); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java index b77eae525e..15708b1f17 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortExec.java @@ -131,7 +131,7 @@ public final void testNext() throws IOException, TajoException { LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummyContext(conf), context); LogicalNode rootNode = optimizer.optimize(plan); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); Tuple tuple; diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java index 09fd99078f..66d38b7d4c 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/physical/TestSortIntersectExec.java @@ -178,7 +178,7 @@ public final void testSortIntersectAll() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with sort intersect all . @@ -245,7 +245,7 @@ public final void testSortIntersect() throws IOException, TajoException { optimizer.optimize(plan); LogicalNode rootNode = plan.getRootBlock().getRoot(); - PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf, catalog); + PhysicalPlanner phyPlanner = new PhysicalPlannerImpl(conf); PhysicalExec exec = phyPlanner.createPlan(ctx, rootNode); // replace an equal join with sort intersect all . diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index 6bd8e1d893..3e2df70879 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -26,7 +26,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.tajo.SessionVars; -import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.catalog.proto.CatalogProtos; @@ -36,7 +35,7 @@ import org.apache.tajo.engine.planner.global.DataChannel; import org.apache.tajo.engine.planner.physical.*; import org.apache.tajo.engine.query.QueryContext; -import org.apache.tajo.exception.*; +import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; @@ -47,8 +46,6 @@ import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.serder.PlanProto.SortEnforce; import org.apache.tajo.plan.serder.PlanProto.SortedInputEnforce; -import org.apache.tajo.plan.util.FilteredPartitionInfo; -import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageConstants; @@ -56,7 +53,6 @@ import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionedFileFragment; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.StringUtils; @@ -80,11 +76,9 @@ public class PhysicalPlannerImpl implements PhysicalPlanner { private static final int UNGENERATED_PID = -1; protected final TajoConf conf; - private final CatalogService catalog; - public PhysicalPlannerImpl(final TajoConf conf, final CatalogService catalog) { + public PhysicalPlannerImpl(final TajoConf conf) { this.conf = conf; - this.catalog = catalog; } public PhysicalExec createPlan(final TaskAttemptContext context, final LogicalNode logicalPlan) { @@ -105,16 +99,6 @@ public PhysicalExec createPlan(final TaskAttemptContext context, final LogicalNo } } catch (IOException ioe) { throw new TajoInternalError(ioe); - } catch (UndefinedDatabaseException ude) { - throw new TajoInternalError(ude); - } catch (UndefinedTableException ute) { - throw new TajoInternalError(ute); - } catch (UndefinedPartitionMethodException upme) { - throw new TajoInternalError(upme); - } catch (UndefinedOperatorException uoe) { - throw new TajoInternalError(uoe); - } catch (UnsupportedException ue) { - throw new TajoInternalError(ue); } } @@ -133,8 +117,7 @@ private PhysicalExec buildOutputOperator(TaskAttemptContext context, LogicalNode } private PhysicalExec createPlanRecursive(TaskAttemptContext ctx, LogicalNode logicalNode, Stack stack) - throws IOException, UndefinedDatabaseException, UndefinedTableException, - UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + throws IOException { PhysicalExec leftExec; PhysicalExec rightExec; @@ -458,7 +441,7 @@ private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNod private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( - plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); + plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); @@ -919,8 +902,7 @@ private boolean checkIfSortEquivalance(TaskAttemptContext ctx, ScanNode scanNode } public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, Stack node) - throws IOException, UndefinedDatabaseException, UndefinedTableException, - UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + throws IOException { // check if an input is sorted in the same order to the subsequence sort operator. // TODO - it works only if input files are raw files. We should check the file format. // Since the default intermediate file format is raw file, it is not problem right now. diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java index 79552c8848..7f6db9bf59 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultSystemScanner.java @@ -701,7 +701,7 @@ public int getCurrentRowNumber() { class SimplePhysicalPlannerImpl extends PhysicalPlannerImpl { public SimplePhysicalPlannerImpl(TajoConf conf) { - super(conf, masterContext.getCatalog()); + super(conf); } @Override diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java index c419f8eb9b..db28433cdf 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/ExecutionBlockContext.java @@ -105,7 +105,7 @@ public ExecutionBlockContext(TajoWorker.WorkerContext workerContext, ExecutionBl // Setup QueryEngine according to the query plan // Here, we can setup row-based query engine or columnar query engine. - this.queryEngine = new TajoQueryEngine(systemConf, workerContext.getCatalog()); + this.queryEngine = new TajoQueryEngine(systemConf); this.queryContext = new QueryContext(workerContext.getConf(), request.getQueryContext()); this.plan = request.getPlanJson(); this.resource = new ExecutionBlockSharedResource(); diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java b/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java index 4c2feae830..e7e81d4217 100644 --- a/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java +++ b/tajo-core/src/main/java/org/apache/tajo/worker/TajoQueryEngine.java @@ -18,7 +18,6 @@ package org.apache.tajo.worker; -import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.engine.planner.PhysicalPlanner; import org.apache.tajo.engine.planner.PhysicalPlannerImpl; @@ -31,8 +30,8 @@ public class TajoQueryEngine { private final PhysicalPlanner phyPlanner; - public TajoQueryEngine(TajoConf conf, CatalogService catalog) throws IOException { - this.phyPlanner = new PhysicalPlannerImpl(conf, catalog); + public TajoQueryEngine(TajoConf conf) throws IOException { + this.phyPlanner = new PhysicalPlannerImpl(conf); } public PhysicalExec createPlan(TaskAttemptContext ctx, LogicalNode plan) { From ccc4f6cb2e12bd642d00be08f393f6754e74db7f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:55:16 +0900 Subject: [PATCH 008/127] Remove unnecessary parameter of PartitionedTableUtil::buildTupleFromPartitionName --- .../tajo/engine/util/TestTupleUtil.java | 12 +++------ .../engine/planner/physical/SeqScanExec.java | 4 +-- .../tajo/plan/util/PartitionedTableUtil.java | 27 +++++++++++++------ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index 852a95d621..73a88761d8 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -186,29 +186,23 @@ public void testBuildTupleFromPartitionPath() { assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); } - @Test public void testBuildTupleFromPartitionName() { Schema schema = new Schema(); schema.addColumn("key1", Type.INT8); schema.addColumn("key2", Type.TEXT); - Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123", true); - assertNotNull(tuple); - assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); - assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); - - tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123", false); + Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); - tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123/key2=abc", true); + tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123/key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); - tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key2=abc", false); + tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 08cec4537e..c29cb4e4ae 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -103,9 +103,9 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { List partitionedFileFragments = FragmentConvertor.convert(PartitionedFileFragment .class, fragments); - // Get a partition key value from a given path + // Get a partition key value from a given partition name partitionRow = PartitionedTableUtil.buildTupleFromPartitionName(columnPartitionSchema, - partitionedFileFragments.get(0).getPartitionName(), false); + partitionedFileFragments.get(0).getPartitionName()); } // Targets or search conditions may contain column references. diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index 44320f0aee..fae250a47f 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -18,6 +18,7 @@ package org.apache.tajo.plan.util; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.logging.Log; @@ -468,14 +469,23 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { return sb.toString(); } - public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName, - boolean beNullIfFile) { - String [] columnValues = partitionName.split("/"); + /** + * This transforms a partition name into a tupe with a given partition column schema. When a file path + * Assume that an user gives partition name 'country=KOREA/city=SEOUL'. + * + * The first datum of tuple : KOREA + * The second datum of tuple : SEOUL + * + * @param partitionColumnSchema The partition column schema + * @param partitionName The partition name + * @return The tuple transformed from a column values part. + */ + public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName) { + Preconditions.checkNotNull(partitionColumnSchema); + Preconditions.checkNotNull(partitionName); - // true means this is a file. - if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { - return null; - } + String [] columnValues = partitionName.split("/"); + Preconditions.checkArgument(partitionColumnSchema.size() < columnValues.length, "Invalid Partition Name"); Tuple tuple = new VTuple(partitionColumnSchema.size()); @@ -488,7 +498,8 @@ public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, St if (parts.length == 2) { int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), + StringUtils.unescapePathName(parts[1]))); } } From d5f563a1d6764f21f80e91a2540a9de5330a38cf Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:59:32 +0900 Subject: [PATCH 009/127] Update wrong indent --- .../apache/tajo/plan/util/PartitionedTableUtil.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index fae250a47f..1ba75e1678 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -77,7 +77,8 @@ public static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService cat return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); } else { // otherwise, we will get all partition paths. - return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); + return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, null, + new Path(table.getUri())); } } @@ -166,7 +167,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca } } else { if (catalog.existPartitions(splits[0], splits[1])) { - CatalogProtos.PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); + CatalogProtos.PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], + conjunctiveForms); partitions = catalog.getPartitionsByAlgebra(request); filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); } else { @@ -190,7 +192,7 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } - LOG.info("Filtered directory or files: " + filteredPartitionInfo.getPartitionPaths().length + + LOG.info("### Filtered directory or files: " + filteredPartitionInfo.getPartitionPaths().length + ", totalVolume:" + filteredPartitionInfo.getTotalVolume()); return filteredPartitionInfo; @@ -448,7 +450,8 @@ public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Pa } int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), + StringUtils.unescapePathName(parts[1]))); } for (; i < partitionColumnSchema.size(); i++) { tuple.put(i, NullDatum.get()); From 086b02beb700e125a6ba37cbe275965150a89183 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 16:59:57 +0900 Subject: [PATCH 010/127] Remove unused package --- .../java/org/apache/tajo/plan/util/PartitionedTableUtil.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index 1ba75e1678..53f56ca0a4 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -37,7 +37,6 @@ import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.StringUtils; -import java.io.File; import java.io.IOException; import java.util.List; import java.util.Set; From 22731ec4a13f1ad0e75d7987966c17715afbeb52 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 17:20:01 +0900 Subject: [PATCH 011/127] Update wrong comparison operator --- .../java/org/apache/tajo/plan/util/PartitionedTableUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index 53f56ca0a4..4c66ef4a95 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -487,7 +487,8 @@ public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, St Preconditions.checkNotNull(partitionName); String [] columnValues = partitionName.split("/"); - Preconditions.checkArgument(partitionColumnSchema.size() < columnValues.length, "Invalid Partition Name"); + Preconditions.checkArgument(partitionColumnSchema.size() >= columnValues.length, + "Invalid Partition Name :" + partitionName); Tuple tuple = new VTuple(partitionColumnSchema.size()); From 437f5ecdc7fad8b056bb638ea0897cd6e455b9b8 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 2 Nov 2015 17:24:05 +0900 Subject: [PATCH 012/127] Update log message --- .../java/org/apache/tajo/plan/util/PartitionedTableUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index 4c66ef4a95..2494cea994 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -191,8 +191,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } - LOG.info("### Filtered directory or files: " + filteredPartitionInfo.getPartitionPaths().length + - ", totalVolume:" + filteredPartitionInfo.getTotalVolume()); + LOG.info("Total filtered input paths:" + filteredPartitionInfo.getPartitionPaths().length + + ", total filtered input volume:" + filteredPartitionInfo.getTotalVolume()); return filteredPartitionInfo; } From d76f41aac39e7536f4acac265559fb136aa05b71 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 3 Nov 2015 09:33:16 +0900 Subject: [PATCH 013/127] When rewriting PartitionedTableScanNode, set partition paths and table volume. --- .../rules/PartitionedTableRewriter.java | 22 +++++++++++++++++++ .../tajo/plan/util/PartitionedTableUtil.java | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 6c53d8a319..096ad06737 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,6 +18,8 @@ package org.apache.tajo.plan.rewrite.rules; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; import org.apache.tajo.exception.*; @@ -25,12 +27,17 @@ import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; +import org.apache.tajo.plan.util.FilteredPartitionInfo; +import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; +import java.io.IOException; import java.util.*; public class PartitionedTableRewriter implements LogicalPlanRewriteRule { + private static final Log LOG = LogFactory.getLog(PartitionedTableRewriter.class); + private CatalogService catalog; private static final String NAME = "Partitioned Table Rewriter"; private final Rewriter rewriter = new Rewriter(); @@ -58,10 +65,17 @@ public boolean isEligible(LogicalPlanRewriteRuleContext context) { public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoException { LogicalPlan plan = context.getPlan(); LogicalPlan.QueryBlock rootBlock = plan.getRootBlock(); + this.catalog = context.getCatalog(); rewriter.visit(context.getQueryContext(), plan, rootBlock, rootBlock.getRoot(), new Stack<>()); return plan; } + public FilteredPartitionInfo findFilteredPartitionInfo(OverridableConf conf, + PartitionedTableScanNode partitionedTableScanNode) throws IOException, UndefinedDatabaseException, + UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + return PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf.getConf(), partitionedTableScanNode); + } + private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -75,6 +89,14 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); rewrittenScanNode.init(scanNode); + try { + // If PartitionedTableScanNode doesn't have correct table volume, broadcast join might not run occasionally. + FilteredPartitionInfo filteredPartitionInfo = findFilteredPartitionInfo(queryContext, rewrittenScanNode); + rewrittenScanNode.getTableDesc().getStats().setNumBytes(filteredPartitionInfo.getTotalVolume()); + } catch (IOException e) { + throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); + } + // if it is topmost node, set it as the rootnode of this block. if (stack.empty() || block.getRoot().equals(scanNode)) { block.setRoot(rewrittenScanNode); diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index 2494cea994..d4a08d32da 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -192,7 +192,7 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca } LOG.info("Total filtered input paths:" + filteredPartitionInfo.getPartitionPaths().length + - ", total filtered input volume:" + filteredPartitionInfo.getTotalVolume()); + ", volume:" + filteredPartitionInfo.getTotalVolume()); return filteredPartitionInfo; } From 9112ceb61547667020423bf4fbe18f99c07c2539 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 3 Nov 2015 10:47:32 +0900 Subject: [PATCH 014/127] Update the result message of partition pruning --- .../java/org/apache/tajo/plan/util/PartitionedTableUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java index d4a08d32da..5b4ec589b9 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java @@ -191,8 +191,8 @@ private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService ca scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } - LOG.info("Total filtered input paths:" + filteredPartitionInfo.getPartitionPaths().length + - ", volume:" + filteredPartitionInfo.getTotalVolume()); + LOG.info("Filtered partition paths (num:" + filteredPartitionInfo.getPartitionPaths().length + + ", volume:" + filteredPartitionInfo.getTotalVolume() + ")"); return filteredPartitionInfo; } From 66c1c496b9fb55ee2d872e843ffe2c2481adbd60 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 20 Nov 2015 11:01:54 +0900 Subject: [PATCH 015/127] Remove unused member variable. --- .../fragment/PartitionedFileFragment.java | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java index 3909dc0cee..0b38ed4cd8 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java @@ -42,7 +42,6 @@ public class PartitionedFileFragment implements Fragment, Comparable idList = new ArrayList<>(); - for(int eachId: diskIds) { - idList.add(eachId); - } - builder.addAllDiskIds(idList); - } if(hosts != null) { builder.addAllHosts(TUtil.newList(hosts)); From 25e23666ce826c5e0f2a64726f8e4e73ab204c2e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 20 Nov 2015 11:06:54 +0900 Subject: [PATCH 016/127] Remove unused method --- .../main/java/org/apache/tajo/storage/FileTablespace.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 59e1b14ed8..be44c71a62 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -421,11 +421,6 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo return new FileFragment(fragmentId, file, blockLocation); } - protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, - String partitionName) { - return new PartitionedFileFragment(fragmentId, file, start, length, partitionName); - } - protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, String[] hosts, String partitionName) { return new PartitionedFileFragment(fragmentId, file, start, length, hosts, partitionName); From c7f89f7b90cc65b8bdd294ab40426cebb73c99d0 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 20 Nov 2015 11:14:53 +0900 Subject: [PATCH 017/127] Separate partition processing logic from existing split method. --- .../tajo/querymaster/Repartitioner.java | 2 +- .../apache/tajo/storage/FileTablespace.java | 139 +++++++++++++----- 2 files changed, 102 insertions(+), 39 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 8d13a9c4cc..27c76a545c 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -473,7 +473,7 @@ public static List getFragmentsFromPartitionedTable(CatalogService cat FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, partitionsScan); - fragments.addAll(((FileTablespace) tsHandler).getSplits( + fragments.addAll(((FileTablespace) tsHandler).getPartitionSplits( scan.getCanonicalName(), table.getMeta(), table.getSchema(), filteredPartitionInfo.getPartitionNames(), filteredPartitionInfo.getPartitionPaths())); diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index be44c71a62..5c15ae2c62 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -498,17 +498,12 @@ private int[] getDiskIds(VolumeId[] volumeIds) { return diskIds; } - public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) - throws IOException { - return getSplits(tableName, meta, schema, null, inputs); - } - /** * Generate the list of files and make them into FileSplits. * * @throws IOException */ - public List getSplits(String tableName, TableMeta meta, Schema schema, String[] partitions, Path... inputs) + public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) throws IOException { // generate splits' @@ -537,11 +532,7 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if (splittable) { for (BlockLocation blockLocation : blkLocations) { - if (partitions != null) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); - } else { - volumeSplits.add(makeSplit(tableName, path, blockLocation)); - } + volumeSplits.add(makeSplit(tableName, path, blockLocation)); } blockLocations.addAll(Arrays.asList(blkLocations)); @@ -550,18 +541,10 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if (blockSize >= length) { blockLocations.addAll(Arrays.asList(blkLocations)); for (BlockLocation blockLocation : blkLocations) { - if (partitions != null) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); - } else { - volumeSplits.add(makeSplit(tableName, path, blockLocation)); - } + volumeSplits.add(makeSplit(tableName, path, blockLocation)); } } else { - if (partitions != null) { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); - } else { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); - } + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); } } @@ -578,33 +561,113 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - if (partitions != null) { - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitions[i])); - } else { - splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts())); - } + splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - if (partitions != null) { - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitions[i])); - } else { - splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts())); - } + splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts())); } } else { // Non splittable - if (partitions != null) { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + } + } + } + } + if(LOG.isDebugEnabled()){ + LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); + } + i++; + } + + // Combine original fileFragments with new VolumeId information + setVolumeMeta(volumeSplits, blockLocations); + splits.addAll(volumeSplits); + LOG.info("Total # of splits: " + splits.size()); + return splits; + } + + + /** + * Generate the list of files and make them into PartitionedFileSplits. + * + * @throws IOException + */ + public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitions, + Path... inputs) throws IOException { + // generate splits' + + List splits = Lists.newArrayList(); + List volumeSplits = Lists.newArrayList(); + List blockLocations = Lists.newArrayList(); + + int i = 0; + for (Path p : inputs) { + ArrayList files = Lists.newArrayList(); + if (fs.isFile(p)) { + files.addAll(Lists.newArrayList(fs.getFileStatus(p))); + } else { + files.addAll(listStatus(p)); + } + + int previousSplitSize = splits.size(); + for (FileStatus file : files) { + Path path = file.getPath(); + long length = file.getLen(); + if (length > 0) { + // Get locations of blocks of file + BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); + boolean splittable = isSplittable(meta, schema, path, file); + if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { + + if (splittable) { + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + } + blockLocations.addAll(Arrays.asList(blkLocations)); + + } else { // Non splittable + long blockSize = blkLocations[0].getLength(); + if (blockSize >= length) { + blockLocations.addAll(Arrays.asList(blkLocations)); + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + } } else { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + } + } + + } else { + if (splittable) { + + long minSize = Math.max(getMinSplitSize(), 1); + + long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one + long splitSize = Math.max(minSize, blockSize); + long bytesRemaining = length; + + // for s3 + while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitions[i])); + + bytesRemaining -= splitSize; + } + if (bytesRemaining > 0) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitions[i])); } + } else { // Non splittable + splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); } } } From e670f25eab4965bd3d5bcfbaf0540194a7ed37d9 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 20 Nov 2015 11:22:30 +0900 Subject: [PATCH 018/127] Rename partitionName to partitionKeys in PartitionedFileFragmentProto --- .../storage/TestPartitionedFileFragment.java | 6 +++--- .../engine/planner/physical/SeqScanExec.java | 4 ++-- .../fragment/PartitionedFileFragment.java | 20 +++++++++---------- .../main/proto/StorageFragmentProtos.proto | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java index ef9d7f9b25..83e7c9949b 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java @@ -48,7 +48,7 @@ public final void testGetAndSetFields() { assertEquals("table1_1", fragment1.getTableName()); assertEquals(new Path(path, "table0/col1=1"), fragment1.getPath()); - assertEquals("col1=1", fragment1.getPartitionName()); + assertEquals("col1=1", fragment1.getPartitionKeys()); assertTrue(0 == fragment1.getStartKey()); assertTrue(500 == fragment1.getLength()); } @@ -61,7 +61,7 @@ public final void testGetProtoAndRestore() { PartitionedFileFragment fragment1 = FragmentConvertor.convert(PartitionedFileFragment.class, fragment.getProto()); assertEquals("table1_1", fragment1.getTableName()); assertEquals(new Path(path, "table0/col1=1"), fragment1.getPath()); - assertEquals("col1=1", fragment1.getPartitionName()); + assertEquals("col1=1", fragment1.getPartitionKeys()); assertTrue(0 == fragment1.getStartKey()); assertTrue(500 == fragment1.getLength()); } @@ -78,7 +78,7 @@ public final void testCompareTo() { Arrays.sort(tablets); for(int i = 0; i < num; i++) { - assertEquals("col1=" + i, tablets[i].getPartitionName()); + assertEquals("col1=" + i, tablets[i].getPartitionKeys()); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index e5d4e00475..52d06a933d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -103,9 +103,9 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { List partitionedFileFragments = FragmentConvertor.convert(PartitionedFileFragment .class, fragments); - // Get a partition key value from a given partition name + // Get first partition key from a given partition keys partitionRow = PartitionedTableUtil.buildTupleFromPartitionName(columnPartitionSchema, - partitionedFileFragments.get(0).getPartitionName()); + partitionedFileFragments.get(0).getPartitionKeys()); } // Targets or search conditions may contain column references. diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java index 0b38ed4cd8..4a9c42997f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java @@ -43,7 +43,7 @@ public class PartitionedFileFragment implements Fragment, Comparable Date: Sun, 22 Nov 2015 00:26:50 +0900 Subject: [PATCH 019/127] Rename PartitionedFileFragment to PartitionFileFragment --- ...nt.java => TestPartitionFileFragment.java} | 21 +++++------ .../engine/planner/physical/SeqScanExec.java | 8 ++-- .../apache/tajo/storage/FileTablespace.java | 12 +++--- ...agment.java => PartitionFileFragment.java} | 37 +++++++++---------- .../main/proto/StorageFragmentProtos.proto | 2 +- 5 files changed, 37 insertions(+), 43 deletions(-) rename tajo-core-tests/src/test/java/org/apache/tajo/storage/{TestPartitionedFileFragment.java => TestPartitionFileFragment.java} (73%) rename tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/{PartitionedFileFragment.java => PartitionFileFragment.java} (79%) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java similarity index 73% rename from tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java rename to tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java index 83e7c9949b..f8ee48d117 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionedFileFragment.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java @@ -20,9 +20,8 @@ import com.google.common.collect.Sets; import org.apache.hadoop.fs.Path; -import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionedFileFragment; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.CommonTestingUtil; import org.junit.Before; import org.junit.Test; @@ -33,7 +32,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class TestPartitionedFileFragment { +public class TestPartitionFileFragment { private Path path; @Before @@ -43,7 +42,7 @@ public final void setUp() throws Exception { @Test public final void testGetAndSetFields() { - PartitionedFileFragment fragment1 = new PartitionedFileFragment("table1_1", new Path(path, "table0/col1=1"), + PartitionFileFragment fragment1 = new PartitionFileFragment("table1_1", new Path(path, "table0/col1=1"), 0, 500, "col1=1"); assertEquals("table1_1", fragment1.getTableName()); @@ -55,10 +54,10 @@ public final void testGetAndSetFields() { @Test public final void testGetProtoAndRestore() { - PartitionedFileFragment fragment = new PartitionedFileFragment("table1_1", new Path(path, "table0/col1=1"), 0, + PartitionFileFragment fragment = new PartitionFileFragment("table1_1", new Path(path, "table0/col1=1"), 0, 500, "col1=1"); - PartitionedFileFragment fragment1 = FragmentConvertor.convert(PartitionedFileFragment.class, fragment.getProto()); + PartitionFileFragment fragment1 = FragmentConvertor.convert(PartitionFileFragment.class, fragment.getProto()); assertEquals("table1_1", fragment1.getTableName()); assertEquals(new Path(path, "table0/col1=1"), fragment1.getPath()); assertEquals("col1=1", fragment1.getPartitionKeys()); @@ -69,9 +68,9 @@ public final void testGetProtoAndRestore() { @Test public final void testCompareTo() { final int num = 10; - PartitionedFileFragment[] tablets = new PartitionedFileFragment[num]; + PartitionFileFragment[] tablets = new PartitionFileFragment[num]; for (int i = num - 1; i >= 0; i--) { - tablets[i] = new PartitionedFileFragment("tablet1", new Path(path, "tablet0/col1=" + i), i * 500, (i+1) * 500 + tablets[i] = new PartitionFileFragment("tablet1", new Path(path, "tablet0/col1=" + i), i * 500, (i+1) * 500 , "col1=" + i); } @@ -85,14 +84,14 @@ public final void testCompareTo() { @Test public final void testCompareTo2() { final int num = 1860; - PartitionedFileFragment[] tablets = new PartitionedFileFragment[num]; + PartitionFileFragment[] tablets = new PartitionFileFragment[num]; for (int i = num - 1; i >= 0; i--) { - tablets[i] = new PartitionedFileFragment("tablet1", new Path(path, "tablet/col1=" +i), (long)i * 6553500, + tablets[i] = new PartitionFileFragment("tablet1", new Path(path, "tablet/col1=" +i), (long)i * 6553500, (long) (i+1) * 6553500, "col1=" + i); } SortedSet sortedSet = Sets.newTreeSet(); - for (PartitionedFileFragment frag : tablets) { + for (PartitionFileFragment frag : tablets) { sortedSet.add(frag); } assertEquals(num, sortedSet.size()); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 52d06a933d..9eeff98496 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -33,13 +33,11 @@ import org.apache.tajo.plan.expr.EvalTreeUtil; import org.apache.tajo.plan.expr.FieldEval; import org.apache.tajo.plan.logical.ScanNode; -import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.*; -import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionedFileFragment; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.worker.TaskAttemptContext; import java.io.IOException; @@ -100,12 +98,12 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { Tuple partitionRow = null; if (fragments != null && fragments.length > 0) { - List partitionedFileFragments = FragmentConvertor.convert(PartitionedFileFragment + List PartitionFileFragments = FragmentConvertor.convert(PartitionFileFragment .class, fragments); // Get first partition key from a given partition keys partitionRow = PartitionedTableUtil.buildTupleFromPartitionName(columnPartitionSchema, - partitionedFileFragments.get(0).getPartitionKeys()); + PartitionFileFragments.get(0).getPartitionKeys()); } // Targets or search conditions may contain column references. diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 5c15ae2c62..a60f8b8e97 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -42,7 +42,7 @@ import org.apache.tajo.plan.logical.NodeType; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; -import org.apache.tajo.storage.fragment.PartitionedFileFragment; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Bytes; import javax.annotation.Nullable; @@ -421,14 +421,14 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo return new FileFragment(fragmentId, file, blockLocation); } - protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, String[] hosts, String partitionName) { - return new PartitionedFileFragment(fragmentId, file, start, length, hosts, partitionName); + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); } - protected PartitionedFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation , String partitionName) throws IOException { - return new PartitionedFileFragment(fragmentId, file, blockLocation, partitionName); + return new PartitionFileFragment(fragmentId, file, blockLocation, partitionName); } // for Non Splittable. eg, compressed gzip TextFile @@ -468,7 +468,7 @@ public int compare(Map.Entry v1, Map.Entry v2) } if (partitionName != null) { - return new PartitionedFileFragment(fragmentId, file, start, length, hosts, partitionName); + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); } else { return new FileFragment(fragmentId, file, start, length, hosts); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java similarity index 79% rename from tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java rename to tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 4a9c42997f..c3d6588d31 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionedFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -25,17 +25,14 @@ import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.Path; import org.apache.tajo.BuiltinStorages; -import org.apache.tajo.storage.StorageFragmentProtos.PartitionedFileFragmentProto; +import org.apache.tajo.storage.StorageFragmentProtos.PartitionFileFragmentProto; import org.apache.tajo.util.TUtil; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; -public class PartitionedFileFragment implements Fragment, Comparable, Cloneable { +public class PartitionFileFragment implements Fragment, Comparable, Cloneable { @Expose private String tableName; // required @Expose private Path uri; // required @Expose public Long startOffset; // required @@ -45,39 +42,39 @@ public class PartitionedFileFragment implements Fragment, Comparable Date: Sun, 22 Nov 2015 00:33:00 +0900 Subject: [PATCH 020/127] Recover partition paths in LogicalNode --- .../logical/PartitionedTableScanNode.java | 26 +- .../rules/PartitionedTableRewriter.java | 451 +++++++++++++++++- 2 files changed, 454 insertions(+), 23 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 24368924ae..d83f8bf0dd 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -19,6 +19,8 @@ package org.apache.tajo.plan.logical; import com.google.common.base.Objects; +import com.google.gson.annotations.Expose; +import org.apache.hadoop.fs.Path; import org.apache.tajo.catalog.TableDesc; import org.apache.tajo.plan.PlanString; import org.apache.tajo.plan.Target; @@ -28,22 +30,33 @@ import java.util.ArrayList; public class PartitionedTableScanNode extends ScanNode { + @Expose Path [] inputPaths; + public PartitionedTableScanNode(int pid) { super(pid, NodeType.PARTITIONS_SCAN); } - public void init(ScanNode scanNode) { + public void init(ScanNode scanNode, Path[] inputPaths) { tableDesc = scanNode.tableDesc; setInSchema(scanNode.getInSchema()); setOutSchema(scanNode.getOutSchema()); this.qual = scanNode.qual; this.targets = scanNode.targets; + this.inputPaths = inputPaths; if (scanNode.hasAlias()) { alias = scanNode.alias; } } + public void setInputPaths(Path [] paths) { + this.inputPaths = paths; + } + + public Path [] getInputPaths() { + return inputPaths; + } + public String toString() { StringBuilder sb = new StringBuilder("Partitions Scan (table=").append(getTableName()); if (hasAlias()) { @@ -70,6 +83,7 @@ public boolean equals(Object obj) { eq = eq && TUtil.checkEquals(this.tableDesc, other.tableDesc); eq = eq && TUtil.checkEquals(this.qual, other.qual); eq = eq && TUtil.checkEquals(this.targets, other.targets); + eq = eq && TUtil.checkEquals(this.inputPaths, other.inputPaths); return eq; } @@ -94,6 +108,8 @@ public Object clone() throws CloneNotSupportedException { } } + unionScan.inputPaths = inputPaths; + return unionScan; } @@ -132,6 +148,14 @@ public PlanString getPlanString() { planStr.addDetail("out schema: ").appendDetail(getOutSchema().toString()); planStr.addDetail("in schema: ").appendDetail(getInSchema().toString()); + if (inputPaths != null) { + planStr.addExplan("num of filtered paths: ").appendExplain(""+ inputPaths.length); + int i = 0; + for (Path path : inputPaths) { + planStr.addDetail((i++) + ": ").appendDetail(path.toString()); + } + } + return planStr; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 096ad06737..d091915153 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,26 +18,40 @@ package org.apache.tajo.plan.rewrite.rules; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.*; import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.partition.PartitionMethodDesc; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionsByAlgebraProto; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; import org.apache.tajo.exception.*; import org.apache.tajo.plan.LogicalPlan; +import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; -import org.apache.tajo.plan.util.FilteredPartitionInfo; -import org.apache.tajo.plan.util.PartitionedTableUtil; +import org.apache.tajo.plan.util.EvalNodeToExprConverter; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.storage.VTuple; +import org.apache.tajo.util.StringUtils; import java.io.IOException; import java.util.*; public class PartitionedTableRewriter implements LogicalPlanRewriteRule { - private static final Log LOG = LogFactory.getLog(PartitionedTableRewriter.class); private CatalogService catalog; + private long totalVolume; + + private static final Log LOG = LogFactory.getLog(PartitionedTableRewriter.class); + private static final String NAME = "Partitioned Table Rewriter"; private final Rewriter rewriter = new Rewriter(); @@ -70,12 +84,407 @@ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoExc return plan; } - public FilteredPartitionInfo findFilteredPartitionInfo(OverridableConf conf, - PartitionedTableScanNode partitionedTableScanNode) throws IOException, UndefinedDatabaseException, - UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - return PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf.getConf(), partitionedTableScanNode); + private static class PartitionPathFilter implements PathFilter { + + private Schema schema; + private EvalNode partitionFilter; + public PartitionPathFilter(Schema schema, EvalNode partitionFilter) { + this.schema = schema; + this.partitionFilter = partitionFilter; + partitionFilter.bind(null, schema); + } + + @Override + public boolean accept(Path path) { + Tuple tuple = buildTupleFromPartitionPath(schema, path, true); + if (tuple == null) { // if it is a file or not acceptable file + return false; + } + + return partitionFilter.eval(tuple).asBool(); + } + + @Override + public String toString() { + return partitionFilter.toString(); + } + } + + private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, + Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) + throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, + UndefinedOperatorException, UnsupportedException { + return findFilteredPaths(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); + } + + /** + * It assumes that each conjunctive form corresponds to one column. + * + * @param partitionColumns + * @param conjunctiveForms search condition corresponding to partition columns. + * If it is NULL, it means that there is no search condition for this table. + * @param tablePath + * @return + * @throws IOException + */ + private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, + Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) + throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, + UndefinedOperatorException, UnsupportedException { + + Path [] filteredPaths = null; + FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); + String [] splits = CatalogUtil.splitFQTableName(tableName); + List partitions = null; + + try { + if (conjunctiveForms == null) { + partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); + if (partitions.isEmpty()) { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + } else { + filteredPaths = findFilteredPathsByPartitionDesc(partitions); + } + } else { + if (catalog.existPartitions(splits[0], splits[1])) { + PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); + partitions = catalog.getPartitionsByAlgebra(request); + filteredPaths = findFilteredPathsByPartitionDesc(partitions); + } else { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + } + } + } catch (UnsupportedException ue) { + // Partial catalog might not allow some filter conditions. For example, HiveMetastore doesn't In statement, + // regexp statement and so on. Above case, Tajo need to build filtered path by listing hdfs directories. + LOG.warn(ue.getMessage()); + partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); + if (partitions.isEmpty()) { + filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + } else { + filteredPaths = findFilteredPathsByPartitionDesc(partitions); + } + scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); + } + + LOG.info("Filtered directory or files: " + filteredPaths.length); + return filteredPaths; } + /** + * Build list of partition path by PartitionDescProto which is generated from CatalogStore. + * + * @param partitions + * @return + */ + private Path[] findFilteredPathsByPartitionDesc(List partitions) { + Path [] filteredPaths = new Path[partitions.size()]; + for (int i = 0; i < partitions.size(); i++) { + PartitionDescProto partition = partitions.get(i); + filteredPaths[i] = new Path(partition.getPath()); + totalVolume += partition.getNumBytes(); + } + return filteredPaths; + } + + /** + * Build list of partition path by filtering directories in the given table path. + * + * + * @param partitionColumns + * @param conjunctiveForms + * @param fs + * @param tablePath + * @return + * @throws IOException + */ + private Path [] findFilteredPathsFromFileSystem(Schema partitionColumns, EvalNode [] conjunctiveForms, + FileSystem fs, Path tablePath) throws IOException{ + Path [] filteredPaths = null; + PathFilter [] filters; + + if (conjunctiveForms == null) { + filters = buildAllAcceptingPathFilters(partitionColumns); + } else { + filters = buildPathFiltersForAllLevels(partitionColumns, conjunctiveForms); + } + + // loop from one to the number of partition columns + filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); + + for (int i = 1; i < partitionColumns.size(); i++) { + // Get all file status matched to a ith level path filter. + filteredPaths = toPathArray(fs.listStatus(filteredPaths, filters[i])); + } + return filteredPaths; + } + + /** + * Build algebra expressions for querying partitions and partition keys by using EvalNodeToExprConverter. + * + * @param databaseName the database name + * @param tableName the table name + * @param conjunctiveForms EvalNode which contains filter conditions + * @return + */ + public static PartitionsByAlgebraProto getPartitionsAlgebraProto( + String databaseName, String tableName, EvalNode [] conjunctiveForms) { + + PartitionsByAlgebraProto.Builder request = PartitionsByAlgebraProto.newBuilder(); + request.setDatabaseName(databaseName); + request.setTableName(tableName); + + if (conjunctiveForms != null) { + EvalNode evalNode = AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms); + EvalNodeToExprConverter convertor = new EvalNodeToExprConverter(databaseName + "." + tableName); + convertor.visit(null, evalNode, new Stack<>()); + request.setAlgebra(convertor.getResult().toJson()); + } else { + request.setAlgebra(""); + } + + return request.build(); + } + + /** + * Build path filters for all levels with a list of filter conditions. + * + * For example, consider you have a partitioned table for three columns (i.e., col1, col2, col3). + * Then, this methods will create three path filters for (col1), (col1, col2), (col1, col2, col3). + * + * Corresponding filter conditions will be placed on each path filter, + * If there is no corresponding expression for certain column, + * The condition will be filled with a true value. + * + * Assume that an user gives a condition WHERE col1 ='A' and col3 = 'C'. + * There is no filter condition corresponding to col2. + * Then, the path filter conditions are corresponding to the followings: + * + * The first path filter: col1 = 'A' + * The second path filter: col1 = 'A' AND col2 IS NOT NULL + * The third path filter: col1 = 'A' AND col2 IS NOT NULL AND col3 = 'C' + * + * 'IS NOT NULL' predicate is always true against the partition path. + * + * @param partitionColumns + * @param conjunctiveForms + * @return + */ + private static PathFilter [] buildPathFiltersForAllLevels(Schema partitionColumns, + EvalNode [] conjunctiveForms) { + // Building partition path filters for all levels + Column target; + PathFilter [] filters = new PathFilter[partitionColumns.size()]; + List accumulatedFilters = Lists.newArrayList(); + for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level + target = partitionColumns.getColumn(i); + + for (EvalNode expr : conjunctiveForms) { + if (EvalTreeUtil.findUniqueColumns(expr).contains(target)) { + // Accumulate one qual per level + accumulatedFilters.add(expr); + } + } + + if (accumulatedFilters.size() < (i + 1)) { + accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); + } + + EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( + accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); + filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); + } + + return filters; + } + + /** + * Build an array of path filters for all levels with all accepting filter condition. + * @param partitionColumns The partition columns schema + * @return The array of path filter, accpeting all partition paths. + */ + public static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { + Column target; + PathFilter [] filters = new PathFilter[partitionColumns.size()]; + List accumulatedFilters = Lists.newArrayList(); + for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level + target = partitionColumns.getColumn(i); + accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); + + EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( + accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); + filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); + } + return filters; + } + + private Path [] toPathArray(FileStatus[] fileStatuses) { + Path [] paths = new Path[fileStatuses.length]; + for (int i = 0; i < fileStatuses.length; i++) { + FileStatus fileStatus = fileStatuses[i]; + paths[i] = fileStatus.getPath(); + totalVolume += fileStatus.getLen(); + } + return paths; + } + + public Path [] findFilteredPartitionPaths(OverridableConf queryContext, ScanNode scanNode) throws IOException, + UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, + UndefinedOperatorException, UnsupportedException { + TableDesc table = scanNode.getTableDesc(); + PartitionMethodDesc partitionDesc = scanNode.getTableDesc().getPartitionMethod(); + + Schema paritionValuesSchema = new Schema(); + for (Column column : partitionDesc.getExpressionSchema().getRootColumns()) { + paritionValuesSchema.addColumn(column); + } + + Set indexablePredicateSet = Sets.newHashSet(); + + // if a query statement has a search condition, try to find indexable predicates + if (scanNode.hasQual()) { + EvalNode [] conjunctiveForms = AlgebraicUtil.toConjunctiveNormalFormArray(scanNode.getQual()); + Set remainExprs = Sets.newHashSet(conjunctiveForms); + + // add qualifier to schema for qual + paritionValuesSchema.setQualifier(scanNode.getCanonicalName()); + for (Column column : paritionValuesSchema.getRootColumns()) { + for (EvalNode simpleExpr : conjunctiveForms) { + if (checkIfIndexablePredicateOnTargetColumn(simpleExpr, column)) { + indexablePredicateSet.add(simpleExpr); + } + } + } + + // Partitions which are not matched to the partition filter conditions are pruned immediately. + // So, the partition filter conditions are not necessary later, and they are removed from + // original search condition for simplicity and efficiency. + remainExprs.removeAll(indexablePredicateSet); + if (remainExprs.isEmpty()) { + scanNode.setQual(null); + } else { + scanNode.setQual( + AlgebraicUtil.createSingletonExprFromCNF(remainExprs.toArray(new EvalNode[remainExprs.size()]))); + } + } + + if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates + return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, + indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); + } else { // otherwise, we will get all partition paths. + return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); + } + } + + private boolean checkIfIndexablePredicateOnTargetColumn(EvalNode evalNode, Column targetColumn) { + if (checkIfIndexablePredicate(evalNode) || checkIfDisjunctiveButOneVariable(evalNode)) { + Set variables = EvalTreeUtil.findUniqueColumns(evalNode); + // if it contains only single variable matched to a target column + return variables.size() == 1 && variables.contains(targetColumn); + } else { + return false; + } + } + + /** + * Check if an expression consists of one variable and one constant and + * the expression is a comparison operator. + * + * @param evalNode The expression to be checked + * @return true if an expression consists of one variable and one constant + * and the expression is a comparison operator. Other, false. + */ + private boolean checkIfIndexablePredicate(EvalNode evalNode) { + // TODO - LIKE with a trailing wild-card character and IN with an array can be indexable + return AlgebraicUtil.containSingleVar(evalNode) && AlgebraicUtil.isIndexableOperator(evalNode); + } + + /** + * + * @param evalNode The expression to be checked + * @return true if an disjunctive expression, consisting of indexable expressions + */ + private boolean checkIfDisjunctiveButOneVariable(EvalNode evalNode) { + if (evalNode.getType() == EvalType.OR) { + BinaryEval orEval = (BinaryEval) evalNode; + boolean indexable = + checkIfIndexablePredicate(orEval.getLeftExpr()) && + checkIfIndexablePredicate(orEval.getRightExpr()); + + boolean sameVariable = + EvalTreeUtil.findUniqueColumns(orEval.getLeftExpr()) + .equals(EvalTreeUtil.findUniqueColumns(orEval.getRightExpr())); + + return indexable && sameVariable; + } else { + return false; + } + } + + /** + * Take a look at a column partition path. A partition path consists + * of a table path part and column values part. This method transforms + * a partition path into a tuple with a given partition column schema. + * + * hdfs://192.168.0.1/tajo/warehouse/table1/col1=abc/col2=def/col3=ghi + * ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ + * table path part column values part + * + * When a file path is given, it can perform two ways depending on beNullIfFile flag. + * If it is true, it returns NULL when a given path is a file. + * Otherwise, it returns a built tuple regardless of file or directory. + * + * @param partitionColumnSchema The partition column schema + * @param partitionPath The partition path + * @param beNullIfFile If true, this method returns NULL when a given path is a file. + * @return The tuple transformed from a column values part. + */ + public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Path partitionPath, + boolean beNullIfFile) { + int startIdx = partitionPath.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); + + if (startIdx == -1) { // if there is no partition column in the patch + return null; + } + String columnValuesPart = partitionPath.toString().substring(startIdx); + + String [] columnValues = columnValuesPart.split("/"); + + // true means this is a file. + if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { + return null; + } + + Tuple tuple = new VTuple(partitionColumnSchema.size()); + int i = 0; + for (; i < columnValues.length && i < partitionColumnSchema.size(); i++) { + String [] parts = columnValues[i].split("="); + if (parts.length != 2) { + return null; + } + int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); + Column keyColumn = partitionColumnSchema.getColumn(columnId); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1]))); + } + for (; i < partitionColumnSchema.size(); i++) { + tuple.put(i, NullDatum.get()); + } + return tuple; + } + + /** + * Get a prefix of column partition path. For example, consider a column partition (col1, col2). + * Then, you will get a string 'col1='. + * + * @param partitionColumn the schema of column partition + * @return The first part string of column partition path. + */ + public static String getColumnPartitionPathPrefix(Schema partitionColumn) { + StringBuilder sb = new StringBuilder(); + sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); + return sb.toString(); + } + private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -86,25 +495,23 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP return null; } - PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); - rewrittenScanNode.init(scanNode); - try { - // If PartitionedTableScanNode doesn't have correct table volume, broadcast join might not run occasionally. - FilteredPartitionInfo filteredPartitionInfo = findFilteredPartitionInfo(queryContext, rewrittenScanNode); - rewrittenScanNode.getTableDesc().getStats().setNumBytes(filteredPartitionInfo.getTotalVolume()); + Path [] filteredPaths = findFilteredPartitionPaths(queryContext, scanNode); + plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); + PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); + rewrittenScanNode.init(scanNode, filteredPaths); + rewrittenScanNode.getTableDesc().getStats().setNumBytes(totalVolume); + + // if it is topmost node, set it as the rootnode of this block. + if (stack.empty() || block.getRoot().equals(scanNode)) { + block.setRoot(rewrittenScanNode); + } else { + PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); + } + block.registerNode(rewrittenScanNode); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); } - - // if it is topmost node, set it as the rootnode of this block. - if (stack.empty() || block.getRoot().equals(scanNode)) { - block.setRoot(rewrittenScanNode); - } else { - PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); - } - block.registerNode(rewrittenScanNode); - return null; } } From f9fcd273abb8960ff1663ffd181410d26a2a6681 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Sun, 22 Nov 2015 00:35:43 +0900 Subject: [PATCH 021/127] Add PartitionedTableWriter::buildTupleFromPartitionName --- .../rules/PartitionedTableRewriter.java | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index d091915153..ec57347154 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,6 +18,7 @@ package org.apache.tajo.plan.rewrite.rules; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.logging.Log; @@ -484,7 +485,7 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); return sb.toString(); } - + private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -515,4 +516,42 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP return null; } } + + /** + * This transforms a partition name into a tupe with a given partition column schema. When a file path + * Assume that an user gives partition name 'country=KOREA/city=SEOUL'. + * + * The first datum of tuple : KOREA + * The second datum of tuple : SEOUL + * + * @param partitionColumnSchema The partition column schema + * @param partitionName The partition name + * @return The tuple transformed from a column values part. + */ + public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName) { + Preconditions.checkNotNull(partitionColumnSchema); + Preconditions.checkNotNull(partitionName); + + String [] columnValues = partitionName.split("/"); + Preconditions.checkArgument(partitionColumnSchema.size() >= columnValues.length, + "Invalid Partition Name :" + partitionName); + + Tuple tuple = new VTuple(partitionColumnSchema.size()); + + for (int i = 0; i < tuple.size(); i++) { + tuple.put(i, NullDatum.get()); + } + + for (int i = 0; i < columnValues.length; i++) { + String [] parts = columnValues[i].split("="); + if (parts.length == 2) { + int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); + Column keyColumn = partitionColumnSchema.getColumn(columnId); + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), + StringUtils.unescapePathName(parts[1]))); + } + } + + return tuple; + } } From 344384b5e7708be4125ba7a3bf578f13030516b4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 12:19:16 +0900 Subject: [PATCH 022/127] PartitionedTableRewriter should set PartitionContent --- .../planner/TestPartitionedTableUtil.java | 476 ---------------- .../tajo/engine/util/TestTupleUtil.java | 28 +- .../testInnerAndOuterWithEmpty.1.Hash.plan | 4 +- ...rAndOuterWithEmpty.1.Hash_NoBroadcast.plan | 4 +- .../testInnerAndOuterWithEmpty.1.Sort.plan | 4 +- ...rAndOuterWithEmpty.1.Sort_NoBroadcast.plan | 4 +- .../rewriter/rules/BroadcastJoinRule.java | 7 +- .../rewriter/rules/GlobalPlanRewriteUtil.java | 14 +- .../engine/planner/physical/SeqScanExec.java | 16 +- .../tajo/master/TajoMasterClientService.java | 4 +- .../apache/tajo/master/exec/DDLExecutor.java | 5 +- .../exec/ExplainPlanPreprocessorForTest.java | 3 + .../NonForwardQueryResultFileScanner.java | 12 +- .../tajo/master/exec/QueryExecutor.java | 2 +- .../tajo/querymaster/Repartitioner.java | 57 +- .../org/apache/tajo/querymaster/Stage.java | 10 +- .../ws/rs/resources/QueryResultResource.java | 2 +- .../logical/PartitionedTableScanNode.java | 22 +- .../tajo/plan/partition/PartitionContent.java | 69 +++ .../rules/PartitionedTableRewriter.java | 133 +++-- .../plan/serder/LogicalNodeDeserializer.java | 7 + .../plan/serder/LogicalNodeSerializer.java | 11 + .../tajo/plan/util/PartitionedTableUtil.java | 512 ------------------ .../verifier/PostLogicalPlanVerifier.java | 7 + tajo-plan/src/main/proto/Plan.proto | 9 + 25 files changed, 313 insertions(+), 1109 deletions(-) delete mode 100644 tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java create mode 100644 tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java delete mode 100644 tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java deleted file mode 100644 index d7234f3ef0..0000000000 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableUtil.java +++ /dev/null @@ -1,476 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.engine.planner; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.tajo.LocalTajoTestingUtility; -import org.apache.tajo.QueryTestCaseBase; -import org.apache.tajo.algebra.Expr; -import org.apache.tajo.catalog.CatalogUtil; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.TableDesc; -import org.apache.tajo.catalog.TableMeta; -import org.apache.tajo.catalog.partition.PartitionMethodDesc; -import org.apache.tajo.catalog.proto.CatalogProtos; -import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.conf.TajoConf; -import org.apache.tajo.engine.query.QueryContext; -import org.apache.tajo.plan.LogicalPlan; -import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.util.FilteredPartitionInfo; -import org.apache.tajo.plan.util.PartitionedTableUtil; -import org.apache.tajo.util.FileUtil; -import org.apache.tajo.util.KeyValueSet; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestPartitionedTableUtil extends QueryTestCaseBase { - - final static String PARTITION_TABLE_NAME = "tb_partition"; - final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; - - @BeforeClass - public static void setUp() throws Exception { - FileSystem fs = FileSystem.get(conf); - Path rootDir = TajoConf.getWarehouseDir(testingCluster.getConfiguration()); - - Schema schema = new Schema(); - schema.addColumn("n_nationkey", TajoDataTypes.Type.INT8); - schema.addColumn("n_name", TajoDataTypes.Type.TEXT); - schema.addColumn("n_regionkey", TajoDataTypes.Type.INT8); - - TableMeta meta = CatalogUtil.newTableMeta("TEXT", new KeyValueSet()); - - createExternalTableIncludedOnePartitionKeyColumn(fs, rootDir, schema, meta); - createExternalTableIncludedMultiplePartitionKeyColumns(fs, rootDir, schema, meta); - } - - private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem fs, Path rootDir, Schema schema, - TableMeta meta) throws Exception { - Schema partSchema = new Schema(); - partSchema.addColumn("key", TajoDataTypes.Type.TEXT); - - PartitionMethodDesc partitionMethodDesc = - new PartitionMethodDesc("TestPartitionedTableUtil", PARTITION_TABLE_NAME, - CatalogProtos.PartitionType.COLUMN, "key", partSchema); - - Path tablePath = new Path(rootDir, PARTITION_TABLE_NAME); - fs.mkdirs(tablePath); - - client.createExternalTable(PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); - - TableDesc tableDesc = client.getTableDesc(PARTITION_TABLE_NAME); - assertNotNull(tableDesc); - - Path path = new Path(tableDesc.getUri().toString() + "/key=part123"); - fs.mkdirs(path); - FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); - - path = new Path(tableDesc.getUri().toString() + "/key=part456"); - fs.mkdirs(path); - FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); - - path = new Path(tableDesc.getUri().toString() + "/key=part789"); - fs.mkdirs(path); - FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); - } - - private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileSystem fs, Path rootDir, - Schema schema, TableMeta meta) throws Exception { - Schema partSchema = new Schema(); - partSchema.addColumn("key1", TajoDataTypes.Type.TEXT); - partSchema.addColumn("key2", TajoDataTypes.Type.TEXT); - partSchema.addColumn("key3", TajoDataTypes.Type.INT8); - - PartitionMethodDesc partitionMethodDesc = - new PartitionMethodDesc("TestPartitionedTableUtil", MULTIPLE_PARTITION_TABLE_NAME, - CatalogProtos.PartitionType.COLUMN, "key1,key2,key3", partSchema); - - Path tablePath = new Path(rootDir, MULTIPLE_PARTITION_TABLE_NAME); - fs.mkdirs(tablePath); - - client.createExternalTable(MULTIPLE_PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); - - TableDesc tableDesc = client.getTableDesc(MULTIPLE_PARTITION_TABLE_NAME); - assertNotNull(tableDesc); - - Path path = new Path(tableDesc.getUri().toString() + "/key1=part123"); - fs.mkdirs(path); - path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123"); - fs.mkdirs(path); - path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=1"); - fs.mkdirs(path); - FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); - - path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=2"); - fs.mkdirs(path); - FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); - - path = new Path(tableDesc.getUri().toString() + "/key1=part789"); - fs.mkdirs(path); - path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789"); - fs.mkdirs(path); - path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789/key3=3"); - fs.mkdirs(path); - FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); - } - - @AfterClass - public static void tearDown() throws Exception { - client.executeQuery("DROP TABLE IF EXISTS " + PARTITION_TABLE_NAME + " PURGE;"); - client.executeQuery("DROP TABLE IF EXISTS " + MULTIPLE_PARTITION_TABLE_NAME + " PURGE;"); - } - - @Test - public void testFilterIncludePartitionKeyColumn() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part456' ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); - SelectionNode selNode = sortNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(1, filteredPaths.length); - assertEquals("key=part456", filteredPaths[0].getName()); - - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(1, partitionNames.length); - assertEquals("key=part456", partitionNames[0]); - - assertEquals(10L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testWithoutAnyFilters() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SCAN, sortNode.getChild().getType()); - ScanNode scanNode = sortNode.getChild(); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=part123", filteredPaths[0].getName()); - assertEquals("key=part456", filteredPaths[1].getName()); - assertEquals("key=part789", filteredPaths[2].getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(3, partitionNames.length); - assertEquals("key=part123", partitionNames[0]); - assertEquals("key=part456", partitionNames[1]); - assertEquals("key=part789", partitionNames[2]); - - assertEquals(33L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testFilterIncludeNonExistingPartitionValue() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part123456789'"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SELECTION, projNode.getChild().getType()); - SelectionNode selNode = projNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - assertEquals(0, filteredPartitionInfo.getPartitionPaths().length); - assertNull(filteredPartitionInfo.getPartitionNames()); - - assertEquals(0L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testFilterIncludeNonPartitionKeyColumn() throws Exception { - String sql = "SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE n_nationkey = 1"; - Expr expr = sqlParser.parse(sql); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SELECTION, projNode.getChild().getType()); - SelectionNode selNode = projNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=part123", filteredPaths[0].getName()); - assertEquals("key=part456", filteredPaths[1].getName()); - assertEquals("key=part789", filteredPaths[2].getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(3, partitionNames.length); - assertEquals("key=part123", partitionNames[0]); - assertEquals("key=part456", partitionNames[1]); - assertEquals("key=part789", partitionNames[2]); - - assertEquals(33L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME - + " WHERE key1 = 'part789' and key2 = 'supp789' and key3=3"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SELECTION, projNode.getChild().getType()); - SelectionNode selNode = projNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(1, filteredPaths.length); - assertEquals("key3=3", filteredPaths[0].getName()); - assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); - assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(1, partitionNames.length); - assertEquals("key1=part789/key2=supp789/key3=3", partitionNames[0]); - - assertEquals(10L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME - + " WHERE key1 = 'part123' and key2 = 'supp123' order by n_nationkey"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); - SelectionNode selNode = sortNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(2, filteredPaths.length); - - assertEquals("key3=1", filteredPaths[0].getName()); - assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); - assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); - - assertEquals("key3=2", filteredPaths[1].getName()); - assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); - assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(2, partitionNames.length); - assertEquals("key1=part123/key2=supp123/key3=1", partitionNames[0]); - assertEquals("key1=part123/key2=supp123/key3=2", partitionNames[1]); - - assertEquals(23L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public void testFilterIncludeNonPartitionKeyColumns() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME - + " WHERE key1 = 'part123' and n_nationkey >= 2 order by n_nationkey"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); - SelectionNode selNode = sortNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(2, filteredPaths.length); - - assertEquals("key3=1", filteredPaths[0].getName()); - assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); - assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); - - assertEquals("key3=2", filteredPaths[1].getName()); - assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); - assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(2, partitionNames.length); - assertEquals("key1=part123/key2=supp123/key3=1", partitionNames[0]); - assertEquals("key1=part123/key2=supp123/key3=2", partitionNames[1]); - - assertEquals(23L, filteredPartitionInfo.getTotalVolume()); - } - - @Test - public final void testPartitionPruningWitCTAS() throws Exception { - String tableName = "testPartitionPruningUsingDirectories".toLowerCase(); - String canonicalTableName = CatalogUtil.getCanonicalTableName("\"TestPartitionedTableUtil\"", tableName); - - executeString( - "create table " + canonicalTableName + "(col1 int4, col2 int4) partition by column(key float8) " - + " as select l_orderkey, l_partkey, l_quantity from default.lineitem"); - - TableDesc tableDesc = catalog.getTableDesc(getCurrentDatabase(), tableName); - assertNotNull(tableDesc); - - // With a filter which checks a partition key column - Expr expr = sqlParser.parse("SELECT * FROM " + canonicalTableName + " WHERE key <= 40.0 ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); - SelectionNode selNode = sortNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - scanNode); - assertNotNull(filteredPartitionInfo); - - Path[] filteredPaths = filteredPartitionInfo.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=17.0", filteredPaths[0].getName()); - assertEquals("key=36.0", filteredPaths[1].getName()); - assertEquals("key=38.0", filteredPaths[2].getName()); - - String[] partitionNames = filteredPartitionInfo.getPartitionNames(); - assertEquals(3, partitionNames.length); - assertEquals("key=17.0", partitionNames[0]); - assertEquals("key=36.0", partitionNames[1]); - assertEquals("key=38.0", partitionNames[2]); - - assertEquals(12L, filteredPartitionInfo.getTotalVolume()); - - executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); - } -} \ No newline at end of file diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index 73a88761d8..39780025a6 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -24,10 +24,10 @@ import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.datum.Datum; import org.apache.tajo.datum.DatumFactory; -import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.engine.planner.RangePartitionAlgorithm; import org.apache.tajo.engine.planner.UniformRangePartition; +import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.storage.*; import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder; import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; @@ -146,41 +146,41 @@ public void testBuildTupleFromPartitionPath() { schema.addColumn("key2", Type.TEXT); Path path = new Path("hdfs://tajo/warehouse/partition_test/"); - Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); + Tuple tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); assertNull(tuple); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123"); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/part-0000"); // wrong cases; - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); assertNull(tuple); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/key2=abc"); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); path = new Path("hdfs://tajo/warehouse/partition_test/key1=123/key2=abc/part-0001"); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, true); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, true); assertNull(tuple); - tuple = PartitionedTableUtil.buildTupleFromPartitionPath(schema, path, false); + tuple = PartitionedTableRewriter.buildTupleFromPartitionPath(schema, path, false); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); @@ -192,17 +192,17 @@ public void testBuildTupleFromPartitionName() { schema.addColumn("key1", Type.INT8); schema.addColumn("key2", Type.TEXT); - Tuple tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123"); + Tuple tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key1=123"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); - tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key1=123/key2=abc"); + tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key1=123/key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); - tuple = PartitionedTableUtil.buildTupleFromPartitionName(schema, "key2=abc"); + tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan index bb47f6c9c5..ceb96f9959 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -51,8 +51,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan index 46e0b4b7ee..f1fa414673 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan index bb47f6c9c5..ceb96f9959 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -51,8 +51,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan index 46e0b4b7ee..f1fa414673 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c - => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) + => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java index b32de6bc04..7bb2c30921 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java @@ -333,7 +333,12 @@ private long estimateOutputVolumeInternal(LogicalNode node) throws TajoInternalE // broadcast method. return Long.MAX_VALUE; } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); + // if there is no selected partition + if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { + return 0; + } else { + return pScanNode.getTableDesc().getStats().getNumBytes(); + } } case TABLE_SUBQUERY: return estimateOutputVolumeInternal(((TableSubQueryNode) node).getSubQuery()); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java index 908d32465b..b13cb0f1a8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java @@ -97,6 +97,13 @@ public static void replaceChild(LogicalNode newChild, ScanNode originalChild, Lo public static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); + if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { + PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; + if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { + scanBytes = 0L; + } + } + return scanBytes; } else { return -1; @@ -126,7 +133,12 @@ public static long computeDescendentVolume(LogicalNode node) { // broadcast method. return Long.MAX_VALUE; } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); + // if there is no selected partition + if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { + return 0; + } else { + return pScanNode.getTableDesc().getStats().getNumBytes(); + } } case TABLE_SUBQUERY: return computeDescendentVolume(((TableSubQueryNode) node).getSubQuery()); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 9eeff98496..ed897c5a7f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -33,7 +33,7 @@ import org.apache.tajo.plan.expr.EvalTreeUtil; import org.apache.tajo.plan.expr.FieldEval; import org.apache.tajo.plan.logical.ScanNode; -import org.apache.tajo.plan.util.PartitionedTableUtil; +import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FragmentConvertor; @@ -98,12 +98,18 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { Tuple partitionRow = null; if (fragments != null && fragments.length > 0) { - List PartitionFileFragments = FragmentConvertor.convert(PartitionFileFragment + List partitionFileFragments = FragmentConvertor.convert(PartitionFileFragment .class, fragments); - // Get first partition key from a given partition keys - partitionRow = PartitionedTableUtil.buildTupleFromPartitionName(columnPartitionSchema, - PartitionFileFragments.get(0).getPartitionKeys()); + if (partitionFileFragments.get(0) != null) { + // Get first partition key from a given partition keys + partitionRow = PartitionedTableRewriter.buildTupleFromPartitionName(columnPartitionSchema, + partitionFileFragments.get(0).getPartitionKeys()); + } else { + // Get a partition key value from a given path + partitionRow = PartitionedTableRewriter.buildTupleFromPartitionPath( + columnPartitionSchema, partitionFileFragments.get(0).getPath(), false); + } } // Targets or search conditions may contain column references. diff --git a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java index fb37c5f69b..017b17fdc3 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java @@ -560,10 +560,10 @@ public GetQueryResultDataResponse getQueryResultData(RpcController controller, G if(request.hasCompressCodec()) { queryResultScanner = new NonForwardQueryResultFileScanner(context.getConf(), session.getSessionId(), - queryId, scanNode, Integer.MAX_VALUE, request.getCompressCodec(), catalog); + queryId, scanNode, Integer.MAX_VALUE, request.getCompressCodec()); } else { queryResultScanner = new NonForwardQueryResultFileScanner(context.getConf(), - session.getSessionId(), queryId, scanNode, Integer.MAX_VALUE, catalog); + session.getSessionId(), queryId, scanNode, Integer.MAX_VALUE); } queryResultScanner.init(); diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java index 00bed0d4e8..da19625341 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java @@ -41,7 +41,6 @@ import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; -import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageUtil; @@ -601,7 +600,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext Schema partitionColumns = partitionDesc.getExpressionSchema(); // Get the array of path filter, accepting all partition paths. - PathFilter[] filters = PartitionedTableUtil.buildAllAcceptingPathFilters(partitionColumns); + PathFilter[] filters = PartitionedTableRewriter.buildAllAcceptingPathFilters(partitionColumns); // loop from one to the number of partition columns Path [] filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); @@ -628,7 +627,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext List targetPartitions = TUtil.newList(); for(Path filteredPath : filteredPaths) { - int startIdx = filteredPath.toString().indexOf(PartitionedTableUtil.getColumnPartitionPathPrefix + int startIdx = filteredPath.toString().indexOf(PartitionedTableRewriter.getColumnPartitionPathPrefix (partitionColumns)); // if there is partition column in the path diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java index ef5af73587..2740728db6 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java @@ -118,6 +118,9 @@ public LogicalNode visitPartitionedTableScan(PlanShapeFixerContext context, Logi throws TajoException { super.visitPartitionedTableScan(context, plan, block, node, stack); context.childNumbers.push(1); + Path[] inputPaths = node.getInputPaths(); + Arrays.sort(inputPaths); + node.setInputPaths(inputPaths); if (node.hasTargets()) { node.setTargets(sortTargets(node.getTargets())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index f506480e67..80275ce6cd 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -28,7 +28,6 @@ import org.apache.tajo.QueryId; import org.apache.tajo.TaskAttemptId; import org.apache.tajo.TaskId; -import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableDesc; @@ -81,15 +80,14 @@ public class NonForwardQueryResultFileScanner implements NonForwardQueryResultSc private ExecutorService executor; private MemoryRowBlock rowBlock; private Future nextFetch; - private CatalogService catalog; public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, QueryId queryId, ScanNode scanNode, - int maxRow, CatalogService catalog) throws IOException { - this(tajoConf, sessionId, queryId, scanNode, maxRow, null, catalog); + int maxRow) throws IOException { + this(tajoConf, sessionId, queryId, scanNode, maxRow, null); } public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, QueryId queryId, ScanNode scanNode, - int maxRow, CodecType codecType, CatalogService catalog) throws IOException { + int maxRow, CodecType codecType) throws IOException { this.tajoConf = tajoConf; this.sessionId = sessionId; this.queryId = queryId; @@ -98,7 +96,6 @@ public NonForwardQueryResultFileScanner(TajoConf tajoConf, String sessionId, Que this.maxRow = maxRow; this.rowEncoder = RowStoreUtil.createEncoder(scanNode.getOutSchema()); this.codecType = codecType; - this.catalog = catalog; } public void init() throws IOException, TajoException { @@ -111,8 +108,7 @@ private void initSeqScanExec() throws IOException, TajoException { List fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(catalog, tajoConf, fileTablespace, scanNode, - tableDesc)); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc)); } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java index 128966b218..9a35ee69ed 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java @@ -302,7 +302,7 @@ public void execSimpleQuery(QueryContext queryContext, Session session, String q plan.getRootBlock().getRoot()); final NonForwardQueryResultScanner queryResultScanner = new NonForwardQueryResultFileScanner( - context.getConf(), session.getSessionId(), queryInfo.getQueryId(), scanNode, maxRow, catalog); + context.getConf(), session.getSessionId(), queryInfo.getQueryId(), scanNode, maxRow); queryResultScanner.init(); session.addNonForwardQueryResultScanner(queryResultScanner); diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 27c76a545c..79d2cc1087 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -44,10 +44,10 @@ import org.apache.tajo.exception.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.logical.SortNode.SortPurpose; +import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.util.FilteredPartitionInfo; -import org.apache.tajo.plan.util.PartitionedTableUtil; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.querymaster.Task.IntermediateEntry; import org.apache.tajo.storage.*; @@ -379,13 +379,26 @@ private static void scheduleSymmetricRepartitionJoin(QueryMasterTask.QueryMaster //If there are more than one data files, that files should be added to fragments or partition path for (ScanNode eachScan: broadcastScans) { + + Path[] partitionScanPaths = null; + String[] partitionKeys = null; TableDesc tableDesc = masterContext.getTableDesc(eachScan); Tablespace space = TablespaceManager.get(tableDesc.getUri()); if (eachScan.getType() == NodeType.PARTITIONS_SCAN) { - CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); - TajoConf tajoConf = masterContext.getConf(); - getFragmentsFromPartitionedTable(catalog, tajoConf, (FileTablespace) space, eachScan, tableDesc); + + PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)eachScan; + partitionScanPaths = partitionScan.getInputPaths(); + if (partitionScan.hasPartitionKeys()) { + partitionKeys = partitionScan.getPartitionKeys(); + } + // set null to inputPaths in getFragmentsFromPartitionedTable() + getFragmentsFromPartitionedTable((FileTablespace) space, eachScan, tableDesc); + partitionScan.setInputPaths(partitionScanPaths); + if (partitionKeys != null) { + partitionScan.setPartitionKeys(partitionKeys); + } + } else { Collection scanFragments = @@ -459,25 +472,20 @@ public static Map>> merge /** * It creates a number of fragments for all partitions. */ - public static List getFragmentsFromPartitionedTable(CatalogService catalog, TajoConf conf, - Tablespace tsHandler, ScanNode scan, TableDesc table) throws IOException, UndefinedDatabaseException, - UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + public static List getFragmentsFromPartitionedTable(Tablespace tsHandler, + ScanNode scan, + TableDesc table) throws IOException { Preconditions.checkArgument(tsHandler instanceof FileTablespace, "tsHandler must be FileTablespace"); if (!(scan instanceof PartitionedTableScanNode)) { throw new IllegalArgumentException("scan should be a PartitionedTableScanNode type."); } - List fragments = Lists.newArrayList(); PartitionedTableScanNode partitionsScan = (PartitionedTableScanNode) scan; - - FilteredPartitionInfo filteredPartitionInfo = PartitionedTableUtil.findFilteredPartitionInfo(catalog, conf, - partitionsScan); - fragments.addAll(((FileTablespace) tsHandler).getPartitionSplits( - scan.getCanonicalName(), table.getMeta(), table.getSchema(), filteredPartitionInfo.getPartitionNames(), - filteredPartitionInfo.getPartitionPaths())); - - partitionsScan.getTableDesc().getStats().setNumBytes(filteredPartitionInfo.getTotalVolume()); + scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getPartitionKeys(), + partitionsScan.getInputPaths())); + partitionsScan.setInputPaths(null); + partitionsScan.setPartitionKeys(null); return fragments; } @@ -509,13 +517,16 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch TableDesc desc = stage.getContext().getTableDesc(scan); Collection scanFragments; + Path[] partitionScanPaths = null; + + Tablespace space = TablespaceManager.get(desc.getUri()); if (scan.getType() == NodeType.PARTITIONS_SCAN) { + PartitionedTableScanNode partitionScan = (PartitionedTableScanNode) scan; + partitionScanPaths = partitionScan.getInputPaths(); // set null to inputPaths in getFragmentsFromPartitionedTable() - CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); - TajoConf tajoConf = stage.getContext().getConf(); - scanFragments = getFragmentsFromPartitionedTable(catalog, tajoConf, space, scan, desc); + scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); } else { scanFragments = space.getSplits(scan.getCanonicalName(), desc, scan.getQual()); } @@ -524,7 +535,13 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch if (i == baseScanId) { baseFragments = scanFragments; } else { - broadcastFragments.addAll(scanFragments); + if (scan.getType() == NodeType.PARTITIONS_SCAN) { + PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)scan; + // PhisicalPlanner make PartitionMergeScanExec when table is boradcast table and inputpaths is not empty + partitionScan.setInputPaths(partitionScanPaths); + } else { + broadcastFragments.addAll(scanFragments); + } } } } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java index 32684aacc2..0aaf92befa 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java @@ -28,7 +28,10 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.state.*; import org.apache.tajo.*; -import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableDesc; +import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.StatisticsUtil; @@ -1143,10 +1146,7 @@ private static void scheduleFragmentsForLeafQuery(Stage stage) throws IOExceptio // Also, we can ensure FileTableSpace if the type of ScanNode is PARTITIONS_SCAN. if (scan.getType() == NodeType.PARTITIONS_SCAN) { // After calling this method, partition paths are removed from the physical plan. - CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); - TajoConf tajoConf = stage.getContext().getConf(); - fragments = Repartitioner.getFragmentsFromPartitionedTable(catalog, tajoConf, (FileTablespace) tablespace, - scan, table); + fragments = Repartitioner.getFragmentsFromPartitionedTable((FileTablespace) tablespace, scan, table); } else { fragments = tablespace.getSplits(scan.getCanonicalName(), table, scan.getQual()); } diff --git a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java index 4473a44d1d..243806051a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java +++ b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java @@ -134,7 +134,7 @@ private static NonForwardQueryResultScanner getNonForwardQueryResultScanner( } resultScanner = new NonForwardQueryResultFileScanner(masterContext.getConf(), session.getSessionId(), queryId, - scanNode, Integer.MAX_VALUE, masterContext.getCatalog()); + scanNode, Integer.MAX_VALUE); resultScanner.init(); session.addNonForwardQueryResultScanner(resultScanner); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index d83f8bf0dd..2dfa3202a2 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -31,18 +31,20 @@ public class PartitionedTableScanNode extends ScanNode { @Expose Path [] inputPaths; + @Expose String[] partitionKeys; public PartitionedTableScanNode(int pid) { super(pid, NodeType.PARTITIONS_SCAN); } - public void init(ScanNode scanNode, Path[] inputPaths) { + public void init(ScanNode scanNode, Path[] inputPaths, String[] partitionKeys) { tableDesc = scanNode.tableDesc; setInSchema(scanNode.getInSchema()); setOutSchema(scanNode.getOutSchema()); this.qual = scanNode.qual; this.targets = scanNode.targets; this.inputPaths = inputPaths; + this.partitionKeys = partitionKeys; if (scanNode.hasAlias()) { alias = scanNode.alias; @@ -57,7 +59,19 @@ public void setInputPaths(Path [] paths) { return inputPaths; } - public String toString() { + public String[] getPartitionKeys() { + return partitionKeys; + } + + public void setPartitionKeys(String[] partitionKeys) { + this.partitionKeys = partitionKeys; + } + + public boolean hasPartitionKeys() { + return this.partitionKeys != null; + } + + public String toString() { StringBuilder sb = new StringBuilder("Partitions Scan (table=").append(getTableName()); if (hasAlias()) { sb.append(", alias=").append(alias); @@ -83,7 +97,8 @@ public boolean equals(Object obj) { eq = eq && TUtil.checkEquals(this.tableDesc, other.tableDesc); eq = eq && TUtil.checkEquals(this.qual, other.qual); eq = eq && TUtil.checkEquals(this.targets, other.targets); - eq = eq && TUtil.checkEquals(this.inputPaths, other.inputPaths); + eq = eq && TUtil.checkEquals(this.inputPaths, other.inputPaths); + eq = eq && TUtil.checkEquals(this.partitionKeys, other.partitionKeys); return eq; } @@ -109,6 +124,7 @@ public Object clone() throws CloneNotSupportedException { } unionScan.inputPaths = inputPaths; + unionScan.partitionKeys = partitionKeys; return unionScan; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java new file mode 100644 index 0000000000..7b74f05d7f --- /dev/null +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.plan.partition; + +import org.apache.hadoop.fs.Path; + +public class PartitionContent { + private Path[] partitionPaths; + private String[] partitionKeys; + private long totalVolume; + + public PartitionContent() { + } + + public PartitionContent(Path[] partitionPaths) { + this.partitionPaths = partitionPaths; + } + + public PartitionContent(Path[] partitionPaths, long totalVolume) { + this.partitionPaths = partitionPaths; + this.totalVolume = totalVolume; + } + + public PartitionContent(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { + this.partitionPaths = partitionPaths; + this.partitionKeys = partitionKeys; + this.totalVolume = totalVolume; + } + + public Path[] getPartitionPaths() { + return partitionPaths; + } + + public void setPartitionPaths(Path[] partitionPaths) { + this.partitionPaths = partitionPaths; + } + + public String[] getPartitionKeys() { + return partitionKeys; + } + + public void setPartitionKeys(String[] partitionKeys) { + this.partitionKeys = partitionKeys; + } + + public long getTotalVolume() { + return totalVolume; + } + + public void setTotalVolume(long totalVolume) { + this.totalVolume = totalVolume; + } +} \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index ec57347154..12ad77e2f9 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -27,6 +27,7 @@ import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.partition.PartitionMethodDesc; +import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionsByAlgebraProto; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.datum.DatumFactory; @@ -35,9 +36,11 @@ import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.partition.PartitionContent; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.util.EvalNodeToExprConverter; +import org.apache.tajo.plan.util.FilteredPartitionInfo; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; import org.apache.tajo.storage.Tuple; @@ -49,7 +52,6 @@ public class PartitionedTableRewriter implements LogicalPlanRewriteRule { private CatalogService catalog; - private long totalVolume; private static final Log LOG = LogFactory.getLog(PartitionedTableRewriter.class); @@ -111,11 +113,11 @@ public String toString() { } } - private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, + private PartitionContent getPartitionContent(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - return findFilteredPaths(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); + return getPartitionContent(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); } /** @@ -128,11 +130,12 @@ public String toString() { * @return * @throws IOException */ - private Path [] findFilteredPaths(OverridableConf queryContext, String tableName, + private PartitionContent getPartitionContent(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + PartitionContent partitionContent = null; Path [] filteredPaths = null; FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); String [] splits = CatalogUtil.splitFQTableName(tableName); @@ -143,16 +146,18 @@ public String toString() { partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionContent = new PartitionContent(filteredPaths); } else { - filteredPaths = findFilteredPathsByPartitionDesc(partitions); + partitionContent = getPartitionContentByPartitionDesc(partitions); } } else { if (catalog.existPartitions(splits[0], splits[1])) { PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); partitions = catalog.getPartitionsByAlgebra(request); - filteredPaths = findFilteredPathsByPartitionDesc(partitions); + partitionContent = getPartitionContentByPartitionDesc(partitions); } else { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionContent = new PartitionContent(filteredPaths); } } } catch (UnsupportedException ue) { @@ -162,30 +167,34 @@ public String toString() { partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionContent = new PartitionContent(filteredPaths); } else { - filteredPaths = findFilteredPathsByPartitionDesc(partitions); + partitionContent = getPartitionContentByPartitionDesc(partitions); } scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } + LOG.info("Filtered directory or files: " + partitionContent.getPartitionPaths().length); - LOG.info("Filtered directory or files: " + filteredPaths.length); - return filteredPaths; + return partitionContent; } /** - * Build list of partition path by PartitionDescProto which is generated from CatalogStore. + * Build list of partition path and partition keys by PartitionDescProto which is generated from CatalogStore. * * @param partitions * @return */ - private Path[] findFilteredPathsByPartitionDesc(List partitions) { - Path [] filteredPaths = new Path[partitions.size()]; + private PartitionContent getPartitionContentByPartitionDesc(List partitions) { + long totalVolume = 0L; + Path[] filteredPaths = new Path[partitions.size()]; + String[] partitionKeys = new String[partitions.size()]; for (int i = 0; i < partitions.size(); i++) { - PartitionDescProto partition = partitions.get(i); + CatalogProtos.PartitionDescProto partition = partitions.get(i); filteredPaths[i] = new Path(partition.getPath()); + partitionKeys[i] = partition.getPartitionName(); totalVolume += partition.getNumBytes(); } - return filteredPaths; + return new PartitionContent(filteredPaths, partitionKeys, totalVolume); } /** @@ -324,12 +333,11 @@ public static PartitionsByAlgebraProto getPartitionsAlgebraProto( for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; paths[i] = fileStatus.getPath(); - totalVolume += fileStatus.getLen(); } return paths; } - public Path [] findFilteredPartitionPaths(OverridableConf queryContext, ScanNode scanNode) throws IOException, + public PartitionContent getPartitionContent(OverridableConf queryContext, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { TableDesc table = scanNode.getTableDesc(); @@ -370,10 +378,10 @@ public static PartitionsByAlgebraProto getPartitionsAlgebraProto( } if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates - return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, + return getPartitionContent(queryContext, table.getName(), paritionValuesSchema, indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); } else { // otherwise, we will get all partition paths. - return findFilteredPaths(queryContext, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); + return getPartitionContent(queryContext, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); } } @@ -486,37 +494,6 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { return sb.toString(); } - private final class Rewriter extends BasicLogicalPlanVisitor { - @Override - public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, - ScanNode scanNode, Stack stack) throws TajoException { - - TableDesc table = scanNode.getTableDesc(); - if (!table.hasPartition()) { - return null; - } - - try { - Path [] filteredPaths = findFilteredPartitionPaths(queryContext, scanNode); - plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); - PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); - rewrittenScanNode.init(scanNode, filteredPaths); - rewrittenScanNode.getTableDesc().getStats().setNumBytes(totalVolume); - - // if it is topmost node, set it as the rootnode of this block. - if (stack.empty() || block.getRoot().equals(scanNode)) { - block.setRoot(rewrittenScanNode); - } else { - PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); - } - block.registerNode(rewrittenScanNode); - } catch (IOException e) { - throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); - } - return null; - } - } - /** * This transforms a partition name into a tupe with a given partition column schema. When a file path * Assume that an user gives partition name 'country=KOREA/city=SEOUL'. @@ -554,4 +531,62 @@ public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, St return tuple; } + + private void updateTableStat(OverridableConf queryContext, PartitionedTableScanNode scanNode) + throws TajoException { + if (scanNode.getInputPaths().length > 0) { + try { + FileSystem fs = scanNode.getInputPaths()[0].getFileSystem(queryContext.getConf()); + long totalVolume = 0; + + for (Path input : scanNode.getInputPaths()) { + ContentSummary summary = fs.getContentSummary(input); + totalVolume += summary.getLength(); + } + scanNode.getTableDesc().getStats().setNumBytes(totalVolume); + } catch (Throwable e) { + throw new TajoInternalError(e); + } + } + } + + private final class Rewriter extends BasicLogicalPlanVisitor { + @Override + public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, + ScanNode scanNode, Stack stack) throws TajoException { + + TableDesc table = scanNode.getTableDesc(); + if (!table.hasPartition()) { + return null; + } + + try { + PartitionContent partitionContent = getPartitionContent(queryContext, scanNode); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); + PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + rewrittenScanNode.init(scanNode, filteredPaths, partitionKeys); + + if (partitionKeys != null) { + rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionContent.getTotalVolume()); + } else { + updateTableStat(queryContext, rewrittenScanNode); + } + + // if it is topmost node, set it as the rootnode of this block. + if (stack.empty() || block.getRoot().equals(scanNode)) { + block.setRoot(rewrittenScanNode); + } else { + PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); + } + block.registerNode(rewrittenScanNode); + } catch (IOException e) { + throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); + } + return null; + } + } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index 7f0d44402a..fdad4c43af 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -462,6 +462,13 @@ private static PartitionedTableScanNode convertPartitionScan(OverridableConf con PlanProto.LogicalNode protoNode) { PartitionedTableScanNode partitionedScan = new PartitionedTableScanNode(protoNode.getNodeId()); fillScanNode(context, evalContext, protoNode, partitionedScan); + + PlanProto.PartitionScanSpec partitionScanProto = protoNode.getPartitionScan(); + Path [] paths = new Path[partitionScanProto.getPathsCount()]; + for (int i = 0; i < partitionScanProto.getPathsCount(); i++) { + paths[i] = new Path(partitionScanProto.getPaths(i)); + } + partitionedScan.setInputPaths(paths); return partitionedScan; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 82fcd739da..6b082f7159 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -475,8 +475,19 @@ public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPl throws TajoException { PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); + + PlanProto.PartitionScanSpec.Builder partitionScan = PlanProto.PartitionScanSpec.newBuilder(); + List pathStrs = TUtil.newList(); + if (node.getInputPaths() != null) { + for (Path p : node.getInputPaths()) { + pathStrs.add(p.toString()); + } + partitionScan.addAllPaths(pathStrs); + } + PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setScan(scanBuilder); + nodeBuilder.setPartitionScan(partitionScan); context.treeBuilder.addNodes(nodeBuilder); return node; diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java deleted file mode 100644 index 5b4ec589b9..0000000000 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PartitionedTableUtil.java +++ /dev/null @@ -1,512 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.plan.util; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.*; -import org.apache.tajo.catalog.*; -import org.apache.tajo.catalog.partition.PartitionMethodDesc; -import org.apache.tajo.catalog.proto.CatalogProtos; -import org.apache.tajo.conf.TajoConf; -import org.apache.tajo.datum.DatumFactory; -import org.apache.tajo.datum.NullDatum; -import org.apache.tajo.exception.*; -import org.apache.tajo.plan.expr.*; -import org.apache.tajo.plan.logical.ScanNode; -import org.apache.tajo.storage.Tuple; -import org.apache.tajo.storage.VTuple; -import org.apache.tajo.util.StringUtils; - -import java.io.IOException; -import java.util.List; -import java.util.Set; -import java.util.Stack; - -public class PartitionedTableUtil { - private static final Log LOG = LogFactory.getLog(PartitionedTableUtil.class); - - public static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, - ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, - UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - TableDesc table = scanNode.getTableDesc(); - PartitionMethodDesc partitionDesc = scanNode.getTableDesc().getPartitionMethod(); - - Schema paritionValuesSchema = new Schema(); - for (Column column : partitionDesc.getExpressionSchema().getRootColumns()) { - paritionValuesSchema.addColumn(column); - } - - Set indexablePredicateSet = Sets.newHashSet(); - - // if a query statement has a search condition, try to find indexable predicates - if (scanNode.hasQual()) { - EvalNode [] conjunctiveForms = AlgebraicUtil.toConjunctiveNormalFormArray(scanNode.getQual()); - // add qualifier to schema for qual - paritionValuesSchema.setQualifier(scanNode.getCanonicalName()); - for (Column column : paritionValuesSchema.getRootColumns()) { - for (EvalNode simpleExpr : conjunctiveForms) { - if (checkIfIndexablePredicateOnTargetColumn(simpleExpr, column)) { - indexablePredicateSet.add(simpleExpr); - } - } - } - } - - if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates - return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, - indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); - } else { // otherwise, we will get all partition paths. - return findFilteredPartitionInfo(catalog, conf, table.getName(), paritionValuesSchema, null, - new Path(table.getUri())); - } - } - - private static boolean checkIfIndexablePredicateOnTargetColumn(EvalNode evalNode, Column targetColumn) { - if (checkIfIndexablePredicate(evalNode) || checkIfDisjunctiveButOneVariable(evalNode)) { - Set variables = EvalTreeUtil.findUniqueColumns(evalNode); - // if it contains only single variable matched to a target column - return variables.size() == 1 && variables.contains(targetColumn); - } else { - return false; - } - } - - /** - * Check if an expression consists of one variable and one constant and - * the expression is a comparison operator. - * - * @param evalNode The expression to be checked - * @return true if an expression consists of one variable and one constant - * and the expression is a comparison operator. Other, false. - */ - private static boolean checkIfIndexablePredicate(EvalNode evalNode) { - // TODO - LIKE with a trailing wild-card character and IN with an array can be indexable - return AlgebraicUtil.containSingleVar(evalNode) && AlgebraicUtil.isIndexableOperator(evalNode); - } - - /** - * - * @param evalNode The expression to be checked - * @return true if an disjunctive expression, consisting of indexable expressions - */ - private static boolean checkIfDisjunctiveButOneVariable(EvalNode evalNode) { - if (evalNode.getType() == EvalType.OR) { - BinaryEval orEval = (BinaryEval) evalNode; - boolean indexable = - checkIfIndexablePredicate(orEval.getLeftExpr()) && - checkIfIndexablePredicate(orEval.getRightExpr()); - - boolean sameVariable = - EvalTreeUtil.findUniqueColumns(orEval.getLeftExpr()) - .equals(EvalTreeUtil.findUniqueColumns(orEval.getRightExpr())); - - return indexable && sameVariable; - } else { - return false; - } - } - - private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, - String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, - UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, - UnsupportedException { - return findFilteredPartitionInfo(catalog, conf, tableName, partitionColumns, conjunctiveForms, tablePath, null); - } - - /** - * It assumes that each conjunctive form corresponds to one column. - * - * @param partitionColumns - * @param conjunctiveForms search condition corresponding to partition columns. - * If it is NULL, it means that there is no search condition for this table. - * @param tablePath - * @return - * @throws IOException - */ - private static FilteredPartitionInfo findFilteredPartitionInfo(CatalogService catalog, TajoConf conf, - String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) - throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, - UndefinedOperatorException, UnsupportedException { - - FilteredPartitionInfo filteredPartitionInfo = null; - Path[] filteredPaths = null; - FileSystem fs = tablePath.getFileSystem(conf); - String [] splits = CatalogUtil.splitFQTableName(tableName); - List partitions = null; - - try { - if (conjunctiveForms == null) { - partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); - if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); - setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); - } else { - filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); - } - } else { - if (catalog.existPartitions(splits[0], splits[1])) { - CatalogProtos.PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], - conjunctiveForms); - partitions = catalog.getPartitionsByAlgebra(request); - filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); - } else { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); - setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); - } - } - } catch (UnsupportedException ue) { - // Partial catalog might not allow some filter conditions. For example, HiveMetastore doesn't In statement, - // regexp statement and so on. Above case, Tajo need to build filtered path by listing hdfs directories. - LOG.warn(ue.getMessage()); - partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); - if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - filteredPartitionInfo = new FilteredPartitionInfo(filteredPaths); - setFilteredPartitionInfo(filteredPartitionInfo, fs, partitionColumns); - } else { - filteredPartitionInfo = findFilteredPartitionInfoByPartitionDesc(partitions); - } - scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); - } - - LOG.info("Filtered partition paths (num:" + filteredPartitionInfo.getPartitionPaths().length + - ", volume:" + filteredPartitionInfo.getTotalVolume() + ")"); - - return filteredPartitionInfo; - } - - /** - * Build list of partition path by PartitionDescProto which is generated from CatalogStore. - * - * @param partitions - * @return - */ - private static FilteredPartitionInfo findFilteredPartitionInfoByPartitionDesc(List - partitions) { - long totalVolume = 0L; - Path[] filteredPaths = new Path[partitions.size()]; - String[] partitionNames = new String[partitions.size()]; - for (int i = 0; i < partitions.size(); i++) { - CatalogProtos.PartitionDescProto partition = partitions.get(i); - filteredPaths[i] = new Path(partition.getPath()); - partitionNames[i] = partition.getPartitionName(); - totalVolume += partition.getNumBytes(); - } - return new FilteredPartitionInfo(filteredPaths, partitionNames, totalVolume); - } - - /** - * Build list of partition path by filtering directories in the given table path. - * - * - * @param partitionColumns - * @param conjunctiveForms - * @param fs - * @param tablePath - * @return - * @throws IOException - */ - private static Path[] findFilteredPathsFromFileSystem(Schema partitionColumns, EvalNode [] conjunctiveForms, - FileSystem fs, Path tablePath) throws IOException{ - Path[] filteredPaths = null; - PathFilter[] filters; - - if (conjunctiveForms == null) { - filters = buildAllAcceptingPathFilters(partitionColumns); - } else { - filters = buildPathFiltersForAllLevels(partitionColumns, conjunctiveForms); - } - - // loop from one to the number of partition columns - filteredPaths = toPathArray(fs.listStatus(tablePath, filters[0])); - - for (int i = 1; i < partitionColumns.size(); i++) { - // Get all file status matched to a ith level path filter. - filteredPaths = toPathArray(fs.listStatus(filteredPaths, filters[i])); - } - return filteredPaths; - } - - /** - * Build algebra expressions for querying partitions and partition keys by using EvalNodeToExprConverter. - * - * @param databaseName the database name - * @param tableName the table name - * @param conjunctiveForms EvalNode which contains filter conditions - * @return - */ - private static CatalogProtos.PartitionsByAlgebraProto getPartitionsAlgebraProto( - String databaseName, String tableName, EvalNode [] conjunctiveForms) { - - CatalogProtos.PartitionsByAlgebraProto.Builder request = CatalogProtos.PartitionsByAlgebraProto.newBuilder(); - request.setDatabaseName(databaseName); - request.setTableName(tableName); - - if (conjunctiveForms != null) { - EvalNode evalNode = AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms); - EvalNodeToExprConverter convertor = new EvalNodeToExprConverter(databaseName + "." + tableName); - convertor.visit(null, evalNode, new Stack<>()); - request.setAlgebra(convertor.getResult().toJson()); - } else { - request.setAlgebra(""); - } - - return request.build(); - } - - /** - * Build path filters for all levels with a list of filter conditions. - * - * For example, consider you have a partitioned table for three columns (i.e., col1, col2, col3). - * Then, this methods will create three path filters for (col1), (col1, col2), (col1, col2, col3). - * - * Corresponding filter conditions will be placed on each path filter, - * If there is no corresponding expression for certain column, - * The condition will be filled with a true value. - * - * Assume that an user gives a condition WHERE col1 ='A' and col3 = 'C'. - * There is no filter condition corresponding to col2. - * Then, the path filter conditions are corresponding to the followings: - * - * The first path filter: col1 = 'A' - * The second path filter: col1 = 'A' AND col2 IS NOT NULL - * The third path filter: col1 = 'A' AND col2 IS NOT NULL AND col3 = 'C' - * - * 'IS NOT NULL' predicate is always true against the partition path. - * - * @param partitionColumns - * @param conjunctiveForms - * @return - */ - private static PathFilter [] buildPathFiltersForAllLevels(Schema partitionColumns, - EvalNode [] conjunctiveForms) { - // Building partition path filters for all levels - Column target; - PathFilter [] filters = new PathFilter[partitionColumns.size()]; - List accumulatedFilters = Lists.newArrayList(); - for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level - target = partitionColumns.getColumn(i); - - for (EvalNode expr : conjunctiveForms) { - if (EvalTreeUtil.findUniqueColumns(expr).contains(target)) { - // Accumulate one qual per level - accumulatedFilters.add(expr); - } - } - - if (accumulatedFilters.size() < (i + 1)) { - accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); - } - - EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( - accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); - filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); - } - - return filters; - } - - /** - * Build an array of path filters for all levels with all accepting filter condition. - * @param partitionColumns The partition columns schema - * @return The array of path filter, accpeting all partition paths. - */ - public static PathFilter [] buildAllAcceptingPathFilters(Schema partitionColumns) { - Column target; - PathFilter [] filters = new PathFilter[partitionColumns.size()]; - List accumulatedFilters = Lists.newArrayList(); - for (int i = 0; i < partitionColumns.size(); i++) { // loop from one to level - target = partitionColumns.getColumn(i); - accumulatedFilters.add(new IsNullEval(true, new FieldEval(target))); - - EvalNode filterPerLevel = AlgebraicUtil.createSingletonExprFromCNF( - accumulatedFilters.toArray(new EvalNode[accumulatedFilters.size()])); - filters[i] = new PartitionPathFilter(partitionColumns, filterPerLevel); - } - return filters; - } - - private static Path[] toPathArray(FileStatus[] fileStatuses) { - Path[] paths = new Path[fileStatuses.length]; - for (int i = 0; i < fileStatuses.length; i++) { - FileStatus fileStatus = fileStatuses[i]; - paths[i] = fileStatus.getPath(); - } - return paths; - } - - private static void setFilteredPartitionInfo(FilteredPartitionInfo filteredPartitionInfo, FileSystem fs, - Schema partitionColumnSchema) { - long totalVolume = 0L; - String[] partitionNames = null; - if (filteredPartitionInfo.getPartitionPaths().length > 0) { - try { - partitionNames = new String[filteredPartitionInfo.getPartitionPaths().length]; - for (int i = 0; i < filteredPartitionInfo.getPartitionPaths().length; i++) { - Path input = filteredPartitionInfo.getPartitionPaths()[i]; - int startIdx = input.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); - ContentSummary summary = fs.getContentSummary(input); - partitionNames[i] = input.toString().substring(startIdx); - totalVolume += summary.getLength(); - } - } catch (Throwable e) { - throw new TajoInternalError(e); - } - } - filteredPartitionInfo.setPartitionNames(partitionNames); - filteredPartitionInfo.setTotalVolume(totalVolume); - } - - private static class PartitionPathFilter implements PathFilter { - - private Schema schema; - private EvalNode partitionFilter; - public PartitionPathFilter(Schema schema, EvalNode partitionFilter) { - this.schema = schema; - this.partitionFilter = partitionFilter; - partitionFilter.bind(null, schema); - } - - @Override - public boolean accept(Path path) { - Tuple tuple = buildTupleFromPartitionPath(schema, path, true); - if (tuple == null) { // if it is a file or not acceptable file - return false; - } - - return partitionFilter.eval(tuple).asBool(); - } - - @Override - public String toString() { - return partitionFilter.toString(); - } - } - - - /** - * Take a look at a column partition path. A partition path consists - * of a table path part and column values part. This method transforms - * a partition path into a tuple with a given partition column schema. - * - * hdfs://192.168.0.1/tajo/warehouse/table1/col1=abc/col2=def/col3=ghi - * ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^ - * table path part column values part - * - * When a file path is given, it can perform two ways depending on beNullIfFile flag. - * If it is true, it returns NULL when a given path is a file. - * Otherwise, it returns a built tuple regardless of file or directory. - * - * @param partitionColumnSchema The partition column schema - * @param partitionPath The partition path - * @param beNullIfFile If true, this method returns NULL when a given path is a file. - * @return The tuple transformed from a column values part. - */ - public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Path partitionPath, - boolean beNullIfFile) { - int startIdx = partitionPath.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema)); - - if (startIdx == -1) { // if there is no partition column in the patch - return null; - } - String columnValuesPart = partitionPath.toString().substring(startIdx); - - String [] columnValues = columnValuesPart.split("/"); - - // true means this is a file. - if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) { - return null; - } - - Tuple tuple = new VTuple(partitionColumnSchema.size()); - int i = 0; - for (; i < columnValues.length && i < partitionColumnSchema.size(); i++) { - String [] parts = columnValues[i].split("="); - if (parts.length != 2) { - return null; - } - int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); - Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), - StringUtils.unescapePathName(parts[1]))); - } - for (; i < partitionColumnSchema.size(); i++) { - tuple.put(i, NullDatum.get()); - } - return tuple; - } - - /** - * Get a prefix of column partition path. For example, consider a column partition (col1, col2). - * Then, you will get a string 'col1='. - * - * @param partitionColumn the schema of column partition - * @return The first part string of column partition path. - */ - public static String getColumnPartitionPathPrefix(Schema partitionColumn) { - StringBuilder sb = new StringBuilder(); - sb.append(partitionColumn.getColumn(0).getSimpleName()).append("="); - return sb.toString(); - } - - /** - * This transforms a partition name into a tupe with a given partition column schema. When a file path - * Assume that an user gives partition name 'country=KOREA/city=SEOUL'. - * - * The first datum of tuple : KOREA - * The second datum of tuple : SEOUL - * - * @param partitionColumnSchema The partition column schema - * @param partitionName The partition name - * @return The tuple transformed from a column values part. - */ - public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName) { - Preconditions.checkNotNull(partitionColumnSchema); - Preconditions.checkNotNull(partitionName); - - String [] columnValues = partitionName.split("/"); - Preconditions.checkArgument(partitionColumnSchema.size() >= columnValues.length, - "Invalid Partition Name :" + partitionName); - - Tuple tuple = new VTuple(partitionColumnSchema.size()); - - for (int i = 0; i < tuple.size(); i++) { - tuple.put(i, NullDatum.get()); - } - - for (int i = 0; i < columnValues.length; i++) { - String [] parts = columnValues[i].split("="); - if (parts.length == 2) { - int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); - Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), - StringUtils.unescapePathName(parts[1]))); - } - } - - return tuple; - } - -} \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java index 09edc7272d..744256136b 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java @@ -134,6 +134,13 @@ private static boolean isSimpleRelationNode(LogicalNode node) { private static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); + if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { + PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; + if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { + scanBytes = 0L; + } + } + return scanBytes; } else { return -1; diff --git a/tajo-plan/src/main/proto/Plan.proto b/tajo-plan/src/main/proto/Plan.proto index ff3cbc5830..0cd0c32dcc 100644 --- a/tajo-plan/src/main/proto/Plan.proto +++ b/tajo-plan/src/main/proto/Plan.proto @@ -73,6 +73,7 @@ message LogicalNode { optional SchemaProto out_schema = 5; optional ScanNode scan = 6; + optional PartitionScanSpec partitionScan = 7; optional IndexScanSpec indexScan = 8; optional JoinNode join = 9; optional FilterNode filter = 10; @@ -114,6 +115,10 @@ message ScanNode { required bool nameResolveBase = 7; } +message PartitionScanSpec { + repeated string paths = 1; +} + message IndexScanSpec { required SchemaProto keySchema = 1; required string indexPath = 2; @@ -226,6 +231,10 @@ enum JoinType { RIGHT_SEMI_JOIN = 9; } +message PartitionTableScanSpec { + repeated string paths = 1; +} + message PersistentStoreNode { optional int32 childSeq = 1; // CreateTableNode may not have any children. This should be improved at TAJO-1589. required string storageType = 2; From b327993ba1c4749621f41fcdd67339551b64ef4e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 12:22:21 +0900 Subject: [PATCH 023/127] Remove unused packages --- .../main/java/org/apache/tajo/querymaster/Repartitioner.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 79d2cc1087..44ce9394e1 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -30,7 +30,6 @@ import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.StatisticsUtil; import org.apache.tajo.catalog.statistics.TableStats; -import org.apache.tajo.conf.TajoConf; import org.apache.tajo.conf.TajoConf.ConfVars; import org.apache.tajo.engine.planner.PhysicalPlannerImpl; import org.apache.tajo.engine.planner.RangePartitionAlgorithm; @@ -44,10 +43,8 @@ import org.apache.tajo.exception.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.logical.SortNode.SortPurpose; -import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; -import org.apache.tajo.plan.util.FilteredPartitionInfo; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.querymaster.Task.IntermediateEntry; import org.apache.tajo.storage.*; From 22d3ad78231c7db4588246dd01bcee9b08573e63 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 12:23:32 +0900 Subject: [PATCH 024/127] Remove unused package --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 12ad77e2f9..5df2a71ca8 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -40,7 +40,6 @@ import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.util.EvalNodeToExprConverter; -import org.apache.tajo.plan.util.FilteredPartitionInfo; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; import org.apache.tajo.storage.Tuple; From 9009b2a500a037493f02c6767bf3c6c4472a24f4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 12:24:25 +0900 Subject: [PATCH 025/127] Remove unused class --- .../tajo/plan/util/FilteredPartitionInfo.java | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java deleted file mode 100644 index dc8008b462..0000000000 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/FilteredPartitionInfo.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tajo.plan.util; - -import org.apache.hadoop.fs.Path; - -public class FilteredPartitionInfo { - private Path[] partitionPaths; - private String[] partitionNames; - private long totalVolume; - - public FilteredPartitionInfo() { - } - - public FilteredPartitionInfo(Path[] partitionPaths) { - this.partitionPaths = partitionPaths; - } - - public FilteredPartitionInfo(Path[] partitionPaths, long totalVolume) { - this.partitionPaths = partitionPaths; - this.totalVolume = totalVolume; - } - - public FilteredPartitionInfo(Path[] partitionPaths, String[] partitionNames, long totalVolume) { - this.partitionPaths = partitionPaths; - this.partitionNames = partitionNames; - this.totalVolume = totalVolume; - } - - public Path[] getPartitionPaths() { - return partitionPaths; - } - - public void setPartitionPaths(Path[] partitionPaths) { - this.partitionPaths = partitionPaths; - } - - public String[] getPartitionNames() { - return partitionNames; - } - - public void setPartitionNames(String[] partitionNames) { - this.partitionNames = partitionNames; - } - - public long getTotalVolume() { - return totalVolume; - } - - public void setTotalVolume(long totalVolume) { - this.totalVolume = totalVolume; - } -} \ No newline at end of file From 92063a010130b9edb4de291e4c8b295aa169da42 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 12:28:16 +0900 Subject: [PATCH 026/127] Implement FileTableSpace::makeNonPartitionSplit --- .../apache/tajo/storage/FileTablespace.java | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index a60f8b8e97..7b8ee26850 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -434,11 +434,39 @@ protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, // for Non Splittable. eg, compressed gzip TextFile protected Fragment makeNonSplit(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations) throws IOException { - return makeNonSplit(fragmentId, file, start, length, blkLocations, null); + + Map hostsBlockMap = new HashMap<>(); + for (BlockLocation blockLocation : blkLocations) { + for (String host : blockLocation.getHosts()) { + if (hostsBlockMap.containsKey(host)) { + hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); + } else { + hostsBlockMap.put(host, 1); + } + } + } + + List> entries = new ArrayList<>(hostsBlockMap.entrySet()); + Collections.sort(entries, new Comparator>() { + + @Override + public int compare(Map.Entry v1, Map.Entry v2) { + return v1.getValue().compareTo(v2.getValue()); + } + }); + + String[] hosts = new String[blkLocations[0].getHosts().length]; + + for (int i = 0; i < hosts.length; i++) { + Map.Entry entry = entries.get((entries.size() - 1) - i); + hosts[i] = entry.getKey(); + } + + return new FileFragment(fragmentId, file, start, length, hosts); } - protected Fragment makeNonSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations, String partitionName) throws IOException { + protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations, String partitionName) throws IOException { Map hostsBlockMap = new HashMap<>(); for (BlockLocation blockLocation : blkLocations) { @@ -467,11 +495,7 @@ public int compare(Map.Entry v1, Map.Entry v2) hosts[i] = entry.getKey(); } - if (partitionName != null) { - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); - } else { - return new FileFragment(fragmentId, file, start, length, hosts); - } + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); } /** @@ -638,7 +662,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); } } else { - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitions[i])); } } @@ -667,7 +691,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem blkLocations[blkIndex].getHosts(), partitions[i])); } } else { // Non splittable - splits.add(makeNonSplit(tableName, path, 0, length, blkLocations, partitions[i])); + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitions[i])); } } } From fc6043fa08859431d31df8f237b02ce6b5c11a61 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 14:39:28 +0900 Subject: [PATCH 027/127] Update the line of added methods in FileTablespace --- .../apache/tajo/storage/FileTablespace.java | 101 ++++++++---------- 1 file changed, 47 insertions(+), 54 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 7b8ee26850..38395dce6b 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -421,19 +421,9 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo return new FileFragment(fragmentId, file, blockLocation); } - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, - String[] hosts, String partitionName) { - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); - } - - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation - , String partitionName) throws IOException { - return new PartitionFileFragment(fragmentId, file, blockLocation, partitionName); - } - // for Non Splittable. eg, compressed gzip TextFile - protected Fragment makeNonSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations) throws IOException { + protected FileFragment makeNonSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations) throws IOException { Map hostsBlockMap = new HashMap<>(); for (BlockLocation blockLocation : blkLocations) { @@ -461,43 +451,9 @@ public int compare(Map.Entry v1, Map.Entry v2) Map.Entry entry = entries.get((entries.size() - 1) - i); hosts[i] = entry.getKey(); } - return new FileFragment(fragmentId, file, start, length, hosts); } - protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations, String partitionName) throws IOException { - - Map hostsBlockMap = new HashMap<>(); - for (BlockLocation blockLocation : blkLocations) { - for (String host : blockLocation.getHosts()) { - if (hostsBlockMap.containsKey(host)) { - hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); - } else { - hostsBlockMap.put(host, 1); - } - } - } - - List> entries = new ArrayList<>(hostsBlockMap.entrySet()); - Collections.sort(entries, new Comparator>() { - - @Override - public int compare(Map.Entry v1, Map.Entry v2) { - return v1.getValue().compareTo(v2.getValue()); - } - }); - - String[] hosts = new String[blkLocations[0].getHosts().length]; - - for (int i = 0; i < hosts.length; i++) { - Map.Entry entry = entries.get((entries.size() - 1) - i); - hosts[i] = entry.getKey(); - } - - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); - } - /** * Get the minimum split size * @@ -535,7 +491,6 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, List volumeSplits = Lists.newArrayList(); List blockLocations = Lists.newArrayList(); - int i = 0; for (Path p : inputs) { ArrayList files = Lists.newArrayList(); if (fs.isFile(p)) { @@ -584,17 +539,14 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, // for s3 while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makeSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts())); - + blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makeSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts())); + blkLocations[blkIndex].getHosts())); } } else { // Non splittable splits.add(makeNonSplit(tableName, path, 0, length, blkLocations)); @@ -605,7 +557,6 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, if(LOG.isDebugEnabled()){ LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); } - i++; } // Combine original fileFragments with new VolumeId information @@ -615,6 +566,48 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + String[] hosts, String partitionName) { + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); + } + + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + , String partitionName) throws IOException { + return new PartitionFileFragment(fragmentId, file, blockLocation, partitionName); + } + + protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations, String partitionName) throws IOException { + + Map hostsBlockMap = new HashMap<>(); + for (BlockLocation blockLocation : blkLocations) { + for (String host : blockLocation.getHosts()) { + if (hostsBlockMap.containsKey(host)) { + hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); + } else { + hostsBlockMap.put(host, 1); + } + } + } + + List> entries = new ArrayList<>(hostsBlockMap.entrySet()); + Collections.sort(entries, new Comparator>() { + + @Override + public int compare(Map.Entry v1, Map.Entry v2) { + return v1.getValue().compareTo(v2.getValue()); + } + }); + + String[] hosts = new String[blkLocations[0].getHosts().length]; + + for (int i = 0; i < hosts.length; i++) { + Map.Entry entry = entries.get((entries.size() - 1) - i); + hosts[i] = entry.getKey(); + } + + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); + } /** * Generate the list of files and make them into PartitionedFileSplits. @@ -622,7 +615,7 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, * @throws IOException */ public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitions, - Path... inputs) throws IOException { + Path... inputs) throws IOException { // generate splits' List splits = Lists.newArrayList(); From 3fe18c974de2f7779d491a85ecf8c8532745523f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 14:56:54 +0900 Subject: [PATCH 028/127] Implement FileTableSpace::partitionSplit --- .../engine/planner/PhysicalPlannerImpl.java | 29 ++++++++--- .../apache/tajo/storage/FileTablespace.java | 48 ++++++++++++++++--- .../fragment/PartitionFileFragment.java | 16 +++---- 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index a48b8f8786..af099a954f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -53,6 +53,7 @@ import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.StringUtils; @@ -441,7 +442,7 @@ private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNod private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( - plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); + plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); @@ -922,14 +923,28 @@ public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, St } } - if (scanNode instanceof PartitionedTableScanNode) { + if (scanNode instanceof PartitionedTableScanNode + && ((PartitionedTableScanNode)scanNode).getInputPaths() != null && + ((PartitionedTableScanNode)scanNode).getInputPaths().length > 0) { + if (broadcastFlag) { - FragmentProto [] fragments = ctx.getTables(scanNode.getCanonicalName()); - if (fragments == null) { - return new SeqScanExec(ctx, scanNode, null); - } else { - return new PartitionMergeScanExec(ctx, scanNode, fragments); + PartitionedTableScanNode partitionedTableScanNode = (PartitionedTableScanNode) scanNode; + List fileFragments = TUtil.newList(); + + FileTablespace space = (FileTablespace) TablespaceManager.get(scanNode.getTableDesc().getUri()); + for (int i = 0; i < partitionedTableScanNode.getInputPaths().length; i++) { + Path path = partitionedTableScanNode.getInputPaths()[i]; + String partitionKeys = partitionedTableScanNode.hasPartitionKeys() ? partitionedTableScanNode + .getPartitionKeys()[i] : ""; + fileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), path, partitionKeys))); } + + FragmentProto[] fragments = + FragmentConvertor.toFragmentProtoArray(fileFragments.toArray( + new PartitionFileFragment[fileFragments.size()])); + + ctx.addFragments(scanNode.getCanonicalName(), fragments); + return new PartitionMergeScanExec(ctx, scanNode, fragments); } } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 38395dce6b..c61f4dcdf6 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -566,18 +566,54 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } + public PartitionFileFragment[] partitionSplit(String tableName, Path tablePath, String partitionKeys) + throws IOException { + return partitionSplit(tableName, tablePath, fs.getDefaultBlockSize(), partitionKeys); + } + + private PartitionFileFragment[] partitionSplit(String tableName, Path tablePath, long size, String partitionKeys) + throws IOException { + FileSystem fs = tablePath.getFileSystem(conf); + + long defaultBlockSize = size; + List listTablets = new ArrayList<>(); + PartitionFileFragment tablet; + + FileStatus[] fileLists = fs.listStatus(tablePath); + for (FileStatus file : fileLists) { + long remainFileSize = file.getLen(); + long start = 0; + if (remainFileSize > defaultBlockSize) { + while (remainFileSize > defaultBlockSize) { + tablet = new PartitionFileFragment(tableName, file.getPath(), start, defaultBlockSize, partitionKeys); + listTablets.add(tablet); + start += defaultBlockSize; + remainFileSize -= defaultBlockSize; + } + listTablets.add(new PartitionFileFragment(tableName, file.getPath(), start, remainFileSize, partitionKeys)); + } else { + listTablets.add(new PartitionFileFragment(tableName, file.getPath(), 0, remainFileSize, partitionKeys)); + } + } + + PartitionFileFragment[] tablets = new PartitionFileFragment[listTablets.size()]; + listTablets.toArray(tablets); + + return tablets; + } + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, - String[] hosts, String partitionName) { - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); + String[] hosts, String partitionKeys) { + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); } protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation - , String partitionName) throws IOException { - return new PartitionFileFragment(fragmentId, file, blockLocation, partitionName); + , String partitionKeys) throws IOException { + return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); } protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations, String partitionName) throws IOException { + BlockLocation[] blkLocations, String partitionKeys) throws IOException { Map hostsBlockMap = new HashMap<>(); for (BlockLocation blockLocation : blkLocations) { @@ -606,7 +642,7 @@ public int compare(Map.Entry v1, Map.Entry v2) hosts[i] = entry.getKey(); } - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionName); + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); } /** diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index c3d6588d31..98cd6e46c5 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -49,25 +49,25 @@ public PartitionFileFragment(ByteString raw) throws InvalidProtocolBufferExcepti init(builder.build()); } - public PartitionFileFragment(String tableName, Path uri, BlockLocation blockLocation, String partitionName) + public PartitionFileFragment(String tableName, Path uri, BlockLocation blockLocation, String partitionKeys) throws IOException { this.set(tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null, - partitionName); + partitionKeys); } public PartitionFileFragment(String tableName, Path uri, long start, long length, String[] hosts, int[] diskIds, - String partitionName) { - this.set(tableName, uri, start, length, hosts, diskIds, partitionName); + String partitionKeys) { + this.set(tableName, uri, start, length, hosts, diskIds, partitionKeys); } // Non splittable public PartitionFileFragment(String tableName, Path uri, long start, long length, String[] hosts, - String partitionName) { - this.set(tableName, uri, start, length, hosts, null, partitionName); + String partitionKeys) { + this.set(tableName, uri, start, length, hosts, null, partitionKeys); } - public PartitionFileFragment(String fragmentId, Path path, long start, long length, String partitionName) { - this.set(fragmentId, path, start, length, null, null, partitionName); + public PartitionFileFragment(String fragmentId, Path path, long start, long length, String partitionKeys) { + this.set(fragmentId, path, start, length, null, null, partitionKeys); } public PartitionFileFragment(PartitionFileFragmentProto proto) { From fc16d15f9eeaeaef094d47e7881788f08a2f5427 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 14:59:36 +0900 Subject: [PATCH 029/127] Added simple description for splitting partitioned table --- .../src/main/java/org/apache/tajo/storage/FileTablespace.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index c61f4dcdf6..2b79887a0c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -566,6 +566,10 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } + //////////////////////////////////////////////////////////////////////////////// + // The below code is for splitting partitioned table. + //////////////////////////////////////////////////////////////////////////////// + public PartitionFileFragment[] partitionSplit(String tableName, Path tablePath, String partitionKeys) throws IOException { return partitionSplit(tableName, tablePath, fs.getDefaultBlockSize(), partitionKeys); From 7e35b0674d8ce109ad1b9cab24ba185b0c6d92b0 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 17:21:23 +0900 Subject: [PATCH 030/127] Fix bugs --- .../engine/planner/physical/SeqScanExec.java | 12 +--- .../tajo/querymaster/Repartitioner.java | 13 +++- .../logical/PartitionedTableScanNode.java | 5 +- .../rules/PartitionedTableRewriter.java | 64 ++++++++----------- .../plan/serder/LogicalNodeDeserializer.java | 7 ++ .../plan/serder/LogicalNodeSerializer.java | 7 ++ tajo-plan/src/main/proto/Plan.proto | 2 + .../apache/tajo/storage/FileTablespace.java | 15 ++--- .../fragment/PartitionFileFragment.java | 7 +- 9 files changed, 75 insertions(+), 57 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index ed897c5a7f..f4f45bb7d0 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -101,15 +101,9 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { List partitionFileFragments = FragmentConvertor.convert(PartitionFileFragment .class, fragments); - if (partitionFileFragments.get(0) != null) { - // Get first partition key from a given partition keys - partitionRow = PartitionedTableRewriter.buildTupleFromPartitionName(columnPartitionSchema, - partitionFileFragments.get(0).getPartitionKeys()); - } else { - // Get a partition key value from a given path - partitionRow = PartitionedTableRewriter.buildTupleFromPartitionPath( - columnPartitionSchema, partitionFileFragments.get(0).getPath(), false); - } + // Get first partition key from a given partition keys + partitionRow = PartitionedTableRewriter.buildTupleFromPartitionName(columnPartitionSchema, + partitionFileFragments.get(0).getPartitionKeys()); } // Targets or search conditions may contain column references. diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 44ce9394e1..b0e13cd798 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -478,11 +478,14 @@ public static List getFragmentsFromPartitionedTable(Tablespace tsHandl } List fragments = Lists.newArrayList(); PartitionedTableScanNode partitionsScan = (PartitionedTableScanNode) scan; + fragments.addAll(((FileTablespace) tsHandler).getPartitionSplits( scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getPartitionKeys(), partitionsScan.getInputPaths())); + partitionsScan.setInputPaths(null); partitionsScan.setPartitionKeys(null); + return fragments; } @@ -515,13 +518,18 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch Collection scanFragments; Path[] partitionScanPaths = null; - + String[] partitionKeys = null; Tablespace space = TablespaceManager.get(desc.getUri()); if (scan.getType() == NodeType.PARTITIONS_SCAN) { PartitionedTableScanNode partitionScan = (PartitionedTableScanNode) scan; partitionScanPaths = partitionScan.getInputPaths(); + + if (partitionScan.hasPartitionKeys()) { + partitionKeys = partitionScan.getPartitionKeys(); + } + // set null to inputPaths in getFragmentsFromPartitionedTable() scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); } else { @@ -536,6 +544,9 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)scan; // PhisicalPlanner make PartitionMergeScanExec when table is boradcast table and inputpaths is not empty partitionScan.setInputPaths(partitionScanPaths); + if (partitionKeys != null) { + partitionScan.setPartitionKeys(partitionKeys); + } } else { broadcastFragments.addAll(scanFragments); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 2dfa3202a2..7fa415cdd3 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -124,7 +124,10 @@ public Object clone() throws CloneNotSupportedException { } unionScan.inputPaths = inputPaths; - unionScan.partitionKeys = partitionKeys; + + if (hasPartitionKeys()) { + unionScan.partitionKeys = partitionKeys; + } return unionScan; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5df2a71ca8..1c5ff0ca7b 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -45,6 +45,7 @@ import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.StringUtils; +import org.apache.tajo.util.TUtil; import java.io.IOException; import java.util.*; @@ -135,7 +136,6 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin UndefinedOperatorException, UnsupportedException { PartitionContent partitionContent = null; - Path [] filteredPaths = null; FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); String [] splits = CatalogUtil.splitFQTableName(tableName); List partitions = null; @@ -144,8 +144,7 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin if (conjunctiveForms == null) { partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - partitionContent = new PartitionContent(filteredPaths); + partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } else { partitionContent = getPartitionContentByPartitionDesc(partitions); } @@ -155,8 +154,7 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin partitions = catalog.getPartitionsByAlgebra(request); partitionContent = getPartitionContentByPartitionDesc(partitions); } else { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - partitionContent = new PartitionContent(filteredPaths); + partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } } } catch (UnsupportedException ue) { @@ -165,14 +163,14 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin LOG.warn(ue.getMessage()); partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - filteredPaths = findFilteredPathsFromFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); - partitionContent = new PartitionContent(filteredPaths); + partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } else { partitionContent = getPartitionContentByPartitionDesc(partitions); } scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } LOG.info("Filtered directory or files: " + partitionContent.getPartitionPaths().length); + LOG.info("Filtered partition keys: " + partitionContent.getPartitionKeys().length); return partitionContent; } @@ -207,10 +205,15 @@ private PartitionContent getPartitionContentByPartitionDesc(List partitionKeysList = TUtil.newList(); if (conjunctiveForms == null) { filters = buildAllAcceptingPathFilters(partitionColumns); @@ -225,7 +228,20 @@ private PartitionContent getPartitionContentByPartitionDesc(List 0) { - try { - FileSystem fs = scanNode.getInputPaths()[0].getFileSystem(queryContext.getConf()); - long totalVolume = 0; - - for (Path input : scanNode.getInputPaths()) { - ContentSummary summary = fs.getContentSummary(input); - totalVolume += summary.getLength(); - } - scanNode.getTableDesc().getStats().setNumBytes(totalVolume); - } catch (Throwable e) { - throw new TajoInternalError(e); - } - } - } - private final class Rewriter extends BasicLogicalPlanVisitor { @Override public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalPlan.QueryBlock block, @@ -564,16 +562,10 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP Path[] filteredPaths = partitionContent.getPartitionPaths(); plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); - PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); - String[] partitionKeys = partitionContent.getPartitionKeys(); - rewrittenScanNode.init(scanNode, filteredPaths, partitionKeys); - - if (partitionKeys != null) { - rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionContent.getTotalVolume()); - } else { - updateTableStat(queryContext, rewrittenScanNode); - } + PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); + rewrittenScanNode.init(scanNode, filteredPaths, partitionContent.getPartitionKeys()); + rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionContent.getTotalVolume()); // if it is topmost node, set it as the rootnode of this block. if (stack.empty() || block.getRoot().equals(scanNode)) { diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index fdad4c43af..2642ca40c6 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -469,6 +469,13 @@ private static PartitionedTableScanNode convertPartitionScan(OverridableConf con paths[i] = new Path(partitionScanProto.getPaths(i)); } partitionedScan.setInputPaths(paths); + + String[] partitionKeys = new String[partitionScanProto.getKeysCount()]; + for (int i = 0; i < partitionScanProto.getKeysCount(); i++) { + partitionKeys[i] = partitionScanProto.getKeys(i); + } + partitionedScan.setPartitionKeys(partitionKeys); + return partitionedScan; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 6b082f7159..beac99c5ac 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -484,6 +484,13 @@ public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPl } partitionScan.addAllPaths(pathStrs); } + List partitionKeysStrs = TUtil.newList(); + if (node.getPartitionKeys() != null) { + for (String partitionKey : node.getPartitionKeys()) { + partitionKeysStrs.add(partitionKey); + } + partitionScan.addAllKeys(partitionKeysStrs); + } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setScan(scanBuilder); diff --git a/tajo-plan/src/main/proto/Plan.proto b/tajo-plan/src/main/proto/Plan.proto index 0cd0c32dcc..0942807bd8 100644 --- a/tajo-plan/src/main/proto/Plan.proto +++ b/tajo-plan/src/main/proto/Plan.proto @@ -117,6 +117,7 @@ message ScanNode { message PartitionScanSpec { repeated string paths = 1; + repeated string keys = 2; } message IndexScanSpec { @@ -233,6 +234,7 @@ enum JoinType { message PartitionTableScanSpec { repeated string paths = 1; + repeated string partitionKeys = 2; } message PersistentStoreNode { diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 2b79887a0c..a5164a882a 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -654,7 +654,7 @@ public int compare(Map.Entry v1, Map.Entry v2) * * @throws IOException */ - public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitions, + public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, Path... inputs) throws IOException { // generate splits' @@ -683,7 +683,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem if (splittable) { for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); } blockLocations.addAll(Arrays.asList(blkLocations)); @@ -692,10 +692,10 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem if (blockSize >= length) { blockLocations.addAll(Arrays.asList(blkLocations)); for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitions[i])); + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); } } else { - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitions[i])); + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); } } @@ -713,18 +713,17 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitions[i])); + blkLocations[blkIndex].getHosts(), partitionKeys[i])); bytesRemaining -= splitSize; } if (bytesRemaining > 0) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitions[i])); + blkLocations[blkIndex].getHosts(), partitionKeys[i])); } } else { // Non splittable - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitions[i])); + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); } } } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 98cd6e46c5..3e44f2cc0e 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -211,10 +211,13 @@ public FragmentProto getProto() { builder.setLength(this.length); builder.setPath(this.uri.toString()); - if(hosts != null) { + if (hosts != null) { builder.addAllHosts(TUtil.newList(hosts)); } - builder.setPartitionKeys(this.partitionKeys); + + if (partitionKeys != null) { + builder.setPartitionKeys(this.partitionKeys); + } FragmentProto.Builder fragmentBuilder = FragmentProto.newBuilder(); fragmentBuilder.setId(this.tableName); From e3712751bc7f9a85705498de5a3cdd5fe22e6544 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 17:24:21 +0900 Subject: [PATCH 031/127] Rename fileFragments to partitionFileFragments --- .../tajo/engine/planner/PhysicalPlannerImpl.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index af099a954f..4038c172d9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -929,19 +929,18 @@ public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, St if (broadcastFlag) { PartitionedTableScanNode partitionedTableScanNode = (PartitionedTableScanNode) scanNode; - List fileFragments = TUtil.newList(); + List partitionFileFragments = TUtil.newList(); FileTablespace space = (FileTablespace) TablespaceManager.get(scanNode.getTableDesc().getUri()); for (int i = 0; i < partitionedTableScanNode.getInputPaths().length; i++) { Path path = partitionedTableScanNode.getInputPaths()[i]; - String partitionKeys = partitionedTableScanNode.hasPartitionKeys() ? partitionedTableScanNode - .getPartitionKeys()[i] : ""; - fileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), path, partitionKeys))); + partitionFileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), path, + partitionedTableScanNode.getPartitionKeys()[i]))); } FragmentProto[] fragments = - FragmentConvertor.toFragmentProtoArray(fileFragments.toArray( - new PartitionFileFragment[fileFragments.size()])); + FragmentConvertor.toFragmentProtoArray(partitionFileFragments.toArray( + new PartitionFileFragment[partitionFileFragments.size()])); ctx.addFragments(scanNode.getCanonicalName(), fragments); return new PartitionMergeScanExec(ctx, scanNode, fragments); From fdeeb79f722a4666d006a2a8b701abd7b63c40b4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 17:26:38 +0900 Subject: [PATCH 032/127] Remove codes for checking the NULL of partition keys --- .../apache/tajo/querymaster/Repartitioner.java | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index b0e13cd798..3f5288dece 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -386,15 +386,12 @@ private static void scheduleSymmetricRepartitionJoin(QueryMasterTask.QueryMaster PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)eachScan; partitionScanPaths = partitionScan.getInputPaths(); - if (partitionScan.hasPartitionKeys()) { - partitionKeys = partitionScan.getPartitionKeys(); - } + partitionKeys = partitionScan.getPartitionKeys(); + // set null to inputPaths in getFragmentsFromPartitionedTable() getFragmentsFromPartitionedTable((FileTablespace) space, eachScan, tableDesc); partitionScan.setInputPaths(partitionScanPaths); - if (partitionKeys != null) { - partitionScan.setPartitionKeys(partitionKeys); - } + partitionScan.setPartitionKeys(partitionKeys); } else { @@ -525,10 +522,7 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch if (scan.getType() == NodeType.PARTITIONS_SCAN) { PartitionedTableScanNode partitionScan = (PartitionedTableScanNode) scan; partitionScanPaths = partitionScan.getInputPaths(); - - if (partitionScan.hasPartitionKeys()) { - partitionKeys = partitionScan.getPartitionKeys(); - } + partitionKeys = partitionScan.getPartitionKeys(); // set null to inputPaths in getFragmentsFromPartitionedTable() scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); @@ -544,9 +538,7 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)scan; // PhisicalPlanner make PartitionMergeScanExec when table is boradcast table and inputpaths is not empty partitionScan.setInputPaths(partitionScanPaths); - if (partitionKeys != null) { - partitionScan.setPartitionKeys(partitionKeys); - } + partitionScan.setPartitionKeys(partitionKeys); } else { broadcastFragments.addAll(scanFragments); } From 0e7ce9336da5f373db43bf9c2b87ad28b6c52f22 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 17:34:25 +0900 Subject: [PATCH 033/127] Rename keys to partitionKeys in PlanProto --- .../tajo/plan/serder/LogicalNodeDeserializer.java | 12 ++++++------ .../tajo/plan/serder/LogicalNodeSerializer.java | 2 +- tajo-plan/src/main/proto/Plan.proto | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index 2642ca40c6..0df12461da 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -218,7 +218,7 @@ private static ProjectionNode convertProjection(OverridableConf context, EvalCon ProjectionNode projectionNode = new ProjectionNode(protoNode.getNodeId()); projectionNode.init(projectionProto.getDistinct(), convertTargets(context, evalContext, - projectionProto.getTargetsList())); + projectionProto.getTargetsList())); projectionNode.setChild(nodeMap.get(projectionProto.getChildSeq())); projectionNode.setInSchema(convertSchema(protoNode.getInSchema())); projectionNode.setOutSchema(convertSchema(protoNode.getOutSchema())); @@ -277,7 +277,7 @@ private static WindowAggNode convertWindowAgg(OverridableConf context, EvalConte if (windowAggProto.getWindowFunctionsCount() > 0) { windowAgg.setWindowFunctions(convertWindowFunccEvals(context, evalContext, - windowAggProto.getWindowFunctionsList())); + windowAggProto.getWindowFunctionsList())); } windowAgg.setDistinct(windowAggProto.getDistinct()); @@ -453,7 +453,7 @@ private static IndexScanNode convertIndexScan(OverridableConf context, EvalConte } indexScan.set(new Schema(indexScanSpec.getKeySchema()), predicates, - TUtil.stringToURI(indexScanSpec.getIndexPath())); + TUtil.stringToURI(indexScanSpec.getIndexPath())); return indexScan; } @@ -470,9 +470,9 @@ private static PartitionedTableScanNode convertPartitionScan(OverridableConf con } partitionedScan.setInputPaths(paths); - String[] partitionKeys = new String[partitionScanProto.getKeysCount()]; - for (int i = 0; i < partitionScanProto.getKeysCount(); i++) { - partitionKeys[i] = partitionScanProto.getKeys(i); + String[] partitionKeys = new String[partitionScanProto.getPartitionKeysCount()]; + for (int i = 0; i < partitionScanProto.getPartitionKeysCount(); i++) { + partitionKeys[i] = partitionScanProto.getPartitionKeys(i); } partitionedScan.setPartitionKeys(partitionKeys); diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index beac99c5ac..1a7987c7a2 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -489,7 +489,7 @@ public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPl for (String partitionKey : node.getPartitionKeys()) { partitionKeysStrs.add(partitionKey); } - partitionScan.addAllKeys(partitionKeysStrs); + partitionScan.addAllPartitionKeys(partitionKeysStrs); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); diff --git a/tajo-plan/src/main/proto/Plan.proto b/tajo-plan/src/main/proto/Plan.proto index 0942807bd8..def0d52701 100644 --- a/tajo-plan/src/main/proto/Plan.proto +++ b/tajo-plan/src/main/proto/Plan.proto @@ -117,7 +117,7 @@ message ScanNode { message PartitionScanSpec { repeated string paths = 1; - repeated string keys = 2; + repeated string partitionKeys = 2; } message IndexScanSpec { From 80a872cf4bbd7474bc16e366b72ae56dc4709206 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 18:03:33 +0900 Subject: [PATCH 034/127] Update description --- .../org/apache/tajo/engine/planner/physical/SeqScanExec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index f4f45bb7d0..64146e5957 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -101,7 +101,7 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { List partitionFileFragments = FragmentConvertor.convert(PartitionFileFragment .class, fragments); - // Get first partition key from a given partition keys + // Get partition keys from first partition fragment partitionRow = PartitionedTableRewriter.buildTupleFromPartitionName(columnPartitionSchema, partitionFileFragments.get(0).getPartitionKeys()); } From 8e11cbcfba1c881d22894507f34918f90ea0127c Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 18:06:51 +0900 Subject: [PATCH 035/127] Remove unnecessary updates --- .../apache/tajo/plan/serder/LogicalNodeDeserializer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index 0df12461da..1c43ef8099 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -218,7 +218,7 @@ private static ProjectionNode convertProjection(OverridableConf context, EvalCon ProjectionNode projectionNode = new ProjectionNode(protoNode.getNodeId()); projectionNode.init(projectionProto.getDistinct(), convertTargets(context, evalContext, - projectionProto.getTargetsList())); + projectionProto.getTargetsList())); projectionNode.setChild(nodeMap.get(projectionProto.getChildSeq())); projectionNode.setInSchema(convertSchema(protoNode.getInSchema())); projectionNode.setOutSchema(convertSchema(protoNode.getOutSchema())); @@ -277,7 +277,7 @@ private static WindowAggNode convertWindowAgg(OverridableConf context, EvalConte if (windowAggProto.getWindowFunctionsCount() > 0) { windowAgg.setWindowFunctions(convertWindowFunccEvals(context, evalContext, - windowAggProto.getWindowFunctionsList())); + windowAggProto.getWindowFunctionsList())); } windowAgg.setDistinct(windowAggProto.getDistinct()); @@ -453,7 +453,7 @@ private static IndexScanNode convertIndexScan(OverridableConf context, EvalConte } indexScan.set(new Schema(indexScanSpec.getKeySchema()), predicates, - TUtil.stringToURI(indexScanSpec.getIndexPath())); + TUtil.stringToURI(indexScanSpec.getIndexPath())); return indexScan; } From 7596c3cef255b0563e5ee412f0b72b5cff2c1788 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 18:09:17 +0900 Subject: [PATCH 036/127] Remove unncessary method --- .../tajo/plan/logical/PartitionedTableScanNode.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 7fa415cdd3..b5d6fffc07 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -67,10 +67,6 @@ public void setPartitionKeys(String[] partitionKeys) { this.partitionKeys = partitionKeys; } - public boolean hasPartitionKeys() { - return this.partitionKeys != null; - } - public String toString() { StringBuilder sb = new StringBuilder("Partitions Scan (table=").append(getTableName()); if (hasAlias()) { @@ -124,10 +120,7 @@ public Object clone() throws CloneNotSupportedException { } unionScan.inputPaths = inputPaths; - - if (hasPartitionKeys()) { - unionScan.partitionKeys = partitionKeys; - } + unionScan.partitionKeys = partitionKeys; return unionScan; } From 56ec31a6352b84015d69beee8f3487a7ef13cdb9 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 18:15:38 +0900 Subject: [PATCH 037/127] Remove unused packages --- .../tajo/engine/planner/PhysicalPlannerImpl.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index 4038c172d9..5cff72ec89 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -24,7 +24,6 @@ import com.google.common.collect.ObjectArrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; import org.apache.tajo.SessionVars; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.SortSpec; @@ -44,13 +43,11 @@ import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.SortSpecArray; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; -import org.apache.tajo.plan.serder.PlanProto.SortEnforce; import org.apache.tajo.plan.serder.PlanProto.SortedInputEnforce; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageConstants; import org.apache.tajo.storage.TablespaceManager; -import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; import org.apache.tajo.storage.fragment.PartitionFileFragment; @@ -442,7 +439,7 @@ private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNod private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( - plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); + plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); @@ -933,14 +930,14 @@ public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, St FileTablespace space = (FileTablespace) TablespaceManager.get(scanNode.getTableDesc().getUri()); for (int i = 0; i < partitionedTableScanNode.getInputPaths().length; i++) { - Path path = partitionedTableScanNode.getInputPaths()[i]; - partitionFileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), path, + partitionFileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), + partitionedTableScanNode.getInputPaths()[i], partitionedTableScanNode.getPartitionKeys()[i]))); } FragmentProto[] fragments = - FragmentConvertor.toFragmentProtoArray(partitionFileFragments.toArray( - new PartitionFileFragment[partitionFileFragments.size()])); + FragmentConvertor.toFragmentProtoArray(partitionFileFragments.toArray( + new PartitionFileFragment[partitionFileFragments.size()])); ctx.addFragments(scanNode.getCanonicalName(), fragments); return new PartitionMergeScanExec(ctx, scanNode, fragments); From 1735d67ce57215a37f0b254c5f626fe11e31c1dd Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 23 Nov 2015 18:27:08 +0900 Subject: [PATCH 038/127] Update PartitionedTableRewriter::getPartitionContentByFileSystem --- .../plan/rewrite/rules/PartitionedTableRewriter.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 1c5ff0ca7b..ebdebc5f9e 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -213,7 +213,7 @@ private PartitionContent getPartitionContentByFileSystem(Schema partitionColumns int startIdx; long totalVolume = 0L; ContentSummary summary = null; - List partitionKeysList = TUtil.newList(); + String[] partitionKeys = null; if (conjunctiveForms == null) { filters = buildAllAcceptingPathFilters(partitionColumns); @@ -230,15 +230,17 @@ private PartitionContent getPartitionContentByFileSystem(Schema partitionColumns } // Get partition keys and volume from the list of partition directories - for (Path path : filteredPaths) { + partitionKeys = new String[filteredPaths.length]; + for (int i = 0; i < partitionKeys.length; i++) { + Path path = filteredPaths[i]; startIdx = path.toString().indexOf(getColumnPartitionPathPrefix(partitionColumns)); - partitionKeysList.add(path.toString().substring(startIdx)); + partitionKeys[i] = path.toString().substring(startIdx); summary = fs.getContentSummary(path); totalVolume += summary.getLength(); } partitionContent.setPartitionPaths(filteredPaths); - partitionContent.setPartitionKeys(partitionKeysList.toArray(new String[partitionKeysList.size()])); + partitionContent.setPartitionKeys(partitionKeys); partitionContent.setTotalVolume(totalVolume); return partitionContent; From e062a728c3295b6898920914a9b4a45066fa64d7 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 24 Nov 2015 12:05:59 +0900 Subject: [PATCH 039/127] Remove unused constructor --- .../tajo/plan/partition/PartitionContent.java | 24 ------------------- .../rules/PartitionedTableRewriter.java | 7 +----- 2 files changed, 1 insertion(+), 30 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java index 7b74f05d7f..691c3f2452 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java @@ -25,18 +25,6 @@ public class PartitionContent { private String[] partitionKeys; private long totalVolume; - public PartitionContent() { - } - - public PartitionContent(Path[] partitionPaths) { - this.partitionPaths = partitionPaths; - } - - public PartitionContent(Path[] partitionPaths, long totalVolume) { - this.partitionPaths = partitionPaths; - this.totalVolume = totalVolume; - } - public PartitionContent(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { this.partitionPaths = partitionPaths; this.partitionKeys = partitionKeys; @@ -47,23 +35,11 @@ public Path[] getPartitionPaths() { return partitionPaths; } - public void setPartitionPaths(Path[] partitionPaths) { - this.partitionPaths = partitionPaths; - } - public String[] getPartitionKeys() { return partitionKeys; } - public void setPartitionKeys(String[] partitionKeys) { - this.partitionKeys = partitionKeys; - } - public long getTotalVolume() { return totalVolume; } - - public void setTotalVolume(long totalVolume) { - this.totalVolume = totalVolume; - } } \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index ebdebc5f9e..47d383fe0a 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -207,7 +207,6 @@ private PartitionContent getPartitionContentByPartitionDesc(List Date: Tue, 24 Nov 2015 18:00:29 +0900 Subject: [PATCH 040/127] Transmit ScanNode instead of PartitionedTableScanNode --- .../engine/planner/PhysicalPlannerImpl.java | 35 ++-------- .../rules/GlobalPlanEqualityTester.java | 15 +++- .../tajo/querymaster/Repartitioner.java | 50 +++----------- .../rules/LogicalPlanEqualityTester.java | 14 +++- .../plan/serder/LogicalNodeDeserializer.java | 23 ------- .../plan/serder/LogicalNodeSerializer.java | 51 +++++++------- tajo-plan/src/main/proto/Plan.proto | 69 ++++++++----------- 7 files changed, 98 insertions(+), 159 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java index 5cff72ec89..e1e5bd4c96 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java @@ -45,12 +45,10 @@ import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.serder.PlanProto.SortedInputEnforce; import org.apache.tajo.plan.util.PlannerUtil; -import org.apache.tajo.storage.FileTablespace; import org.apache.tajo.storage.StorageConstants; import org.apache.tajo.storage.TablespaceManager; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.FileUtil; import org.apache.tajo.util.StringUtils; @@ -901,12 +899,12 @@ private boolean checkIfSortEquivalance(TaskAttemptContext ctx, ScanNode scanNode public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, Stack node) throws IOException { + FragmentProto [] fragments = ctx.getTables(scanNode.getCanonicalName()); // check if an input is sorted in the same order to the subsequence sort operator. if (checkIfSortEquivalance(ctx, scanNode, node)) { - if (ctx.getTable(scanNode.getCanonicalName()) == null) { + if (fragments == null) { return new SeqScanExec(ctx, scanNode, null); } - FragmentProto [] fragments = ctx.getTables(scanNode.getCanonicalName()); return new ExternalSortExec(ctx, (SortNode) node.peek(), scanNode, fragments); } else { Enforcer enforcer = ctx.getEnforcer(); @@ -920,34 +918,15 @@ public PhysicalExec createScanPlan(TaskAttemptContext ctx, ScanNode scanNode, St } } - if (scanNode instanceof PartitionedTableScanNode - && ((PartitionedTableScanNode)scanNode).getInputPaths() != null && - ((PartitionedTableScanNode)scanNode).getInputPaths().length > 0) { - - if (broadcastFlag) { - PartitionedTableScanNode partitionedTableScanNode = (PartitionedTableScanNode) scanNode; - List partitionFileFragments = TUtil.newList(); - - FileTablespace space = (FileTablespace) TablespaceManager.get(scanNode.getTableDesc().getUri()); - for (int i = 0; i < partitionedTableScanNode.getInputPaths().length; i++) { - partitionFileFragments.addAll(TUtil.newList(space.partitionSplit(scanNode.getCanonicalName(), - partitionedTableScanNode.getInputPaths()[i], - partitionedTableScanNode.getPartitionKeys()[i]))); - } - - FragmentProto[] fragments = - FragmentConvertor.toFragmentProtoArray(partitionFileFragments.toArray( - new PartitionFileFragment[partitionFileFragments.size()])); - - ctx.addFragments(scanNode.getCanonicalName(), fragments); - return new PartitionMergeScanExec(ctx, scanNode, fragments); - } + if (scanNode.getTableDesc().hasPartition() && broadcastFlag && fragments != null) { + ctx.addFragments(scanNode.getCanonicalName(), fragments); + return new PartitionMergeScanExec(ctx, scanNode, fragments); } - if (ctx.getTable(scanNode.getCanonicalName()) == null) { + if (fragments == null) { return new SeqScanExec(ctx, scanNode, null); } - FragmentProto [] fragments = ctx.getTables(scanNode.getCanonicalName()); + return new SeqScanExec(ctx, scanNode, fragments); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java index 5758f5ece8..1d104d7ae9 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java @@ -18,15 +18,20 @@ package org.apache.tajo.engine.planner.global.rewriter.rules; +import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.tajo.OverridableConf; import org.apache.tajo.engine.planner.global.ExecutionBlock; import org.apache.tajo.engine.planner.global.ExecutionBlockCursor; import org.apache.tajo.engine.planner.global.MasterPlan; import org.apache.tajo.engine.planner.global.rewriter.GlobalPlanRewriteRule; import org.apache.tajo.plan.logical.LogicalNode; +import org.apache.tajo.plan.logical.NodeType; +import org.apache.tajo.plan.logical.PartitionedTableScanNode; +import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; import org.apache.tajo.plan.serder.LogicalNodeSerializer; import org.apache.tajo.plan.serder.PlanProto; +import org.apache.tajo.plan.util.PlannerUtil; /** * It verifies the equality between the input and output of LogicalNodeTree(De)Serializer in global planning. @@ -52,7 +57,15 @@ public MasterPlan rewrite(OverridableConf queryContext, MasterPlan plan) { if (node != null) { PlanProto.LogicalNodeTree tree = LogicalNodeSerializer.serialize(node); LogicalNode deserialize = LogicalNodeDeserializer.deserialize(plan.getContext(), null, tree); - assert node.deepEquals(deserialize); + + // Error handling PartitionedTableScanNode because LogicalNodeDeserializer convert it to ScanNode. + PartitionedTableScanNode partitionedTableScanNode = PlannerUtil.findTopNode(node, NodeType.PARTITIONS_SCAN); + if (partitionedTableScanNode != null) { + ScanNode scanNode = PlannerUtil.findTopNode(deserialize, NodeType.SCAN); + assert scanNode != null; + } else { + assert node.deepEquals(deserialize); + } } } return plan; diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 3f5288dece..00711b2e6f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -374,33 +374,19 @@ private static void scheduleSymmetricRepartitionJoin(QueryMasterTask.QueryMaster if (broadcastFragments != null) { //In this phase a ScanNode has a single fragment. //If there are more than one data files, that files should be added to fragments or partition path - for (ScanNode eachScan: broadcastScans) { - - Path[] partitionScanPaths = null; - String[] partitionKeys = null; TableDesc tableDesc = masterContext.getTableDesc(eachScan); Tablespace space = TablespaceManager.get(tableDesc.getUri()); + Collection scanFragments = null; if (eachScan.getType() == NodeType.PARTITIONS_SCAN) { - - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)eachScan; - partitionScanPaths = partitionScan.getInputPaths(); - partitionKeys = partitionScan.getPartitionKeys(); - - // set null to inputPaths in getFragmentsFromPartitionedTable() - getFragmentsFromPartitionedTable((FileTablespace) space, eachScan, tableDesc); - partitionScan.setInputPaths(partitionScanPaths); - partitionScan.setPartitionKeys(partitionKeys); - + scanFragments = getFragmentsFromPartitionedTable(space, eachScan, tableDesc); } else { + scanFragments = space.getSplits(eachScan.getCanonicalName(), tableDesc, eachScan.getQual()); + } - Collection scanFragments = - space.getSplits(eachScan.getCanonicalName(), tableDesc, eachScan.getQual()); - if (scanFragments != null) { - rightFragments.addAll(scanFragments); - } - + if (scanFragments != null) { + rightFragments.addAll(scanFragments); } } } @@ -480,9 +466,6 @@ public static List getFragmentsFromPartitionedTable(Tablespace tsHandl scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getPartitionKeys(), partitionsScan.getInputPaths())); - partitionsScan.setInputPaths(null); - partitionsScan.setPartitionKeys(null); - return fragments; } @@ -504,9 +487,9 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch // Broadcast table // all fragments or paths assigned every Large table's scan task. // -> PARTITIONS_SCAN - // . add all partition paths to node's inputPaths variable + // . add all PartitionFileFragments to broadcastFragments // -> SCAN - // . add all fragments to broadcastFragments + // . add all FileFragments to broadcastFragments Collection baseFragments = null; List broadcastFragments = new ArrayList<>(); for (int i = 0; i < scans.length; i++) { @@ -514,17 +497,9 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch TableDesc desc = stage.getContext().getTableDesc(scan); Collection scanFragments; - Path[] partitionScanPaths = null; - String[] partitionKeys = null; Tablespace space = TablespaceManager.get(desc.getUri()); - if (scan.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode) scan; - partitionScanPaths = partitionScan.getInputPaths(); - partitionKeys = partitionScan.getPartitionKeys(); - - // set null to inputPaths in getFragmentsFromPartitionedTable() scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); } else { scanFragments = space.getSplits(scan.getCanonicalName(), desc, scan.getQual()); @@ -534,14 +509,7 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch if (i == baseScanId) { baseFragments = scanFragments; } else { - if (scan.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode partitionScan = (PartitionedTableScanNode)scan; - // PhisicalPlanner make PartitionMergeScanExec when table is boradcast table and inputpaths is not empty - partitionScan.setInputPaths(partitionScanPaths); - partitionScan.setPartitionKeys(partitionKeys); - } else { - broadcastFragments.addAll(scanFragments); - } + broadcastFragments.addAll(scanFragments); } } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java index c35194e8ec..abd413cb9b 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java @@ -21,11 +21,15 @@ import org.apache.tajo.exception.TajoException; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.LogicalNode; +import org.apache.tajo.plan.logical.NodeType; +import org.apache.tajo.plan.logical.PartitionedTableScanNode; +import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; import org.apache.tajo.plan.serder.LogicalNodeSerializer; import org.apache.tajo.plan.serder.PlanProto; +import org.apache.tajo.plan.util.PlannerUtil; /** * It verifies the equality between the input and output of LogicalNodeTree(De)Serializer in logical planning. @@ -50,7 +54,15 @@ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoExc LogicalNode root = plan.getRootBlock().getRoot(); PlanProto.LogicalNodeTree serialized = LogicalNodeSerializer.serialize(plan.getRootBlock().getRoot()); LogicalNode deserialized = LogicalNodeDeserializer.deserialize(context.getQueryContext(), null, serialized); - assert root.deepEquals(deserialized); + + // Error handling PartitionedTableScanNode because LogicalNodeDeserializer convert it to ScanNode. + PartitionedTableScanNode partitionedTableScanNode = PlannerUtil.findTopNode(root, NodeType.PARTITIONS_SCAN); + if (partitionedTableScanNode != null) { + ScanNode scanNode = PlannerUtil.findTopNode(deserialized, NodeType.SCAN); + assert scanNode != null; + } else { + assert root.deepEquals(deserialized); + } return plan; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index 1c43ef8099..523e6aa886 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -122,8 +122,6 @@ public int compare(PlanProto.LogicalNode o1, PlanProto.LogicalNode o2) { current = convertUnion(nodeMap, protoNode); break; case PARTITIONS_SCAN: - current = convertPartitionScan(context, evalContext, protoNode); - break; case SCAN: current = convertScan(context, evalContext, protoNode); break; @@ -458,27 +456,6 @@ private static IndexScanNode convertIndexScan(OverridableConf context, EvalConte return indexScan; } - private static PartitionedTableScanNode convertPartitionScan(OverridableConf context, EvalContext evalContext, - PlanProto.LogicalNode protoNode) { - PartitionedTableScanNode partitionedScan = new PartitionedTableScanNode(protoNode.getNodeId()); - fillScanNode(context, evalContext, protoNode, partitionedScan); - - PlanProto.PartitionScanSpec partitionScanProto = protoNode.getPartitionScan(); - Path [] paths = new Path[partitionScanProto.getPathsCount()]; - for (int i = 0; i < partitionScanProto.getPathsCount(); i++) { - paths[i] = new Path(partitionScanProto.getPaths(i)); - } - partitionedScan.setInputPaths(paths); - - String[] partitionKeys = new String[partitionScanProto.getPartitionKeysCount()]; - for (int i = 0; i < partitionScanProto.getPartitionKeysCount(); i++) { - partitionKeys[i] = partitionScanProto.getPartitionKeys(i); - } - partitionedScan.setPartitionKeys(partitionKeys); - - return partitionedScan; - } - private static TableSubQueryNode convertTableSubQuery(OverridableConf context, EvalContext evalContext, Map nodeMap, PlanProto.LogicalNode protoNode) { diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 1a7987c7a2..ec901894a1 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -149,7 +149,7 @@ public LogicalNode visitEvalExpr(SerializeContext context, LogicalPlan plan, Log EvalExprNode exprEval, Stack stack) throws TajoException { PlanProto.EvalExprNode.Builder exprEvalBuilder = PlanProto.EvalExprNode.newBuilder(); exprEvalBuilder.addAllTargets( - ProtoUtil.toProtoObjects(exprEval.getTargets().toArray(new ProtoObject[exprEval.getTargets().size()]))); + ProtoUtil.toProtoObjects(exprEval.getTargets().toArray(new ProtoObject[exprEval.getTargets().size()]))); PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, exprEval); nodeBuilder.setExprEval(exprEvalBuilder); @@ -474,30 +474,31 @@ public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPl PartitionedTableScanNode node, Stack stack) throws TajoException { - PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); - - PlanProto.PartitionScanSpec.Builder partitionScan = PlanProto.PartitionScanSpec.newBuilder(); - List pathStrs = TUtil.newList(); - if (node.getInputPaths() != null) { - for (Path p : node.getInputPaths()) { - pathStrs.add(p.toString()); - } - partitionScan.addAllPaths(pathStrs); - } - List partitionKeysStrs = TUtil.newList(); - if (node.getPartitionKeys() != null) { - for (String partitionKey : node.getPartitionKeys()) { - partitionKeysStrs.add(partitionKey); - } - partitionScan.addAllPartitionKeys(partitionKeysStrs); - } - - PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); - nodeBuilder.setScan(scanBuilder); - nodeBuilder.setPartitionScan(partitionScan); - context.treeBuilder.addNodes(nodeBuilder); - - return node; +// PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); +// +// PlanProto.PartitionScanSpec.Builder partitionScan = PlanProto.PartitionScanSpec.newBuilder(); +// List pathStrs = TUtil.newList(); +// if (node.getInputPaths() != null) { +// for (Path p : node.getInputPaths()) { +// pathStrs.add(p.toString()); +// } +// partitionScan.addAllPaths(pathStrs); +// } +// List partitionKeysStrs = TUtil.newList(); +// if (node.getPartitionKeys() != null) { +// for (String partitionKey : node.getPartitionKeys()) { +// partitionKeysStrs.add(partitionKey); +// } +// partitionScan.addAllPartitionKeys(partitionKeysStrs); +// } +// +// PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); +// nodeBuilder.setScan(scanBuilder); +// nodeBuilder.setPartitionScan(partitionScan); +// context.treeBuilder.addNodes(nodeBuilder); + + ScanNode scanNode = (ScanNode) node; + return visitScan(context, plan, block, scanNode, stack); } @Override diff --git a/tajo-plan/src/main/proto/Plan.proto b/tajo-plan/src/main/proto/Plan.proto index def0d52701..74e2e666f6 100644 --- a/tajo-plan/src/main/proto/Plan.proto +++ b/tajo-plan/src/main/proto/Plan.proto @@ -73,36 +73,35 @@ message LogicalNode { optional SchemaProto out_schema = 5; optional ScanNode scan = 6; - optional PartitionScanSpec partitionScan = 7; - optional IndexScanSpec indexScan = 8; - optional JoinNode join = 9; - optional FilterNode filter = 10; - optional GroupbyNode groupby = 11; - optional DistinctGroupbyNode distinctGroupby = 12; - optional SortNode sort = 13; - optional LimitNode limit = 14; - optional WindowAggNode windowAgg = 15; - optional ProjectionNode projection = 16; - optional EvalExprNode exprEval = 17; - optional UnionNode union = 18; - optional TableSubQueryNode tableSubQuery = 19; - optional PersistentStoreNode persistentStore = 20; - optional StoreTableNodeSpec storeTable = 21; - optional InsertNodeSpec insert = 22; - optional CreateTableNodeSpec createTable = 23; - optional RootNode root = 24; - optional SetSessionNode setSession = 25; - - optional CreateDatabaseNode createDatabase = 26; - optional DropDatabaseNode dropDatabase = 27; - optional DropTableNode dropTable = 28; - - optional AlterTablespaceNode alterTablespace = 29; - optional AlterTableNode alterTable = 30; - optional TruncateTableNode truncateTableNode = 31; - - optional CreateIndexNode createIndex = 32; - optional DropIndexNode dropIndex = 33; + optional IndexScanSpec indexScan = 7; + optional JoinNode join = 8; + optional FilterNode filter = 9; + optional GroupbyNode groupby = 10; + optional DistinctGroupbyNode distinctGroupby = 11; + optional SortNode sort = 12; + optional LimitNode limit = 13; + optional WindowAggNode windowAgg = 14; + optional ProjectionNode projection = 15; + optional EvalExprNode exprEval = 16; + optional UnionNode union = 17; + optional TableSubQueryNode tableSubQuery = 18; + optional PersistentStoreNode persistentStore = 19; + optional StoreTableNodeSpec storeTable = 20; + optional InsertNodeSpec insert = 21; + optional CreateTableNodeSpec createTable = 22; + optional RootNode root = 23; + optional SetSessionNode setSession = 24; + + optional CreateDatabaseNode createDatabase = 25; + optional DropDatabaseNode dropDatabase = 26; + optional DropTableNode dropTable = 27; + + optional AlterTablespaceNode alterTablespace = 28; + optional AlterTableNode alterTable = 29; + optional TruncateTableNode truncateTableNode = 30; + + optional CreateIndexNode createIndex = 31; + optional DropIndexNode dropIndex = 32; } message ScanNode { @@ -115,11 +114,6 @@ message ScanNode { required bool nameResolveBase = 7; } -message PartitionScanSpec { - repeated string paths = 1; - repeated string partitionKeys = 2; -} - message IndexScanSpec { required SchemaProto keySchema = 1; required string indexPath = 2; @@ -232,11 +226,6 @@ enum JoinType { RIGHT_SEMI_JOIN = 9; } -message PartitionTableScanSpec { - repeated string paths = 1; - repeated string partitionKeys = 2; -} - message PersistentStoreNode { optional int32 childSeq = 1; // CreateTableNode may not have any children. This should be improved at TAJO-1589. required string storageType = 2; From f24577a64b4685ee0564ebb81ac375756dbaf162 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 24 Nov 2015 18:05:11 +0900 Subject: [PATCH 041/127] Remove unused package --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 47d383fe0a..97722bd87d 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -45,7 +45,6 @@ import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; import org.apache.tajo.util.StringUtils; -import org.apache.tajo.util.TUtil; import java.io.IOException; import java.util.*; From c4c4d3657ac55879d92d74546324f1a42b8b22d0 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 24 Nov 2015 18:10:49 +0900 Subject: [PATCH 042/127] Remove unnnecessary codes --- .../plan/serder/LogicalNodeSerializer.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index ec901894a1..27b3a30c85 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -473,30 +473,6 @@ public LogicalNode visitIndexScan(SerializeContext context, LogicalPlan plan, Lo public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, PartitionedTableScanNode node, Stack stack) throws TajoException { - -// PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); -// -// PlanProto.PartitionScanSpec.Builder partitionScan = PlanProto.PartitionScanSpec.newBuilder(); -// List pathStrs = TUtil.newList(); -// if (node.getInputPaths() != null) { -// for (Path p : node.getInputPaths()) { -// pathStrs.add(p.toString()); -// } -// partitionScan.addAllPaths(pathStrs); -// } -// List partitionKeysStrs = TUtil.newList(); -// if (node.getPartitionKeys() != null) { -// for (String partitionKey : node.getPartitionKeys()) { -// partitionKeysStrs.add(partitionKey); -// } -// partitionScan.addAllPartitionKeys(partitionKeysStrs); -// } -// -// PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); -// nodeBuilder.setScan(scanBuilder); -// nodeBuilder.setPartitionScan(partitionScan); -// context.treeBuilder.addNodes(nodeBuilder); - ScanNode scanNode = (ScanNode) node; return visitScan(context, plan, block, scanNode, stack); } From b428f24c4a8cb22763f727ecdcdeb0a7d3e4532f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 24 Nov 2015 18:13:39 +0900 Subject: [PATCH 043/127] Remove unnecessary modification --- .../java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 27b3a30c85..d1f80b5e46 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -149,7 +149,7 @@ public LogicalNode visitEvalExpr(SerializeContext context, LogicalPlan plan, Log EvalExprNode exprEval, Stack stack) throws TajoException { PlanProto.EvalExprNode.Builder exprEvalBuilder = PlanProto.EvalExprNode.newBuilder(); exprEvalBuilder.addAllTargets( - ProtoUtil.toProtoObjects(exprEval.getTargets().toArray(new ProtoObject[exprEval.getTargets().size()]))); + ProtoUtil.toProtoObjects(exprEval.getTargets().toArray(new ProtoObject[exprEval.getTargets().size()]))); PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, exprEval); nodeBuilder.setExprEval(exprEvalBuilder); From 772c049f8ac9c8241d672b1cc9b4a16d2b73250b Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 25 Nov 2015 16:16:45 +0900 Subject: [PATCH 044/127] Implement unit test cases for PartitionedTableRewriter --- .../planner/TestPartitionedTableRewriter.java | 502 ++++++++++++++++++ .../rules/PartitionedTableRewriter.java | 6 + 2 files changed, 508 insertions(+) create mode 100644 tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java new file mode 100644 index 0000000000..4cceac8b5a --- /dev/null +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -0,0 +1,502 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.LocalTajoTestingUtility; +import org.apache.tajo.OverridableConf; +import org.apache.tajo.QueryTestCaseBase; +import org.apache.tajo.algebra.Expr; +import org.apache.tajo.catalog.CatalogUtil; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableDesc; +import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.partition.PartitionMethodDesc; +import org.apache.tajo.catalog.proto.CatalogProtos; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.engine.query.QueryContext; +import org.apache.tajo.plan.LogicalPlan; +import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.partition.PartitionContent; +import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; +import org.apache.tajo.util.CommonTestingUtil; +import org.apache.tajo.util.FileUtil; +import org.apache.tajo.util.KeyValueSet; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestPartitionedTableRewriter extends QueryTestCaseBase { + + final static String PARTITION_TABLE_NAME = "tb_partition"; + final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; + + @BeforeClass + public static void setUp() throws Exception { + FileSystem fs = FileSystem.get(conf); + Path rootDir = TajoConf.getWarehouseDir(testingCluster.getConfiguration()); + + Schema schema = new Schema(); + schema.addColumn("n_nationkey", TajoDataTypes.Type.INT8); + schema.addColumn("n_name", TajoDataTypes.Type.TEXT); + schema.addColumn("n_regionkey", TajoDataTypes.Type.INT8); + + TableMeta meta = CatalogUtil.newTableMeta("TEXT", new KeyValueSet()); + + createExternalTableIncludedOnePartitionKeyColumn(fs, rootDir, schema, meta); + createExternalTableIncludedMultiplePartitionKeyColumns(fs, rootDir, schema, meta); + } + + private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem fs, Path rootDir, Schema schema, + TableMeta meta) throws Exception { + Schema partSchema = new Schema(); + partSchema.addColumn("key", TajoDataTypes.Type.TEXT); + + PartitionMethodDesc partitionMethodDesc = + new PartitionMethodDesc("TestPartitionedTableRewriter", PARTITION_TABLE_NAME, + CatalogProtos.PartitionType.COLUMN, "key", partSchema); + + Path tablePath = new Path(rootDir, PARTITION_TABLE_NAME); + fs.mkdirs(tablePath); + + client.createExternalTable(PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + + TableDesc tableDesc = client.getTableDesc(PARTITION_TABLE_NAME); + assertNotNull(tableDesc); + + Path path = new Path(tableDesc.getUri().toString() + "/key=part123"); + fs.mkdirs(path); + FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key=part456"); + fs.mkdirs(path); + FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key=part789"); + fs.mkdirs(path); + FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); + } + + private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileSystem fs, Path rootDir, + Schema schema, TableMeta meta) throws Exception { + Schema partSchema = new Schema(); + partSchema.addColumn("key1", TajoDataTypes.Type.TEXT); + partSchema.addColumn("key2", TajoDataTypes.Type.TEXT); + partSchema.addColumn("key3", TajoDataTypes.Type.INT8); + + PartitionMethodDesc partitionMethodDesc = + new PartitionMethodDesc("TestPartitionedTableRewriter", MULTIPLE_PARTITION_TABLE_NAME, + CatalogProtos.PartitionType.COLUMN, "key1,key2,key3", partSchema); + + Path tablePath = new Path(rootDir, MULTIPLE_PARTITION_TABLE_NAME); + fs.mkdirs(tablePath); + + client.createExternalTable(MULTIPLE_PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + + TableDesc tableDesc = client.getTableDesc(MULTIPLE_PARTITION_TABLE_NAME); + assertNotNull(tableDesc); + + Path path = new Path(tableDesc.getUri().toString() + "/key1=part123"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=1"); + fs.mkdirs(path); + FileUtil.writeTextToFile("1|ARGENTINA|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key1=part123/key2=supp123/key3=2"); + fs.mkdirs(path); + FileUtil.writeTextToFile("2|BRAZIL|1", new Path(path, "data")); + + path = new Path(tableDesc.getUri().toString() + "/key1=part789"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789"); + fs.mkdirs(path); + path = new Path(tableDesc.getUri().toString() + "/key1=part789/key2=supp789/key3=3"); + fs.mkdirs(path); + FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); + } + + @AfterClass + public static void tearDown() throws Exception { + client.executeQuery("DROP TABLE IF EXISTS " + PARTITION_TABLE_NAME + " PURGE;"); + client.executeQuery("DROP TABLE IF EXISTS " + MULTIPLE_PARTITION_TABLE_NAME + " PURGE;"); + } + + @Test + public void testFilterIncludePartitionKeyColumn() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part456' ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(1, filteredPaths.length); + assertEquals("key=part456", filteredPaths[0].getName()); + + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(1, partitionKeys.length); + assertEquals("key=part456", partitionKeys[0]); + + assertEquals(10L, partitionContent.getTotalVolume()); + } + + @Test + public void testWithoutAnyFilters() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SCAN, sortNode.getChild().getType()); + ScanNode scanNode = sortNode.getChild(); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=part123", filteredPaths[0].getName()); + assertEquals("key=part456", filteredPaths[1].getName()); + assertEquals("key=part789", filteredPaths[2].getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(3, partitionKeys.length); + assertEquals("key=part123", partitionKeys[0]); + assertEquals("key=part456", partitionKeys[1]); + assertEquals("key=part789", partitionKeys[2]); + + assertEquals(33L, partitionContent.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonExistingPartitionValue() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part123456789'"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + assertEquals(0, partitionContent.getPartitionPaths().length); + assertEquals(0, partitionContent.getPartitionKeys().length); + + assertEquals(0L, partitionContent.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonPartitionKeyColumn() throws Exception { + String sql = "SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE n_nationkey = 1"; + Expr expr = sqlParser.parse(sql); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=part123", filteredPaths[0].getName()); + assertEquals("key=part456", filteredPaths[1].getName()); + assertEquals("key=part789", filteredPaths[2].getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(3, partitionKeys.length); + assertEquals("key=part123", partitionKeys[0]); + assertEquals("key=part456", partitionKeys[1]); + assertEquals("key=part789", partitionKeys[2]); + + assertEquals(33L, partitionContent.getTotalVolume()); + } + + @Test + public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part789' and key2 = 'supp789' and key3=3"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SELECTION, projNode.getChild().getType()); + SelectionNode selNode = projNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(1, filteredPaths.length); + assertEquals("key3=3", filteredPaths[0].getName()); + assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); + assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(1, partitionKeys.length); + assertEquals("key1=part789/key2=supp789/key3=3", partitionKeys[0]); + + assertEquals(10L, partitionContent.getTotalVolume()); + } + + @Test + public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part123' and key2 = 'supp123' order by n_nationkey"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(2, filteredPaths.length); + + assertEquals("key3=1", filteredPaths[0].getName()); + assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); + assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); + + assertEquals("key3=2", filteredPaths[1].getName()); + assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); + assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(2, partitionKeys.length); + assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); + assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); + + assertEquals(23L, partitionContent.getTotalVolume()); + } + + @Test + public void testFilterIncludeNonPartitionKeyColumns() throws Exception { + Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + + " WHERE key1 = 'part123' and n_nationkey >= 2 order by n_nationkey"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(2, filteredPaths.length); + + assertEquals("key3=1", filteredPaths[0].getName()); + assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); + assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); + + assertEquals("key3=2", filteredPaths[1].getName()); + assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); + assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(2, partitionKeys.length); + assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); + assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); + + assertEquals(23L, partitionContent.getTotalVolume()); + } + + @Test + public final void testPartitionPruningWitCTAS() throws Exception { + String tableName = "testPartitionPruningUsingDirectories".toLowerCase(); + String canonicalTableName = CatalogUtil.getCanonicalTableName("\"" + getCurrentDatabase() +"\"", tableName); + + executeString( + "create table " + canonicalTableName + "(col1 int4, col2 int4) partition by column(key float8) " + + " as select l_orderkey, l_partkey, l_quantity from default.lineitem"); + + TableDesc tableDesc = catalog.getTableDesc(getCurrentDatabase(), tableName); + assertNotNull(tableDesc); + + // With a filter which checks a partition key column + Expr expr = sqlParser.parse("SELECT * FROM " + canonicalTableName + " WHERE key <= 40.0 ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode plan = newPlan.getRootBlock().getRoot(); + + assertEquals(NodeType.ROOT, plan.getType()); + LogicalRootNode root = (LogicalRootNode) plan; + + ProjectionNode projNode = root.getChild(); + + assertEquals(NodeType.SORT, projNode.getChild().getType()); + SortNode sortNode = projNode.getChild(); + + assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); + SelectionNode selNode = sortNode.getChild(); + assertTrue(selNode.hasQual()); + + assertEquals(NodeType.SCAN, selNode.getChild().getType()); + ScanNode scanNode = selNode.getChild(); + scanNode.setQual(selNode.getQual()); + + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); + + PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); + assertNotNull(partitionContent); + + Path[] filteredPaths = partitionContent.getPartitionPaths(); + assertEquals(3, filteredPaths.length); + assertEquals("key=17.0", filteredPaths[0].getName()); + assertEquals("key=36.0", filteredPaths[1].getName()); + assertEquals("key=38.0", filteredPaths[2].getName()); + + String[] partitionKeys = partitionContent.getPartitionKeys(); + assertEquals(3, partitionKeys.length); + assertEquals("key=17.0", partitionKeys[0]); + assertEquals("key=36.0", partitionKeys[1]); + assertEquals("key=38.0", partitionKeys[2]); + + assertEquals(12L, partitionContent.getTotalVolume()); + + executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); + } +} \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 97722bd87d..607b210462 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,6 +18,7 @@ package org.apache.tajo.plan.rewrite.rules; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -86,6 +87,10 @@ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoExc return plan; } + public void setCatalog(CatalogService catalog) { + this.catalog = catalog; + } + private static class PartitionPathFilter implements PathFilter { private Schema schema; @@ -348,6 +353,7 @@ public static PartitionsByAlgebraProto getPartitionsAlgebraProto( return paths; } + @VisibleForTesting public PartitionContent getPartitionContent(OverridableConf queryContext, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { From 3f5f2386bea518275c381d2540f825072e06550d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 25 Nov 2015 17:31:05 +0900 Subject: [PATCH 045/127] Trigger for travis CI build --- .../apache/tajo/engine/planner/TestPartitionedTableRewriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 4cceac8b5a..938679cdcf 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -47,7 +47,6 @@ import static org.junit.Assert.*; public class TestPartitionedTableRewriter extends QueryTestCaseBase { - final static String PARTITION_TABLE_NAME = "tb_partition"; final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; From eed923b349928557ac1c874a79c5b01344f9e52e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 2 Dec 2015 11:57:54 +0900 Subject: [PATCH 046/127] Remove unused methods --- .../apache/tajo/storage/FileTablespace.java | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 70a9979924..d45e3f1a5b 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -555,42 +555,6 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, // The below code is for splitting partitioned table. //////////////////////////////////////////////////////////////////////////////// - public PartitionFileFragment[] partitionSplit(String tableName, Path tablePath, String partitionKeys) - throws IOException { - return partitionSplit(tableName, tablePath, fs.getDefaultBlockSize(), partitionKeys); - } - - private PartitionFileFragment[] partitionSplit(String tableName, Path tablePath, long size, String partitionKeys) - throws IOException { - FileSystem fs = tablePath.getFileSystem(conf); - - long defaultBlockSize = size; - List listTablets = new ArrayList<>(); - PartitionFileFragment tablet; - - FileStatus[] fileLists = fs.listStatus(tablePath); - for (FileStatus file : fileLists) { - long remainFileSize = file.getLen(); - long start = 0; - if (remainFileSize > defaultBlockSize) { - while (remainFileSize > defaultBlockSize) { - tablet = new PartitionFileFragment(tableName, file.getPath(), start, defaultBlockSize, partitionKeys); - listTablets.add(tablet); - start += defaultBlockSize; - remainFileSize -= defaultBlockSize; - } - listTablets.add(new PartitionFileFragment(tableName, file.getPath(), start, remainFileSize, partitionKeys)); - } else { - listTablets.add(new PartitionFileFragment(tableName, file.getPath(), 0, remainFileSize, partitionKeys)); - } - } - - PartitionFileFragment[] tablets = new PartitionFileFragment[listTablets.size()]; - listTablets.toArray(tablets); - - return tablets; - } - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, String[] hosts, String partitionKeys) { return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); From 224dfc2290fea58366b57a165c99c300a677ae31 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 10:26:21 +0900 Subject: [PATCH 047/127] Remove previous split size --- .../src/main/java/org/apache/tajo/storage/FileTablespace.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index d45e3f1a5b..29090756c8 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -620,7 +620,6 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem files.addAll(listStatus(p)); } - int previousSplitSize = splits.size(); for (FileStatus file : files) { Path path = file.getPath(); long length = file.getLen(); @@ -678,7 +677,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } } if(LOG.isDebugEnabled()){ - LOG.debug("# of splits per partition: " + (splits.size() - previousSplitSize)); + LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); } i++; } From e390fc69de8db62b37ab38f4e9dd2ddaedfd950f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 10:34:15 +0900 Subject: [PATCH 048/127] Mark final keword to all member variables in PartitionFragement --- .../org/apache/tajo/plan/partition/PartitionContent.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java index 691c3f2452..3985cbaa20 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java @@ -21,9 +21,9 @@ import org.apache.hadoop.fs.Path; public class PartitionContent { - private Path[] partitionPaths; - private String[] partitionKeys; - private long totalVolume; + private final Path[] partitionPaths; + private final String[] partitionKeys; + private final long totalVolume; public PartitionContent(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { this.partitionPaths = partitionPaths; From 707866aaa5e88049e3f8c7979751564b224fc706 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 10:53:00 +0900 Subject: [PATCH 049/127] Add more description --- .../apache/tajo/storage/FileTablespace.java | 116 +++++++++++------- 1 file changed, 70 insertions(+), 46 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 29090756c8..ab2c13ff08 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -409,33 +409,7 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo // for Non Splittable. eg, compressed gzip TextFile protected FileFragment makeNonSplit(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations) throws IOException { - - Map hostsBlockMap = new HashMap<>(); - for (BlockLocation blockLocation : blkLocations) { - for (String host : blockLocation.getHosts()) { - if (hostsBlockMap.containsKey(host)) { - hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); - } else { - hostsBlockMap.put(host, 1); - } - } - } - - List> entries = new ArrayList<>(hostsBlockMap.entrySet()); - Collections.sort(entries, new Comparator>() { - - @Override - public int compare(Map.Entry v1, Map.Entry v2) { - return v1.getValue().compareTo(v2.getValue()); - } - }); - - String[] hosts = new String[blkLocations[0].getHosts().length]; - - for (int i = 0; i < hosts.length; i++) { - Map.Entry entry = entries.get((entries.size() - 1) - i); - hosts[i] = entry.getKey(); - } + String[] hosts = getHosts(blkLocations); return new FileFragment(fragmentId, file, start, length, hosts); } @@ -551,23 +525,15 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } - //////////////////////////////////////////////////////////////////////////////// - // The below code is for splitting partitioned table. - //////////////////////////////////////////////////////////////////////////////// - - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, - String[] hosts, String partitionKeys) { - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); - } - - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation - , String partitionKeys) throws IOException { - return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); - } - - protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations, String partitionKeys) throws IOException { - + /** + * Get the list of hosts (hostname) hosting specified blocks + * + * + * @param blkLocations locations of blocks + * @return the list of hosts + * @throws IOException + */ + private String[] getHosts(BlockLocation[] blkLocations) throws IOException { Map hostsBlockMap = new HashMap<>(); for (BlockLocation blockLocation : blkLocations) { for (String host : blockLocation.getHosts()) { @@ -595,18 +561,76 @@ public int compare(Map.Entry v1, Map.Entry v2) hosts[i] = entry.getKey(); } + return hosts; + } + + //////////////////////////////////////////////////////////////////////////////// + // The below code is for splitting partitioned table. + //////////////////////////////////////////////////////////////////////////////// + + /** + * Build a fragment for partition table + * + * @param fragmentId fragment id + * @param file file path + * @param start offset + * @param length length + * @param hosts the list of hosts (names) hosting blocks + * @param partitionKeys partition keys + * @return PartitionFileFragment + */ + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + String[] hosts, String partitionKeys) { return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); } /** - * Generate the list of files and make them into PartitionedFileSplits. + * Build a fragment for partition table * + * @param fragmentId fragment id + * @param file file path + * @param blockLocation location of block + * @param partitionKeys partition keys + * @return PartitionFileFragment + * @throws IOException + */ + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + , String partitionKeys) throws IOException { + return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); + } + + /** + * Build a fragment for non splittable partition table + * + * @param fragmentId fragment id + * @param file file path + * @param start offset + * @param length length + * @param blkLocations locations of blocks + * @param partitionKeys partition keys + * @return PartitionFileFragment + * @throws IOException + */ + protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations, String partitionKeys) throws IOException { + String[] hosts = getHosts(blkLocations); + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); + } + + /** + * Build the list of fragments for partition table + * + * @param tableName table name + * @param meta all meta information for scanning a fragmented table + * @param schema table schema + * @param partitionKeys the list of partition keys + * @param inputs the list of paths + * @return the list of PartitionFileFragment * @throws IOException */ public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, Path... inputs) throws IOException { // generate splits' - List splits = Lists.newArrayList(); List volumeSplits = Lists.newArrayList(); List blockLocations = Lists.newArrayList(); From dc67e059a662423cd2afb3c63a92603ebbc6f534 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 11:47:55 +0900 Subject: [PATCH 050/127] Apply lamda expression --- .../java/org/apache/tajo/storage/FileTablespace.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index ab2c13ff08..58863c2c18 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -546,13 +546,8 @@ private String[] getHosts(BlockLocation[] blkLocations) throws IOException { } List> entries = new ArrayList<>(hostsBlockMap.entrySet()); - Collections.sort(entries, new Comparator>() { - - @Override - public int compare(Map.Entry v1, Map.Entry v2) { - return v1.getValue().compareTo(v2.getValue()); - } - }); + Collections.sort(entries, (Map.Entry v1, Map.Entry v2) + -> v1.getValue().compareTo(v2.getValue())); String[] hosts = new String[blkLocations[0].getHosts().length]; From 8a52561c7861f7e0def51a9ed4c206094f08bf60 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 13:41:57 +0900 Subject: [PATCH 051/127] Implement methods for PartitionFileFragment to S3TableSpace --- .../apache/tajo/storage/s3/S3TableSpace.java | 212 +++++++++++++++--- 1 file changed, 185 insertions(+), 27 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 7ac5425ba0..d4319633f6 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -44,6 +44,7 @@ import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Bytes; import javax.annotation.Nullable; @@ -423,33 +424,7 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo // for Non Splittable. eg, compressed gzip TextFile protected FileFragment makeNonSplit(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations) throws IOException { - - Map hostsBlockMap = new HashMap<>(); - for (BlockLocation blockLocation : blkLocations) { - for (String host : blockLocation.getHosts()) { - if (hostsBlockMap.containsKey(host)) { - hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); - } else { - hostsBlockMap.put(host, 1); - } - } - } - - List> entries = new ArrayList<>(hostsBlockMap.entrySet()); - Collections.sort(entries, new Comparator>() { - - @Override - public int compare(Map.Entry v1, Map.Entry v2) { - return v1.getValue().compareTo(v2.getValue()); - } - }); - - String[] hosts = new String[blkLocations[0].getHosts().length]; - - for (int i = 0; i < hosts.length; i++) { - Map.Entry entry = entries.get((entries.size() - 1) - i); - hosts[i] = entry.getKey(); - } + String[] hosts = getHosts(blkLocations); return new FileFragment(fragmentId, file, start, length, hosts); } @@ -565,6 +540,189 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } + /** + * Get the list of hosts (hostname) hosting specified blocks + * + * + * @param blkLocations locations of blocks + * @return the list of hosts + * @throws IOException + */ + private String[] getHosts(BlockLocation[] blkLocations) throws IOException { + Map hostsBlockMap = new HashMap<>(); + for (BlockLocation blockLocation : blkLocations) { + for (String host : blockLocation.getHosts()) { + if (hostsBlockMap.containsKey(host)) { + hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); + } else { + hostsBlockMap.put(host, 1); + } + } + } + + List> entries = new ArrayList<>(hostsBlockMap.entrySet()); + Collections.sort(entries, (Map.Entry v1, Map.Entry v2) + -> v1.getValue().compareTo(v2.getValue())); + + String[] hosts = new String[blkLocations[0].getHosts().length]; + + for (int i = 0; i < hosts.length; i++) { + Map.Entry entry = entries.get((entries.size() - 1) - i); + hosts[i] = entry.getKey(); + } + + return hosts; + } + + //////////////////////////////////////////////////////////////////////////////// + // The below code is for splitting partitioned table. + //////////////////////////////////////////////////////////////////////////////// + + /** + * Build a fragment for partition table + * + * @param fragmentId fragment id + * @param file file path + * @param start offset + * @param length length + * @param hosts the list of hosts (names) hosting blocks + * @param partitionKeys partition keys + * @return PartitionFileFragment + */ + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + String[] hosts, String partitionKeys) { + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); + } + + /** + * Build a fragment for partition table + * + * @param fragmentId fragment id + * @param file file path + * @param blockLocation location of block + * @param partitionKeys partition keys + * @return PartitionFileFragment + * @throws IOException + */ + protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + , String partitionKeys) throws IOException { + return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); + } + + /** + * Build a fragment for non splittable partition table + * + * @param fragmentId fragment id + * @param file file path + * @param start offset + * @param length length + * @param blkLocations locations of blocks + * @param partitionKeys partition keys + * @return PartitionFileFragment + * @throws IOException + */ + protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + BlockLocation[] blkLocations, String partitionKeys) throws IOException { + String[] hosts = getHosts(blkLocations); + return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); + } + + /** + * Build the list of fragments for partition table + * + * @param tableName table name + * @param meta all meta information for scanning a fragmented table + * @param schema table schema + * @param partitionKeys the list of partition keys + * @param inputs the list of paths + * @return the list of PartitionFileFragment + * @throws IOException + */ + public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, + Path... inputs) throws IOException { + // generate splits' + List splits = Lists.newArrayList(); + List volumeSplits = Lists.newArrayList(); + List blockLocations = Lists.newArrayList(); + + int i = 0; + for (Path p : inputs) { + ArrayList files = Lists.newArrayList(); + if (fs.isFile(p)) { + files.addAll(Lists.newArrayList(fs.getFileStatus(p))); + } else { + files.addAll(listStatus(p)); + } + + for (FileStatus file : files) { + Path path = file.getPath(); + long length = file.getLen(); + if (length > 0) { + // Get locations of blocks of file + BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); + boolean splittable = isSplittable(meta, schema, path, file); + if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { + + if (splittable) { + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); + } + blockLocations.addAll(Arrays.asList(blkLocations)); + + } else { // Non splittable + long blockSize = blkLocations[0].getLength(); + if (blockSize >= length) { + blockLocations.addAll(Arrays.asList(blkLocations)); + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); + } + } else { + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); + } + } + + } else { + if (splittable) { + + long minSize = Math.max(getMinSplitSize(), 1); + + long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one + long splitSize = Math.max(minSize, blockSize); + long bytesRemaining = length; + + // for s3 + while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitionKeys[i])); + + bytesRemaining -= splitSize; + } + if (bytesRemaining > 0) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitionKeys[i])); + } + } else { // Non splittable + splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); + } + } + } + } + if(LOG.isDebugEnabled()){ + LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); + } + i++; + } + + // Combine original fileFragments with new VolumeId information + setVolumeMeta(volumeSplits, blockLocations); + splits.addAll(volumeSplits); + LOG.info("Total # of splits: " + splits.size()); + return splits; + } + private void setVolumeMeta(List splits, final List blockLocations) throws IOException { From 52b3790d7968ee7f115446e2fa61c2c040d9ce31 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 3 Dec 2015 14:30:43 +0900 Subject: [PATCH 052/127] Revert "Implement methods for PartitionFileFragment to S3TableSpace" This reverts commit 8a52561c7861f7e0def51a9ed4c206094f08bf60. --- .../apache/tajo/storage/s3/S3TableSpace.java | 212 +++--------------- 1 file changed, 27 insertions(+), 185 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index d4319633f6..7ac5425ba0 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -44,7 +44,6 @@ import org.apache.tajo.storage.Scanner; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; -import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Bytes; import javax.annotation.Nullable; @@ -424,7 +423,33 @@ protected FileFragment makeSplit(String fragmentId, Path file, BlockLocation blo // for Non Splittable. eg, compressed gzip TextFile protected FileFragment makeNonSplit(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations) throws IOException { - String[] hosts = getHosts(blkLocations); + + Map hostsBlockMap = new HashMap<>(); + for (BlockLocation blockLocation : blkLocations) { + for (String host : blockLocation.getHosts()) { + if (hostsBlockMap.containsKey(host)) { + hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); + } else { + hostsBlockMap.put(host, 1); + } + } + } + + List> entries = new ArrayList<>(hostsBlockMap.entrySet()); + Collections.sort(entries, new Comparator>() { + + @Override + public int compare(Map.Entry v1, Map.Entry v2) { + return v1.getValue().compareTo(v2.getValue()); + } + }); + + String[] hosts = new String[blkLocations[0].getHosts().length]; + + for (int i = 0; i < hosts.length; i++) { + Map.Entry entry = entries.get((entries.size() - 1) - i); + hosts[i] = entry.getKey(); + } return new FileFragment(fragmentId, file, start, length, hosts); } @@ -540,189 +565,6 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } - /** - * Get the list of hosts (hostname) hosting specified blocks - * - * - * @param blkLocations locations of blocks - * @return the list of hosts - * @throws IOException - */ - private String[] getHosts(BlockLocation[] blkLocations) throws IOException { - Map hostsBlockMap = new HashMap<>(); - for (BlockLocation blockLocation : blkLocations) { - for (String host : blockLocation.getHosts()) { - if (hostsBlockMap.containsKey(host)) { - hostsBlockMap.put(host, hostsBlockMap.get(host) + 1); - } else { - hostsBlockMap.put(host, 1); - } - } - } - - List> entries = new ArrayList<>(hostsBlockMap.entrySet()); - Collections.sort(entries, (Map.Entry v1, Map.Entry v2) - -> v1.getValue().compareTo(v2.getValue())); - - String[] hosts = new String[blkLocations[0].getHosts().length]; - - for (int i = 0; i < hosts.length; i++) { - Map.Entry entry = entries.get((entries.size() - 1) - i); - hosts[i] = entry.getKey(); - } - - return hosts; - } - - //////////////////////////////////////////////////////////////////////////////// - // The below code is for splitting partitioned table. - //////////////////////////////////////////////////////////////////////////////// - - /** - * Build a fragment for partition table - * - * @param fragmentId fragment id - * @param file file path - * @param start offset - * @param length length - * @param hosts the list of hosts (names) hosting blocks - * @param partitionKeys partition keys - * @return PartitionFileFragment - */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, - String[] hosts, String partitionKeys) { - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); - } - - /** - * Build a fragment for partition table - * - * @param fragmentId fragment id - * @param file file path - * @param blockLocation location of block - * @param partitionKeys partition keys - * @return PartitionFileFragment - * @throws IOException - */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation - , String partitionKeys) throws IOException { - return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); - } - - /** - * Build a fragment for non splittable partition table - * - * @param fragmentId fragment id - * @param file file path - * @param start offset - * @param length length - * @param blkLocations locations of blocks - * @param partitionKeys partition keys - * @return PartitionFileFragment - * @throws IOException - */ - protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, - BlockLocation[] blkLocations, String partitionKeys) throws IOException { - String[] hosts = getHosts(blkLocations); - return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); - } - - /** - * Build the list of fragments for partition table - * - * @param tableName table name - * @param meta all meta information for scanning a fragmented table - * @param schema table schema - * @param partitionKeys the list of partition keys - * @param inputs the list of paths - * @return the list of PartitionFileFragment - * @throws IOException - */ - public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, - Path... inputs) throws IOException { - // generate splits' - List splits = Lists.newArrayList(); - List volumeSplits = Lists.newArrayList(); - List blockLocations = Lists.newArrayList(); - - int i = 0; - for (Path p : inputs) { - ArrayList files = Lists.newArrayList(); - if (fs.isFile(p)) { - files.addAll(Lists.newArrayList(fs.getFileStatus(p))); - } else { - files.addAll(listStatus(p)); - } - - for (FileStatus file : files) { - Path path = file.getPath(); - long length = file.getLen(); - if (length > 0) { - // Get locations of blocks of file - BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); - boolean splittable = isSplittable(meta, schema, path, file); - if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { - - if (splittable) { - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - blockLocations.addAll(Arrays.asList(blkLocations)); - - } else { // Non splittable - long blockSize = blkLocations[0].getLength(); - if (blockSize >= length) { - blockLocations.addAll(Arrays.asList(blkLocations)); - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - } else { - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - - } else { - if (splittable) { - - long minSize = Math.max(getMinSplitSize(), 1); - - long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one - long splitSize = Math.max(minSize, blockSize); - long bytesRemaining = length; - - // for s3 - while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - - bytesRemaining -= splitSize; - } - if (bytesRemaining > 0) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - } - } else { // Non splittable - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - } - } - if(LOG.isDebugEnabled()){ - LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); - } - i++; - } - - // Combine original fileFragments with new VolumeId information - setVolumeMeta(volumeSplits, blockLocations); - splits.addAll(volumeSplits); - LOG.info("Total # of splits: " + splits.size()); - return splits; - } - private void setVolumeMeta(List splits, final List blockLocations) throws IOException { From 69e6c6161eb32d6f9e9a2450da656e695177fde0 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 7 Dec 2015 11:50:21 +0900 Subject: [PATCH 053/127] Add dist id to PartitionFileFragment --- .../org/apache/tajo/querymaster/Task.java | 3 ++ .../apache/tajo/storage/FileTablespace.java | 49 ++++++++++++++++--- .../fragment/PartitionFileFragment.java | 30 +++++++++++- 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java index 95a7170ed8..b09f5a19c1 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java @@ -43,6 +43,7 @@ import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TajoIdUtils; import org.apache.tajo.util.history.TaskHistory; @@ -329,6 +330,8 @@ private void addDataLocation(Fragment fragment) { int[] diskIds = null; if (fragment instanceof FileFragment) { diskIds = ((FileFragment)fragment).getDiskIds(); + } else if (fragment instanceof PartitionFileFragment) { + diskIds = ((PartitionFileFragment)fragment).getDiskIds(); } for (int i = 0; i < hosts.length; i++) { dataLocations.add(new DataLocation(hosts[i], diskIds == null ? -1 : diskIds[i])); diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 58863c2c18..54c9eb62b9 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -525,6 +525,41 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } + private void setVolumeMeta(List splits, final List blockLocations) + throws IOException { + + int locationSize = blockLocations.size(); + int splitSize = splits.size(); + if (locationSize == 0 || splitSize == 0) return; + + if (locationSize != splitSize) { + // splits and locations don't match up + LOG.warn("Number of block locations not equal to number of splits: " + + "#locations=" + locationSize + + " #splits=" + splitSize); + return; + } + + DistributedFileSystem fs = (DistributedFileSystem) this.fs; + int lsLimit = conf.getInt(DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); + int blockLocationIdx = 0; + + Iterator iter = splits.iterator(); + while (locationSize > blockLocationIdx) { + + int subSize = Math.min(locationSize - blockLocationIdx, lsLimit); + List locations = blockLocations.subList(blockLocationIdx, blockLocationIdx + subSize); + //BlockStorageLocation containing additional volume location information for each replica of each block. + BlockStorageLocation[] blockStorageLocations = fs.getFileBlockStorageLocations(locations); + + for (BlockStorageLocation blockStorageLocation : blockStorageLocations) { + ((FileFragment)iter.next()).setDiskIds(getDiskIds(blockStorageLocation.getVolumeIds())); + blockLocationIdx++; + } + } + LOG.info("# of splits with volumeId " + splitSize); + } + /** * Get the list of hosts (hostname) hosting specified blocks * @@ -702,14 +737,14 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } // Combine original fileFragments with new VolumeId information - setVolumeMeta(volumeSplits, blockLocations); + setPartitionVolumeMeta(volumeSplits, blockLocations); splits.addAll(volumeSplits); LOG.info("Total # of splits: " + splits.size()); return splits; } - private void setVolumeMeta(List splits, final List blockLocations) - throws IOException { + private void setPartitionVolumeMeta(List splits, final List blockLocations) + throws IOException { int locationSize = blockLocations.size(); int splitSize = splits.size(); @@ -718,8 +753,8 @@ private void setVolumeMeta(List splits, final List bloc if (locationSize != splitSize) { // splits and locations don't match up LOG.warn("Number of block locations not equal to number of splits: " - + "#locations=" + locationSize - + " #splits=" + splitSize); + + "#locations=" + locationSize + + " #splits=" + splitSize); return; } @@ -736,13 +771,15 @@ private void setVolumeMeta(List splits, final List bloc BlockStorageLocation[] blockStorageLocations = fs.getFileBlockStorageLocations(locations); for (BlockStorageLocation blockStorageLocation : blockStorageLocations) { - ((FileFragment)iter.next()).setDiskIds(getDiskIds(blockStorageLocation.getVolumeIds())); + ((PartitionFileFragment)iter.next()).setDiskIds(getDiskIds(blockStorageLocation.getVolumeIds())); blockLocationIdx++; } } LOG.info("# of splits with volumeId " + splitSize); } + //////////////////////////////////////////////////////////////////////////////// + private static class InvalidInputException extends IOException { List errors; public InvalidInputException(List errors) { diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 3e44f2cc0e..261db7e003 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -29,6 +29,9 @@ import org.apache.tajo.util.TUtil; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; @@ -39,6 +42,7 @@ public class PartitionFileFragment implements Fragment, Comparable idList = new ArrayList<>(); + for(int eachId: diskIds) { + idList.add(eachId); + } + builder.addAllDiskIds(idList); + } if (hosts != null) { builder.addAllHosts(TUtil.newList(hosts)); From 28edf6c792929f65fc6bab2a1eff87d74070642c Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 7 Dec 2015 12:40:19 +0900 Subject: [PATCH 054/127] PartitionFileFragment extends FileFragment --- .../org/apache/tajo/querymaster/Task.java | 3 - .../apache/tajo/storage/FileTablespace.java | 49 +---- .../tajo/storage/fragment/FileFragment.java | 12 ++ .../fragment/PartitionFileFragment.java | 183 +++--------------- 4 files changed, 45 insertions(+), 202 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java index b09f5a19c1..95a7170ed8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java @@ -43,7 +43,6 @@ import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TajoIdUtils; import org.apache.tajo.util.history.TaskHistory; @@ -330,8 +329,6 @@ private void addDataLocation(Fragment fragment) { int[] diskIds = null; if (fragment instanceof FileFragment) { diskIds = ((FileFragment)fragment).getDiskIds(); - } else if (fragment instanceof PartitionFileFragment) { - diskIds = ((PartitionFileFragment)fragment).getDiskIds(); } for (int i = 0; i < hosts.length; i++) { dataLocations.add(new DataLocation(hosts[i], diskIds == null ? -1 : diskIds[i])); diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 54c9eb62b9..58863c2c18 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -525,41 +525,6 @@ public List getSplits(String tableName, TableMeta meta, Schema schema, return splits; } - private void setVolumeMeta(List splits, final List blockLocations) - throws IOException { - - int locationSize = blockLocations.size(); - int splitSize = splits.size(); - if (locationSize == 0 || splitSize == 0) return; - - if (locationSize != splitSize) { - // splits and locations don't match up - LOG.warn("Number of block locations not equal to number of splits: " - + "#locations=" + locationSize - + " #splits=" + splitSize); - return; - } - - DistributedFileSystem fs = (DistributedFileSystem) this.fs; - int lsLimit = conf.getInt(DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); - int blockLocationIdx = 0; - - Iterator iter = splits.iterator(); - while (locationSize > blockLocationIdx) { - - int subSize = Math.min(locationSize - blockLocationIdx, lsLimit); - List locations = blockLocations.subList(blockLocationIdx, blockLocationIdx + subSize); - //BlockStorageLocation containing additional volume location information for each replica of each block. - BlockStorageLocation[] blockStorageLocations = fs.getFileBlockStorageLocations(locations); - - for (BlockStorageLocation blockStorageLocation : blockStorageLocations) { - ((FileFragment)iter.next()).setDiskIds(getDiskIds(blockStorageLocation.getVolumeIds())); - blockLocationIdx++; - } - } - LOG.info("# of splits with volumeId " + splitSize); - } - /** * Get the list of hosts (hostname) hosting specified blocks * @@ -737,14 +702,14 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } // Combine original fileFragments with new VolumeId information - setPartitionVolumeMeta(volumeSplits, blockLocations); + setVolumeMeta(volumeSplits, blockLocations); splits.addAll(volumeSplits); LOG.info("Total # of splits: " + splits.size()); return splits; } - private void setPartitionVolumeMeta(List splits, final List blockLocations) - throws IOException { + private void setVolumeMeta(List splits, final List blockLocations) + throws IOException { int locationSize = blockLocations.size(); int splitSize = splits.size(); @@ -753,8 +718,8 @@ private void setPartitionVolumeMeta(List splits, final List errors; public InvalidInputException(List errors) { diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/FileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/FileFragment.java index 4128998b1e..dd7daa19bf 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/FileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/FileFragment.java @@ -106,6 +106,10 @@ public String[] getHosts() { return hosts; } + public void setHosts(String[] hosts) { + this.hosts = hosts; + } + /** * Get the list of Disk Ids * Unknown disk is -1. Others 0 ~ N @@ -127,6 +131,10 @@ public String getTableName() { return this.tableName; } + public void setTableName(String tableName) { + this.tableName = tableName; + } + public Path getPath() { return this.uri; } @@ -149,6 +157,10 @@ public long getLength() { return this.length; } + public void setLength(long length) { + this.length = length; + } + @Override public boolean isEmpty() { return this.length <= 0; diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 261db7e003..ce519adfac 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -30,121 +30,39 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; -public class PartitionFileFragment implements Fragment, Comparable, Cloneable { - @Expose private String tableName; // required - @Expose private Path uri; // required - @Expose public Long startOffset; // required - @Expose public Long length; // required - - private String[] hosts; // Datanode hostnames - @Expose private int[] diskIds; +public class PartitionFileFragment extends FileFragment + implements Cloneable { @Expose private String partitionKeys; // required public PartitionFileFragment(ByteString raw) throws InvalidProtocolBufferException { + super(raw); PartitionFileFragmentProto.Builder builder = PartitionFileFragmentProto.newBuilder(); builder.mergeFrom(raw); - builder.build(); - init(builder.build()); - } - - public PartitionFileFragment(String tableName, Path uri, BlockLocation blockLocation, String partitionKeys) - throws IOException { - this.set(tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null, - partitionKeys); + this.partitionKeys = builder.build().getPartitionKeys(); } - public PartitionFileFragment(String tableName, Path uri, long start, long length, String[] hosts, int[] diskIds, - String partitionKeys) { - this.set(tableName, uri, start, length, hosts, diskIds, partitionKeys); + public PartitionFileFragment(String tableName, Path uri, BlockLocation blockLocation, + String partitionKeys) throws IOException { + super(tableName, uri, blockLocation); + this.partitionKeys = partitionKeys; } - // Non splittable public PartitionFileFragment(String tableName, Path uri, long start, long length, String[] hosts, String partitionKeys) { - this.set(tableName, uri, start, length, hosts, null, partitionKeys); + super(tableName, uri, start, length, hosts); + this.partitionKeys = partitionKeys; } public PartitionFileFragment(String fragmentId, Path path, long start, long length, String partitionKeys) { - this.set(fragmentId, path, start, length, null, null, partitionKeys); - } - - public PartitionFileFragment(PartitionFileFragmentProto proto) { - init(proto); - } - - private void init(PartitionFileFragmentProto proto) { - int[] diskIds = new int[proto.getDiskIdsList().size()]; - int i = 0; - for(Integer eachValue: proto.getDiskIdsList()) { - diskIds[i++] = eachValue; - } - this.set(proto.getId(), new Path(proto.getPath()), - proto.getStartOffset(), proto.getLength(), - proto.getHostsList().toArray(new String[]{}), - diskIds, - proto.getPartitionKeys()); - } - - private void set(String tableName, Path path, long start, - long length, String[] hosts, int[] diskIds, String partitionKeys) { - this.tableName = tableName; - this.uri = path; - this.startOffset = start; - this.length = length; - this.hosts = hosts; - this.diskIds = diskIds; + super(fragmentId, path, start, length); this.partitionKeys = partitionKeys; } - /** - * Get the list of hosts (hostname) hosting this block - */ - public String[] getHosts() { - if (hosts == null) { - this.hosts = new String[0]; - } - return this.hosts; - } - - /** - * Get the list of Disk Ids - * Unknown disk is -1. Others 0 ~ N - */ - public int[] getDiskIds() { - if (diskIds == null) { - this.diskIds = new int[getHosts().length]; - Arrays.fill(this.diskIds, -1); - } - return diskIds; - } - - public void setDiskIds(int[] diskIds){ - this.diskIds = diskIds; - } - - @Override - public String getTableName() { - return this.tableName; - } - - public Path getPath() { - return this.uri; - } - - public void setPath(Path path) { - this.uri = path; - } - - public Long getStartKey() { - return this.startOffset; - } - public String getPartitionKeys() { return partitionKeys; } @@ -154,93 +72,46 @@ public void setPartitionKeys(String partitionKeys) { } @Override - public String getKey() { - return this.uri.toString(); - } - - @Override - public long getLength() { - return this.length; - } - - @Override - public boolean isEmpty() { - return this.length <= 0; - } - /** - * - * The offset range of tablets MUST NOT be overlapped. - * - * @param t - * @return If the table paths are not same, return -1. - */ - @Override - public int compareTo(PartitionFileFragment t) { - if (getPath().equals(t.getPath())) { - long diff = this.getStartKey() - t.getStartKey(); - if (diff < 0) { - return -1; - } else if (diff > 0) { - return 1; - } else { - return 0; - } - } else { - return getPath().compareTo(t.getPath()); - } - } - - @Override - public boolean equals(Object o) { - if (o instanceof PartitionFileFragment) { - PartitionFileFragment t = (PartitionFileFragment) o; - if (getPath().equals(t.getPath()) - && TUtil.checkEquals(t.getStartKey(), this.getStartKey()) - && TUtil.checkEquals(t.getLength(), this.getLength())) { - return true; - } - } - return false; + public int hashCode() { + return Objects.hashCode(getTableName(), getPath(), getStartKey(), getLength(), getPartitionKeys()); } @Override - public int hashCode() { - return Objects.hashCode(tableName, uri, startOffset, length); - } - public Object clone() throws CloneNotSupportedException { PartitionFileFragment frag = (PartitionFileFragment) super.clone(); - frag.tableName = tableName; - frag.uri = uri; - frag.diskIds = diskIds; - frag.hosts = hosts; + frag.setTableName(getTableName()); + frag.setPath(getPath()); + frag.setDiskIds(getDiskIds()); + frag.setHosts(getHosts()); + frag.setPartitionKeys(getPartitionKeys()); return frag; } @Override public String toString() { - return "\"fragment\": {\"id\": \""+ tableName +"\", \"path\": " + return "\"fragment\": {\"id\": \""+ getTableName() +"\", \"path\": " +getPath() + "\", \"start\": " + this.getStartKey() + ",\"length\": " + getLength() + "\", \"partitionKeys\":" + getPartitionKeys() + "}" ; } + @Override public FragmentProto getProto() { PartitionFileFragmentProto.Builder builder = PartitionFileFragmentProto.newBuilder(); - builder.setId(this.tableName); + builder.setId(getTableName()); builder.setStartOffset(this.startOffset); builder.setLength(this.length); - builder.setPath(this.uri.toString()); - if(diskIds != null) { + builder.setPath(getPath().toString()); + if(getDiskIds() != null) { List idList = new ArrayList<>(); - for(int eachId: diskIds) { + for(int eachId: getDiskIds()) { idList.add(eachId); } builder.addAllDiskIds(idList); } - if (hosts != null) { - builder.addAllHosts(TUtil.newList(hosts)); + if (getHosts() != null) { + builder.addAllHosts(TUtil.newList(getHosts())); } if (partitionKeys != null) { @@ -248,7 +119,7 @@ public FragmentProto getProto() { } FragmentProto.Builder fragmentBuilder = FragmentProto.newBuilder(); - fragmentBuilder.setId(this.tableName); + fragmentBuilder.setId(getTableName()); fragmentBuilder.setDataFormat(BuiltinStorages.TEXT); fragmentBuilder.setContents(builder.buildPartial().toByteString()); return fragmentBuilder.build(); From 8446fc32fd22e8a15fd52bdf547ce0972905b245 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 7 Dec 2015 17:20:09 +0900 Subject: [PATCH 055/127] Trigger for tavis ci build --- .../apache/tajo/storage/fragment/PartitionFileFragment.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index ce519adfac..9aa60e8284 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -34,8 +34,7 @@ import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; -public class PartitionFileFragment extends FileFragment - implements Cloneable { +public class PartitionFileFragment extends FileFragment implements Cloneable { @Expose private String partitionKeys; // required From 9407a8858c5b5950c7274b4eb0ed20cc98d4ae9b Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 7 Dec 2015 17:52:56 +0900 Subject: [PATCH 056/127] Trigger for Travis CI build --- .../apache/tajo/storage/fragment/PartitionFileFragment.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 9aa60e8284..af5c04f282 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -46,13 +46,13 @@ public PartitionFileFragment(ByteString raw) throws InvalidProtocolBufferExcepti } public PartitionFileFragment(String tableName, Path uri, BlockLocation blockLocation, - String partitionKeys) throws IOException { + String partitionKeys) throws IOException { super(tableName, uri, blockLocation); this.partitionKeys = partitionKeys; } public PartitionFileFragment(String tableName, Path uri, long start, long length, String[] hosts, - String partitionKeys) { + String partitionKeys) { super(tableName, uri, start, length, hosts); this.partitionKeys = partitionKeys; } From 8c05203f6db47cc33b03a345768948179def6954 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 9 Dec 2015 15:50:57 +0900 Subject: [PATCH 057/127] Add debug logs --- .../java/org/apache/tajo/querymaster/Repartitioner.java | 3 +++ .../src/main/java/org/apache/tajo/querymaster/Stage.java | 9 ++++++++- .../tajo/plan/logical/PartitionedTableScanNode.java | 8 ++++---- .../plan/rewrite/rules/PartitionedTableRewriter.java | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 00711b2e6f..ee51170182 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -1172,12 +1172,14 @@ public static Stage setShuffleOutputNumForTwoPhase(Stage stage, final int desire // TODO: the union handling is required when a join has unions as its child MasterPlan masterPlan = stage.getMasterPlan(); keys = channel.getShuffleKeys(); + LOG.info("### 500 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); if (!masterPlan.isRoot(stage.getBlock()) ) { ExecutionBlock parentBlock = masterPlan.getParent(stage.getBlock()); if (parentBlock.getPlan().getType() == NodeType.JOIN) { channel.setShuffleOutputNum(desiredNum); } } + LOG.info("### 510 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); // set the partition number for group by and sort if (channel.getShuffleType() == HASH_SHUFFLE) { @@ -1203,6 +1205,7 @@ public static Stage setShuffleOutputNumForTwoPhase(Stage stage, final int desire channel.setShuffleOutputNum(desiredNum); } } + LOG.info("### 520 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); return stage; } } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java index 0aaf92befa..0afa6fff98 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java @@ -946,9 +946,11 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { + "Inner volume: " + Math.ceil((double) innerVolume / 1048576) + "MB"); long bigger = Math.max(outerVolume, innerVolume); + LOG.info("### 400 ### outerVolume:" + outerVolume + ", innerVolume:" + innerVolume); int mb = (int) Math.ceil((double) bigger / 1048576); LOG.info(stage.getId() + ", Bigger Table's volume is approximately " + mb + " MB"); + LOG.info("### 401 ### bigger:" + bigger + ", mb:" + mb); int taskNum = (int) Math.ceil((double) mb / masterPlan.getContext().getInt(SessionVars.JOIN_PER_SHUFFLE_SIZE)); @@ -967,6 +969,8 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { for (DataChannel eachChannel : masterPlan.getOutgoingChannels(inner.getId())) { innerShuffleOutputNum = Math.max(innerShuffleOutputNum, eachChannel.getShuffleOutputNum()); } + LOG.info("### 402 ### outerShuffleOutputNum:" + outerShuffleOutputNum + ", innerShuffleOutputNum:" + innerShuffleOutputNum); + if (outerShuffleOutputNum != innerShuffleOutputNum && taskNum != outerShuffleOutputNum && taskNum != innerShuffleOutputNum) { @@ -978,7 +982,7 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { } LOG.info(stage.getId() + ", The determined number of join partitions is " + taskNum); - +LOG.info("### 403 ### taskNum:" + taskNum); return taskNum; // Is this stage the first step of group-by? } else if (grpNode != null) { @@ -1010,6 +1014,7 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { } if (!hasGroupColumns) { LOG.info(stage.getId() + ", No Grouping Column - determinedTaskNum is set to 1"); + LOG.info("### 410 ### taskNum:1"); return 1; } else { long volume = getInputVolume(stage.masterPlan, stage.context, stage.block); @@ -1020,6 +1025,7 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { int taskNum = (int) Math.ceil((double) volumeByMB / masterPlan.getContext().getInt(SessionVars.GROUPBY_PER_SHUFFLE_SIZE)); LOG.info(stage.getId() + ", The determined number of aggregation partitions is " + taskNum); + LOG.info("### 420 ### taskNum:" + taskNum); return taskNum; } } else { @@ -1031,6 +1037,7 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { // determine the number of task per 128MB int taskNum = (int) Math.ceil((double)mb / 128); LOG.info(stage.getId() + ", The determined number of partitions is " + taskNum); + LOG.info("### 430 ### taskNum:" + taskNum); return taskNum; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 6653894671..13d742d4d0 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -143,10 +143,10 @@ public PlanString getPlanString() { if (inputPaths != null) { planStr.addExplan("num of filtered paths: ").appendExplain(""+ inputPaths.length); - int i = 0; - for (Path path : inputPaths) { - planStr.addDetail((i++) + ": ").appendDetail(path.toString()); - } +// int i = 0; +// for (Path path : inputPaths) { +// planStr.addDetail((i++) + ": ").appendDetail(path.toString()); +// } } return planStr; diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 607b210462..9e77c37bdd 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -568,7 +568,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); rewrittenScanNode.init(scanNode, filteredPaths, partitionContent.getPartitionKeys()); rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionContent.getTotalVolume()); - +LOG.info("### 100 ### tableName:" + rewrittenScanNode.getTableName() + ", volume:" + partitionContent.getTotalVolume()); // if it is topmost node, set it as the rootnode of this block. if (stack.empty() || block.getRoot().equals(scanNode)) { block.setRoot(rewrittenScanNode); From 909a601d60e207ee2644b4c03513f5b3d973c853 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 9 Dec 2015 18:10:38 +0900 Subject: [PATCH 058/127] Fix unit test bug --- .../apache/tajo/storage/TestPartitionFileFragment.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java index f8ee48d117..999817741f 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/storage/TestPartitionFileFragment.java @@ -20,6 +20,7 @@ import com.google.common.collect.Sets; import org.apache.hadoop.fs.Path; +import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.FragmentConvertor; import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.CommonTestingUtil; @@ -70,14 +71,14 @@ public final void testCompareTo() { final int num = 10; PartitionFileFragment[] tablets = new PartitionFileFragment[num]; for (int i = num - 1; i >= 0; i--) { - tablets[i] = new PartitionFileFragment("tablet1", new Path(path, "tablet0/col1=" + i), i * 500, (i+1) * 500 + tablets[i] = new PartitionFileFragment("tablet0", new Path(path, "tablet0/col1=" + i), i * 500, (i+1) * 500 , "col1=" + i); } - + Arrays.sort(tablets); - for(int i = 0; i < num; i++) { - assertEquals("col1=" + i, tablets[i].getPartitionKeys()); + for (int i = 0; i < num; i++) { + assertEquals("col1=" + (num - i - 1), tablets[i].getPartitionKeys()); } } From 4ab2b51f8562c1ca3f87909b42f09079585817a4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 10 Dec 2015 12:21:25 +0900 Subject: [PATCH 059/127] Remove comments --- .../tajo/plan/logical/PartitionedTableScanNode.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 13d742d4d0..6653894671 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -143,10 +143,10 @@ public PlanString getPlanString() { if (inputPaths != null) { planStr.addExplan("num of filtered paths: ").appendExplain(""+ inputPaths.length); -// int i = 0; -// for (Path path : inputPaths) { -// planStr.addDetail((i++) + ": ").appendDetail(path.toString()); -// } + int i = 0; + for (Path path : inputPaths) { + planStr.addDetail((i++) + ": ").appendDetail(path.toString()); + } } return planStr; From c2a5aae605a95432c94ad02c07a408b006bc3317 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 10 Dec 2015 14:51:54 +0900 Subject: [PATCH 060/127] Remove debug logs --- .../java/org/apache/tajo/querymaster/Repartitioner.java | 3 --- .../src/main/java/org/apache/tajo/querymaster/Stage.java | 9 +-------- .../plan/rewrite/rules/PartitionedTableRewriter.java | 2 +- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index ee51170182..00711b2e6f 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -1172,14 +1172,12 @@ public static Stage setShuffleOutputNumForTwoPhase(Stage stage, final int desire // TODO: the union handling is required when a join has unions as its child MasterPlan masterPlan = stage.getMasterPlan(); keys = channel.getShuffleKeys(); - LOG.info("### 500 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); if (!masterPlan.isRoot(stage.getBlock()) ) { ExecutionBlock parentBlock = masterPlan.getParent(stage.getBlock()); if (parentBlock.getPlan().getType() == NodeType.JOIN) { channel.setShuffleOutputNum(desiredNum); } } - LOG.info("### 510 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); // set the partition number for group by and sort if (channel.getShuffleType() == HASH_SHUFFLE) { @@ -1205,7 +1203,6 @@ public static Stage setShuffleOutputNumForTwoPhase(Stage stage, final int desire channel.setShuffleOutputNum(desiredNum); } } - LOG.info("### 520 ### desiredNum:" + desiredNum + ", channel:" + channel.toString()); return stage; } } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java index 0afa6fff98..0aaf92befa 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java @@ -946,11 +946,9 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { + "Inner volume: " + Math.ceil((double) innerVolume / 1048576) + "MB"); long bigger = Math.max(outerVolume, innerVolume); - LOG.info("### 400 ### outerVolume:" + outerVolume + ", innerVolume:" + innerVolume); int mb = (int) Math.ceil((double) bigger / 1048576); LOG.info(stage.getId() + ", Bigger Table's volume is approximately " + mb + " MB"); - LOG.info("### 401 ### bigger:" + bigger + ", mb:" + mb); int taskNum = (int) Math.ceil((double) mb / masterPlan.getContext().getInt(SessionVars.JOIN_PER_SHUFFLE_SIZE)); @@ -969,8 +967,6 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { for (DataChannel eachChannel : masterPlan.getOutgoingChannels(inner.getId())) { innerShuffleOutputNum = Math.max(innerShuffleOutputNum, eachChannel.getShuffleOutputNum()); } - LOG.info("### 402 ### outerShuffleOutputNum:" + outerShuffleOutputNum + ", innerShuffleOutputNum:" + innerShuffleOutputNum); - if (outerShuffleOutputNum != innerShuffleOutputNum && taskNum != outerShuffleOutputNum && taskNum != innerShuffleOutputNum) { @@ -982,7 +978,7 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { } LOG.info(stage.getId() + ", The determined number of join partitions is " + taskNum); -LOG.info("### 403 ### taskNum:" + taskNum); + return taskNum; // Is this stage the first step of group-by? } else if (grpNode != null) { @@ -1014,7 +1010,6 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { } if (!hasGroupColumns) { LOG.info(stage.getId() + ", No Grouping Column - determinedTaskNum is set to 1"); - LOG.info("### 410 ### taskNum:1"); return 1; } else { long volume = getInputVolume(stage.masterPlan, stage.context, stage.block); @@ -1025,7 +1020,6 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { int taskNum = (int) Math.ceil((double) volumeByMB / masterPlan.getContext().getInt(SessionVars.GROUPBY_PER_SHUFFLE_SIZE)); LOG.info(stage.getId() + ", The determined number of aggregation partitions is " + taskNum); - LOG.info("### 420 ### taskNum:" + taskNum); return taskNum; } } else { @@ -1037,7 +1031,6 @@ public static int calculateShuffleOutputNum(Stage stage, DataChannel channel) { // determine the number of task per 128MB int taskNum = (int) Math.ceil((double)mb / 128); LOG.info(stage.getId() + ", The determined number of partitions is " + taskNum); - LOG.info("### 430 ### taskNum:" + taskNum); return taskNum; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 9e77c37bdd..607b210462 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -568,7 +568,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); rewrittenScanNode.init(scanNode, filteredPaths, partitionContent.getPartitionKeys()); rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionContent.getTotalVolume()); -LOG.info("### 100 ### tableName:" + rewrittenScanNode.getTableName() + ", volume:" + partitionContent.getTotalVolume()); + // if it is topmost node, set it as the rootnode of this block. if (stack.empty() || block.getRoot().equals(scanNode)) { block.setRoot(rewrittenScanNode); From 14374cd696099f1352950033b3e6a1bb3b959cc9 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 10 Dec 2015 14:54:26 +0900 Subject: [PATCH 061/127] Rename buildTupleFromPartitionName to buildTupleFromPartitionKeys --- .../org/apache/tajo/engine/util/TestTupleUtil.java | 6 +++--- .../tajo/engine/planner/physical/SeqScanExec.java | 4 ++-- .../plan/rewrite/rules/PartitionedTableRewriter.java | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index 39780025a6..0c45e3559c 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -192,17 +192,17 @@ public void testBuildTupleFromPartitionName() { schema.addColumn("key1", Type.INT8); schema.addColumn("key2", Type.TEXT); - Tuple tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key1=123"); + Tuple tuple = PartitionedTableRewriter.buildTupleFromPartitionKeys(schema, "key1=123"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(1)); - tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key1=123/key2=abc"); + tuple = PartitionedTableRewriter.buildTupleFromPartitionKeys(schema, "key1=123/key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createInt8(123), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); - tuple = PartitionedTableRewriter.buildTupleFromPartitionName(schema, "key2=abc"); + tuple = PartitionedTableRewriter.buildTupleFromPartitionKeys(schema, "key2=abc"); assertNotNull(tuple); assertEquals(DatumFactory.createNullDatum(), tuple.asDatum(0)); assertEquals(DatumFactory.createText("abc"), tuple.asDatum(1)); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java index 64146e5957..853c424d73 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SeqScanExec.java @@ -101,8 +101,8 @@ private void rewriteColumnPartitionedTableSchema() throws IOException { List partitionFileFragments = FragmentConvertor.convert(PartitionFileFragment .class, fragments); - // Get partition keys from first partition fragment - partitionRow = PartitionedTableRewriter.buildTupleFromPartitionName(columnPartitionSchema, + // Get tuple from first partition fragment using parition keys + partitionRow = PartitionedTableRewriter.buildTupleFromPartitionKeys(columnPartitionSchema, partitionFileFragments.get(0).getPartitionKeys()); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 607b210462..c893897a98 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -519,16 +519,16 @@ public static String getColumnPartitionPathPrefix(Schema partitionColumn) { * The second datum of tuple : SEOUL * * @param partitionColumnSchema The partition column schema - * @param partitionName The partition name + * @param partitionKeys The keys of partition * @return The tuple transformed from a column values part. */ - public static Tuple buildTupleFromPartitionName(Schema partitionColumnSchema, String partitionName) { + public static Tuple buildTupleFromPartitionKeys(Schema partitionColumnSchema, String partitionKeys) { Preconditions.checkNotNull(partitionColumnSchema); - Preconditions.checkNotNull(partitionName); + Preconditions.checkNotNull(partitionKeys); - String [] columnValues = partitionName.split("/"); + String [] columnValues = partitionKeys.split("/"); Preconditions.checkArgument(partitionColumnSchema.size() >= columnValues.length, - "Invalid Partition Name :" + partitionName); + "Invalid Partition Keys :" + partitionKeys); Tuple tuple = new VTuple(partitionColumnSchema.size()); From d23321866da93a25c3f6fe247443aa493f6263d8 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 15 Dec 2015 17:15:42 +0900 Subject: [PATCH 062/127] Add messages for printing elapsed time. --- .../tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 5 +++++ .../main/java/org/apache/tajo/storage/FileTablespace.java | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index c893897a98..5f35736554 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -560,6 +560,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP } try { + long startTime = System.currentTimeMillis(); PartitionContent partitionContent = getPartitionContent(queryContext, scanNode); Path[] filteredPaths = partitionContent.getPartitionPaths(); @@ -576,6 +577,10 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); } block.registerNode(rewrittenScanNode); + + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(String.format("Partition pruning :%d ms elapsed.", elapsedMills)); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 58863c2c18..e40fa11c3f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -625,6 +625,8 @@ protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long star */ public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, Path... inputs) throws IOException { + long startTime = System.currentTimeMillis(); + // generate splits' List splits = Lists.newArrayList(); List volumeSplits = Lists.newArrayList(); @@ -705,6 +707,10 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem setVolumeMeta(volumeSplits, blockLocations); splits.addAll(volumeSplits); LOG.info("Total # of splits: " + splits.size()); + + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(String.format("Split for partition table :%d ms elapsed.", elapsedMills)); return splits; } From 490cb1a9f3d2461bef3cd0368b5fcc1ec2312cf8 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Jan 2016 11:40:17 +0900 Subject: [PATCH 063/127] Trigger for travis CI build --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5f35736554..0ce1733b40 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -580,6 +580,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP long finishTime = System.currentTimeMillis(); long elapsedMills = finishTime - startTime; + LOG.info(String.format("Partition pruning :%d ms elapsed.", elapsedMills)); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); From ec10d1c217b6fe35e57eada1f3aa498f7f5eeb33 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 7 Jan 2016 15:50:19 +0900 Subject: [PATCH 064/127] Trigger for Travis CI build --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 0ce1733b40..5f35736554 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -580,7 +580,6 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP long finishTime = System.currentTimeMillis(); long elapsedMills = finishTime - startTime; - LOG.info(String.format("Partition pruning :%d ms elapsed.", elapsedMills)); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); From a96c6538578dea89ca4d707f1757d370c8f74358 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 7 Jan 2016 23:40:38 +0900 Subject: [PATCH 065/127] Trigger CI build --- .../tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5f35736554..a8b6c1fd87 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -580,7 +580,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP long finishTime = System.currentTimeMillis(); long elapsedMills = finishTime - startTime; - LOG.info(String.format("Partition pruning :%d ms elapsed.", elapsedMills)); + LOG.info(String.format("Partition pruning: %d ms elapsed.", elapsedMills)); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); } From f7c5149a23eda13ad9b2e762524027eea2bc9e83 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 8 Jan 2016 15:33:54 +0900 Subject: [PATCH 066/127] Remove TUtil::newList --- .../apache/tajo/storage/fragment/PartitionFileFragment.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index af5c04f282..0459eed81d 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import static org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; @@ -110,7 +111,7 @@ public FragmentProto getProto() { } if (getHosts() != null) { - builder.addAllHosts(TUtil.newList(getHosts())); + builder.addAllHosts(Arrays.asList(getHosts())); } if (partitionKeys != null) { From 145b8b209cee134ffdf548f2cf197cf97cf8c420 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 10:34:01 +0900 Subject: [PATCH 067/127] TAJO-2063: Refactor FileTablespace::commitOutputData. --- .../org/apache/tajo/querymaster/Query.java | 70 ++- .../org/apache/tajo/storage/Tablespace.java | 3 +- .../tajo/storage/hbase/HBaseTablespace.java | 3 +- .../apache/tajo/storage/FileTablespace.java | 443 ++++++++++-------- .../tajo/storage/OutputCommitHandle.java | 74 +++ .../tajo/storage/jdbc/JdbcTablespace.java | 3 +- 6 files changed, 353 insertions(+), 243 deletions(-) create mode 100644 tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java index 0cd178f464..dae7058094 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java @@ -500,40 +500,34 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { // In this case, we should use default tablespace. Tablespace space = TablespaceManager.get(queryContext.get(QueryVars.OUTPUT_TABLE_URI, "")); + List partitions = queryContext.hasPartition() ? query.getPartitions() : null; Path finalOutputDir = space.commitTable( - query.context.getQueryContext(), - lastStage.getId(), - lastStage.getMasterPlan().getLogicalPlan(), - lastStage.getSchema(), - tableDesc); + query.context.getQueryContext(), + lastStage.getId(), + lastStage.getMasterPlan().getLogicalPlan(), + lastStage.getSchema(), + tableDesc, + partitions); QueryHookExecutor hookExecutor = new QueryHookExecutor(query.context.getQueryMasterContext()); hookExecutor.execute(query.context.getQueryContext(), query, event.getExecutionBlockId(), finalOutputDir); // Add dynamic partitions to catalog for partition table. - if (queryContext.hasOutputTableUri() && queryContext.hasPartition()) { - List partitions = query.getPartitions(); - if (partitions != null) { - // Set contents length and file count to PartitionDescProto by listing final output directories. - List finalPartitions = getPartitionsWithContentsSummary(query.systemConf, - finalOutputDir, partitions); - - String databaseName, simpleTableName; - if (CatalogUtil.isFQTableName(tableDesc.getName())) { - String[] split = CatalogUtil.splitFQTableName(tableDesc.getName()); - databaseName = split[0]; - simpleTableName = split[1]; - } else { - databaseName = queryContext.getCurrentDatabase(); - simpleTableName = tableDesc.getName(); - } - - // Store partitions to CatalogStore using alter table statement. - catalog.addPartitions(databaseName, simpleTableName, finalPartitions, true); - LOG.info("Added partitions to catalog (total=" + partitions.size() + ")"); + if (!query.getPartitions().isEmpty()) { + String databaseName, simpleTableName; + + if (CatalogUtil.isFQTableName(tableDesc.getName())) { + String[] split = CatalogUtil.splitFQTableName(tableDesc.getName()); + databaseName = split[0]; + simpleTableName = split[1]; } else { - LOG.info("Can't find partitions for adding."); + databaseName = queryContext.getCurrentDatabase(); + simpleTableName = tableDesc.getName(); } + + // Store partitions to CatalogStore using alter table statement. + catalog.addPartitions(databaseName, simpleTableName, partitions, true); + LOG.info("Added partitions to catalog (total=" + partitions.size() + ")"); query.clearPartitions(); } } catch (Throwable e) { @@ -546,21 +540,6 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { return QueryState.QUERY_SUCCEEDED; } - private List getPartitionsWithContentsSummary(TajoConf conf, Path outputDir, - List partitions) throws IOException { - List finalPartitions = new ArrayList<>(); - - FileSystem fileSystem = outputDir.getFileSystem(conf); - for (PartitionDescProto partition : partitions) { - PartitionDescProto.Builder builder = partition.toBuilder(); - Path partitionPath = new Path(outputDir, partition.getPath()); - ContentSummary contentSummary = fileSystem.getContentSummary(partitionPath); - builder.setNumBytes(contentSummary.getLength()); - finalPartitions.add(builder.build()); - } - return finalPartitions; - } - private static interface QueryHook { boolean isEligible(QueryContext queryContext, Query query, ExecutionBlockId finalExecBlockId, Path finalOutputDir); void execute(QueryMaster.QueryMasterContext context, QueryContext queryContext, Query query, @@ -695,7 +674,14 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo tableDescTobeCreated.setPartitionMethod(createTableNode.getPartitionMethod()); } - stats.setNumBytes(getTableVolume(query.systemConf, finalOutputDir)); + long totalVolume = 0L; + if (!query.getPartitions().isEmpty()) { + totalVolume = query.getPartitions().stream().mapToLong(partition -> partition.getNumBytes()).sum(); + } else { + totalVolume = getTableVolume(query.systemConf, finalOutputDir); + } + + stats.setNumBytes(totalVolume); tableDescTobeCreated.setStats(stats); query.setResultDesc(tableDescTobeCreated); diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java index 00e6d75a12..51e047112d 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java @@ -25,6 +25,7 @@ import org.apache.tajo.TaskAttemptId; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.exception.TajoException; import org.apache.tajo.exception.TajoRuntimeException; @@ -363,7 +364,7 @@ public void rewritePlan(OverridableConf context, LogicalPlan plan) throws TajoEx public abstract Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc) throws IOException; + TableDesc tableDesc, List partitions) throws IOException; public abstract void rollbackTable(LogicalNode node) throws IOException, TajoException; diff --git a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java index 132ceff0ae..4260e8ecc6 100644 --- a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java +++ b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java @@ -40,6 +40,7 @@ import org.apache.tajo.*; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.Datum; @@ -911,7 +912,7 @@ public Pair getIndexablePredicateValue(ColumnMapping columnMapping @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc) throws IOException { + TableDesc tableDesc, List partitions) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 35504afaf3..495afba75f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -33,6 +33,7 @@ import org.apache.tajo.*; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.exception.UnsupportedException; @@ -50,6 +51,7 @@ import java.text.NumberFormat; import java.util.*; +import static java.lang.String.format; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT; @@ -768,8 +770,8 @@ public void verifySchemaToWrite(TableDesc tableDesc, Schema outSchema) { @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, - Schema schema, TableDesc tableDesc) throws IOException { - return commitOutputData(queryContext, true); + Schema schema, TableDesc tableDesc, List partitions) throws IOException { + return commitOutputData(queryContext, true, partitions); } @Override @@ -789,177 +791,248 @@ public TupleRange[] getInsertSortRanges(OverridableConf queryContext, TableDesc * @return Saved path * @throws java.io.IOException */ - protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq) throws IOException { + protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq, + List partitions) throws IOException { + Path finalOutputDir = null; Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); - Path finalOutputDir; + Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); + OutputCommitHandle commitHandle = new OutputCommitHandle(); + if (!queryContext.get(QueryVars.OUTPUT_TABLE_URI, "").isEmpty()) { finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_URI)); - try { - FileSystem fs = stagingResultDir.getFileSystem(conf); + boolean checkExistingPartition = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); + try { if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO + if (partitions != null) { + commitInsertOverwriteOrCreateWithPartition(stagingResultDir, finalOutputDir, oldTableDir, partitions, + checkExistingPartition, commitHandle); + } else { + commitInsertOverwrite(stagingResultDir, finalOutputDir, oldTableDir); + } + } else { + String queryType = queryContext.get(QueryVars.COMMAND_TYPE); + Preconditions.checkNotNull(queryContext); + if (queryType.equals(NodeType.INSERT.name())) { // INSERT INTO + if (partitions != null) { + commitInsertWithPartition(stagingResultDir, finalOutputDir, partitions, commitHandle, changeFileSeq); + } else { + commitInsert(stagingResultDir, finalOutputDir, changeFileSeq); + } + cleanupTemporaryDirectory(stagingResultDir); + } else if (queryType.equals(NodeType.CREATE_TABLE.name())){ // CREATE TABLE AS SELECT (CTAS) + if (partitions != null) { + commitInsertOverwriteOrCreateWithPartition(stagingResultDir, finalOutputDir, oldTableDir, partitions, + checkExistingPartition, commitHandle); + } else { + commitCreate(stagingResultDir, finalOutputDir); + } + } else { + throw new IOException("Cannot handle query type:" + queryType); + } + } - // It moves the original table into the temporary location. - // Then it moves the new result table into the original table location. - // Upon failed, it recovers the original table if possible. - boolean movedToOldTable = false; - boolean committed = false; - Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); - ContentSummary summary = fs.getContentSummary(stagingResultDir); - - // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not. - boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); - - // If existing data doesn't need to keep, check if there are some files. - if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) - && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) { - // This is a map for existing non-leaf directory to rename. A key is current directory and a value is - // renaming directory. - Map renameDirs = new HashMap<>(); - // This is a map for recovering existing partition directory. A key is current directory and a value is - // temporary directory to back up. - Map recoveryDirs = new HashMap<>(); - - try { - if (!fs.exists(finalOutputDir)) { - fs.mkdirs(finalOutputDir); - } + // remove the staging directory if the final output dir is given. + Path stagingDirRoot = stagingDir.getParent(); + fs.delete(stagingDirRoot, true); + } catch (Throwable t) { + LOG.error(t); + throw new IOException(t); + } + } else { + finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); + } + return finalOutputDir; + } - visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir, stagingResultDir.toString(), - renameDirs, oldTableDir); - - // Rename target partition directories - for(Map.Entry entry : renameDirs.entrySet()) { - // Backup existing data files for recovering - if (fs.exists(entry.getValue())) { - String recoveryPathString = entry.getValue().toString().replaceAll(finalOutputDir.toString(), - oldTableDir.toString()); - Path recoveryPath = new Path(recoveryPathString); - fs.rename(entry.getValue(), recoveryPath); - fs.exists(recoveryPath); - recoveryDirs.put(entry.getValue(), recoveryPath); - } - // Delete existing directory - fs.delete(entry.getValue(), true); - // Rename staging directory to final output directory - fs.rename(entry.getKey(), entry.getValue()); - } + private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, Path finalOutputDir, + Path oldTableDir, List partitions, boolean checkExistingPartition, + OutputCommitHandle commitHandle) throws IOException { + String stagingResultPath = stagingResultDir.toString(); + String finalOutputPath = finalOutputDir.toString(); + String oldTablePath = oldTableDir.toString(); - } catch (IOException ioe) { - // Remove created dirs - for(Map.Entry entry : renameDirs.entrySet()) { - fs.delete(entry.getValue(), true); - } + try { + for(PartitionDescProto partition : partitions) { + Path targetPath = new Path(partition.getPath() + "/"); + Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath)+"/"); + Path backupPath = new Path(partition.getPath().replaceAll(finalOutputPath, oldTablePath)); + + // Move existing directory to backup directory. + if (checkExistingPartition && fs.exists(targetPath)) { + renameDirectory(targetPath, backupPath); + commitHandle.addBackupPath(backupPath); + } - // Recovery renamed dirs - for(Map.Entry entry : recoveryDirs.entrySet()) { - fs.delete(entry.getValue(), true); - fs.rename(entry.getValue(), entry.getKey()); - } + // Move staging directory to target directory + renameDirectory(stagingPath, targetPath); + commitHandle.addTargetPath(targetPath); - throw new IOException(ioe.getMessage()); - } - } else { // no partition - try { + // Summarize the volume of partitions + // TODO : This will improved at TAJO-2069 + long totalSize = calculateSize(targetPath); + PartitionDescProto.Builder builder = partition.toBuilder(); + builder.setNumBytes(totalSize); + commitHandle.addPartition(builder.build()); + } + partitions.clear(); + partitions.addAll(commitHandle.getPartitions()); + } catch (Exception e) { + rollback(stagingResultDir, finalOutputDir, oldTableDir, commitHandle); + throw new IOException("Failed to create partition table:", e); + } + } - // if the final output dir exists, move all contents to the temporary table dir. - // Otherwise, just make the final output dir. As a result, the final output dir will be empty. - if (fs.exists(finalOutputDir)) { - fs.mkdirs(oldTableDir); + private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDir, + List partitions, OutputCommitHandle commitHandle, boolean changeFileSeq) throws IOException { + String stagingResultPath = stagingResultDir.toString(); + String finalOutputPath = finalOutputDir.toString(); - for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { - fs.rename(status.getPath(), oldTableDir); - } + NumberFormat fmt = NumberFormat.getInstance(); + fmt.setGroupingUsed(false); + fmt.setMinimumIntegerDigits(3); - movedToOldTable = fs.exists(oldTableDir); - } else { // if the parent does not exist, make its parent directory. - fs.mkdirs(finalOutputDir); - } + try { + for(PartitionDescProto partition : partitions) { + Path targetPath = new Path(partition.getPath() + "/"); + Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath)+"/"); - // Move the results to the final output dir. - for (FileStatus status : fs.listStatus(stagingResultDir)) { - fs.rename(status.getPath(), finalOutputDir); - } + if (!fs.exists(targetPath)) { + renameDirectory(stagingPath, targetPath); + } else { + moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, + changeFileSeq, commitHandle); + } - // Check the final output dir - committed = fs.exists(finalOutputDir); + // Summarize the volume of partitions + PartitionDescProto.Builder builder = partition.toBuilder(); + // TODO: This will improved at TAJO-2069 + builder.setNumBytes(calculateSize(targetPath)); + commitHandle.addPartition(builder.build()); + } + partitions.clear(); + partitions.addAll(commitHandle.getPartitions()); + } catch (Exception e) { + rollback(stagingResultDir, finalOutputDir, commitHandle); + throw new IOException("Failed to create partition table:", e); + } + } - } catch (IOException ioe) { - // recover the old table - if (movedToOldTable && !committed) { + private void rollback(Path stagingResultDir, Path finalOutputDir, + OutputCommitHandle commitHandle) throws IOException { + rollback(stagingResultDir, finalOutputDir, null, commitHandle); + } - // if commit is failed, recover the old data - for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { - fs.delete(status.getPath(), true); - } + private void rollback(Path stagingResultDir, Path finalOutputDir, Path oldTableDir, + OutputCommitHandle commitHandle) throws IOException { + String finalOutputPath = finalOutputDir.toString(); + String oldTablePath = oldTableDir != null ? oldTableDir.toString() : null; - for (FileStatus status : fs.listStatus(oldTableDir)) { - fs.rename(status.getPath(), finalOutputDir); - } - } + // Delete data from the output directory + List targetPaths = commitHandle.getTargetPaths(); + for(Path targetPath: targetPaths) { + fs.delete(targetPath, true); + } - throw new IOException(ioe.getMessage()); - } - } - } else { - String queryType = queryContext.get(QueryVars.COMMAND_TYPE); + // Move from backup directory to output directory + List backupPaths = commitHandle.getBackupPaths(); + for(Path backupPath: backupPaths) { + Path targetPath = new Path(backupPath.toString().replaceAll(oldTablePath, finalOutputPath)); + fs.delete(targetPath, true); + renameDirectory(backupPath, targetPath); + } - if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table + // Delete staging directory + fs.delete(stagingResultDir, true); + } - NumberFormat fmt = NumberFormat.getInstance(); - fmt.setGroupingUsed(false); - fmt.setMinimumIntegerDigits(3); + private void commitInsertOverwrite(Path stagingResultDir, Path finalOutputDir, Path oldTableDir) throws IOException { + // It moves the original table into the temporary location. + // Then it moves the new result table into the original table location. + // Upon failed, it recovers the original table if possible. + boolean movedToOldTable = false; + boolean committed = false; - if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) { - for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { - if (eachFile.isFile()) { - LOG.warn("Partition table can't have file in a staging dir: " + eachFile.getPath()); - continue; - } - moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1, changeFileSeq); - } - } else { - int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; - for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { - if (eachFile.getPath().getName().startsWith("_")) { - continue; - } - moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq); - } - } - // checking all file moved and remove empty dir - verifyAllFileMoved(fs, stagingResultDir); - FileStatus[] files = fs.listStatus(stagingResultDir); - if (files != null && files.length != 0) { - for (FileStatus eachFile: files) { - LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); - } - } - } else { // CREATE TABLE AS SELECT (CTAS) - if (fs.exists(finalOutputDir)) { - for (FileStatus status : fs.listStatus(stagingResultDir)) { - fs.rename(status.getPath(), finalOutputDir); - } - } else { - fs.rename(stagingResultDir, finalOutputDir); - } - LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); - } + try { + // if the final output dir exists, move all contents to the temporary table dir. + // Otherwise, just make the final output dir. As a result, the final output dir will be empty. + if (fs.exists(finalOutputDir)) { + fs.mkdirs(oldTableDir); + + for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { + fs.rename(status.getPath(), oldTableDir); } - // remove the staging directory if the final output dir is given. - Path stagingDirRoot = stagingDir.getParent(); - fs.delete(stagingDirRoot, true); - } catch (Throwable t) { - LOG.error(t); - throw new IOException(t); + movedToOldTable = fs.exists(oldTableDir); + } else { // if the parent does not exist, make its parent directory. + fs.mkdirs(finalOutputDir); + } + + // Move the results to the final output dir. + for (FileStatus status : fs.listStatus(stagingResultDir)) { + fs.rename(status.getPath(), finalOutputDir); + } + + // Check the final output dir + committed = fs.exists(finalOutputDir); + + } catch (IOException ioe) { + // recover the old table + if (movedToOldTable && !committed) { + + // if commit is failed, recover the old data + for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { + fs.delete(status.getPath(), true); + } + + for (FileStatus status : fs.listStatus(oldTableDir)) { + fs.rename(status.getPath(), finalOutputDir); + } + } + + throw new IOException(ioe.getMessage()); + } + } + + private void commitInsert(Path stagingResultDir, Path finalOutputDir, boolean changeFileSeq) throws IOException { + NumberFormat fmt = NumberFormat.getInstance(); + fmt.setGroupingUsed(false); + fmt.setMinimumIntegerDigits(3); + + int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; + for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { + if (eachFile.getPath().getName().startsWith("_")) { + continue; + } + moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq, null); + } + } + + private void commitCreate(Path stagingResultDir, Path finalOutputDir) throws IOException { + if (fs.exists(finalOutputDir)) { + for (FileStatus status : fs.listStatus(stagingResultDir)) { + fs.rename(status.getPath(), finalOutputDir); } } else { - finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); + fs.rename(stagingResultDir, finalOutputDir); } + LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); + } - return finalOutputDir; + /** + * checking all file moved and remove empty dir + * @param stagingResultDir + * @throws IOException + */ + private void cleanupTemporaryDirectory(Path stagingResultDir) throws IOException { + verifyAllFileMoved(fs, stagingResultDir); + FileStatus[] files = fs.listStatus(stagingResultDir); + if (files != null && files.length != 0) { + for (FileStatus eachFile: files) { + LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); + } + } } /** @@ -974,9 +1047,8 @@ protected Path commitOutputData(OverridableConf queryContext, boolean changeFile * @throws java.io.IOException */ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, - FileStatus fileStatus, Path finalOutputPath, - NumberFormat nf, - int fileSeq, boolean changeFileSeq) throws IOException { + FileStatus fileStatus, Path finalOutputPath, NumberFormat nf, + int fileSeq, boolean changeFileSeq, OutputCommitHandle commitHandle) throws IOException { if (fileStatus.isDirectory()) { String subPath = extractSubPath(stagingResultDir, fileStatus.getPath()); if (subPath != null) { @@ -989,7 +1061,8 @@ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, if (eachFile.getPath().getName().startsWith("_")) { continue; } - moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq, changeFileSeq); + moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq, changeFileSeq, + commitHandle); } } else { throw new IOException("Wrong staging dir:" + stagingResultDir + "," + fileStatus.getPath()); @@ -1011,9 +1084,12 @@ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, if (success) { LOG.info("Moving staging file[" + fileStatus.getPath() + "] + " + "to final output[" + finalSubPath + "]"); + if (commitHandle != null) { + commitHandle.addTargetPath(finalSubPath); + } } else { LOG.error("Can't move staging file[" + fileStatus.getPath() + "] + " + - "to final output[" + finalSubPath + "]"); + "to final output[" + finalSubPath + "]"); } } } @@ -1085,63 +1161,34 @@ private boolean verifyAllFileMoved(FileSystem fs, Path stagingPath) throws IOExc return true; } - /** - * This method sets a rename map which includes renamed staging directory to final output directory recursively. - * If there exists some data files, this delete it for duplicate data. - * - * - * @param fs - * @param stagingPath - * @param outputPath - * @param stagingParentPathString - * @throws java.io.IOException - */ - private void visitPartitionedDirectory(FileSystem fs, Path stagingPath, Path outputPath, - String stagingParentPathString, - Map renameDirs, Path oldTableDir) throws IOException { - FileStatus[] files = fs.listStatus(stagingPath); - - for(FileStatus eachFile : files) { - if (eachFile.isDirectory()) { - Path oldPath = eachFile.getPath(); - - // Make recover directory. - String recoverPathString = oldPath.toString().replaceAll(stagingParentPathString, - oldTableDir.toString()); - Path recoveryPath = new Path(recoverPathString); - if (!fs.exists(recoveryPath)) { - fs.mkdirs(recoveryPath); - } - visitPartitionedDirectory(fs, eachFile.getPath(), outputPath, stagingParentPathString, - renameDirs, oldTableDir); - // Find last order partition for renaming - String newPathString = oldPath.toString().replaceAll(stagingParentPathString, - outputPath.toString()); - Path newPath = new Path(newPathString); - if (!isLeafDirectory(fs, eachFile.getPath())) { - renameDirs.put(eachFile.getPath(), newPath); - } else { - if (!fs.exists(newPath)) { - fs.mkdirs(newPath); - } - } + protected void renameDirectory(Path sourcePath, Path targetPath) throws IOException { + try { + if (!fs.exists(targetPath.getParent())) { + createDirectory(targetPath.getParent()); + } + if (!rename(sourcePath, targetPath)) { + throw new IOException(format("Failed to rename %s to %s: rename returned false", sourcePath, targetPath)); } + } catch (IOException e) { + e.printStackTrace(); + throw new IOException(format("Failed to rename %s to %s", sourcePath, targetPath), e); } - } - private boolean isLeafDirectory(FileSystem fs, Path path) throws IOException { - boolean retValue = false; + } - FileStatus[] files = fs.listStatus(path); - for (FileStatus file : files) { - if (fs.isDirectory(file.getPath())) { - retValue = true; - break; + protected void createDirectory(Path path) throws IOException { + try { + if (!fs.mkdirs(path)) { + throw new IOException(format("mkdirs %s returned false", path)); } + } catch (IOException e) { + throw new IOException("Failed to create directory:" + path, e); } + } - return retValue; + protected boolean rename(Path sourcePath, Path targetPath) throws IOException { + return fs.rename(sourcePath, targetPath); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java new file mode 100644 index 0000000000..c6e977756b --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage; + +import org.apache.hadoop.fs.Path; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; + +import java.util.ArrayList; +import java.util.List; + +public class OutputCommitHandle { + + private List backupPaths; + private List targetPaths; + private List partitions; + + public OutputCommitHandle() { + backupPaths = new ArrayList(); + targetPaths = new ArrayList(); + partitions = new ArrayList(); + } + + public List getBackupPaths() { + return backupPaths; + } + + public void setBackupPaths(List backupPaths) { + this.backupPaths = backupPaths; + } + + public void addBackupPath(Path path) { + this.backupPaths.add(path); + } + + public List getTargetPaths() { + return targetPaths; + } + + public void setTargetPaths(List renamedPaths) { + this.targetPaths = renamedPaths; + } + + public void addTargetPath(Path path) { + this.targetPaths.add(path); + } + + public List getPartitions() { + return partitions; + } + + public void setPartitions(List partitions) { + this.partitions = partitions; + } + + public void addPartition(PartitionDescProto partition) { + this.partitions.add(partition); + } +} diff --git a/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java b/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java index fa6cf486e2..536e238c63 100644 --- a/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java +++ b/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java @@ -27,6 +27,7 @@ import org.apache.tajo.ExecutionBlockId; import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.exception.NotImplementedException; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.exception.TajoRuntimeException; @@ -178,7 +179,7 @@ public void prepareTable(LogicalNode node) throws IOException { @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc) throws IOException { + TableDesc tableDesc, List partitions) throws IOException { throw new TajoRuntimeException(new NotImplementedException()); } From 17b3204efe0b427ab2818e8802b8f8cc965ab02b Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 11:08:54 +0900 Subject: [PATCH 068/127] TAJO-2069: Remove getContentsSummary in TableSpace and Query. --- .../java/org/apache/tajo/conf/TajoConf.java | 9 ++ .../org/apache/tajo/querymaster/Query.java | 18 +-- tajo-dist/pom.xml | 1 + .../apache/tajo/storage/FileTablespace.java | 31 ++-- tajo-storage/tajo-storage-s3/pom.xml | 25 ++++ .../apache/tajo/storage/s3/S3TableSpace.java | 133 ++++++++++++++++++ 6 files changed, 199 insertions(+), 18 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index a535ece61a..feee9db10a 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -236,6 +236,15 @@ public static enum ConfVars implements ConfigKey { // for RCFile HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true, Validators.bool()), + // S3 Configuration -------------------------------------------------- + S3_MAX_ERROR_RETRIES("tajo.s3.max-error-retries", 50), + S3_SSL_ENABLED("tajo.s3.ssl.enabled", true), + S3_CONNECT_TIMEOUT("tajo.s3.connect-timeout", "5m"), + S3_SOCKET_TIMEOUT("tajo.s3.socket-timeout", "5m"), + S3_MAX_CONNECTIONS("tajo.s3.max-connections", 500), + S3_USE_INSTANCE_CREDENTIALS("tajo.s3.use-instance-credentials", true), + S3_PIN_CLIENT_TO_CURRENT_REGION("tajo.s3.pin-client-to-current-region", false), + // RPC -------------------------------------------------------------------- // Internal RPC Client INTERNAL_RPC_CLIENT_WORKER_THREAD_NUM("tajo.internal.rpc.client.worker-thread-num", diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java index dae7058094..aee853c91e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java @@ -46,6 +46,7 @@ import org.apache.tajo.engine.query.QueryContext; import org.apache.tajo.error.Errors.SerializedException; import org.apache.tajo.exception.ErrorUtil; +import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.master.event.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.util.PlannerUtil; @@ -637,7 +638,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo finalOutputDir.toUri()); resultTableDesc.setExternal(true); - stats.setNumBytes(getTableVolume(query.systemConf, finalOutputDir)); + stats.setNumBytes(getTableVolume(queryContext, resultTableDesc)); resultTableDesc.setStats(stats); query.setResultDesc(resultTableDesc); } @@ -678,7 +679,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo if (!query.getPartitions().isEmpty()) { totalVolume = query.getPartitions().stream().mapToLong(partition -> partition.getNumBytes()).sum(); } else { - totalVolume = getTableVolume(query.systemConf, finalOutputDir); + totalVolume = getTableVolume(queryContext, tableDescTobeCreated); } stats.setNumBytes(totalVolume); @@ -719,8 +720,9 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo finalTable = new TableDesc(tableName, lastStage.getSchema(), meta, finalOutputDir.toUri()); } - long volume = getTableVolume(query.systemConf, finalOutputDir); - stats.setNumBytes(volume); + long totalVolume = getTableVolume(queryContext, finalTable); + + stats.setNumBytes(totalVolume); finalTable.setStats(stats); if (insertNode.hasTargetTable()) { @@ -736,10 +738,10 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo } } - public static long getTableVolume(TajoConf systemConf, Path tablePath) throws IOException { - FileSystem fs = tablePath.getFileSystem(systemConf); - ContentSummary directorySummary = fs.getContentSummary(tablePath); - return directorySummary.getLength(); + public static long getTableVolume(QueryContext queryContext, TableDesc tableDesc) throws UnsupportedException { + Tablespace space = TablespaceManager.get(queryContext.get(QueryVars.OUTPUT_TABLE_URI, "")); + + return space.getTableVolume(tableDesc, Optional.empty()); } public static class StageCompletedTransition implements SingleArcTransition { diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml index 095f128809..e679741cb2 100644 --- a/tajo-dist/pom.xml +++ b/tajo-dist/pom.xml @@ -146,6 +146,7 @@ run cp -r $ROOT/tajo-sql-parser/target/tajo-sql-parser-${project.version}/* . run cp -r $ROOT/tajo-storage/tajo-storage-jdbc/target/tajo-storage-jdbc-${project.version}.jar . run cp -r $ROOT/tajo-storage/tajo-storage-pgsql/target/tajo-storage-pgsql-${project.version}.jar . + run cp -r $ROOT/tajo-storage/tajo-storage-s3/target/tajo-storage-s3-${project.version}.jar . run cp -r $ROOT/tajo-pullserver/target/tajo-pullserver-${project.version}.jar . run cp -r $ROOT/tajo-metrics/target/tajo-metrics-${project.version}.jar . run cp -r $ROOT/tajo-core/target/tajo-core-${project.version}.jar . diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 495afba75f..e62d914611 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -129,13 +129,13 @@ protected void storageInit() throws IOException { @Override public long getTableVolume(TableDesc table, Optional filter) throws UnsupportedException { Path path = new Path(table.getUri()); - ContentSummary summary; + long totalVolume = 0L; try { - summary = fs.getContentSummary(path); + totalVolume = getTotalFileSize(path); } catch (IOException e) { throw new TajoInternalError(e); } - return summary.getLength(); + return totalVolume; } @Override @@ -252,7 +252,7 @@ public long calculateSize(Path tablePath) throws IOException { long totalSize = 0; if (fs.exists(tablePath)) { - totalSize = fs.getContentSummary(tablePath).getLength(); + totalSize = getTotalFileSize(tablePath); } return totalSize; @@ -870,8 +870,7 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P commitHandle.addTargetPath(targetPath); // Summarize the volume of partitions - // TODO : This will improved at TAJO-2069 - long totalSize = calculateSize(targetPath); + long totalSize = getTotalFileSize(targetPath); PartitionDescProto.Builder builder = partition.toBuilder(); builder.setNumBytes(totalSize); commitHandle.addPartition(builder.build()); @@ -907,8 +906,7 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi // Summarize the volume of partitions PartitionDescProto.Builder builder = partition.toBuilder(); - // TODO: This will improved at TAJO-2069 - builder.setNumBytes(calculateSize(targetPath)); + builder.setNumBytes(getTotalFileSize(targetPath)); commitHandle.addPartition(builder.build()); } partitions.clear(); @@ -1161,6 +1159,21 @@ private boolean verifyAllFileMoved(FileSystem fs, Path stagingPath) throws IOExc return true; } + protected long getTotalFileSize(Path path) throws IOException { + long totalVolume = 0L; + + // f is a file + FileStatus status = fs.getFileStatus(path); + if (status.isFile()) { + totalVolume = status.getLen(); + } + // f is a directory + for(FileStatus s : listStatus(path)) { + long length = s.isDirectory() ? getTotalFileSize(s.getPath()) : s.getLen(); + totalVolume += length; + } + return totalVolume; + } protected void renameDirectory(Path sourcePath, Path targetPath) throws IOException { try { @@ -1190,6 +1203,4 @@ protected void createDirectory(Path path) throws IOException { protected boolean rename(Path sourcePath, Path targetPath) throws IOException { return fs.rename(sourcePath, targetPath); } - - } diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index a9a541aed1..7127940334 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -167,6 +167,31 @@ + + + org.apache.hadoop + hadoop-aws + provided + ${hadoop.version} + + + + org.weakref + jmxutils + 1.18 + + + + org.apache.httpcomponents + httpclient + 4.2.5 + + + org.apache.httpcomponents + httpcore + 4.2.5 + + junit junit diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 4bcdb60a68..10a3794862 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -18,14 +18,147 @@ package org.apache.tajo.storage.s3; +import java.io.IOException; import java.net.URI; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.Protocol; +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.internal.StaticCredentialsProvider; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.iterable.S3Objects; +import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.common.primitives.Ints; +import io.airlift.units.Duration; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3.S3Credentials; +import org.apache.tajo.conf.TajoConf; import org.apache.tajo.storage.FileTablespace; import net.minidev.json.JSONObject; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Strings.nullToEmpty; +import static java.lang.String.format; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; + public class S3TableSpace extends FileTablespace { + private final Log LOG = LogFactory.getLog(S3TableSpace.class); + + private AmazonS3 s3; + private boolean useInstanceCredentials; + public S3TableSpace(String spaceName, URI uri, JSONObject config) { super(spaceName, uri, config); } + + @Override + public void init(TajoConf tajoConf) throws IOException { + super.init(tajoConf); + + int maxErrorRetries = conf.getIntVar(TajoConf.ConfVars.S3_MAX_ERROR_RETRIES); + boolean sslEnabled = conf.getBoolVar(TajoConf.ConfVars.S3_SSL_ENABLED); + + Duration connectTimeout = Duration.valueOf(conf.getVar(TajoConf.ConfVars.S3_CONNECT_TIMEOUT)); + Duration socketTimeout = Duration.valueOf(conf.getVar(TajoConf.ConfVars.S3_SOCKET_TIMEOUT)); + int maxConnections = conf.getIntVar(TajoConf.ConfVars.S3_MAX_CONNECTIONS); + + this.useInstanceCredentials = conf.getBoolVar(TajoConf.ConfVars.S3_USE_INSTANCE_CREDENTIALS); + + ClientConfiguration configuration = new ClientConfiguration() + .withMaxErrorRetry(maxErrorRetries) + .withProtocol(sslEnabled ? Protocol.HTTPS : Protocol.HTTP) + .withConnectionTimeout(Ints.checkedCast(connectTimeout.toMillis())) + .withSocketTimeout(Ints.checkedCast(socketTimeout.toMillis())) + .withMaxConnections(maxConnections); + + Path tajoRootPath = TajoConf.getTajoRootDir(conf); + FileSystem defaultFS = tajoRootPath.getFileSystem(conf); + this.s3 = createAmazonS3Client(defaultFS.getUri(), conf, configuration); + + if (s3 != null) { + String endPoint = conf.getTrimmed(ENDPOINT,""); + try { + if (!endPoint.isEmpty()) { + s3.setEndpoint(endPoint); + } + } catch (IllegalArgumentException e) { + String msg = "Incorrect endpoint: " + e.getMessage(); + LOG.error(msg); + throw new IllegalArgumentException(msg, e); + } + + if (!s3.doesBucketExist(uri.getHost())) { + throw new IOException("Bucket " + uri.getHost() + " does not exist"); + } + + LOG.info("Amazon3Client is initialized."); + } + } + + private AmazonS3Client createAmazonS3Client(URI uri, Configuration hadoopConfig, ClientConfiguration clientConfig) { + AWSCredentialsProvider credentials = getAwsCredentialsProvider(uri, hadoopConfig); + AmazonS3Client client = new AmazonS3Client(credentials, clientConfig); + return client; + } + + private AWSCredentialsProvider getAwsCredentialsProvider(URI uri, Configuration conf) { + // first try credentials from URI or static properties + try { + return new StaticCredentialsProvider(getAwsCredentials(uri, conf)); + } catch (IllegalArgumentException ignored) { + } + + if (useInstanceCredentials) { + return new InstanceProfileCredentialsProvider(); + } + + throw new RuntimeException("S3 credentials not configured"); + } + + private static AWSCredentials getAwsCredentials(URI uri, Configuration conf) { + S3Credentials credentials = new S3Credentials(); + credentials.initialize(uri, conf); + return new BasicAWSCredentials(credentials.getAccessKey(), credentials.getSecretAccessKey()); + } + + @Override + protected long getTotalFileSize(Path path) throws IOException { + String key = keyFromPath(path); + if (!key.isEmpty()) { + key += "/"; + } + + Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), key); + Stream objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); + long totalBucketSize = objectStream.mapToLong(object -> object.getSize()).sum(); + objectStream.close(); + return totalBucketSize; + } + + private String keyFromPath(Path path) + { + checkArgument(path.isAbsolute(), "Path is not absolute: %s", path); + String key = nullToEmpty(path.toUri().getPath()); + if (key.startsWith("/")) { + key = key.substring(1); + } + if (key.endsWith("/")) { + key = key.substring(0, key.length() - 1); + } + return key; + } } From 1948c5cc9e045e447d07f91eb5007cf4e967f9eb Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 11:38:06 +0900 Subject: [PATCH 069/127] Implement unit test cases --- tajo-storage/tajo-storage-s3/pom.xml | 9 +++++ .../apache/tajo/storage/s3/S3TableSpace.java | 14 +++---- .../tajo/storage/s3/TestS3TableSpace.java | 39 +++++++++++++++++++ 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index 7127940334..65d00f50da 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -34,6 +34,7 @@ UTF-8 UTF-8 + 0.97 @@ -197,6 +198,14 @@ junit test + + + org.testng + testng + 6.9.6 + test + + diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 10a3794862..3fc2d41994 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -33,10 +33,8 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.iterable.S3Objects; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; import io.airlift.units.Duration; import org.apache.commons.logging.Log; @@ -52,7 +50,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.nullToEmpty; -import static java.lang.String.format; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; public class S3TableSpace extends FileTablespace { @@ -101,10 +98,6 @@ public void init(TajoConf tajoConf) throws IOException { throw new IllegalArgumentException(msg, e); } - if (!s3.doesBucketExist(uri.getHost())) { - throw new IOException("Bucket " + uri.getHost() + " does not exist"); - } - LOG.info("Amazon3Client is initialized."); } } @@ -161,4 +154,9 @@ private String keyFromPath(Path path) } return key; } + + @VisibleForTesting + public AmazonS3 getAmazonS3Client() { + return s3; + } } diff --git a/tajo-storage/tajo-storage-s3/src/test/java/org/apache/tajo/storage/s3/TestS3TableSpace.java b/tajo-storage/tajo-storage-s3/src/test/java/org/apache/tajo/storage/s3/TestS3TableSpace.java index 2d0677885c..293d39e40e 100644 --- a/tajo-storage/tajo-storage-s3/src/test/java/org/apache/tajo/storage/s3/TestS3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/test/java/org/apache/tajo/storage/s3/TestS3TableSpace.java @@ -18,6 +18,11 @@ package org.apache.tajo.storage.s3; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.google.common.base.Throwables; import net.minidev.json.JSONObject; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.storage.TablespaceManager; @@ -26,9 +31,12 @@ import org.junit.Test; import java.io.IOException; +import java.lang.reflect.Field; import java.net.URI; +import static com.google.common.base.Preconditions.checkArgument; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; public class TestS3TableSpace { @@ -41,6 +49,8 @@ public static void setUp() throws Exception { TajoConf tajoConf = new TajoConf(); tajoConf.set("fs.s3.impl", MockS3FileSystem.class.getName()); + tajoConf.set("fs.s3a.access.key", "test_access_key_id"); + tajoConf.set("fs.s3a.secret.key", "test_secret_access_key"); tablespace.init(tajoConf); TablespaceManager.addTableSpaceForTest(tablespace); @@ -59,4 +69,33 @@ public void testTablespaceHandler() throws Exception { assertTrue((TablespaceManager.get(S3_URI)) instanceof S3TableSpace); assertEquals(S3_URI, TablespaceManager.get(S3_URI).getUri().toASCIIString()); } + + @Test + public void testInstanceCredentialsEnabled() throws Exception { + assertTrue((TablespaceManager.getByName(SPACENAME)) instanceof S3TableSpace); + S3TableSpace tableSpace = TablespaceManager.getByName(SPACENAME); + + assertNotNull(tableSpace.getAmazonS3Client()); + assertTrue((tableSpace.getAmazonS3Client()) instanceof AmazonS3Client); + + assertTrue(getAwsCredentialsProvider(tableSpace.getAmazonS3Client()) + instanceof InstanceProfileCredentialsProvider); + } + + private AWSCredentialsProvider getAwsCredentialsProvider(AmazonS3 s3) { + return getFieldValue(s3, "awsCredentialsProvider", AWSCredentialsProvider.class); + } + + @SuppressWarnings("unchecked") + private T getFieldValue(Object instance, String name, Class type) { + try { + Field field = instance.getClass().getDeclaredField(name); + checkArgument(field.getType() == type, "expected %s but found %s", type, field.getType()); + field.setAccessible(true); + return (T) field.get(instance); + } + catch (ReflectiveOperationException e) { + throw Throwables.propagate(e); + } + } } From 322ab8391da5921c3b95dda1174fcb2b6c40cc45 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 12:27:44 +0900 Subject: [PATCH 070/127] Add log messages --- .../main/java/org/apache/tajo/storage/FileTablespace.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index e62d914611..0314385605 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -804,6 +804,8 @@ protected Path commitOutputData(OverridableConf queryContext, boolean changeFile boolean checkExistingPartition = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); try { + long startTime = System.currentTimeMillis(); + LOG.info("Output-commit started"); if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO if (partitions != null) { commitInsertOverwriteOrCreateWithPartition(stagingResultDir, finalOutputDir, oldTableDir, partitions, @@ -836,6 +838,11 @@ protected Path commitOutputData(OverridableConf queryContext, boolean changeFile // remove the staging directory if the final output dir is given. Path stagingDirRoot = stagingDir.getParent(); fs.delete(stagingDirRoot, true); + + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(format("Output-commit finished : %d ms elapsed.", elapsedMills)); + } catch (Throwable t) { LOG.error(t); throw new IOException(t); From 21382dd7fa0e3ed98ce4020910b5c086ba12f84d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 15:24:30 +0900 Subject: [PATCH 071/127] Apply parallelStream --- .../apache/tajo/storage/FileTablespace.java | 42 ++++++++----------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 495afba75f..70b67106d8 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -837,6 +837,7 @@ protected Path commitOutputData(OverridableConf queryContext, boolean changeFile Path stagingDirRoot = stagingDir.getParent(); fs.delete(stagingDirRoot, true); } catch (Throwable t) { + rollback(stagingResultDir, finalOutputDir, oldTableDir, commitHandle); LOG.error(t); throw new IOException(t); } @@ -853,10 +854,10 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P String finalOutputPath = finalOutputDir.toString(); String oldTablePath = oldTableDir.toString(); - try { - for(PartitionDescProto partition : partitions) { + partitions.parallelStream().forEach(partition -> { + try { Path targetPath = new Path(partition.getPath() + "/"); - Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath)+"/"); + Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); Path backupPath = new Path(partition.getPath().replaceAll(finalOutputPath, oldTablePath)); // Move existing directory to backup directory. @@ -870,18 +871,16 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P commitHandle.addTargetPath(targetPath); // Summarize the volume of partitions - // TODO : This will improved at TAJO-2069 long totalSize = calculateSize(targetPath); PartitionDescProto.Builder builder = partition.toBuilder(); builder.setNumBytes(totalSize); commitHandle.addPartition(builder.build()); + } catch (IOException e) { + throw new ConcurrentModificationException(); } - partitions.clear(); - partitions.addAll(commitHandle.getPartitions()); - } catch (Exception e) { - rollback(stagingResultDir, finalOutputDir, oldTableDir, commitHandle); - throw new IOException("Failed to create partition table:", e); - } + }); + partitions.clear(); + partitions.addAll(commitHandle.getPartitions()); } private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDir, @@ -893,10 +892,10 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); - try { - for(PartitionDescProto partition : partitions) { + partitions.parallelStream().forEach(partition -> { + try { Path targetPath = new Path(partition.getPath() + "/"); - Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath)+"/"); + Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); if (!fs.exists(targetPath)) { renameDirectory(stagingPath, targetPath); @@ -907,21 +906,14 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi // Summarize the volume of partitions PartitionDescProto.Builder builder = partition.toBuilder(); - // TODO: This will improved at TAJO-2069 builder.setNumBytes(calculateSize(targetPath)); commitHandle.addPartition(builder.build()); + } catch (IOException e) { + throw new ConcurrentModificationException(); } - partitions.clear(); - partitions.addAll(commitHandle.getPartitions()); - } catch (Exception e) { - rollback(stagingResultDir, finalOutputDir, commitHandle); - throw new IOException("Failed to create partition table:", e); - } - } - - private void rollback(Path stagingResultDir, Path finalOutputDir, - OutputCommitHandle commitHandle) throws IOException { - rollback(stagingResultDir, finalOutputDir, null, commitHandle); + }); + partitions.clear(); + partitions.addAll(commitHandle.getPartitions()); } private void rollback(Path stagingResultDir, Path finalOutputDir, Path oldTableDir, From f9d9afa3d60a68f64da9af1d8b018ddd003055bc Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 17:22:12 +0900 Subject: [PATCH 072/127] Update max retry count for avoiding NoHttpResponseException. --- tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index feee9db10a..83fd65e554 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -237,7 +237,7 @@ public static enum ConfVars implements ConfigKey { HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true, Validators.bool()), // S3 Configuration -------------------------------------------------- - S3_MAX_ERROR_RETRIES("tajo.s3.max-error-retries", 50), + S3_MAX_ERROR_RETRIES("tajo.s3.max-error-retries", 100), S3_SSL_ENABLED("tajo.s3.ssl.enabled", true), S3_CONNECT_TIMEOUT("tajo.s3.connect-timeout", "5m"), S3_SOCKET_TIMEOUT("tajo.s3.socket-timeout", "5m"), From faf383ebeb3d97c7b9f9976c105320e4d0cf91d5 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 17:27:39 +0900 Subject: [PATCH 073/127] Trigger for CI build --- .../src/main/java/org/apache/tajo/storage/FileTablespace.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 70b67106d8..d4580c86cc 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -1182,6 +1182,4 @@ protected void createDirectory(Path path) throws IOException { protected boolean rename(Path sourcePath, Path targetPath) throws IOException { return fs.rename(sourcePath, targetPath); } - - } From fec2ff1ed7f8f475487e6c2418d18368be5e4a20 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 1 Feb 2016 20:27:29 +0900 Subject: [PATCH 074/127] Replace parallelStream to Stream --- .../main/java/org/apache/tajo/storage/FileTablespace.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index d4580c86cc..9fdd4c48c0 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -854,7 +854,7 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P String finalOutputPath = finalOutputDir.toString(); String oldTablePath = oldTableDir.toString(); - partitions.parallelStream().forEach(partition -> { + partitions.stream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); @@ -874,7 +874,8 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P long totalSize = calculateSize(targetPath); PartitionDescProto.Builder builder = partition.toBuilder(); builder.setNumBytes(totalSize); - commitHandle.addPartition(builder.build()); + PartitionDescProto partitionDescProto = builder.build(); + commitHandle.addPartition(partitionDescProto); } catch (IOException e) { throw new ConcurrentModificationException(); } @@ -892,7 +893,7 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); - partitions.parallelStream().forEach(partition -> { + partitions.stream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); From dfa58f9eb084d54f1f7b621f3901ef7c1e7bf335 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 10:39:27 +0900 Subject: [PATCH 075/127] Replace List type to Set type with ConcurrentHashMap --- .../org/apache/tajo/storage/OutputCommitHandle.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java index c6e977756b..a71d2fb2bd 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java @@ -22,18 +22,21 @@ import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; public class OutputCommitHandle { private List backupPaths; private List targetPaths; - private List partitions; + private Set partitions; public OutputCommitHandle() { backupPaths = new ArrayList(); targetPaths = new ArrayList(); - partitions = new ArrayList(); + partitions = Collections.newSetFromMap(new ConcurrentHashMap<>()); } public List getBackupPaths() { @@ -60,11 +63,11 @@ public void addTargetPath(Path path) { this.targetPaths.add(path); } - public List getPartitions() { + public Set getPartitions() { return partitions; } - public void setPartitions(List partitions) { + public void setPartitions(Set partitions) { this.partitions = partitions; } From b548b1bc4769e454b94bd978366566ed1aad6e47 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 10:42:54 +0900 Subject: [PATCH 076/127] Use parallelStream instead of stream --- .../src/main/java/org/apache/tajo/storage/FileTablespace.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 9fdd4c48c0..9f4dc9acf3 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -854,7 +854,7 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P String finalOutputPath = finalOutputDir.toString(); String oldTablePath = oldTableDir.toString(); - partitions.stream().forEach(partition -> { + partitions.parallelStream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); @@ -893,7 +893,7 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); - partitions.stream().forEach(partition -> { + partitions.parallelStream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); From da00b48d27d25de4bf859c04e3d9a8d1f53b86aa Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 16:13:04 +0900 Subject: [PATCH 077/127] Rename PartitionContent to PartitionPruningHandle --- .../planner/TestPartitionedTableRewriter.java | 82 +++++++++---------- ...ntent.java => PartitionPruningHandle.java} | 14 ++-- .../rules/PartitionedTableRewriter.java | 46 +++++------ 3 files changed, 73 insertions(+), 69 deletions(-) rename tajo-plan/src/main/java/org/apache/tajo/plan/partition/{PartitionContent.java => PartitionPruningHandle.java} (80%) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 938679cdcf..c22447d414 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -35,7 +35,7 @@ import org.apache.tajo.engine.query.QueryContext; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.partition.PartitionContent; +import org.apache.tajo.plan.partition.PartitionPruningHandle; import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.util.CommonTestingUtil; import org.apache.tajo.util.FileUtil; @@ -169,19 +169,19 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(1, filteredPaths.length); assertEquals("key=part456", filteredPaths[0].getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(1, partitionKeys.length); assertEquals("key=part456", partitionKeys[0]); - assertEquals(10L, partitionContent.getTotalVolume()); + assertEquals(10L, partitionPruningHandle.getTotalVolume()); } @Test @@ -206,22 +206,22 @@ public void testWithoutAnyFilters() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(3, filteredPaths.length); assertEquals("key=part123", filteredPaths[0].getName()); assertEquals("key=part456", filteredPaths[1].getName()); assertEquals("key=part789", filteredPaths[2].getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(3, partitionKeys.length); assertEquals("key=part123", partitionKeys[0]); assertEquals("key=part456", partitionKeys[1]); assertEquals("key=part789", partitionKeys[2]); - assertEquals(33L, partitionContent.getTotalVolume()); + assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @Test @@ -248,13 +248,13 @@ public void testFilterIncludeNonExistingPartitionValue() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - assertEquals(0, partitionContent.getPartitionPaths().length); - assertEquals(0, partitionContent.getPartitionKeys().length); + assertEquals(0, partitionPruningHandle.getPartitionPaths().length); + assertEquals(0, partitionPruningHandle.getPartitionKeys().length); - assertEquals(0L, partitionContent.getTotalVolume()); + assertEquals(0L, partitionPruningHandle.getTotalVolume()); } @Test @@ -282,22 +282,22 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(3, filteredPaths.length); assertEquals("key=part123", filteredPaths[0].getName()); assertEquals("key=part456", filteredPaths[1].getName()); assertEquals("key=part789", filteredPaths[2].getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(3, partitionKeys.length); assertEquals("key=part123", partitionKeys[0]); assertEquals("key=part456", partitionKeys[1]); assertEquals("key=part789", partitionKeys[2]); - assertEquals(33L, partitionContent.getTotalVolume()); + assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @Test @@ -325,20 +325,20 @@ public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(1, filteredPaths.length); assertEquals("key3=3", filteredPaths[0].getName()); assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(1, partitionKeys.length); assertEquals("key1=part789/key2=supp789/key3=3", partitionKeys[0]); - assertEquals(10L, partitionContent.getTotalVolume()); + assertEquals(10L, partitionPruningHandle.getTotalVolume()); } @Test @@ -369,10 +369,10 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(2, filteredPaths.length); assertEquals("key3=1", filteredPaths[0].getName()); @@ -383,12 +383,12 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(2, partitionKeys.length); assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); - assertEquals(23L, partitionContent.getTotalVolume()); + assertEquals(23L, partitionPruningHandle.getTotalVolume()); } @Test @@ -419,10 +419,10 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(2, filteredPaths.length); assertEquals("key3=1", filteredPaths[0].getName()); @@ -433,12 +433,12 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(2, partitionKeys.length); assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); - assertEquals(23L, partitionContent.getTotalVolume()); + assertEquals(23L, partitionPruningHandle.getTotalVolume()); } @Test @@ -479,22 +479,22 @@ public final void testPartitionPruningWitCTAS() throws Exception { rewriter.setCatalog(catalog); OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); - PartitionContent partitionContent = rewriter.getPartitionContent(conf, scanNode); - assertNotNull(partitionContent); + PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); + assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionContent.getPartitionPaths(); + Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(3, filteredPaths.length); assertEquals("key=17.0", filteredPaths[0].getName()); assertEquals("key=36.0", filteredPaths[1].getName()); assertEquals("key=38.0", filteredPaths[2].getName()); - String[] partitionKeys = partitionContent.getPartitionKeys(); + String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(3, partitionKeys.length); assertEquals("key=17.0", partitionKeys[0]); assertEquals("key=36.0", partitionKeys[1]); assertEquals("key=38.0", partitionKeys[2]); - assertEquals(12L, partitionContent.getTotalVolume()); + assertEquals(12L, partitionPruningHandle.getTotalVolume()); executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java similarity index 80% rename from tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java rename to tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java index 3985cbaa20..9271786efa 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionContent.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java @@ -20,12 +20,16 @@ import org.apache.hadoop.fs.Path; -public class PartitionContent { - private final Path[] partitionPaths; - private final String[] partitionKeys; - private final long totalVolume; +/** + * This includes result informs of partition pruning. + * + */ +public class PartitionPruningHandle { + private Path[] partitionPaths; + private String[] partitionKeys; + private long totalVolume; - public PartitionContent(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { + public PartitionPruningHandle(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { this.partitionPaths = partitionPaths; this.partitionKeys = partitionKeys; this.totalVolume = totalVolume; diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index a8b6c1fd87..84603f219d 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -37,7 +37,7 @@ import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.expr.*; import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.partition.PartitionContent; +import org.apache.tajo.plan.partition.PartitionPruningHandle; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.util.EvalNodeToExprConverter; @@ -117,7 +117,7 @@ public String toString() { } } - private PartitionContent getPartitionContent(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { @@ -134,12 +134,12 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin * @return * @throws IOException */ - private PartitionContent getPartitionContent(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - PartitionContent partitionContent = null; + PartitionPruningHandle partitionPruningHandle = null; FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); String [] splits = CatalogUtil.splitFQTableName(tableName); List partitions = null; @@ -148,17 +148,17 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin if (conjunctiveForms == null) { partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } else { - partitionContent = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); } } else { if (catalog.existPartitions(splits[0], splits[1])) { PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); partitions = catalog.getPartitionsByAlgebra(request); - partitionContent = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); } else { - partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } } } catch (UnsupportedException ue) { @@ -167,16 +167,16 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin LOG.warn(ue.getMessage()); partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - partitionContent = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); } else { - partitionContent = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); } scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } - LOG.info("Filtered directory or files: " + partitionContent.getPartitionPaths().length); - LOG.info("Filtered partition keys: " + partitionContent.getPartitionKeys().length); + LOG.info("Filtered directory or files: " + partitionPruningHandle.getPartitionPaths().length); + LOG.info("Filtered partition keys: " + partitionPruningHandle.getPartitionKeys().length); - return partitionContent; + return partitionPruningHandle; } /** @@ -185,7 +185,7 @@ private PartitionContent getPartitionContent(OverridableConf queryContext, Strin * @param partitions * @return */ - private PartitionContent getPartitionContentByPartitionDesc(List partitions) { + private PartitionPruningHandle getPartitionContentByPartitionDesc(List partitions) { long totalVolume = 0L; Path[] filteredPaths = new Path[partitions.size()]; String[] partitionKeys = new String[partitions.size()]; @@ -195,7 +195,7 @@ private PartitionContent getPartitionContentByPartitionDesc(List Date: Tue, 2 Feb 2016 16:21:11 +0900 Subject: [PATCH 078/127] Rename method names --- .../rules/PartitionedTableRewriter.java | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 84603f219d..5eb8e255b0 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -117,11 +117,11 @@ public String toString() { } } - private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - return getPartitionContent(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); + return getPartitionPruningHandle(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); } /** @@ -134,7 +134,7 @@ private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, * @return * @throws IOException */ - private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryContext, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { @@ -148,17 +148,19 @@ private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, if (conjunctiveForms == null) { partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionPruningHandleByFileSystem(partitionColumns, conjunctiveForms, fs, + tablePath); } else { - partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionPruningHandleByCatalog(partitions); } } else { if (catalog.existPartitions(splits[0], splits[1])) { PartitionsByAlgebraProto request = getPartitionsAlgebraProto(splits[0], splits[1], conjunctiveForms); partitions = catalog.getPartitionsByAlgebra(request); - partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionPruningHandleByCatalog(partitions); } else { - partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionPruningHandleByFileSystem(partitionColumns, conjunctiveForms, fs, + tablePath); } } } catch (UnsupportedException ue) { @@ -167,9 +169,10 @@ private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, LOG.warn(ue.getMessage()); partitions = catalog.getPartitionsOfTable(splits[0], splits[1]); if (partitions.isEmpty()) { - partitionPruningHandle = getPartitionContentByFileSystem(partitionColumns, conjunctiveForms, fs, tablePath); + partitionPruningHandle = getPartitionPruningHandleByFileSystem(partitionColumns, conjunctiveForms, fs, + tablePath); } else { - partitionPruningHandle = getPartitionContentByPartitionDesc(partitions); + partitionPruningHandle = getPartitionPruningHandleByCatalog(partitions); } scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } @@ -185,7 +188,7 @@ private PartitionPruningHandle getPartitionContent(OverridableConf queryContext, * @param partitions * @return */ - private PartitionPruningHandle getPartitionContentByPartitionDesc(List partitions) { + private PartitionPruningHandle getPartitionPruningHandleByCatalog(List partitions) { long totalVolume = 0L; Path[] filteredPaths = new Path[partitions.size()]; String[] partitionKeys = new String[partitions.size()]; @@ -209,8 +212,8 @@ private PartitionPruningHandle getPartitionContentByPartitionDesc(List 0) { // There are at least one indexable predicates - return getPartitionContent(queryContext, table.getName(), paritionValuesSchema, + return getPartitionPruningHandle(queryContext, table.getName(), paritionValuesSchema, indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); } else { // otherwise, we will get all partition paths. - return getPartitionContent(queryContext, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); + return getPartitionPruningHandle(queryContext, table.getName(), paritionValuesSchema, null, + new Path(table.getUri())); } } From 7e4b115653032eaa246db464f6a1cf4eb1b70763 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 17:19:31 +0900 Subject: [PATCH 079/127] Check if staging directory exits --- .../apache/tajo/storage/FileTablespace.java | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 9f4dc9acf3..a1874ae947 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -860,22 +860,24 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); Path backupPath = new Path(partition.getPath().replaceAll(finalOutputPath, oldTablePath)); - // Move existing directory to backup directory. - if (checkExistingPartition && fs.exists(targetPath)) { - renameDirectory(targetPath, backupPath); - commitHandle.addBackupPath(backupPath); - } - - // Move staging directory to target directory - renameDirectory(stagingPath, targetPath); - commitHandle.addTargetPath(targetPath); + if (fs.exists(stagingPath)) { + // Move existing directory to backup directory. + if (checkExistingPartition && fs.exists(targetPath)) { + renameDirectory(targetPath, backupPath); + commitHandle.addBackupPath(backupPath); + } - // Summarize the volume of partitions - long totalSize = calculateSize(targetPath); - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(totalSize); - PartitionDescProto partitionDescProto = builder.build(); - commitHandle.addPartition(partitionDescProto); + // Move staging directory to target directory + renameDirectory(stagingPath, targetPath); + commitHandle.addTargetPath(targetPath); + + // Summarize the volume of partitions + long totalSize = calculateSize(targetPath); + PartitionDescProto.Builder builder = partition.toBuilder(); + builder.setNumBytes(totalSize); + PartitionDescProto partitionDescProto = builder.build(); + commitHandle.addPartition(partitionDescProto); + } } catch (IOException e) { throw new ConcurrentModificationException(); } @@ -898,17 +900,19 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); - if (!fs.exists(targetPath)) { - renameDirectory(stagingPath, targetPath); - } else { - moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, - changeFileSeq, commitHandle); - } + if (fs.exists(stagingPath)) { + if (!fs.exists(targetPath)) { + renameDirectory(stagingPath, targetPath); + } else { + moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, + changeFileSeq, commitHandle); + } - // Summarize the volume of partitions - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(calculateSize(targetPath)); - commitHandle.addPartition(builder.build()); + // Summarize the volume of partitions + PartitionDescProto.Builder builder = partition.toBuilder(); + builder.setNumBytes(calculateSize(targetPath)); + commitHandle.addPartition(builder.build()); + } } catch (IOException e) { throw new ConcurrentModificationException(); } From 1458ebff43d09272d3770b03e6923d1c874558fd Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 17:22:23 +0900 Subject: [PATCH 080/127] Update max-retry-count --- tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index 83fd65e554..4acee992ef 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -237,7 +237,7 @@ public static enum ConfVars implements ConfigKey { HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true, Validators.bool()), // S3 Configuration -------------------------------------------------- - S3_MAX_ERROR_RETRIES("tajo.s3.max-error-retries", 100), + S3_MAX_ERROR_RETRIES("tajo.s3.max-error-retries", 10), S3_SSL_ENABLED("tajo.s3.ssl.enabled", true), S3_CONNECT_TIMEOUT("tajo.s3.connect-timeout", "5m"), S3_SOCKET_TIMEOUT("tajo.s3.socket-timeout", "5m"), From 156578855a3e85074dadc730b12919b2dbcc1a9d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 2 Feb 2016 18:02:31 +0900 Subject: [PATCH 081/127] Replace parallelStream to stream --- .../apache/tajo/storage/FileTablespace.java | 58 +++++++++---------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index a1874ae947..9fdd4c48c0 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -854,30 +854,28 @@ private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, P String finalOutputPath = finalOutputDir.toString(); String oldTablePath = oldTableDir.toString(); - partitions.parallelStream().forEach(partition -> { + partitions.stream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); Path backupPath = new Path(partition.getPath().replaceAll(finalOutputPath, oldTablePath)); - if (fs.exists(stagingPath)) { - // Move existing directory to backup directory. - if (checkExistingPartition && fs.exists(targetPath)) { - renameDirectory(targetPath, backupPath); - commitHandle.addBackupPath(backupPath); - } - - // Move staging directory to target directory - renameDirectory(stagingPath, targetPath); - commitHandle.addTargetPath(targetPath); - - // Summarize the volume of partitions - long totalSize = calculateSize(targetPath); - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(totalSize); - PartitionDescProto partitionDescProto = builder.build(); - commitHandle.addPartition(partitionDescProto); + // Move existing directory to backup directory. + if (checkExistingPartition && fs.exists(targetPath)) { + renameDirectory(targetPath, backupPath); + commitHandle.addBackupPath(backupPath); } + + // Move staging directory to target directory + renameDirectory(stagingPath, targetPath); + commitHandle.addTargetPath(targetPath); + + // Summarize the volume of partitions + long totalSize = calculateSize(targetPath); + PartitionDescProto.Builder builder = partition.toBuilder(); + builder.setNumBytes(totalSize); + PartitionDescProto partitionDescProto = builder.build(); + commitHandle.addPartition(partitionDescProto); } catch (IOException e) { throw new ConcurrentModificationException(); } @@ -895,24 +893,22 @@ private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDi fmt.setGroupingUsed(false); fmt.setMinimumIntegerDigits(3); - partitions.parallelStream().forEach(partition -> { + partitions.stream().forEach(partition -> { try { Path targetPath = new Path(partition.getPath() + "/"); Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); - if (fs.exists(stagingPath)) { - if (!fs.exists(targetPath)) { - renameDirectory(stagingPath, targetPath); - } else { - moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, - changeFileSeq, commitHandle); - } - - // Summarize the volume of partitions - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(calculateSize(targetPath)); - commitHandle.addPartition(builder.build()); + if (!fs.exists(targetPath)) { + renameDirectory(stagingPath, targetPath); + } else { + moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, + changeFileSeq, commitHandle); } + + // Summarize the volume of partitions + PartitionDescProto.Builder builder = partition.toBuilder(); + builder.setNumBytes(calculateSize(targetPath)); + commitHandle.addPartition(builder.build()); } catch (IOException e) { throw new ConcurrentModificationException(); } From c78d2b87d0c73d7c1fa6a674b85e96c94077d441 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 3 Feb 2016 23:43:09 +0900 Subject: [PATCH 082/127] Remove TAJO-2063 dependency --- .../org/apache/tajo/querymaster/Query.java | 72 +-- .../org/apache/tajo/storage/Tablespace.java | 3 +- .../tajo/storage/hbase/HBaseTablespace.java | 3 +- .../apache/tajo/storage/FileTablespace.java | 462 ++++++++---------- .../tajo/storage/OutputCommitHandle.java | 77 --- .../tajo/storage/jdbc/JdbcTablespace.java | 3 +- .../apache/tajo/storage/s3/S3TableSpace.java | 2 +- 7 files changed, 247 insertions(+), 375 deletions(-) delete mode 100644 tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java index aee853c91e..cdc657a22a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java @@ -501,34 +501,40 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { // In this case, we should use default tablespace. Tablespace space = TablespaceManager.get(queryContext.get(QueryVars.OUTPUT_TABLE_URI, "")); - List partitions = queryContext.hasPartition() ? query.getPartitions() : null; Path finalOutputDir = space.commitTable( - query.context.getQueryContext(), - lastStage.getId(), - lastStage.getMasterPlan().getLogicalPlan(), - lastStage.getSchema(), - tableDesc, - partitions); + query.context.getQueryContext(), + lastStage.getId(), + lastStage.getMasterPlan().getLogicalPlan(), + lastStage.getSchema(), + tableDesc); QueryHookExecutor hookExecutor = new QueryHookExecutor(query.context.getQueryMasterContext()); hookExecutor.execute(query.context.getQueryContext(), query, event.getExecutionBlockId(), finalOutputDir); // Add dynamic partitions to catalog for partition table. - if (!query.getPartitions().isEmpty()) { - String databaseName, simpleTableName; - - if (CatalogUtil.isFQTableName(tableDesc.getName())) { - String[] split = CatalogUtil.splitFQTableName(tableDesc.getName()); - databaseName = split[0]; - simpleTableName = split[1]; + if (queryContext.hasOutputTableUri() && queryContext.hasPartition()) { + List partitions = query.getPartitions(); + if (partitions != null) { + // Set contents length and file count to PartitionDescProto by listing final output directories. + List finalPartitions = getPartitionsWithContentsSummary(query.systemConf, + finalOutputDir, partitions); + + String databaseName, simpleTableName; + if (CatalogUtil.isFQTableName(tableDesc.getName())) { + String[] split = CatalogUtil.splitFQTableName(tableDesc.getName()); + databaseName = split[0]; + simpleTableName = split[1]; + } else { + databaseName = queryContext.getCurrentDatabase(); + simpleTableName = tableDesc.getName(); + } + + // Store partitions to CatalogStore using alter table statement. + catalog.addPartitions(databaseName, simpleTableName, finalPartitions, true); + LOG.info("Added partitions to catalog (total=" + partitions.size() + ")"); } else { - databaseName = queryContext.getCurrentDatabase(); - simpleTableName = tableDesc.getName(); + LOG.info("Can't find partitions for adding."); } - - // Store partitions to CatalogStore using alter table statement. - catalog.addPartitions(databaseName, simpleTableName, partitions, true); - LOG.info("Added partitions to catalog (total=" + partitions.size() + ")"); query.clearPartitions(); } } catch (Throwable e) { @@ -541,6 +547,21 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { return QueryState.QUERY_SUCCEEDED; } + private List getPartitionsWithContentsSummary(TajoConf conf, Path outputDir, + List partitions) throws IOException { + List finalPartitions = new ArrayList<>(); + + FileSystem fileSystem = outputDir.getFileSystem(conf); + for (PartitionDescProto partition : partitions) { + PartitionDescProto.Builder builder = partition.toBuilder(); + Path partitionPath = new Path(outputDir, partition.getPath()); + ContentSummary contentSummary = fileSystem.getContentSummary(partitionPath); + builder.setNumBytes(contentSummary.getLength()); + finalPartitions.add(builder.build()); + } + return finalPartitions; + } + private static interface QueryHook { boolean isEligible(QueryContext queryContext, Query query, ExecutionBlockId finalExecBlockId, Path finalOutputDir); void execute(QueryMaster.QueryMasterContext context, QueryContext queryContext, Query query, @@ -675,14 +696,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo tableDescTobeCreated.setPartitionMethod(createTableNode.getPartitionMethod()); } - long totalVolume = 0L; - if (!query.getPartitions().isEmpty()) { - totalVolume = query.getPartitions().stream().mapToLong(partition -> partition.getNumBytes()).sum(); - } else { - totalVolume = getTableVolume(queryContext, tableDescTobeCreated); - } - - stats.setNumBytes(totalVolume); + stats.setNumBytes(getTableVolume(queryContext, tableDescTobeCreated)); tableDescTobeCreated.setStats(stats); query.setResultDesc(tableDescTobeCreated); @@ -721,7 +735,6 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo } long totalVolume = getTableVolume(queryContext, finalTable); - stats.setNumBytes(totalVolume); finalTable.setStats(stats); @@ -740,7 +753,6 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo public static long getTableVolume(QueryContext queryContext, TableDesc tableDesc) throws UnsupportedException { Tablespace space = TablespaceManager.get(queryContext.get(QueryVars.OUTPUT_TABLE_URI, "")); - return space.getTableVolume(tableDesc, Optional.empty()); } diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java index 51e047112d..00e6d75a12 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java @@ -25,7 +25,6 @@ import org.apache.tajo.TaskAttemptId; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.proto.CatalogProtos.FragmentProto; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.exception.TajoException; import org.apache.tajo.exception.TajoRuntimeException; @@ -364,7 +363,7 @@ public void rewritePlan(OverridableConf context, LogicalPlan plan) throws TajoEx public abstract Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc, List partitions) throws IOException; + TableDesc tableDesc) throws IOException; public abstract void rollbackTable(LogicalNode node) throws IOException, TajoException; diff --git a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java index 4260e8ecc6..132ceff0ae 100644 --- a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java +++ b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java @@ -40,7 +40,6 @@ import org.apache.tajo.*; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.TableStats; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.Datum; @@ -912,7 +911,7 @@ public Pair getIndexablePredicateValue(ColumnMapping columnMapping @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc, List partitions) throws IOException { + TableDesc tableDesc) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 921318f212..e9aba4faa1 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -33,7 +33,6 @@ import org.apache.tajo.*; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.TableStats; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.exception.UnsupportedException; @@ -51,7 +50,6 @@ import java.text.NumberFormat; import java.util.*; -import static java.lang.String.format; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT; @@ -131,7 +129,7 @@ public long getTableVolume(TableDesc table, Optional filter) throws Un Path path = new Path(table.getUri()); long totalVolume = 0L; try { - totalVolume = getTotalFileSize(path); + totalVolume = calculateSize(path); } catch (IOException e) { throw new TajoInternalError(e); } @@ -252,7 +250,7 @@ public long calculateSize(Path tablePath) throws IOException { long totalSize = 0; if (fs.exists(tablePath)) { - totalSize = getTotalFileSize(tablePath); + totalSize = fs.getContentSummary(tablePath).getLength(); } return totalSize; @@ -770,8 +768,8 @@ public void verifySchemaToWrite(TableDesc tableDesc, Schema outSchema) { @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, - Schema schema, TableDesc tableDesc, List partitions) throws IOException { - return commitOutputData(queryContext, true, partitions); + Schema schema, TableDesc tableDesc) throws IOException { + return commitOutputData(queryContext, true); } @Override @@ -791,248 +789,177 @@ public TupleRange[] getInsertSortRanges(OverridableConf queryContext, TableDesc * @return Saved path * @throws java.io.IOException */ - protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq, - List partitions) throws IOException { - Path finalOutputDir = null; + protected Path commitOutputData(OverridableConf queryContext, boolean changeFileSeq) throws IOException { Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); - Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); - OutputCommitHandle commitHandle = new OutputCommitHandle(); - + Path finalOutputDir; if (!queryContext.get(QueryVars.OUTPUT_TABLE_URI, "").isEmpty()) { finalOutputDir = new Path(queryContext.get(QueryVars.OUTPUT_TABLE_URI)); - boolean checkExistingPartition = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); - - try { - long startTime = System.currentTimeMillis(); - LOG.info("Output-commit started"); - if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO - if (partitions != null) { - commitInsertOverwriteOrCreateWithPartition(stagingResultDir, finalOutputDir, oldTableDir, partitions, - checkExistingPartition, commitHandle); - } else { - commitInsertOverwrite(stagingResultDir, finalOutputDir, oldTableDir); - } - } else { - String queryType = queryContext.get(QueryVars.COMMAND_TYPE); - Preconditions.checkNotNull(queryContext); - if (queryType.equals(NodeType.INSERT.name())) { // INSERT INTO - if (partitions != null) { - commitInsertWithPartition(stagingResultDir, finalOutputDir, partitions, commitHandle, changeFileSeq); - } else { - commitInsert(stagingResultDir, finalOutputDir, changeFileSeq); - } - cleanupTemporaryDirectory(stagingResultDir); - } else if (queryType.equals(NodeType.CREATE_TABLE.name())){ // CREATE TABLE AS SELECT (CTAS) - if (partitions != null) { - commitInsertOverwriteOrCreateWithPartition(stagingResultDir, finalOutputDir, oldTableDir, partitions, - checkExistingPartition, commitHandle); - } else { - commitCreate(stagingResultDir, finalOutputDir); - } - } else { - throw new IOException("Cannot handle query type:" + queryType); - } - } - - // remove the staging directory if the final output dir is given. - Path stagingDirRoot = stagingDir.getParent(); - fs.delete(stagingDirRoot, true); - - long finishTime = System.currentTimeMillis(); - long elapsedMills = finishTime - startTime; - LOG.info(format("Output-commit finished : %d ms elapsed.", elapsedMills)); - - } catch (Throwable t) { - rollback(stagingResultDir, finalOutputDir, oldTableDir, commitHandle); - LOG.error(t); - throw new IOException(t); - } - } else { - finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); - } - return finalOutputDir; - } - - private void commitInsertOverwriteOrCreateWithPartition(Path stagingResultDir, Path finalOutputDir, - Path oldTableDir, List partitions, boolean checkExistingPartition, - OutputCommitHandle commitHandle) throws IOException { - String stagingResultPath = stagingResultDir.toString(); - String finalOutputPath = finalOutputDir.toString(); - String oldTablePath = oldTableDir.toString(); - - partitions.stream().forEach(partition -> { try { - Path targetPath = new Path(partition.getPath() + "/"); - Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); - Path backupPath = new Path(partition.getPath().replaceAll(finalOutputPath, oldTablePath)); - - // Move existing directory to backup directory. - if (checkExistingPartition && fs.exists(targetPath)) { - renameDirectory(targetPath, backupPath); - commitHandle.addBackupPath(backupPath); - } + FileSystem fs = stagingResultDir.getFileSystem(conf); - // Move staging directory to target directory - renameDirectory(stagingPath, targetPath); - commitHandle.addTargetPath(targetPath); - - // Summarize the volume of partitions - long totalSize = calculateSize(targetPath); - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(totalSize); - PartitionDescProto partitionDescProto = builder.build(); - commitHandle.addPartition(partitionDescProto); - } catch (IOException e) { - throw new ConcurrentModificationException(); - } - }); - partitions.clear(); - partitions.addAll(commitHandle.getPartitions()); - } - - private void commitInsertWithPartition(Path stagingResultDir, Path finalOutputDir, - List partitions, OutputCommitHandle commitHandle, boolean changeFileSeq) throws IOException { - String stagingResultPath = stagingResultDir.toString(); - String finalOutputPath = finalOutputDir.toString(); + if (queryContext.getBool(QueryVars.OUTPUT_OVERWRITE, false)) { // INSERT OVERWRITE INTO - NumberFormat fmt = NumberFormat.getInstance(); - fmt.setGroupingUsed(false); - fmt.setMinimumIntegerDigits(3); + // It moves the original table into the temporary location. + // Then it moves the new result table into the original table location. + // Upon failed, it recovers the original table if possible. + boolean movedToOldTable = false; + boolean committed = false; + Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); + ContentSummary summary = fs.getContentSummary(stagingResultDir); + + // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not. + boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); + + // If existing data doesn't need to keep, check if there are some files. + if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) + && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) { + // This is a map for existing non-leaf directory to rename. A key is current directory and a value is + // renaming directory. + Map renameDirs = new HashMap<>(); + // This is a map for recovering existing partition directory. A key is current directory and a value is + // temporary directory to back up. + Map recoveryDirs = new HashMap<>(); + + try { + if (!fs.exists(finalOutputDir)) { + fs.mkdirs(finalOutputDir); + } - partitions.stream().forEach(partition -> { - try { - Path targetPath = new Path(partition.getPath() + "/"); - Path stagingPath = new Path(partition.getPath().replaceAll(finalOutputPath, stagingResultPath) + "/"); + visitPartitionedDirectory(fs, stagingResultDir, finalOutputDir, stagingResultDir.toString(), + renameDirs, oldTableDir); + + // Rename target partition directories + for(Map.Entry entry : renameDirs.entrySet()) { + // Backup existing data files for recovering + if (fs.exists(entry.getValue())) { + String recoveryPathString = entry.getValue().toString().replaceAll(finalOutputDir.toString(), + oldTableDir.toString()); + Path recoveryPath = new Path(recoveryPathString); + fs.rename(entry.getValue(), recoveryPath); + fs.exists(recoveryPath); + recoveryDirs.put(entry.getValue(), recoveryPath); + } + // Delete existing directory + fs.delete(entry.getValue(), true); + // Rename staging directory to final output directory + fs.rename(entry.getKey(), entry.getValue()); + } - if (!fs.exists(targetPath)) { - renameDirectory(stagingPath, targetPath); - } else { - moveResultFromStageToFinal(fs, stagingResultDir, fs.getFileStatus(stagingPath), finalOutputDir, fmt, -1, - changeFileSeq, commitHandle); - } + } catch (IOException ioe) { + // Remove created dirs + for(Map.Entry entry : renameDirs.entrySet()) { + fs.delete(entry.getValue(), true); + } - // Summarize the volume of partitions - PartitionDescProto.Builder builder = partition.toBuilder(); - builder.setNumBytes(calculateSize(targetPath)); - commitHandle.addPartition(builder.build()); - } catch (IOException e) { - throw new ConcurrentModificationException(); - } - }); - partitions.clear(); - partitions.addAll(commitHandle.getPartitions()); - } + // Recovery renamed dirs + for(Map.Entry entry : recoveryDirs.entrySet()) { + fs.delete(entry.getValue(), true); + fs.rename(entry.getValue(), entry.getKey()); + } - private void rollback(Path stagingResultDir, Path finalOutputDir, Path oldTableDir, - OutputCommitHandle commitHandle) throws IOException { - String finalOutputPath = finalOutputDir.toString(); - String oldTablePath = oldTableDir != null ? oldTableDir.toString() : null; + throw new IOException(ioe.getMessage()); + } + } else { // no partition + try { - // Delete data from the output directory - List targetPaths = commitHandle.getTargetPaths(); - for(Path targetPath: targetPaths) { - fs.delete(targetPath, true); - } + // if the final output dir exists, move all contents to the temporary table dir. + // Otherwise, just make the final output dir. As a result, the final output dir will be empty. + if (fs.exists(finalOutputDir)) { + fs.mkdirs(oldTableDir); - // Move from backup directory to output directory - List backupPaths = commitHandle.getBackupPaths(); - for(Path backupPath: backupPaths) { - Path targetPath = new Path(backupPath.toString().replaceAll(oldTablePath, finalOutputPath)); - fs.delete(targetPath, true); - renameDirectory(backupPath, targetPath); - } + for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { + fs.rename(status.getPath(), oldTableDir); + } - // Delete staging directory - fs.delete(stagingResultDir, true); - } + movedToOldTable = fs.exists(oldTableDir); + } else { // if the parent does not exist, make its parent directory. + fs.mkdirs(finalOutputDir); + } - private void commitInsertOverwrite(Path stagingResultDir, Path finalOutputDir, Path oldTableDir) throws IOException { - // It moves the original table into the temporary location. - // Then it moves the new result table into the original table location. - // Upon failed, it recovers the original table if possible. - boolean movedToOldTable = false; - boolean committed = false; + // Move the results to the final output dir. + for (FileStatus status : fs.listStatus(stagingResultDir)) { + fs.rename(status.getPath(), finalOutputDir); + } - try { - // if the final output dir exists, move all contents to the temporary table dir. - // Otherwise, just make the final output dir. As a result, the final output dir will be empty. - if (fs.exists(finalOutputDir)) { - fs.mkdirs(oldTableDir); + // Check the final output dir + committed = fs.exists(finalOutputDir); - for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { - fs.rename(status.getPath(), oldTableDir); - } + } catch (IOException ioe) { + // recover the old table + if (movedToOldTable && !committed) { - movedToOldTable = fs.exists(oldTableDir); - } else { // if the parent does not exist, make its parent directory. - fs.mkdirs(finalOutputDir); - } + // if commit is failed, recover the old data + for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { + fs.delete(status.getPath(), true); + } - // Move the results to the final output dir. - for (FileStatus status : fs.listStatus(stagingResultDir)) { - fs.rename(status.getPath(), finalOutputDir); - } + for (FileStatus status : fs.listStatus(oldTableDir)) { + fs.rename(status.getPath(), finalOutputDir); + } + } - // Check the final output dir - committed = fs.exists(finalOutputDir); + throw new IOException(ioe.getMessage()); + } + } + } else { + String queryType = queryContext.get(QueryVars.COMMAND_TYPE); - } catch (IOException ioe) { - // recover the old table - if (movedToOldTable && !committed) { + if (queryType != null && queryType.equals(NodeType.INSERT.name())) { // INSERT INTO an existing table - // if commit is failed, recover the old data - for (FileStatus status : fs.listStatus(finalOutputDir, hiddenFileFilter)) { - fs.delete(status.getPath(), true); - } + NumberFormat fmt = NumberFormat.getInstance(); + fmt.setGroupingUsed(false); + fmt.setMinimumIntegerDigits(3); - for (FileStatus status : fs.listStatus(oldTableDir)) { - fs.rename(status.getPath(), finalOutputDir); + if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) { + for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { + if (eachFile.isFile()) { + LOG.warn("Partition table can't have file in a staging dir: " + eachFile.getPath()); + continue; + } + moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, -1, changeFileSeq); + } + } else { + int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; + for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { + if (eachFile.getPath().getName().startsWith("_")) { + continue; + } + moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq); + } + } + // checking all file moved and remove empty dir + verifyAllFileMoved(fs, stagingResultDir); + FileStatus[] files = fs.listStatus(stagingResultDir); + if (files != null && files.length != 0) { + for (FileStatus eachFile: files) { + LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); + } + } + } else { // CREATE TABLE AS SELECT (CTAS) + if (fs.exists(finalOutputDir)) { + for (FileStatus status : fs.listStatus(stagingResultDir)) { + fs.rename(status.getPath(), finalOutputDir); + } + } else { + fs.rename(stagingResultDir, finalOutputDir); + } + LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); + } } - } - - throw new IOException(ioe.getMessage()); - } - } - - private void commitInsert(Path stagingResultDir, Path finalOutputDir, boolean changeFileSeq) throws IOException { - NumberFormat fmt = NumberFormat.getInstance(); - fmt.setGroupingUsed(false); - fmt.setMinimumIntegerDigits(3); - int maxSeq = StorageUtil.getMaxFileSequence(fs, finalOutputDir, false) + 1; - for(FileStatus eachFile: fs.listStatus(stagingResultDir)) { - if (eachFile.getPath().getName().startsWith("_")) { - continue; - } - moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputDir, fmt, maxSeq++, changeFileSeq, null); - } - } - - private void commitCreate(Path stagingResultDir, Path finalOutputDir) throws IOException { - if (fs.exists(finalOutputDir)) { - for (FileStatus status : fs.listStatus(stagingResultDir)) { - fs.rename(status.getPath(), finalOutputDir); + // remove the staging directory if the final output dir is given. + Path stagingDirRoot = stagingDir.getParent(); + fs.delete(stagingDirRoot, true); + } catch (Throwable t) { + LOG.error(t); + throw new IOException(t); } } else { - fs.rename(stagingResultDir, finalOutputDir); + finalOutputDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); } - LOG.info("Moved from the staging dir to the output directory '" + finalOutputDir); - } - /** - * checking all file moved and remove empty dir - * @param stagingResultDir - * @throws IOException - */ - private void cleanupTemporaryDirectory(Path stagingResultDir) throws IOException { - verifyAllFileMoved(fs, stagingResultDir); - FileStatus[] files = fs.listStatus(stagingResultDir); - if (files != null && files.length != 0) { - for (FileStatus eachFile: files) { - LOG.error("There are some unmoved files in staging dir:" + eachFile.getPath()); - } - } + return finalOutputDir; } /** @@ -1047,8 +974,9 @@ private void cleanupTemporaryDirectory(Path stagingResultDir) throws IOException * @throws java.io.IOException */ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, - FileStatus fileStatus, Path finalOutputPath, NumberFormat nf, - int fileSeq, boolean changeFileSeq, OutputCommitHandle commitHandle) throws IOException { + FileStatus fileStatus, Path finalOutputPath, + NumberFormat nf, + int fileSeq, boolean changeFileSeq) throws IOException { if (fileStatus.isDirectory()) { String subPath = extractSubPath(stagingResultDir, fileStatus.getPath()); if (subPath != null) { @@ -1061,8 +989,7 @@ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, if (eachFile.getPath().getName().startsWith("_")) { continue; } - moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq, changeFileSeq, - commitHandle); + moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq, changeFileSeq); } } else { throw new IOException("Wrong staging dir:" + stagingResultDir + "," + fileStatus.getPath()); @@ -1084,12 +1011,9 @@ private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, if (success) { LOG.info("Moving staging file[" + fileStatus.getPath() + "] + " + "to final output[" + finalSubPath + "]"); - if (commitHandle != null) { - commitHandle.addTargetPath(finalSubPath); - } } else { LOG.error("Can't move staging file[" + fileStatus.getPath() + "] + " + - "to final output[" + finalSubPath + "]"); + "to final output[" + finalSubPath + "]"); } } } @@ -1161,48 +1085,64 @@ private boolean verifyAllFileMoved(FileSystem fs, Path stagingPath) throws IOExc return true; } - protected long getTotalFileSize(Path path) throws IOException { - long totalVolume = 0L; + /** + * This method sets a rename map which includes renamed staging directory to final output directory recursively. + * If there exists some data files, this delete it for duplicate data. + * + * + * @param fs + * @param stagingPath + * @param outputPath + * @param stagingParentPathString + * @throws java.io.IOException + */ + private void visitPartitionedDirectory(FileSystem fs, Path stagingPath, Path outputPath, + String stagingParentPathString, + Map renameDirs, Path oldTableDir) throws IOException { + FileStatus[] files = fs.listStatus(stagingPath); - // f is a file - FileStatus status = fs.getFileStatus(path); - if (status.isFile()) { - totalVolume = status.getLen(); - } - // f is a directory - for(FileStatus s : listStatus(path)) { - long length = s.isDirectory() ? getTotalFileSize(s.getPath()) : s.getLen(); - totalVolume += length; - } - return totalVolume; - } + for(FileStatus eachFile : files) { + if (eachFile.isDirectory()) { + Path oldPath = eachFile.getPath(); - protected void renameDirectory(Path sourcePath, Path targetPath) throws IOException { - try { - if (!fs.exists(targetPath.getParent())) { - createDirectory(targetPath.getParent()); - } - if (!rename(sourcePath, targetPath)) { - throw new IOException(format("Failed to rename %s to %s: rename returned false", sourcePath, targetPath)); + // Make recover directory. + String recoverPathString = oldPath.toString().replaceAll(stagingParentPathString, + oldTableDir.toString()); + Path recoveryPath = new Path(recoverPathString); + if (!fs.exists(recoveryPath)) { + fs.mkdirs(recoveryPath); + } + + visitPartitionedDirectory(fs, eachFile.getPath(), outputPath, stagingParentPathString, + renameDirs, oldTableDir); + // Find last order partition for renaming + String newPathString = oldPath.toString().replaceAll(stagingParentPathString, + outputPath.toString()); + Path newPath = new Path(newPathString); + if (!isLeafDirectory(fs, eachFile.getPath())) { + renameDirs.put(eachFile.getPath(), newPath); + } else { + if (!fs.exists(newPath)) { + fs.mkdirs(newPath); + } + } } - } catch (IOException e) { - e.printStackTrace(); - throw new IOException(format("Failed to rename %s to %s", sourcePath, targetPath), e); } - } - protected void createDirectory(Path path) throws IOException { - try { - if (!fs.mkdirs(path)) { - throw new IOException(format("mkdirs %s returned false", path)); + private boolean isLeafDirectory(FileSystem fs, Path path) throws IOException { + boolean retValue = false; + + FileStatus[] files = fs.listStatus(path); + for (FileStatus file : files) { + if (fs.isDirectory(file.getPath())) { + retValue = true; + break; } - } catch (IOException e) { - throw new IOException("Failed to create directory:" + path, e); } - } - protected boolean rename(Path sourcePath, Path targetPath) throws IOException { - return fs.rename(sourcePath, targetPath); + return retValue; } + + } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java deleted file mode 100644 index a71d2fb2bd..0000000000 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/OutputCommitHandle.java +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.storage; - -import org.apache.hadoop.fs.Path; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -public class OutputCommitHandle { - - private List backupPaths; - private List targetPaths; - private Set partitions; - - public OutputCommitHandle() { - backupPaths = new ArrayList(); - targetPaths = new ArrayList(); - partitions = Collections.newSetFromMap(new ConcurrentHashMap<>()); - } - - public List getBackupPaths() { - return backupPaths; - } - - public void setBackupPaths(List backupPaths) { - this.backupPaths = backupPaths; - } - - public void addBackupPath(Path path) { - this.backupPaths.add(path); - } - - public List getTargetPaths() { - return targetPaths; - } - - public void setTargetPaths(List renamedPaths) { - this.targetPaths = renamedPaths; - } - - public void addTargetPath(Path path) { - this.targetPaths.add(path); - } - - public Set getPartitions() { - return partitions; - } - - public void setPartitions(Set partitions) { - this.partitions = partitions; - } - - public void addPartition(PartitionDescProto partition) { - this.partitions.add(partition); - } -} diff --git a/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java b/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java index 536e238c63..fa6cf486e2 100644 --- a/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java +++ b/tajo-storage/tajo-storage-jdbc/src/main/java/org/apache/tajo/storage/jdbc/JdbcTablespace.java @@ -27,7 +27,6 @@ import org.apache.tajo.ExecutionBlockId; import org.apache.tajo.OverridableConf; import org.apache.tajo.catalog.*; -import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.exception.NotImplementedException; import org.apache.tajo.exception.TajoInternalError; import org.apache.tajo.exception.TajoRuntimeException; @@ -179,7 +178,7 @@ public void prepareTable(LogicalNode node) throws IOException { @Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, - TableDesc tableDesc, List partitions) throws IOException { + TableDesc tableDesc) throws IOException { throw new TajoRuntimeException(new NotImplementedException()); } diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 3fc2d41994..5c85781c9b 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -129,7 +129,7 @@ private static AWSCredentials getAwsCredentials(URI uri, Configuration conf) { } @Override - protected long getTotalFileSize(Path path) throws IOException { + public long calculateSize(Path path) throws IOException { String key = keyFromPath(path); if (!key.isEmpty()) { key += "/"; From 8040422dbab8edfab2c70bf88ccc38ef2341fbde Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 4 Feb 2016 15:37:45 +0900 Subject: [PATCH 083/127] Add calculateSize to TableSpace --- .../apache/tajo/master/exec/DDLExecutor.java | 14 ++++++------ .../org/apache/tajo/querymaster/Query.java | 22 +++++++++---------- .../org/apache/tajo/storage/Tablespace.java | 2 ++ .../tajo/storage/hbase/HBaseTablespace.java | 6 +++++ .../apache/tajo/storage/FileTablespace.java | 1 + .../tajo/storage/pgsql/PgSQLTablespace.java | 9 ++++++++ 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java index 6a2214191d..12081d8711 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/DDLExecutor.java @@ -511,8 +511,8 @@ public void alterTable(TajoMaster.MasterContext context, final QueryContext quer long numBytes = 0L; if (fs.exists(partitionPath)) { - ContentSummary summary = fs.getContentSummary(partitionPath); - numBytes = summary.getLength(); + Tablespace tablespace = TablespaceManager.get(desc.getUri()); + numBytes = tablespace.calculateSize(partitionPath); } catalog.alterTable(CatalogUtil.addOrDropPartition(qualifiedName, alterTable.getPartitionColumns(), @@ -623,6 +623,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext } // Find missing partitions from CatalogStore + Tablespace tablespace = TablespaceManager.get(tableDesc.getUri()); List targetPartitions = new ArrayList<>(); for(Path filteredPath : filteredPaths) { @@ -631,7 +632,7 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext // if there is partition column in the path if (startIdx > -1) { - PartitionDescProto targetPartition = getPartitionDesc(tablePath, filteredPath, fs); + PartitionDescProto targetPartition = getPartitionDesc(tablespace, tablePath, filteredPath); if (!existingPartitionNames.contains(targetPartition.getPartitionName())) { if (LOG.isDebugEnabled()) { LOG.debug("Partitions not in CatalogStore:" + targetPartition.getPartitionName()); @@ -657,7 +658,8 @@ public void repairPartition(TajoMaster.MasterContext context, final QueryContext LOG.info("Total added partitions to CatalogStore: " + targetPartitions.size()); } - private PartitionDescProto getPartitionDesc(Path tablePath, Path partitionPath, FileSystem fs) throws IOException { + private PartitionDescProto getPartitionDesc(Tablespace tablespace, Path tablePath, Path partitionPath) + throws IOException { String partitionName = StringUtils.unescapePathName(partitionPath.toString()); int startIndex = partitionName.indexOf(tablePath.toString()) + tablePath.toString().length(); @@ -679,9 +681,7 @@ private PartitionDescProto getPartitionDesc(Path tablePath, Path partitionPath, } builder.setPath(partitionPath.toString()); - - ContentSummary contentSummary = fs.getContentSummary(partitionPath); - builder.setNumBytes(contentSummary.getLength()); + builder.setNumBytes(tablespace.calculateSize(partitionPath)); return builder.build(); } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java index cdc657a22a..1caaeb046c 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Query.java @@ -515,8 +515,8 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { if (queryContext.hasOutputTableUri() && queryContext.hasPartition()) { List partitions = query.getPartitions(); if (partitions != null) { - // Set contents length and file count to PartitionDescProto by listing final output directories. - List finalPartitions = getPartitionsWithContentsSummary(query.systemConf, + // Find each partition volume by listing all partitions. + List finalPartitions = getPartitionsWithContentsSummary(queryContext, finalOutputDir, partitions); String databaseName, simpleTableName; @@ -547,16 +547,14 @@ private QueryState finalizeQuery(Query query, QueryCompletedEvent event) { return QueryState.QUERY_SUCCEEDED; } - private List getPartitionsWithContentsSummary(TajoConf conf, Path outputDir, - List partitions) throws IOException { + private List getPartitionsWithContentsSummary(QueryContext queryContext, + Path outputDir, List partitions) throws IOException { List finalPartitions = new ArrayList<>(); - FileSystem fileSystem = outputDir.getFileSystem(conf); for (PartitionDescProto partition : partitions) { PartitionDescProto.Builder builder = partition.toBuilder(); Path partitionPath = new Path(outputDir, partition.getPath()); - ContentSummary contentSummary = fileSystem.getContentSummary(partitionPath); - builder.setNumBytes(contentSummary.getLength()); + builder.setNumBytes(calculateSize(queryContext, partitionPath)); finalPartitions.add(builder.build()); } return finalPartitions; @@ -659,7 +657,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo finalOutputDir.toUri()); resultTableDesc.setExternal(true); - stats.setNumBytes(getTableVolume(queryContext, resultTableDesc)); + stats.setNumBytes(calculateSize(queryContext, finalOutputDir)); resultTableDesc.setStats(stats); query.setResultDesc(resultTableDesc); } @@ -696,7 +694,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo tableDescTobeCreated.setPartitionMethod(createTableNode.getPartitionMethod()); } - stats.setNumBytes(getTableVolume(queryContext, tableDescTobeCreated)); + stats.setNumBytes(calculateSize(queryContext, finalOutputDir)); tableDescTobeCreated.setStats(stats); query.setResultDesc(tableDescTobeCreated); @@ -734,7 +732,7 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo finalTable = new TableDesc(tableName, lastStage.getSchema(), meta, finalOutputDir.toUri()); } - long totalVolume = getTableVolume(queryContext, finalTable); + long totalVolume = calculateSize(queryContext, finalOutputDir); stats.setNumBytes(totalVolume); finalTable.setStats(stats); @@ -751,9 +749,9 @@ public void execute(QueryMaster.QueryMasterContext context, QueryContext queryCo } } - public static long getTableVolume(QueryContext queryContext, TableDesc tableDesc) throws UnsupportedException { + public static long calculateSize(QueryContext queryContext, Path path) throws IOException { Tablespace space = TablespaceManager.get(queryContext.get(QueryVars.OUTPUT_TABLE_URI, "")); - return space.getTableVolume(tableDesc, Optional.empty()); + return space.calculateSize(path); } public static class StageCompletedTransition implements SingleArcTransition { diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java index 00e6d75a12..a917d37ed3 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/Tablespace.java @@ -101,6 +101,8 @@ public String toString() { public abstract long getTableVolume(TableDesc table, Optional filter) throws UnsupportedException; + public abstract long calculateSize(Path path) throws IOException; + /** * if {@link StorageProperty#isArbitraryPathAllowed} is true, * the storage allows arbitrary path accesses. In this case, the storage must provide the root URI. diff --git a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java index 132ceff0ae..e8f77b4dac 100644 --- a/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java +++ b/tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/HBaseTablespace.java @@ -1118,4 +1118,10 @@ public void verifySchemaToWrite(TableDesc tableDesc, Schema outSchema) throws Ta } } } + + @Override + public long calculateSize(Path path) throws IOException { + FileSystem fs = path.getFileSystem(conf); + return fs.getContentSummary(path).getLength(); + } } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index e9aba4faa1..71f98fbdde 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -245,6 +245,7 @@ public static FileFragment[] splitNG(Configuration conf, String tableName, Table return tablets; } + @Override public long calculateSize(Path tablePath) throws IOException { FileSystem fs = tablePath.getFileSystem(conf); long totalSize = 0; diff --git a/tajo-storage/tajo-storage-pgsql/src/main/java/org/apache/tajo/storage/pgsql/PgSQLTablespace.java b/tajo-storage/tajo-storage-pgsql/src/main/java/org/apache/tajo/storage/pgsql/PgSQLTablespace.java index f44c85d2cb..14e236f8dc 100644 --- a/tajo-storage/tajo-storage-pgsql/src/main/java/org/apache/tajo/storage/pgsql/PgSQLTablespace.java +++ b/tajo-storage/tajo-storage-pgsql/src/main/java/org/apache/tajo/storage/pgsql/PgSQLTablespace.java @@ -19,6 +19,8 @@ package org.apache.tajo.storage.pgsql; import net.minidev.json.JSONObject; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.tajo.catalog.MetadataProvider; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; @@ -73,4 +75,11 @@ public Scanner getScanner(TableMeta meta, public int hashCode() { throw new UnsupportedOperationException(); } + + @Override + public long calculateSize(Path path) throws IOException { + FileSystem fs = path.getFileSystem(conf); + return fs.getContentSummary(path).getLength(); + } + } From 2ae68cbc7b850b5ce48510b8cea407a3f179bf0e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 5 Feb 2016 15:01:35 +0900 Subject: [PATCH 084/127] Remove hadoop-aws dependency --- tajo-storage/tajo-storage-s3/pom.xml | 29 ++++- .../apache/tajo/storage/s3/S3TableSpace.java | 6 +- .../tajo/storage/s3/TajoS3Credentials.java | 103 ++++++++++++++++++ 3 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/TajoS3Credentials.java diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index 65d00f50da..f6951ecbdc 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -34,6 +34,7 @@ UTF-8 UTF-8 + 1.7.4 0.97 @@ -98,6 +99,27 @@ true + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-dependencies + package + + copy-dependencies + + + runtime + ${project.build.directory}/lib + false + false + true + + + + @@ -170,10 +192,9 @@ - org.apache.hadoop - hadoop-aws - provided - ${hadoop.version} + com.amazonaws + aws-java-sdk + ${aws-java-sdk.version} diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 5c85781c9b..1b3b9c6a4e 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -42,7 +42,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3.S3Credentials; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.storage.FileTablespace; @@ -50,13 +49,14 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.nullToEmpty; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; public class S3TableSpace extends FileTablespace { private final Log LOG = LogFactory.getLog(S3TableSpace.class); private AmazonS3 s3; private boolean useInstanceCredentials; + //use a custom endpoint? + public static final String ENDPOINT = "fs.s3a.endpoint"; public S3TableSpace(String spaceName, URI uri, JSONObject config) { super(spaceName, uri, config); @@ -123,7 +123,7 @@ private AWSCredentialsProvider getAwsCredentialsProvider(URI uri, Configuration } private static AWSCredentials getAwsCredentials(URI uri, Configuration conf) { - S3Credentials credentials = new S3Credentials(); + TajoS3Credentials credentials = new TajoS3Credentials(); credentials.initialize(uri, conf); return new BasicAWSCredentials(credentials.getAccessKey(), credentials.getSecretAccessKey()); } diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/TajoS3Credentials.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/TajoS3Credentials.java new file mode 100644 index 0000000000..138e0de41c --- /dev/null +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/TajoS3Credentials.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.s3; + +import java.net.URI; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; + +/** + *

+ * Extracts AWS credentials from the filesystem URI or configuration. (borrowed from hadoop-aws package) + *

+ */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class TajoS3Credentials { + + private String accessKey; + private String secretAccessKey; + + /** + * @throws IllegalArgumentException if credentials for S3 cannot be + * determined. + */ + public void initialize(URI uri, Configuration conf) { + if (uri.getHost() == null) { + throw new IllegalArgumentException("Invalid hostname in URI " + uri); + } + + String userInfo = uri.getUserInfo(); + if (userInfo != null) { + int index = userInfo.indexOf(':'); + if (index != -1) { + accessKey = userInfo.substring(0, index); + secretAccessKey = userInfo.substring(index + 1); + } else { + accessKey = userInfo; + } + } + + String scheme = uri.getScheme(); + String accessKeyProperty = String.format("fs.%s.awsAccessKeyId", scheme); + String secretAccessKeyProperty = + String.format("fs.%s.awsSecretAccessKey", scheme); + if (accessKey == null) { + accessKey = conf.getTrimmed(accessKeyProperty); + } + if (secretAccessKey == null) { + secretAccessKey = conf.getTrimmed(secretAccessKeyProperty); + } + if (accessKey == null && secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID and Secret Access " + + "Key must be specified as the " + + "username or password " + + "(respectively) of a " + scheme + + " URL, or by setting the " + + accessKeyProperty + " or " + + secretAccessKeyProperty + + " properties (respectively)."); + } else if (accessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID must be specified " + + "as the username of a " + scheme + + " URL, or by setting the " + + accessKeyProperty + " property."); + } else if (secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Secret Access Key must be " + + "specified as the password of a " + + scheme + " URL, or by setting the " + + secretAccessKeyProperty + + " property."); + } + + } + + public String getAccessKey() { + return accessKey; + } + + public String getSecretAccessKey() { + return secretAccessKey; + } +} \ No newline at end of file From 8f61db10e5b2dfee5772905a42e717328b7cbd21 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 11 Feb 2016 12:19:18 +0900 Subject: [PATCH 085/127] Remove hadoop-aws dependency --- .../org/apache/tajo/TajoTestingCluster.java | 2 +- .../org/apache/tajo/datum/Inet4Datum.java | 2 +- .../org/apache/tajo/cli/tsql/TestTajoCli.java | 2 +- .../org/apache/tajo/util/JvmPauseMonitor.java | 6 +- tajo-dist/pom.xml | 2 + tajo-project/pom.xml | 7 +- tajo-rpc/tajo-rpc-protobuf/pom.xml | 4 + tajo-storage/tajo-storage-hdfs/pom.xml | 12 + tajo-storage/tajo-storage-s3/pom.xml | 714 +++++++++++++++++- 9 files changed, 717 insertions(+), 34 deletions(-) diff --git a/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java b/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java index 3863203aad..28457a62d3 100644 --- a/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java +++ b/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java @@ -715,7 +715,7 @@ private static void writeLines(File file, String... lines) writer.write('\n'); } } finally { - Closeables.closeQuietly(writer); + writer.close(); } } diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java index ab1799bac4..d6868fa82f 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java @@ -69,7 +69,7 @@ public int asInt4() { @Override public long asInt8() { - return UnsignedInteger.asUnsigned(address).longValue(); + return UnsignedInteger.fromIntBits(address).longValue(); } @Override diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java b/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java index 6bd694fad3..9349407c4d 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java @@ -18,7 +18,7 @@ package org.apache.tajo.cli.tsql; -import com.google.common.io.NullOutputStream; +import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.PosixParser; diff --git a/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java b/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java index 8939bda057..3aa2a5834a 100644 --- a/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java +++ b/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java @@ -35,6 +35,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; + /** * Class which sets up a simple thread which runs in a loop sleeping @@ -176,7 +178,7 @@ public String toString() { private class Monitor implements Runnable { @Override public void run() { - Stopwatch sw = new Stopwatch(); + Stopwatch sw = Stopwatch.createUnstarted(); Map gcTimesBeforeSleep = getGcTimes(); while (shouldRun) { sw.reset().start(); @@ -185,7 +187,7 @@ public void run() { } catch (InterruptedException ie) { return; } - long extraSleepTime = sw.elapsedMillis() - SLEEP_INTERVAL_MS; + long extraSleepTime = sw.elapsed(TimeUnit.MILLISECONDS) - SLEEP_INTERVAL_MS; Map gcTimesAfterSleep = getGcTimes(); if (extraSleepTime > warnThresholdMs) { diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml index e679741cb2..9184225af9 100644 --- a/tajo-dist/pom.xml +++ b/tajo-dist/pom.xml @@ -158,6 +158,8 @@ run mkdir hive run mv lib/hive-*.jar hive/ + run cp -r $ROOT/tajo-storage/tajo-storage-s3/target/lib/* lib/ + run mkdir -p share/jdbc-dist run cp -r $ROOT/tajo-jdbc/target/tajo-jdbc-${project.version}-jar-with-dependencies.jar ./share/jdbc-dist/tajo-jdbc-${project.version}.jar diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml index 59d2b465f2..13e5a7c9bc 100644 --- a/tajo-project/pom.xml +++ b/tajo-project/pom.xml @@ -1100,7 +1100,12 @@ com.google.guava guava - 11.0.2 + 18.0 + + + com.google.code.findbugs + jsr305 + 3.0.1 com.google.code.gson diff --git a/tajo-rpc/tajo-rpc-protobuf/pom.xml b/tajo-rpc/tajo-rpc-protobuf/pom.xml index 4d9a62bf74..d8a60ee8ca 100644 --- a/tajo-rpc/tajo-rpc-protobuf/pom.xml +++ b/tajo-rpc/tajo-rpc-protobuf/pom.xml @@ -147,6 +147,10 @@ com.google.guava guava + + com.google.code.findbugs + jsr305 + com.google.protobuf protobuf-java diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml b/tajo-storage/tajo-storage-hdfs/pom.xml index 6c10a88baf..4613acaf2f 100644 --- a/tajo-storage/tajo-storage-hdfs/pom.xml +++ b/tajo-storage/tajo-storage-hdfs/pom.xml @@ -36,6 +36,7 @@ UTF-8 1.5.0 2.1.0 + 2.8.1 @@ -359,6 +360,17 @@ com.facebook.presto presto-orc 0.132 + + + joda-time + joda-time + + + + + joda-time + joda-time + ${joda-time.version} diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index f6951ecbdc..c598affc55 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -34,8 +34,13 @@ UTF-8 UTF-8 - 1.7.4 - 0.97 + + 0.122 + + 1.8.9.1 + 2.8.1 + 18.0 + 2.2.3 @@ -128,16 +133,182 @@ org.apache.tajo tajo-common provided + + + org.apache.hadoop + hadoop-common + + + com.google.protobuf + protobuf-java + + + commons-logging + commons-logging + + + commons-logging + commons-logging-api + + + commons-lang + commons-lang + + + com.google.guava + guava + + + com.google.code.gson + gson + + + io.netty + netty-buffer + + + org.iq80.snappy + snappy + +
org.apache.tajo tajo-storage-common provided + + + org.apache.tajo + tajo-common + + + org.apache.tajo + tajo-catalog-common + + + org.apache.tajo + tajo-plan + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + com.google.protobuf + protobuf-java + + + io.netty + netty-buffer + + org.apache.tajo tajo-storage-hdfs provided + + + io.netty + netty-transport + + + io.netty + netty-codec + + + io.netty + netty-codec-http + + + org.apache.tajo + tajo-common + + + org.apache.tajo + tajo-catalog-common + + + org.apache.tajo + tajo-plan + + + org.apache.tajo + tajo-storage-common + + + org.apache.avro + trevni-core + + + org.apache.avro + trevni-avro + + + org.apache.hadoop + hadoop-common + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + com.google.protobuf + protobuf-java + + + com.twitter + parquet-column + + + com.twitter + parquet-hadoop + + + com.twitter + parquet-format + + + io.netty + netty-buffer + + + com.facebook.presto + presto-orc + + + + + + org.apache.tajo + tajo-catalog-common + test + + + org.apache.tajo + tajo-common + + + org.apache.hadoop + hadoop-common + + + com.google.protobuf + protobuf-java + + @@ -145,58 +316,452 @@ hadoop-common provided + + com.google.code.gson + gson + + + com.jcraft + jsch + + + org.apache.curator + curator-client + + + org.apache.curator + curator-recipes + + + org.apache.htrace + htrace-core + zookeeper org.apache.zookeeper - slf4j-api - org.slf4j + org.apache.commons + commons-compress - jersey-json - com.sun.jersey + org.apache.ant + ant + + + org.apache.commons + commons-compress + + org.apache.hadoop hadoop-hdfs - provided + test - commons-el - commons-el + org.apache.hadoop + hadoop-annotations + + + org.apache.hadoop + hadoop-auth + + + org.apache.hadoop + hadoop-common - tomcat - jasper-runtime + com.google.guava + guava - tomcat - jasper-compiler + org.mortbay.jetty + jetty org.mortbay.jetty - jsp-2.1-jetty + jetty-util - com.sun.jersey.jersey-test-framework - jersey-test-framework-grizzly2 + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + commons-cli + commons-cli + + + commons-codec + commons-codec + + + commons-io + commons-io + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + commons-daemon + commons-daemon + + + log4j + log4j + + + com.google.protobuf + protobuf-java + + + javax.servlet + servlet-api + + + org.slf4j + slf4j-log4j12 + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + xmlenc + xmlenc - netty-all io.netty + netty-all + + + xerces + xercesImpl + + + org.apache.htrace + htrace-core + + + org.fusesource.leveldbjni + leveldbjni-all + + + + + + + + + + + + + + + + + + + + - com.amazonaws - aws-java-sdk - ${aws-java-sdk.version} + org.apache.tajo + tajo-plan + test + + + + com.fasterxml.jackson.core + jackson-databind + test + ${jackson2.version} + + + com.fasterxml.jackson.core + jackson-annotations + test + ${jackson2.version} + + + + junit + junit + test + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.airlift + units + ${airlft.version} + + + javax.validation + validation-api + + + com.google.guava + guava + + + com.fasterxml.jackson.core + jackson-annotations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + org.weakref jmxutils @@ -204,20 +769,100 @@ - org.apache.httpcomponents - httpclient - 4.2.5 + com.amazonaws + + aws-java-sdk + ${aws-java-sdk.version} + + + commons-logging + commons-logging + + + org.apache.httpcomponents + httpclient + + + joda-time + joda-time + + + javax.mail + mail + + + org.freemarker + freemarker + + + org.springframework + spring-beans + + + org.springframework + spring-core + + + org.springframework + spring-context + + + org.springframework + spring-test + + + org.aspectj + aspectjrt + + + junit + junit + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + org.apache.httpcomponents - httpcore - 4.2.5 + httpclient + 4.5 + + + org.apache.httpcomponents + httpcore + + + commons-logging + commons-logging + + + commons-codec + commons-codec + + - junit - junit - test + org.apache.httpcomponents + httpcore + 4.4.1 + + + junit + junit + + @@ -227,6 +872,19 @@ test + + joda-time + joda-time + ${joda-time.version} + compile + + + + javax.validation + validation-api + 1.1.0.Final + + From 00b564b14300283d7a1d26c36e3c21ce2ac526b4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 11 Feb 2016 12:53:02 +0900 Subject: [PATCH 086/127] Apply AmazonSDK 1.7.4 --- .../org/apache/tajo/TajoTestingCluster.java | 2 +- .../org/apache/tajo/datum/Inet4Datum.java | 2 +- .../org/apache/tajo/util/JvmPauseMonitor.java | 6 +- tajo-project/pom.xml | 7 +- tajo-rpc/tajo-rpc-protobuf/pom.xml | 4 - tajo-storage/tajo-storage-hdfs/pom.xml | 12 - tajo-storage/tajo-storage-s3/pom.xml | 262 +----------------- 7 files changed, 8 insertions(+), 287 deletions(-) diff --git a/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java b/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java index 28457a62d3..3863203aad 100644 --- a/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java +++ b/tajo-cluster-tests/src/test/java/org/apache/tajo/TajoTestingCluster.java @@ -715,7 +715,7 @@ private static void writeLines(File file, String... lines) writer.write('\n'); } } finally { - writer.close(); + Closeables.closeQuietly(writer); } } diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java index d6868fa82f..ab1799bac4 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Inet4Datum.java @@ -69,7 +69,7 @@ public int asInt4() { @Override public long asInt8() { - return UnsignedInteger.fromIntBits(address).longValue(); + return UnsignedInteger.asUnsigned(address).longValue(); } @Override diff --git a/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java b/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java index 3aa2a5834a..8939bda057 100644 --- a/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java +++ b/tajo-core/src/main/java/org/apache/tajo/util/JvmPauseMonitor.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.TimeUnit; - /** * Class which sets up a simple thread which runs in a loop sleeping @@ -178,7 +176,7 @@ public String toString() { private class Monitor implements Runnable { @Override public void run() { - Stopwatch sw = Stopwatch.createUnstarted(); + Stopwatch sw = new Stopwatch(); Map gcTimesBeforeSleep = getGcTimes(); while (shouldRun) { sw.reset().start(); @@ -187,7 +185,7 @@ public void run() { } catch (InterruptedException ie) { return; } - long extraSleepTime = sw.elapsed(TimeUnit.MILLISECONDS) - SLEEP_INTERVAL_MS; + long extraSleepTime = sw.elapsedMillis() - SLEEP_INTERVAL_MS; Map gcTimesAfterSleep = getGcTimes(); if (extraSleepTime > warnThresholdMs) { diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml index 13e5a7c9bc..59d2b465f2 100644 --- a/tajo-project/pom.xml +++ b/tajo-project/pom.xml @@ -1100,12 +1100,7 @@ com.google.guava guava - 18.0 - - - com.google.code.findbugs - jsr305 - 3.0.1 + 11.0.2 com.google.code.gson diff --git a/tajo-rpc/tajo-rpc-protobuf/pom.xml b/tajo-rpc/tajo-rpc-protobuf/pom.xml index d8a60ee8ca..4d9a62bf74 100644 --- a/tajo-rpc/tajo-rpc-protobuf/pom.xml +++ b/tajo-rpc/tajo-rpc-protobuf/pom.xml @@ -147,10 +147,6 @@ com.google.guava guava - - com.google.code.findbugs - jsr305 - com.google.protobuf protobuf-java diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml b/tajo-storage/tajo-storage-hdfs/pom.xml index 4613acaf2f..6c10a88baf 100644 --- a/tajo-storage/tajo-storage-hdfs/pom.xml +++ b/tajo-storage/tajo-storage-hdfs/pom.xml @@ -36,7 +36,6 @@ UTF-8 1.5.0 2.1.0 - 2.8.1 @@ -360,17 +359,6 @@ com.facebook.presto presto-orc 0.132 - - - joda-time - joda-time - - - - - joda-time - joda-time - ${joda-time.version} diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index c598affc55..4af62e0062 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -34,12 +34,10 @@ UTF-8 UTF-8 - 0.122 - - 1.8.9.1 - 2.8.1 - 18.0 + 1.7.4 + 2.8.2 + 11.0.2 2.2.3 @@ -464,26 +462,6 @@ - - - - - - - - - - - - - - - - - - - - org.apache.tajo tajo-plan @@ -509,154 +487,6 @@ test - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - io.airlift units @@ -677,91 +507,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - org.weakref jmxutils @@ -770,7 +515,6 @@ com.amazonaws - aws-java-sdk ${aws-java-sdk.version} From 3f7b9fb7b45a067ac7da299ebf2ae50f466cde67 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 11 Feb 2016 12:56:22 +0900 Subject: [PATCH 087/127] Remove unnecessary updates --- .../src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java b/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java index 9349407c4d..6bd694fad3 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/cli/tsql/TestTajoCli.java @@ -18,7 +18,7 @@ package org.apache.tajo.cli.tsql; -import org.apache.commons.io.output.NullOutputStream; +import com.google.common.io.NullOutputStream; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.PosixParser; From ee414e94393fafc65caa5c56ec9e86783722d8f4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 12 Feb 2016 02:11:50 +0900 Subject: [PATCH 088/127] Trigger for Travis CI build --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5eb8e255b0..71043768fa 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -591,4 +591,5 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP return null; } } + } From 06d7458e4bef480d6a520e567ea4f964aeb7338d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 18 Feb 2016 10:48:45 +0900 Subject: [PATCH 089/127] Trigger travis CI build --- .../apache/tajo/engine/planner/TestPartitionedTableRewriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index c22447d414..6f7c774e92 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -47,6 +47,7 @@ import static org.junit.Assert.*; public class TestPartitionedTableRewriter extends QueryTestCaseBase { + final static String PARTITION_TABLE_NAME = "tb_partition"; final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; From e82ebef4d25b7eec41626411c53c4618f43ea678 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 22 Feb 2016 15:37:29 +0900 Subject: [PATCH 090/127] Trigger for travis CI build --- .../apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 71043768fa..03b8ffeedc 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -584,6 +584,7 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP long finishTime = System.currentTimeMillis(); long elapsedMills = finishTime - startTime; + LOG.info(String.format("Partition pruning: %d ms elapsed.", elapsedMills)); } catch (IOException e) { throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); From 5a7b4e229ed9e9f9bfe23b3a776d47214e851593 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 7 Mar 2016 11:31:09 +0900 Subject: [PATCH 091/127] Trigger for travis CI build --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index 592bfe3e20..3aea02e006 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ Tajo Change Log Release 0.12.0 - unreleased + NEW FEATURES TAJO-1955: Add a feature to strip quotes from CSV file. (hyunsik) From 36616d0605bf37fcf791aa04f9791b9c5de029b2 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 22 Mar 2016 01:09:05 +0900 Subject: [PATCH 092/127] PartitionedTableRewriter just rewrite ScanNode to PartitionedTableScanNode, acutal partition pruning will be executed to Repartitioner --- .../planner/TestPartitionedTableRewriter.java | 8 -- .../rewriter/rules/BroadcastJoinRule.java | 15 +-- .../rules/GlobalPlanEqualityTester.java | 15 +-- .../rewriter/rules/GlobalPlanRewriteUtil.java | 22 +--- .../planner/physical/ExternalSortExec.java | 1 + .../tajo/master/TajoMasterClientService.java | 3 +- .../exec/ExplainPlanPreprocessorForTest.java | 3 - .../NonForwardQueryResultFileScanner.java | 14 ++- .../tajo/master/exec/QueryExecutor.java | 3 +- .../tajo/querymaster/Repartitioner.java | 24 ++-- .../org/apache/tajo/querymaster/Stage.java | 12 +- .../ws/rs/resources/QueryResultResource.java | 3 +- .../logical/PartitionedTableScanNode.java | 118 +----------------- .../rules/LogicalPlanEqualityTester.java | 14 +-- .../rules/PartitionedTableRewriter.java | 63 +++++----- .../plan/serder/LogicalNodeDeserializer.java | 10 +- .../plan/serder/LogicalNodeSerializer.java | 9 +- .../verifier/PostLogicalPlanVerifier.java | 7 -- .../apache/tajo/storage/FileTablespace.java | 14 ++- 19 files changed, 96 insertions(+), 262 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 6f7c774e92..d6c8571f83 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -168,7 +168,6 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -205,7 +204,6 @@ public void testWithoutAnyFilters() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -247,7 +245,6 @@ public void testFilterIncludeNonExistingPartitionValue() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -281,7 +278,6 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -324,7 +320,6 @@ public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -368,7 +363,6 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -418,7 +412,6 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); @@ -478,7 +471,6 @@ public final void testPartitionPruningWitCTAS() throws Exception { PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); - OverridableConf conf = CommonTestingUtil.getSessionVarsForTest(); PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java index d390740187..3fc03645c0 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/BroadcastJoinRule.java @@ -317,6 +317,7 @@ private long estimateOutputVolumeInternal(LogicalNode node) throws TajoInternalE if (node instanceof RelationNode) { switch (node.getType()) { case INDEX_SCAN: + case PARTITIONS_SCAN: case SCAN: ScanNode scanNode = (ScanNode) node; if (scanNode.getTableDesc().getStats() == null) { @@ -326,20 +327,6 @@ private long estimateOutputVolumeInternal(LogicalNode node) throws TajoInternalE } else { return scanNode.getTableDesc().getStats().getNumBytes(); } - case PARTITIONS_SCAN: - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) node; - if (pScanNode.getTableDesc().getStats() == null) { - // TODO - this case means that data is not located in HDFS. So, we need additional - // broadcast method. - return Long.MAX_VALUE; - } else { - // if there is no selected partition - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - return 0; - } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); - } - } case TABLE_SUBQUERY: return estimateOutputVolumeInternal(((TableSubQueryNode) node).getSubQuery()); } diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java index 1d104d7ae9..5758f5ece8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanEqualityTester.java @@ -18,20 +18,15 @@ package org.apache.tajo.engine.planner.global.rewriter.rules; -import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.tajo.OverridableConf; import org.apache.tajo.engine.planner.global.ExecutionBlock; import org.apache.tajo.engine.planner.global.ExecutionBlockCursor; import org.apache.tajo.engine.planner.global.MasterPlan; import org.apache.tajo.engine.planner.global.rewriter.GlobalPlanRewriteRule; import org.apache.tajo.plan.logical.LogicalNode; -import org.apache.tajo.plan.logical.NodeType; -import org.apache.tajo.plan.logical.PartitionedTableScanNode; -import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; import org.apache.tajo.plan.serder.LogicalNodeSerializer; import org.apache.tajo.plan.serder.PlanProto; -import org.apache.tajo.plan.util.PlannerUtil; /** * It verifies the equality between the input and output of LogicalNodeTree(De)Serializer in global planning. @@ -57,15 +52,7 @@ public MasterPlan rewrite(OverridableConf queryContext, MasterPlan plan) { if (node != null) { PlanProto.LogicalNodeTree tree = LogicalNodeSerializer.serialize(node); LogicalNode deserialize = LogicalNodeDeserializer.deserialize(plan.getContext(), null, tree); - - // Error handling PartitionedTableScanNode because LogicalNodeDeserializer convert it to ScanNode. - PartitionedTableScanNode partitionedTableScanNode = PlannerUtil.findTopNode(node, NodeType.PARTITIONS_SCAN); - if (partitionedTableScanNode != null) { - ScanNode scanNode = PlannerUtil.findTopNode(deserialize, NodeType.SCAN); - assert scanNode != null; - } else { - assert node.deepEquals(deserialize); - } + assert node.deepEquals(deserialize); } } return plan; diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java index b13cb0f1a8..6513ac743b 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/rewriter/rules/GlobalPlanRewriteUtil.java @@ -97,13 +97,6 @@ public static void replaceChild(LogicalNode newChild, ScanNode originalChild, Lo public static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); - if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - scanBytes = 0L; - } - } - return scanBytes; } else { return -1; @@ -117,6 +110,7 @@ public static long computeDescendentVolume(LogicalNode node) { if (node instanceof RelationNode) { switch (node.getType()) { + case PARTITIONS_SCAN: case SCAN: ScanNode scanNode = (ScanNode) node; if (scanNode.getTableDesc().getStats() == null) { @@ -126,20 +120,6 @@ public static long computeDescendentVolume(LogicalNode node) { } else { return scanNode.getTableDesc().getStats().getNumBytes(); } - case PARTITIONS_SCAN: - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) node; - if (pScanNode.getTableDesc().getStats() == null) { - // TODO - this case means that data is not located in HDFS. So, we need additional - // broadcast method. - return Long.MAX_VALUE; - } else { - // if there is no selected partition - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - return 0; - } else { - return pScanNode.getTableDesc().getStats().getNumBytes(); - } - } case TABLE_SUBQUERY: return computeDescendentVolume(((TableSubQueryNode) node).getSubQuery()); default: diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java index ff629c3a8d..c532b9221c 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java @@ -142,6 +142,7 @@ public ExternalSortExec(final TaskAttemptContext context,final SortNode plan, fi mergedInputFragments = new ArrayList<>(); for (CatalogProtos.FragmentProto proto : fragments) { FileFragment fragment = FragmentConvertor.convert(FileFragment.class, proto); + LOG.debug("### fragment:" + fragment.toString()); mergedInputFragments.add(new Chunk(inSchema, fragment, scanNode.getTableDesc().getMeta())); } } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java index bfba51d70b..fdeff6db91 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/TajoMasterClientService.java @@ -568,7 +568,8 @@ public GetQueryResultDataResponse getQueryResultData(RpcController controller, G queryId, scanNode, Integer.MAX_VALUE, - codecType); + codecType, + context.getCatalog()); queryResultScanner.init(); session.addNonForwardQueryResultScanner(queryResultScanner); diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java index 2740728db6..ef5af73587 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/ExplainPlanPreprocessorForTest.java @@ -118,9 +118,6 @@ public LogicalNode visitPartitionedTableScan(PlanShapeFixerContext context, Logi throws TajoException { super.visitPartitionedTableScan(context, plan, block, node, stack); context.childNumbers.push(1); - Path[] inputPaths = node.getInputPaths(); - Arrays.sort(inputPaths); - node.setInputPaths(inputPaths); if (node.hasTargets()) { node.setTargets(sortTargets(node.getTargets())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index a1728ec2ca..d231e48648 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -24,10 +24,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; -import org.apache.tajo.ExecutionBlockId; -import org.apache.tajo.QueryId; -import org.apache.tajo.TaskAttemptId; -import org.apache.tajo.TaskId; +import org.apache.tajo.*; +import org.apache.tajo.catalog.CatalogService; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableDesc; @@ -80,10 +78,12 @@ public class NonForwardQueryResultFileScanner implements NonForwardQueryResultSc final private Optional codecType; private MemoryRowBlock rowBlock; private Future nextFetch; + private CatalogService catalog; public NonForwardQueryResultFileScanner(AsyncTaskService asyncTaskService, TajoConf tajoConf, String sessionId, QueryId queryId, ScanNode scanNode, - int maxRow, Optional codecType) throws IOException { + int maxRow, Optional codecType, + CatalogService catalog) throws IOException { this.asyncTaskService = asyncTaskService; this.tajoConf = tajoConf; this.sessionId = sessionId; @@ -93,6 +93,7 @@ public NonForwardQueryResultFileScanner(AsyncTaskService asyncTaskService, this.maxRow = maxRow; this.rowEncoder = RowStoreUtil.createEncoder(scanNode.getOutSchema()); this.codecType = codecType; + this.catalog = catalog; } public void init() throws IOException, TajoException { @@ -105,7 +106,8 @@ private void initSeqScanExec() throws IOException, TajoException { List fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc)); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc + , catalog, tajoConf)); } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java index 641e2b92d3..392e9ebf00 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/QueryExecutor.java @@ -309,7 +309,8 @@ public void execSimpleQuery(QueryContext queryContext, Session session, String q queryInfo.getQueryId(), scanNode, maxRow, - Optional.empty()); + Optional.empty(), + context.getCatalog()); queryResultScanner.init(); diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 662bfdcb9c..339a603d11 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -32,6 +32,7 @@ import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.StatisticsUtil; import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.conf.TajoConf; import org.apache.tajo.conf.TajoConf.ConfVars; import org.apache.tajo.engine.planner.PhysicalPlannerImpl; import org.apache.tajo.engine.planner.RangePartitionAlgorithm; @@ -45,6 +46,8 @@ import org.apache.tajo.exception.*; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.logical.SortNode.SortPurpose; +import org.apache.tajo.plan.partition.PartitionPruningHandle; +import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.util.PlannerUtil; @@ -386,7 +389,9 @@ private static void scheduleSymmetricRepartitionJoin(QueryMasterTask.QueryMaster Collection scanFragments = null; if (eachScan.getType() == NodeType.PARTITIONS_SCAN) { - scanFragments = getFragmentsFromPartitionedTable(space, eachScan, tableDesc); + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf conf = stage.getContext().getQueryContext().getConf(); + scanFragments = getFragmentsFromPartitionedTable(space, eachScan, tableDesc, catalog, conf); } else { scanFragments = space.getSplits(eachScan.getCanonicalName(), tableDesc, eachScan.getQual()); } @@ -459,18 +464,21 @@ public static Map>> merge * It creates a number of fragments for all partitions. */ public static List getFragmentsFromPartitionedTable(Tablespace tsHandler, - ScanNode scan, - TableDesc table) throws IOException { + ScanNode scan, TableDesc table, CatalogService catalog, TajoConf conf) throws IOException, TajoException { Preconditions.checkArgument(tsHandler instanceof FileTablespace, "tsHandler must be FileTablespace"); if (!(scan instanceof PartitionedTableScanNode)) { throw new IllegalArgumentException("scan should be a PartitionedTableScanNode type."); } List fragments = Lists.newArrayList(); PartitionedTableScanNode partitionsScan = (PartitionedTableScanNode) scan; + partitionsScan.init(scan); + PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); + rewriter.setCatalog(catalog); + PartitionPruningHandle pruningHandle = rewriter.getPartitionPruningHandle(conf, partitionsScan); - fragments.addAll(((FileTablespace) tsHandler).getPartitionSplits( - scan.getCanonicalName(), table.getMeta(), table.getSchema(), partitionsScan.getPartitionKeys(), - partitionsScan.getInputPaths())); + FileTablespace tablespace = (FileTablespace) tsHandler; + fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() + , pruningHandle.getPartitionKeys(), pruningHandle.getPartitionPaths())); return fragments; } @@ -506,7 +514,9 @@ private static void scheduleLeafTasksWithBroadcastTable(TaskSchedulerContext sch Tablespace space = TablespaceManager.get(desc.getUri()); if (scan.getType() == NodeType.PARTITIONS_SCAN) { - scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf conf = stage.getContext().getQueryContext().getConf(); + scanFragments = getFragmentsFromPartitionedTable(space, scan, desc, catalog, conf); } else { scanFragments = space.getSplits(scan.getCanonicalName(), desc, scan.getQual()); } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java index 08ff18436a..022ea5c6db 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Stage.java @@ -28,10 +28,7 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.state.*; import org.apache.tajo.*; -import org.apache.tajo.catalog.CatalogUtil; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.TableDesc; -import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; import org.apache.tajo.catalog.statistics.ColumnStats; import org.apache.tajo.catalog.statistics.StatisticsUtil; @@ -52,7 +49,6 @@ import org.apache.tajo.master.event.*; import org.apache.tajo.master.event.TaskAttemptToSchedulerEvent.TaskAttemptScheduleContext; import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.serder.PlanProto; import org.apache.tajo.plan.serder.PlanProto.DistinctGroupbyEnforcer.MultipleAggregationStage; import org.apache.tajo.plan.serder.PlanProto.EnforceProperty; import org.apache.tajo.plan.util.PlannerUtil; @@ -1195,8 +1191,12 @@ private static void scheduleFragmentsForLeafQuery(Stage stage) throws IOExceptio // // Also, we can ensure FileTableSpace if the type of ScanNode is PARTITIONS_SCAN. if (scan.getType() == NodeType.PARTITIONS_SCAN) { + CatalogService catalog = stage.getContext().getQueryMasterContext().getWorkerContext().getCatalog(); + TajoConf conf = stage.getContext().getQueryContext().getConf(); + // After calling this method, partition paths are removed from the physical plan. - fragments = Repartitioner.getFragmentsFromPartitionedTable((FileTablespace) tablespace, scan, table); + fragments = Repartitioner.getFragmentsFromPartitionedTable((FileTablespace) tablespace, scan, table, catalog, + conf); } else { fragments = tablespace.getSplits(scan.getCanonicalName(), table, scan.getQual()); } diff --git a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java index 7d5c78ac7b..abbfd33cde 100644 --- a/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java +++ b/tajo-core/src/main/java/org/apache/tajo/ws/rs/resources/QueryResultResource.java @@ -144,7 +144,8 @@ private static NonForwardQueryResultScanner getNonForwardQueryResultScanner( queryId, scanNode, Integer.MAX_VALUE, - Optional.empty()); + Optional.empty(), + masterContext.getCatalog()); resultScanner.init(); session.addNonForwardQueryResultScanner(resultScanner); } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java index 6653894671..a03b169241 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/PartitionedTableScanNode.java @@ -18,137 +18,21 @@ package org.apache.tajo.plan.logical; -import com.google.common.base.Objects; -import com.google.gson.annotations.Expose; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.tajo.catalog.TableDesc; -import org.apache.tajo.plan.PlanString; -import org.apache.tajo.plan.Target; -import org.apache.tajo.plan.expr.EvalNode; -import org.apache.tajo.util.TUtil; - -import java.util.ArrayList; - public class PartitionedTableScanNode extends ScanNode { - @Expose Path [] inputPaths; - @Expose String[] partitionKeys; - public PartitionedTableScanNode(int pid) { super(pid, NodeType.PARTITIONS_SCAN); } - public void init(ScanNode scanNode, Path[] inputPaths, String[] partitionKeys) { + public void init(ScanNode scanNode) { tableDesc = scanNode.tableDesc; setInSchema(scanNode.getInSchema()); setOutSchema(scanNode.getOutSchema()); this.qual = scanNode.qual; this.targets = scanNode.targets; - this.inputPaths = inputPaths; - this.partitionKeys = partitionKeys; if (scanNode.hasAlias()) { alias = scanNode.alias; } } - public void setInputPaths(Path [] paths) { - this.inputPaths = paths; - } - - public Path [] getInputPaths() { - return inputPaths; - } - - public String[] getPartitionKeys() { - return partitionKeys; - } - - public void setPartitionKeys(String[] partitionKeys) { - this.partitionKeys = partitionKeys; - } - - @Override - public int hashCode() { - return Objects.hashCode(this.tableDesc, this.qual, this.targets); - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof PartitionedTableScanNode) { - PartitionedTableScanNode other = (PartitionedTableScanNode) obj; - - boolean eq = super.equals(other); - eq = eq && TUtil.checkEquals(this.tableDesc, other.tableDesc); - eq = eq && TUtil.checkEquals(this.qual, other.qual); - eq = eq && TUtil.checkEquals(this.targets, other.targets); - eq = eq && TUtil.checkEquals(this.inputPaths, other.inputPaths); - eq = eq && TUtil.checkEquals(this.partitionKeys, other.partitionKeys); - - return eq; - } - - return false; - } - - @Override - public Object clone() throws CloneNotSupportedException { - PartitionedTableScanNode unionScan = (PartitionedTableScanNode) super.clone(); - - unionScan.tableDesc = (TableDesc) this.tableDesc.clone(); - - if (hasQual()) { - unionScan.qual = (EvalNode) this.qual.clone(); - } - - if (hasTargets()) { - unionScan.targets = new ArrayList<>(); - for (Target t : targets) { - unionScan.targets.add((Target) t.clone()); - } - } - - unionScan.inputPaths = inputPaths; - unionScan.partitionKeys = partitionKeys; - - return unionScan; - } - - @Override - public void preOrder(LogicalNodeVisitor visitor) { - visitor.visit(this); - } - - public void postOrder(LogicalNodeVisitor visitor) { - visitor.visit(this); - } - - @Override - public PlanString getPlanString() { - PlanString planStr = new PlanString(this).appendTitle(" on " + getTableName()); - if (hasAlias()) { - planStr.appendTitle(" as ").appendTitle(alias); - } - - if (hasQual()) { - planStr.addExplan("filter: ").appendExplain(this.qual.toString()); - } - - if (hasTargets()) { - planStr.addExplan("target list: ").appendExplain(StringUtils.join(targets, ", ")); - } - - planStr.addDetail("out schema: ").appendDetail(getOutSchema().toString()); - planStr.addDetail("in schema: ").appendDetail(getInSchema().toString()); - - if (inputPaths != null) { - planStr.addExplan("num of filtered paths: ").appendExplain(""+ inputPaths.length); - int i = 0; - for (Path path : inputPaths) { - planStr.addDetail((i++) + ": ").appendDetail(path.toString()); - } - } - - return planStr; - } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java index abd413cb9b..c35194e8ec 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/LogicalPlanEqualityTester.java @@ -21,15 +21,11 @@ import org.apache.tajo.exception.TajoException; import org.apache.tajo.plan.LogicalPlan; import org.apache.tajo.plan.logical.LogicalNode; -import org.apache.tajo.plan.logical.NodeType; -import org.apache.tajo.plan.logical.PartitionedTableScanNode; -import org.apache.tajo.plan.logical.ScanNode; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRule; import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleContext; import org.apache.tajo.plan.serder.LogicalNodeDeserializer; import org.apache.tajo.plan.serder.LogicalNodeSerializer; import org.apache.tajo.plan.serder.PlanProto; -import org.apache.tajo.plan.util.PlannerUtil; /** * It verifies the equality between the input and output of LogicalNodeTree(De)Serializer in logical planning. @@ -54,15 +50,7 @@ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoExc LogicalNode root = plan.getRootBlock().getRoot(); PlanProto.LogicalNodeTree serialized = LogicalNodeSerializer.serialize(plan.getRootBlock().getRoot()); LogicalNode deserialized = LogicalNodeDeserializer.deserialize(context.getQueryContext(), null, serialized); - - // Error handling PartitionedTableScanNode because LogicalNodeDeserializer convert it to ScanNode. - PartitionedTableScanNode partitionedTableScanNode = PlannerUtil.findTopNode(root, NodeType.PARTITIONS_SCAN); - if (partitionedTableScanNode != null) { - ScanNode scanNode = PlannerUtil.findTopNode(deserialized, NodeType.SCAN); - assert scanNode != null; - } else { - assert root.deepEquals(deserialized); - } + assert root.deepEquals(deserialized); return plan; } } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 03b8ffeedc..f14618f736 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -18,7 +18,6 @@ package org.apache.tajo.plan.rewrite.rules; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -31,6 +30,7 @@ import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionsByAlgebraProto; import org.apache.tajo.catalog.proto.CatalogProtos.PartitionDescProto; +import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.exception.*; @@ -89,7 +89,7 @@ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws TajoExc public void setCatalog(CatalogService catalog) { this.catalog = catalog; - } + } private static class PartitionPathFilter implements PathFilter { @@ -117,11 +117,11 @@ public String toString() { } } - private PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionPruningHandle(TajoConf conf, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { - return getPartitionPruningHandle(queryContext, tableName, partitionColumns, conjunctiveForms, tablePath, null); + return getPartitionPruningHandle(conf, tableName, partitionColumns, conjunctiveForms, tablePath, null); } /** @@ -134,13 +134,13 @@ private PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryCo * @return * @throws IOException */ - private PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryContext, String tableName, + private PartitionPruningHandle getPartitionPruningHandle(TajoConf conf, String tableName, Schema partitionColumns, EvalNode [] conjunctiveForms, Path tablePath, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { PartitionPruningHandle partitionPruningHandle = null; - FileSystem fs = tablePath.getFileSystem(queryContext.getConf()); + FileSystem fs = tablePath.getFileSystem(conf); String [] splits = CatalogUtil.splitFQTableName(tableName); List partitions = null; @@ -356,10 +356,12 @@ public static PartitionsByAlgebraProto getPartitionsAlgebraProto( return paths; } - @VisibleForTesting - public PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryContext, ScanNode scanNode) + public PartitionPruningHandle getPartitionPruningHandle(TajoConf conf, ScanNode scanNode) throws IOException, UndefinedDatabaseException, UndefinedTableException, UndefinedPartitionMethodException, UndefinedOperatorException, UnsupportedException { + long startTime = System.currentTimeMillis(); + PartitionPruningHandle pruningHandle = null; + TableDesc table = scanNode.getTableDesc(); PartitionMethodDesc partitionDesc = scanNode.getTableDesc().getPartitionMethod(); @@ -398,12 +400,18 @@ public PartitionPruningHandle getPartitionPruningHandle(OverridableConf queryCon } if (indexablePredicateSet.size() > 0) { // There are at least one indexable predicates - return getPartitionPruningHandle(queryContext, table.getName(), paritionValuesSchema, + pruningHandle = getPartitionPruningHandle(conf, table.getName(), paritionValuesSchema, indexablePredicateSet.toArray(new EvalNode[indexablePredicateSet.size()]), new Path(table.getUri()), scanNode); } else { // otherwise, we will get all partition paths. - return getPartitionPruningHandle(queryContext, table.getName(), paritionValuesSchema, null, + pruningHandle = getPartitionPruningHandle(conf, table.getName(), paritionValuesSchema, null, new Path(table.getUri())); } + + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + + LOG.info(String.format("Partition pruning: %d ms elapsed.", elapsedMills)); + return pruningHandle; } private boolean checkIfIndexablePredicateOnTargetColumn(EvalNode evalNode, Column targetColumn) { @@ -563,34 +571,19 @@ public Object visitScan(OverridableConf queryContext, LogicalPlan plan, LogicalP return null; } - try { - long startTime = System.currentTimeMillis(); - PartitionPruningHandle partitionPruningHandle = getPartitionPruningHandle(queryContext, scanNode); - - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - plan.addHistory("PartitionTableRewriter chooses " + filteredPaths.length + " of partitions"); + PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); + rewrittenScanNode.init(scanNode); - PartitionedTableScanNode rewrittenScanNode = plan.createNode(PartitionedTableScanNode.class); - rewrittenScanNode.init(scanNode, filteredPaths, partitionPruningHandle.getPartitionKeys()); - rewrittenScanNode.getTableDesc().getStats().setNumBytes(partitionPruningHandle.getTotalVolume()); - - // if it is topmost node, set it as the rootnode of this block. - if (stack.empty() || block.getRoot().equals(scanNode)) { - block.setRoot(rewrittenScanNode); - } else { - PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); - } - block.registerNode(rewrittenScanNode); - - long finishTime = System.currentTimeMillis(); - long elapsedMills = finishTime - startTime; - - LOG.info(String.format("Partition pruning: %d ms elapsed.", elapsedMills)); - } catch (IOException e) { - throw new TajoInternalError("Partitioned Table Rewrite Failed: \n" + e.getMessage()); + // if it is topmost node, set it as the rootnode of this block. + if (stack.empty() || block.getRoot().equals(scanNode)) { + block.setRoot(rewrittenScanNode); + } else { + PlannerUtil.replaceNode(plan, stack.peek(), scanNode, rewrittenScanNode); } + + block.registerNode(rewrittenScanNode); return null; } } -} +} \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java index a1ee3245d5..e63763a2d2 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeDeserializer.java @@ -20,7 +20,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.hadoop.fs.Path; import org.apache.tajo.OverridableConf; import org.apache.tajo.algebra.JoinType; import org.apache.tajo.annotation.Nullable; @@ -119,6 +118,8 @@ public int compare(PlanProto.LogicalNode o1, PlanProto.LogicalNode o2) { current = convertUnion(nodeMap, protoNode); break; case PARTITIONS_SCAN: + current = convertPartitionedTableScan(context, evalContext, protoNode); + break; case SCAN: current = convertScan(context, evalContext, protoNode); break; @@ -411,6 +412,13 @@ private static ScanNode convertScan(OverridableConf context, EvalContext evalCon return scan; } + private static PartitionedTableScanNode convertPartitionedTableScan(OverridableConf context, EvalContext evalContext, + PlanProto.LogicalNode protoNode) { + PartitionedTableScanNode partitionedTableScan = new PartitionedTableScanNode(protoNode.getNodeId()); + fillScanNode(context, evalContext, protoNode, partitionedTableScan); + return partitionedTableScan; + } + private static void fillScanNode(OverridableConf context, EvalContext evalContext, PlanProto.LogicalNode protoNode, ScanNode scan) { PlanProto.ScanNode scanProto = protoNode.getScan(); diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java index 6f56ce89e2..677fc8d7ff 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/serder/LogicalNodeSerializer.java @@ -19,7 +19,6 @@ package org.apache.tajo.plan.serder; import com.google.common.collect.Maps; -import org.apache.hadoop.fs.Path; import org.apache.tajo.algebra.JoinType; import org.apache.tajo.catalog.SortSpec; import org.apache.tajo.catalog.proto.CatalogProtos; @@ -472,8 +471,12 @@ public LogicalNode visitIndexScan(SerializeContext context, LogicalPlan plan, Lo public LogicalNode visitPartitionedTableScan(SerializeContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, PartitionedTableScanNode node, Stack stack) throws TajoException { - ScanNode scanNode = (ScanNode) node; - return visitScan(context, plan, block, scanNode, stack); + + PlanProto.ScanNode.Builder scanBuilder = buildScanNode(node); + PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); + nodeBuilder.setScan(scanBuilder); + context.treeBuilder.addNodes(nodeBuilder); + return node; } @Override diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java index 9ded584815..d576379cc6 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/verifier/PostLogicalPlanVerifier.java @@ -134,13 +134,6 @@ private static boolean isSimpleRelationNode(LogicalNode node) { private static long getTableVolume(ScanNode scanNode) { if (scanNode.getTableDesc().hasStats()) { long scanBytes = scanNode.getTableDesc().getStats().getNumBytes(); - if (scanNode.getType() == NodeType.PARTITIONS_SCAN) { - PartitionedTableScanNode pScanNode = (PartitionedTableScanNode) scanNode; - if (pScanNode.getInputPaths() == null || pScanNode.getInputPaths().length == 0) { - scanBytes = 0L; - } - } - return scanBytes; } else { return -1; diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 2c0d767df4..6e2e3db4a6 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -366,6 +366,15 @@ protected boolean isSplittable(TableMeta meta, Schema schema, Path path, FileSta return split; } + protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path, String partitionKeys, + FileStatus status) throws IOException { + Fragment fragment = new PartitionFileFragment(path.getName(), path, 0, status.getLen(), partitionKeys); + Scanner scanner = getScanner(meta, schema, fragment, null); + boolean split = scanner.isSplittable(); + scanner.close(); + return split; + } + private static final double SPLIT_SLOP = 1.1; // 10% slop protected int getBlockIndex(BlockLocation[] blkLocations, @@ -441,7 +450,6 @@ private int[] getDiskIds(VolumeId[] volumeIds) { public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) throws IOException { // generate splits' - List splits = Lists.newArrayList(); List volumeSplits = Lists.newArrayList(); List blockLocations = Lists.newArrayList(); @@ -643,7 +651,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem if (length > 0) { // Get locations of blocks of file BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); - boolean splittable = isSplittable(meta, schema, path, file); + boolean splittable = isPartitionSplittable(meta, schema, path, partitionKeys[i], file); if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { if (splittable) { @@ -676,10 +684,8 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem // for s3 while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), partitionKeys[i])); - bytesRemaining -= splitSize; } if (bytesRemaining > 0) { From 8f3e72856760a0a8f76e290192b4a8ea9e14b277 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 22 Mar 2016 01:29:46 +0900 Subject: [PATCH 093/127] Remove unnecessary codes --- .../apache/tajo/engine/planner/physical/ExternalSortExec.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java index c532b9221c..ff629c3a8d 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ExternalSortExec.java @@ -142,7 +142,6 @@ public ExternalSortExec(final TaskAttemptContext context,final SortNode plan, fi mergedInputFragments = new ArrayList<>(); for (CatalogProtos.FragmentProto proto : fragments) { FileFragment fragment = FragmentConvertor.convert(FileFragment.class, proto); - LOG.debug("### fragment:" + fragment.toString()); mergedInputFragments.add(new Chunk(inSchema, fragment, scanNode.getTableDesc().getMeta())); } } From 7d4bbed01b0ebd6a1c6fb0f0cba8e76ccf21d329 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 22 Mar 2016 11:15:35 +0900 Subject: [PATCH 094/127] Fix bugs in TestMultipleJoinTypes::testInnerAndOuterWithEmpty --- .../testInnerAndOuterWithEmpty.1.Hash.plan | 4 ++-- .../testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan | 4 ++-- .../testInnerAndOuterWithEmpty.1.Sort.plan | 4 ++-- .../testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan index 2f2ca890fb..7456b5238a 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -82,8 +82,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan index f1fa414673..46e0b4b7ee 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Hash_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan index 2f2ca890fb..7456b5238a 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -82,8 +82,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) diff --git a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan index f1fa414673..46e0b4b7ee 100644 --- a/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan +++ b/tajo-core-tests/src/test/resources/results/TestMultipleJoinTypes/testInnerAndOuterWithEmpty.1.Sort_NoBroadcast.plan @@ -6,8 +6,8 @@ JOIN(8)(LEFT_OUTER) => out schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} => in schema: {(3) default.a.l_orderkey (INT4), default.b.o_orderkey (INT4), default.c.c_custkey (INT4)} PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} JOIN(7)(INNER) @@ -100,8 +100,8 @@ Block Id: eb_0000000000000_0000_000004 [LEAF] [q_0000000000000_0000] 4 => 5 (type=HASH_SHUFFLE, key=default.c.c_custkey (INT4), num=32) PARTITIONS_SCAN(9) on default.customer_broad_parts as c + => filter: default.c.c_custkey (INT4) < 0 => target list: default.c.c_custkey (INT4) - => num of filtered paths: 0 => out schema: {(1) default.c.c_custkey (INT4)} => in schema: {(7) default.c.c_acctbal (FLOAT8), default.c.c_address (TEXT), default.c.c_comment (TEXT), default.c.c_mktsegment (TEXT), default.c.c_name (TEXT), default.c.c_nationkey (INT4), default.c.c_phone (TEXT)} From 8036ed3d920dac0e766a153ed358bcd25ac190e2 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 22 Mar 2016 17:05:51 +0900 Subject: [PATCH 095/127] Show informs of PartitionFileFragment on web UI --- .../java/org/apache/tajo/querymaster/Task.java | 14 ++++++++++++-- .../src/main/resources/webapps/worker/task.jsp | 16 +++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java index 466f6c9a8e..6f6abd5591 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java @@ -40,10 +40,12 @@ import org.apache.tajo.master.event.*; import org.apache.tajo.master.event.TaskAttemptToSchedulerEvent.TaskAttemptScheduleContext; import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.DataLocation; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TUtil; import org.apache.tajo.util.TajoIdUtils; @@ -274,10 +276,18 @@ private TaskHistory makeTaskHistory() { } } + PartitionedTableScanNode partitionedTable = PlannerUtil.findTopNode(lastAttempt.getTask().getLogicalPlan(), + NodeType.PARTITIONS_SCAN); + List fragmentList = new ArrayList<>(); for (FragmentProto eachFragment : getAllFragments()) { try { - Fragment fragment = FragmentConvertor.convert(systemConf, eachFragment); + Fragment fragment = null; + if (partitionedTable != null) { + fragment = FragmentConvertor.convert(PartitionFileFragment.class, eachFragment); + } else { + fragment = FragmentConvertor.convert(systemConf, eachFragment); + } fragmentList.add(fragment.toString()); } catch (Exception e) { LOG.error(e.getMessage(), e); @@ -286,7 +296,7 @@ private TaskHistory makeTaskHistory() { } taskHistory.setFragments(fragmentList.toArray(new String[fragmentList.size()])); - List fetchList = new ArrayList<>(); + List < String[]> fetchList = new ArrayList<>(); for (Map.Entry> e : getFetchMap().entrySet()) { for (FetchProto f : e.getValue()) { for (URI uri : Repartitioner.createSimpleURIs(maxUrlLength, f)) { diff --git a/tajo-core/src/main/resources/webapps/worker/task.jsp b/tajo-core/src/main/resources/webapps/worker/task.jsp index 3e17c8a1e3..04fe74bea0 100644 --- a/tajo-core/src/main/resources/webapps/worker/task.jsp +++ b/tajo-core/src/main/resources/webapps/worker/task.jsp @@ -39,6 +39,11 @@ <%@ page import="java.util.Map" %> <%@ page import="java.util.Set" %> <%@ page import="org.apache.tajo.conf.TajoConf.ConfVars" %> +<%@ page import="org.apache.tajo.storage.fragment.PartitionFileFragment" %> +<%@ page import="org.apache.tajo.engine.query.QueryContext" %> +<%@ page import="org.apache.tajo.plan.util.PlannerUtil" %> +<%@ page import="org.apache.tajo.plan.logical.NodeType" %> +<%@ page import="org.apache.tajo.plan.logical.PartitionedTableScanNode" %> <% String paramQueryId = request.getParameter("queryId"); @@ -103,8 +108,17 @@ String fragmentInfo = ""; String delim = ""; + + PartitionedTableScanNode partitionedTable = PlannerUtil.findTopNode(stage.getBlock().getPlan(), + NodeType.PARTITIONS_SCAN); + for (CatalogProtos.FragmentProto eachFragment : task.getAllFragments()) { - Fragment fragment = FragmentConvertor.convert(tajoWorker.getConfig(), eachFragment); + Fragment fragment = null; + if (partitionedTable != null) { + fragment = FragmentConvertor.convert(PartitionFileFragment.class, eachFragment); + } else { + fragment = FragmentConvertor.convert(tajoWorker.getConfig(), eachFragment); + } fragmentInfo += delim + fragment.toString(); delim = "
"; } From b335fde9ae8ec469f3381c4a0f5e8b63c0d97884 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 23 Mar 2016 09:46:10 +0900 Subject: [PATCH 096/127] Remove informs of PartitionFileFragment on web UI --- .../java/org/apache/tajo/querymaster/Task.java | 14 ++------------ .../src/main/resources/webapps/worker/task.jsp | 16 +--------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java index 6f6abd5591..466f6c9a8e 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Task.java @@ -40,12 +40,10 @@ import org.apache.tajo.master.event.*; import org.apache.tajo.master.event.TaskAttemptToSchedulerEvent.TaskAttemptScheduleContext; import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.storage.DataLocation; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.fragment.PartitionFileFragment; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TUtil; import org.apache.tajo.util.TajoIdUtils; @@ -276,18 +274,10 @@ private TaskHistory makeTaskHistory() { } } - PartitionedTableScanNode partitionedTable = PlannerUtil.findTopNode(lastAttempt.getTask().getLogicalPlan(), - NodeType.PARTITIONS_SCAN); - List fragmentList = new ArrayList<>(); for (FragmentProto eachFragment : getAllFragments()) { try { - Fragment fragment = null; - if (partitionedTable != null) { - fragment = FragmentConvertor.convert(PartitionFileFragment.class, eachFragment); - } else { - fragment = FragmentConvertor.convert(systemConf, eachFragment); - } + Fragment fragment = FragmentConvertor.convert(systemConf, eachFragment); fragmentList.add(fragment.toString()); } catch (Exception e) { LOG.error(e.getMessage(), e); @@ -296,7 +286,7 @@ private TaskHistory makeTaskHistory() { } taskHistory.setFragments(fragmentList.toArray(new String[fragmentList.size()])); - List < String[]> fetchList = new ArrayList<>(); + List fetchList = new ArrayList<>(); for (Map.Entry> e : getFetchMap().entrySet()) { for (FetchProto f : e.getValue()) { for (URI uri : Repartitioner.createSimpleURIs(maxUrlLength, f)) { diff --git a/tajo-core/src/main/resources/webapps/worker/task.jsp b/tajo-core/src/main/resources/webapps/worker/task.jsp index 04fe74bea0..3e17c8a1e3 100644 --- a/tajo-core/src/main/resources/webapps/worker/task.jsp +++ b/tajo-core/src/main/resources/webapps/worker/task.jsp @@ -39,11 +39,6 @@ <%@ page import="java.util.Map" %> <%@ page import="java.util.Set" %> <%@ page import="org.apache.tajo.conf.TajoConf.ConfVars" %> -<%@ page import="org.apache.tajo.storage.fragment.PartitionFileFragment" %> -<%@ page import="org.apache.tajo.engine.query.QueryContext" %> -<%@ page import="org.apache.tajo.plan.util.PlannerUtil" %> -<%@ page import="org.apache.tajo.plan.logical.NodeType" %> -<%@ page import="org.apache.tajo.plan.logical.PartitionedTableScanNode" %> <% String paramQueryId = request.getParameter("queryId"); @@ -108,17 +103,8 @@ String fragmentInfo = ""; String delim = ""; - - PartitionedTableScanNode partitionedTable = PlannerUtil.findTopNode(stage.getBlock().getPlan(), - NodeType.PARTITIONS_SCAN); - for (CatalogProtos.FragmentProto eachFragment : task.getAllFragments()) { - Fragment fragment = null; - if (partitionedTable != null) { - fragment = FragmentConvertor.convert(PartitionFileFragment.class, eachFragment); - } else { - fragment = FragmentConvertor.convert(tajoWorker.getConfig(), eachFragment); - } + Fragment fragment = FragmentConvertor.convert(tajoWorker.getConfig(), eachFragment); fragmentInfo += delim + fragment.toString(); delim = "
"; } From 6c855bf519b4adc4b0e7d7dd802e6ceca3d872e8 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 23 Mar 2016 16:42:35 +0900 Subject: [PATCH 097/127] Add dependency scope because aws sdk conflict on EMR --- tajo-storage/tajo-storage-s3/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index 4af62e0062..3799a8af03 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -490,6 +490,7 @@ io.airlift units + provided ${airlft.version} @@ -510,12 +511,14 @@ org.weakref jmxutils + provided 1.18 com.amazonaws aws-java-sdk + provided ${aws-java-sdk.version} @@ -580,6 +583,7 @@ org.apache.httpcomponents httpclient + provided 4.5 @@ -600,6 +604,7 @@ org.apache.httpcomponents httpcore + provided 4.4.1 @@ -626,6 +631,7 @@ javax.validation validation-api + provided 1.1.0.Final From dd00174c1bb0dd58838fa395ee4d4478bede3858 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 23 Mar 2016 16:51:20 +0900 Subject: [PATCH 098/127] Move isPartitionSplittable to right position --- .../apache/tajo/storage/FileTablespace.java | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 6e2e3db4a6..07bf7541e4 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -366,15 +366,6 @@ protected boolean isSplittable(TableMeta meta, Schema schema, Path path, FileSta return split; } - protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path, String partitionKeys, - FileStatus status) throws IOException { - Fragment fragment = new PartitionFileFragment(path.getName(), path, 0, status.getLen(), partitionKeys); - Scanner scanner = getScanner(meta, schema, fragment, null); - boolean split = scanner.isSplittable(); - scanner.close(); - return split; - } - private static final double SPLIT_SLOP = 1.1; // 10% slop protected int getBlockIndex(BlockLocation[] blkLocations, @@ -567,6 +558,33 @@ private String[] getHosts(BlockLocation[] blkLocations) throws IOException { // The below code is for splitting partitioned table. //////////////////////////////////////////////////////////////////////////////// + + /** + * Is the given filename splitable? Usually, true, but if the file is + * stream compressed, it will not be. + *

+ * FileInputFormat implementations can override this and return + * false to ensure that individual input files are never split-up + * so that Mappers process entire files. + * + * + * @param meta the metadata of target table + * @param schema the schema of target table + * @param path the file name to check + * @param partitionKeys keys of target partition + * @param status get the file length + * @return is this file isSplittable? + * @throws IOException + */ + protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path, String partitionKeys, + FileStatus status) throws IOException { + Fragment fragment = new PartitionFileFragment(path.getName(), path, 0, status.getLen(), partitionKeys); + Scanner scanner = getScanner(meta, schema, fragment, null); + boolean split = scanner.isSplittable(); + scanner.close(); + return split; + } + /** * Build a fragment for partition table * @@ -699,7 +717,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } } } - if(LOG.isDebugEnabled()){ + if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); } i++; From cbccf4fcb51f1bde1598452dda0983f7224598f2 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 24 Mar 2016 16:01:40 +0900 Subject: [PATCH 099/127] Make directory which includes aws s3 dependencies --- tajo-dist/pom.xml | 6 +++++- tajo-dist/src/main/bin/tajo | 11 +++++++++++ tajo-storage/tajo-storage-s3/pom.xml | 6 ------ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml index a62aebb05d..93deddd7fd 100644 --- a/tajo-dist/pom.xml +++ b/tajo-dist/pom.xml @@ -157,7 +157,11 @@ run mkdir -p lib run cp -r $ROOT/tajo-storage/tajo-storage-hdfs/target/lib/hive-*.jar lib/ - + + run mkdir aws_s3 + run cp -r $ROOT/tajo-storage/tajo-storage-s3/target/lib/*.jar aws_s3/ + + run mkdir -p share/jdbc-dist run cp -r $ROOT/tajo-jdbc/target/tajo-jdbc-${project.version}-jar-with-dependencies.jar ./share/jdbc-dist/tajo-jdbc-${project.version}.jar diff --git a/tajo-dist/src/main/bin/tajo b/tajo-dist/src/main/bin/tajo index 007e960ffb..bc7707aa14 100755 --- a/tajo-dist/src/main/bin/tajo +++ b/tajo-dist/src/main/bin/tajo @@ -321,6 +321,17 @@ fi # Hive Home Configuration End ############################################################################## +############################################################################## +# Find and Set AWS S3 CLASSPATH +############################################################################## + +AWS_S3_LIB=$TAJO_HOME/aws_s3 + +if [ -d ${AWS_S3_LIB} ]; then + for f in ${AWS_S3_LIB}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; + done +fi ############################################################################## # Find and Set Tajo CLASSPATH diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index 3799a8af03..4af62e0062 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -490,7 +490,6 @@ io.airlift units - provided ${airlft.version} @@ -511,14 +510,12 @@ org.weakref jmxutils - provided 1.18 com.amazonaws aws-java-sdk - provided ${aws-java-sdk.version} @@ -583,7 +580,6 @@ org.apache.httpcomponents httpclient - provided 4.5 @@ -604,7 +600,6 @@ org.apache.httpcomponents httpcore - provided 4.4.1 @@ -631,7 +626,6 @@ javax.validation validation-api - provided 1.1.0.Final From 983c5f2988c7695bc79b679a7f2368cde0de38a8 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 24 Mar 2016 17:18:22 +0900 Subject: [PATCH 100/127] Use the uri of table instead of the default filesystem uri --- .../main/java/org/apache/tajo/storage/s3/S3TableSpace.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 1b3b9c6a4e..728b18ec40 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -51,7 +51,7 @@ import static com.google.common.base.Strings.nullToEmpty; public class S3TableSpace extends FileTablespace { - private final Log LOG = LogFactory.getLog(S3TableSpace.class); + private final static Log LOG = LogFactory.getLog(S3TableSpace.class); private AmazonS3 s3; private boolean useInstanceCredentials; @@ -82,9 +82,7 @@ public void init(TajoConf tajoConf) throws IOException { .withSocketTimeout(Ints.checkedCast(socketTimeout.toMillis())) .withMaxConnections(maxConnections); - Path tajoRootPath = TajoConf.getTajoRootDir(conf); - FileSystem defaultFS = tajoRootPath.getFileSystem(conf); - this.s3 = createAmazonS3Client(defaultFS.getUri(), conf, configuration); + this.s3 = createAmazonS3Client(uri, conf, configuration); if (s3 != null) { String endPoint = conf.getTrimmed(ENDPOINT,""); From b0720d3ba746800a7371a913bdbafe288464856d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 25 Mar 2016 10:31:27 +0900 Subject: [PATCH 101/127] Recover OrcScanner --- .../apache/tajo/storage/orc/OrcScanner.java | 460 ++++++++++++++++++ 1 file changed, 460 insertions(+) create mode 100644 tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java new file mode 100644 index 0000000000..c8aa67b404 --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/OrcScanner.java @@ -0,0 +1,460 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc; + +import com.google.common.collect.Lists; +import com.google.protobuf.CodedInputStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.orc.*; +import org.apache.orc.Reader.Options; +import org.apache.orc.impl.BufferChunk; +import org.apache.orc.impl.InStream; +import org.apache.tajo.TajoConstants; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.plan.expr.EvalNode; +import org.apache.tajo.storage.FileScanner; +import org.apache.tajo.storage.StorageConstants; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.storage.fragment.Fragment; +import org.apache.tajo.storage.thirdparty.orc.OrcRecordReader; +import org.apache.tajo.storage.thirdparty.orc.OrcUtils; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.TimeZone; + +public class OrcScanner extends FileScanner { + private static final Log LOG = LogFactory.getLog(OrcScanner.class); + + private static final int DIRECTORY_SIZE_GUESS = 16 * 1024; + + protected final FileSystem fileSystem; + private final long maxLength = Long.MAX_VALUE; + protected final Path path; + protected org.apache.orc.CompressionKind compressionKind; + protected CompressionCodec codec; + protected int bufferSize; + private List stripeStats; + private int metadataSize; + protected List types; + private List userMetadata; + private List fileStats; + private List stripes; + protected int rowIndexStride; + private long contentLength, numberOfRows; + + private List versionList; + + //serialized footer - Keeping this around for use by getFileMetaInfo() + // will help avoid cpu cycles spend in deserializing at cost of increased + // memory footprint. + private ByteBuffer footerByteBuffer; + // Same for metastore cache - maintains the same background buffer, but includes postscript. + // This will only be set if the file footer/metadata was read from disk. + private ByteBuffer footerMetaAndPsBuffer; + + private OrcRecordReader recordReader; + + private long recordCount = 0; + + /** + * Ensure this is an ORC file to prevent users from trying to read text + * files or RC files as ORC files. + * @param in the file being read + * @param path the filename for error messages + * @param psLen the postscript length + * @param buffer the tail of the file + * @throws IOException + */ + static void ensureOrcFooter(FSDataInputStream in, + Path path, + int psLen, + ByteBuffer buffer) throws IOException { + int len = OrcFile.MAGIC.length(); + if (psLen < len + 1) { + throw new IOException("Malformed ORC file " + path + + ". Invalid postscript length " + psLen); + } + int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1 - len; + byte[] array = buffer.array(); + // now look for the magic string at the end of the postscript. + if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) { + // If it isn't there, this may be the 0.11.0 version of ORC. + // Read the first 3 bytes of the file to check for the header + byte[] header = new byte[len]; + in.readFully(0, header, 0, len); + // if it isn't there, this isn't an ORC file + if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) { + throw new IOException("Malformed ORC file " + path + + ". Invalid postscript."); + } + } + } + + /** + * Build a version string out of an array. + * @param version the version number as a list + * @return the human readable form of the version string + */ + private static String versionString(List version) { + StringBuilder buffer = new StringBuilder(); + for(int i=0; i < version.size(); ++i) { + if (i != 0) { + buffer.append('.'); + } + buffer.append(version.get(i)); + } + return buffer.toString(); + } + + /** + * Check to see if this ORC file is from a future version and if so, + * warn the user that we may not be able to read all of the column encodings. + * @param log the logger to write any error message to + * @param path the data source path for error messages + * @param version the version of hive that wrote the file. + */ + static void checkOrcVersion(Log log, Path path, List version) { + if (version.size() >= 1) { + int major = version.get(0); + int minor = 0; + if (version.size() >= 2) { + minor = version.get(1); + } + if (major > OrcFile.Version.CURRENT.getMajor() || + (major == OrcFile.Version.CURRENT.getMajor() && + minor > OrcFile.Version.CURRENT.getMinor())) { + log.warn(path + " was written by a future Hive version " + + versionString(version) + + ". This file may not be readable by this version of Hive."); + } + } + } + + public OrcScanner(Configuration conf, Schema schema, TableMeta meta, Fragment fragment) throws IOException { + super(conf, schema, meta, fragment); + + this.path = this.fragment.getPath(); + this.fileSystem = this.path.getFileSystem(conf); + } + + private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, + Path path, + long maxFileLength + ) throws IOException { + FSDataInputStream file = fs.open(path); + + // figure out the size of the file using the option or filesystem + long size; + if (maxFileLength == Long.MAX_VALUE) { + size = fs.getFileStatus(path).getLen(); + } else { + size = maxFileLength; + } + + //read last bytes into buffer to get PostScript + int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); + ByteBuffer buffer = ByteBuffer.allocate(readSize); + assert buffer.position() == 0; + file.readFully((size - readSize), + buffer.array(), buffer.arrayOffset(), readSize); + buffer.position(0); + + //read the PostScript + //get length of PostScript + int psLen = buffer.get(readSize - 1) & 0xff; + ensureOrcFooter(file, path, psLen, buffer); + int psOffset = readSize - 1 - psLen; + OrcProto.PostScript ps = extractPostScript(buffer, path, psLen, psOffset); + + int footerSize = (int) ps.getFooterLength(); + int metadataSize = (int) ps.getMetadataLength(); + + //check if extra bytes need to be read + ByteBuffer fullFooterBuffer = null; + int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize); + if (extra > 0) { + //more bytes need to be read, seek back to the right place and read extra bytes + ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); + file.readFully((size - readSize - extra), extraBuf.array(), + extraBuf.arrayOffset() + extraBuf.position(), extra); + extraBuf.position(extra); + //append with already read bytes + extraBuf.put(buffer); + buffer = extraBuf; + buffer.position(0); + fullFooterBuffer = buffer.slice(); + buffer.limit(footerSize + metadataSize); + } else { + //footer is already in the bytes in buffer, just adjust position, length + buffer.position(psOffset - footerSize - metadataSize); + fullFooterBuffer = buffer.slice(); + buffer.limit(psOffset); + } + + // remember position for later + buffer.mark(); + + file.close(); + + return new FileMetaInfo( + ps.getCompression().toString(), + (int) ps.getCompressionBlockSize(), + (int) ps.getMetadataLength(), + buffer, + ps.getVersionList(), + org.apache.orc.OrcFile.WriterVersion.FUTURE, + fullFooterBuffer + ); + } + + public OrcRecordReader createRecordReader() throws IOException { + return new OrcRecordReader(this.stripes, fileSystem, schema, targets, fragment, types, codec, bufferSize, + rowIndexStride, buildReaderOptions(meta), conf, + TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, TajoConstants.DEFAULT_SYSTEM_TIMEZONE))); + } + + private static Options buildReaderOptions(TableMeta meta) { + return new Options() + .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(), + String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) + .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), + String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); + } + + @Override + public void init() throws IOException { + FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fileSystem, path, maxLength); + this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; + MetaInfoObjExtractor rInfo = + new MetaInfoObjExtractor(footerMetaData.compressionType, + footerMetaData.bufferSize, + footerMetaData.metadataSize, + footerMetaData.footerBuffer + ); + this.footerByteBuffer = footerMetaData.footerBuffer; + this.compressionKind = rInfo.compressionKind; + this.codec = rInfo.codec; + this.bufferSize = rInfo.bufferSize; + this.metadataSize = rInfo.metadataSize; + this.stripeStats = rInfo.metadata.getStripeStatsList(); + this.types = rInfo.footer.getTypesList(); + this.rowIndexStride = rInfo.footer.getRowIndexStride(); + this.contentLength = rInfo.footer.getContentLength(); + this.numberOfRows = rInfo.footer.getNumberOfRows(); + this.userMetadata = rInfo.footer.getMetadataList(); + this.fileStats = rInfo.footer.getStatisticsList(); + this.versionList = footerMetaData.versionList; + this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList()); + + recordReader = createRecordReader(); + + super.init(); + } + + @Override + public Tuple next() throws IOException { + Tuple next = recordReader.next(); + if (next != null) { + recordCount++; + } + return next; + } + + @Override + public void reset() throws IOException { + // TODO: improve this + this.close(); + recordReader = createRecordReader(); + } + + @Override + public void close() throws IOException { + if (recordReader != null) { + recordReader.close(); + tableStats.setNumBytes(recordReader.getNumBytes()); + tableStats.setNumRows(recordCount); + } + } + + @Override + public boolean isProjectable() { + return true; + } + + @Override + public boolean isSelectable() { + return false; + } + + @Override + public void setFilter(EvalNode filter) { + // TODO: implement this + } + + @Override + public float getProgress() { + return inited ? recordReader.getProgress() : super.getProgress(); + } + + @Override + public boolean isSplittable() { + return true; + } + + private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path, + int psLen, int psAbsOffset) throws IOException { + // TODO: when PB is upgraded to 2.6, newInstance(ByteBuffer) method should be used here. + assert bb.hasArray(); + CodedInputStream in = CodedInputStream.newInstance( + bb.array(), bb.arrayOffset() + psAbsOffset, psLen); + OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); + checkOrcVersion(LOG, path, ps.getVersionList()); + + // Check compression codec. + switch (ps.getCompression()) { + case NONE: + break; + case ZLIB: + break; + case SNAPPY: + break; + case LZO: + break; + default: + throw new IllegalArgumentException("Unknown compression"); + } + return ps; + } + + private static OrcProto.Footer extractFooter(ByteBuffer bb, int footerAbsPos, + int footerSize, CompressionCodec codec, int bufferSize) throws IOException { + bb.position(footerAbsPos); + bb.limit(footerAbsPos + footerSize); + return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer", + Lists.newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize)); + } + + private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, + int metadataSize, CompressionCodec codec, int bufferSize) throws IOException { + bb.position(metadataAbsPos); + bb.limit(metadataAbsPos + metadataSize); + return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata", + Lists.newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize)); + } + + /** + * MetaInfoObjExtractor - has logic to create the values for the fields in ReaderImpl + * from serialized fields. + * As the fields are final, the fields need to be initialized in the constructor and + * can't be done in some helper function. So this helper class is used instead. + * + */ + private static class MetaInfoObjExtractor{ + final org.apache.orc.CompressionKind compressionKind; + final CompressionCodec codec; + final int bufferSize; + final int metadataSize; + final OrcProto.Metadata metadata; + final OrcProto.Footer footer; + + MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, + ByteBuffer footerBuffer) throws IOException { + + this.compressionKind = org.apache.orc.CompressionKind.valueOf(codecStr); + this.bufferSize = bufferSize; + this.codec = OrcUtils.createCodec(compressionKind); + this.metadataSize = metadataSize; + + int position = footerBuffer.position(); + int footerBufferSize = footerBuffer.limit() - footerBuffer.position() - metadataSize; + + this.metadata = extractMetadata(footerBuffer, position, metadataSize, codec, bufferSize); + this.footer = extractFooter( + footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize); + + footerBuffer.position(position); + } + } + + public static class StripeInformationImpl + implements org.apache.orc.StripeInformation { + private final OrcProto.StripeInformation stripe; + + public StripeInformationImpl(OrcProto.StripeInformation stripe) { + this.stripe = stripe; + } + + @Override + public long getOffset() { + return stripe.getOffset(); + } + + @Override + public long getLength() { + return stripe.getDataLength() + getIndexLength() + getFooterLength(); + } + + @Override + public long getDataLength() { + return stripe.getDataLength(); + } + + @Override + public long getFooterLength() { + return stripe.getFooterLength(); + } + + @Override + public long getIndexLength() { + return stripe.getIndexLength(); + } + + @Override + public long getNumberOfRows() { + return stripe.getNumberOfRows(); + } + + @Override + public String toString() { + return "offset: " + getOffset() + " data: " + getDataLength() + + " rows: " + getNumberOfRows() + " tail: " + getFooterLength() + + " index: " + getIndexLength(); + } + } + + private static List convertProtoStripesToStripes( + List stripes) { + List result = new ArrayList<>(stripes.size()); + for (OrcProto.StripeInformation info : stripes) { + result.add(new StripeInformationImpl(info)); + } + return result; + } + +} From 606d0077ca4a78f78b3750b7d4e6c68864d2311d Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 25 Mar 2016 10:34:31 +0900 Subject: [PATCH 102/127] Trigger for travis CI build --- CHANGES | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGES b/CHANGES index d6a7da3fad..3c6bcaf57a 100644 --- a/CHANGES +++ b/CHANGES @@ -2,7 +2,6 @@ Tajo Change Log Release 0.12.0 - unreleased - NEW FEATURES TAJO-1955: Add a feature to strip quotes from CSV file. (hyunsik) From 9dc07f648a981ac27e89583884cfbde5653a616e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 28 Mar 2016 14:27:39 +0900 Subject: [PATCH 103/127] Fix a build bug --- .../engine/planner/TestPartitionedTableRewriter.java | 11 ++++------- .../org/apache/tajo/engine/util/TestTupleUtil.java | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index d6c8571f83..032b8126be 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -24,10 +24,7 @@ import org.apache.tajo.OverridableConf; import org.apache.tajo.QueryTestCaseBase; import org.apache.tajo.algebra.Expr; -import org.apache.tajo.catalog.CatalogUtil; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.TableDesc; -import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.partition.PartitionMethodDesc; import org.apache.tajo.catalog.proto.CatalogProtos; import org.apache.tajo.common.TajoDataTypes; @@ -56,7 +53,7 @@ public static void setUp() throws Exception { FileSystem fs = FileSystem.get(conf); Path rootDir = TajoConf.getWarehouseDir(testingCluster.getConfiguration()); - Schema schema = new Schema(); + Schema schema = SchemaFactory.newV1(); schema.addColumn("n_nationkey", TajoDataTypes.Type.INT8); schema.addColumn("n_name", TajoDataTypes.Type.TEXT); schema.addColumn("n_regionkey", TajoDataTypes.Type.INT8); @@ -69,7 +66,7 @@ public static void setUp() throws Exception { private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem fs, Path rootDir, Schema schema, TableMeta meta) throws Exception { - Schema partSchema = new Schema(); + Schema partSchema = SchemaFactory.newV1(); partSchema.addColumn("key", TajoDataTypes.Type.TEXT); PartitionMethodDesc partitionMethodDesc = @@ -99,7 +96,7 @@ private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileSystem fs, Path rootDir, Schema schema, TableMeta meta) throws Exception { - Schema partSchema = new Schema(); + Schema partSchema = SchemaFactory.newV1(); partSchema.addColumn("key1", TajoDataTypes.Type.TEXT); partSchema.addColumn("key2", TajoDataTypes.Type.TEXT); partSchema.addColumn("key3", TajoDataTypes.Type.INT8); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java index 2d819493c4..2e77f6afde 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/util/TestTupleUtil.java @@ -189,7 +189,7 @@ public void testBuildTupleFromPartitionPath() { @Test public void testBuildTupleFromPartitionName() { - Schema schema = new Schema(); + Schema schema = SchemaFactory.newV1(); schema.addColumn("key1", Type.INT8); schema.addColumn("key2", Type.TEXT); From 7e745ca6135bf2a396d1058eed9d21c2356aaaaa Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Sun, 3 Apr 2016 15:56:50 +0900 Subject: [PATCH 104/127] Improve to execute simple query for partition table with AWS S3 --- tajo-core/pom.xml | 5 + .../NonForwardQueryResultFileScanner.java | 13 +- .../tajo/querymaster/Repartitioner.java | 14 +- .../partition/PartitionPruningHandle.java | 21 ++- .../rules/PartitionedTableRewriter.java | 16 +- .../apache/tajo/storage/FileTablespace.java | 134 ++++++++-------- tajo-storage/tajo-storage-s3/pom.xml | 7 +- .../apache/tajo/storage/s3/S3TableSpace.java | 151 +++++++++++++++++- 8 files changed, 276 insertions(+), 85 deletions(-) diff --git a/tajo-core/pom.xml b/tajo-core/pom.xml index f4e1b9e81b..c21bbff005 100644 --- a/tajo-core/pom.xml +++ b/tajo-core/pom.xml @@ -199,6 +199,11 @@ org.apache.tajo tajo-storage-hdfs + + org.apache.tajo + tajo-storage-s3 + ${project.version} + org.apache.tajo tajo-pullserver diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index d231e48648..c0a721e235 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -45,6 +45,7 @@ import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; +import org.apache.tajo.storage.s3.S3TableSpace; import org.apache.tajo.tuple.memory.MemoryBlock; import org.apache.tajo.tuple.memory.MemoryRowBlock; import org.apache.tajo.util.CompressionUtil; @@ -105,9 +106,15 @@ private void initSeqScanExec() throws IOException, TajoException { List fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { - FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc - , catalog, tajoConf)); + if (tablespace instanceof S3TableSpace) { + S3TableSpace s3TableSpace = TUtil.checkTypeAndGet(tablespace, S3TableSpace.class); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(s3TableSpace, scanNode, tableDesc + , catalog, tajoConf)); + } else { + FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc + , catalog, tajoConf)); + } } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index 1c64535c7b..b8424ddaa8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -56,6 +56,7 @@ import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; +import org.apache.tajo.storage.s3.S3TableSpace; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TUtil; @@ -475,10 +476,15 @@ public static List getFragmentsFromPartitionedTable(Tablespace tsHandl PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); PartitionPruningHandle pruningHandle = rewriter.getPartitionPruningHandle(conf, partitionsScan); - - FileTablespace tablespace = (FileTablespace) tsHandler; - fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() - , pruningHandle.getPartitionKeys(), pruningHandle.getPartitionPaths())); + if (tsHandler instanceof S3TableSpace) { + S3TableSpace tablespace = (S3TableSpace) tsHandler; + fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() + , pruningHandle)); + } else { + FileTablespace tablespace = (FileTablespace) tsHandler; + fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() + , pruningHandle)); + } return fragments; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java index 9271786efa..8d1cad8939 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java @@ -19,6 +19,9 @@ package org.apache.tajo.plan.partition; import org.apache.hadoop.fs.Path; +import org.apache.tajo.plan.expr.EvalNode; + +import java.util.Map; /** * This includes result informs of partition pruning. @@ -28,11 +31,15 @@ public class PartitionPruningHandle { private Path[] partitionPaths; private String[] partitionKeys; private long totalVolume; + private Map partitionMap; + private EvalNode[] conjunctiveForms; - public PartitionPruningHandle(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { + public PartitionPruningHandle(Path[] partitionPaths, String[] partitionKeys, long totalVolume, + Map partitionMap) { this.partitionPaths = partitionPaths; this.partitionKeys = partitionKeys; this.totalVolume = totalVolume; + this.partitionMap = partitionMap; } public Path[] getPartitionPaths() { @@ -46,4 +53,16 @@ public String[] getPartitionKeys() { public long getTotalVolume() { return totalVolume; } + + public Map getPartitionMap() { + return partitionMap; + } + + public boolean hasConjunctiveForms() { + return this.conjunctiveForms != null; + } + + public void setConjunctiveForms(EvalNode[] conjunctiveForms) { + this.conjunctiveForms = conjunctiveForms; + } } \ No newline at end of file diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 2602c18c02..71a060e0e3 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -20,6 +20,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -176,6 +177,13 @@ private PartitionPruningHandle getPartitionPruningHandle(TajoConf conf, String t } scanNode.setQual(AlgebraicUtil.createSingletonExprFromCNF(conjunctiveForms)); } + + if (conjunctiveForms != null) { + partitionPruningHandle.setConjunctiveForms(conjunctiveForms); + } else { + partitionPruningHandle.setConjunctiveForms(conjunctiveForms); + } + LOG.info("Filtered directory or files: " + partitionPruningHandle.getPartitionPaths().length); LOG.info("Filtered partition keys: " + partitionPruningHandle.getPartitionKeys().length); @@ -192,13 +200,15 @@ private PartitionPruningHandle getPartitionPruningHandleByCatalog(List partitionMap = Maps.newHashMap(); for (int i = 0; i < partitions.size(); i++) { CatalogProtos.PartitionDescProto partition = partitions.get(i); filteredPaths[i] = new Path(partition.getPath()); partitionKeys[i] = partition.getPartitionName(); totalVolume += partition.getNumBytes(); + partitionMap.put(filteredPaths[i], partitionKeys[i]); } - return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume); + return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume, partitionMap); } /** @@ -237,15 +247,17 @@ private PartitionPruningHandle getPartitionPruningHandleByFileSystem(Schema part // Get partition keys and volume from the list of partition directories partitionKeys = new String[filteredPaths.length]; + Map partitionMap = Maps.newHashMap(); for (int i = 0; i < partitionKeys.length; i++) { Path path = filteredPaths[i]; startIdx = path.toString().indexOf(getColumnPartitionPathPrefix(partitionColumns)); partitionKeys[i] = path.toString().substring(startIdx); summary = fs.getContentSummary(path); totalVolume += summary.getLength(); + partitionMap.put(path, partitionKeys[i]); } - return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume); + return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume, partitionMap); } /** diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index b0d9fb8e67..7262fa36d1 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -40,6 +40,7 @@ import org.apache.tajo.plan.expr.EvalNode; import org.apache.tajo.plan.logical.LogicalNode; import org.apache.tajo.plan.logical.NodeType; +import org.apache.tajo.plan.partition.PartitionPruningHandle; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.PartitionFileFragment; @@ -367,7 +368,7 @@ protected boolean isSplittable(TableMeta meta, Schema schema, Path path, FileSta return split; } - private static final double SPLIT_SLOP = 1.1; // 10% slop + protected static final double SPLIT_SLOP = 1.1; // 10% slop protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { @@ -561,7 +562,7 @@ private String[] getHosts(BlockLocation[] blkLocations) throws IOException { /** - * Is the given filename splitable? Usually, true, but if the file is + * Is the given filename splittable? Usually, true, but if the file is * stream compressed, it will not be. *

* FileInputFormat implementations can override this and return @@ -577,7 +578,7 @@ private String[] getHosts(BlockLocation[] blkLocations) throws IOException { * @return is this file isSplittable? * @throws IOException */ - protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path, String partitionKeys, + protected boolean isSplittablePartitionFragment(TableMeta meta, Schema schema, Path path, String partitionKeys, FileStatus status) throws IOException { Fragment fragment = new PartitionFileFragment(path.getName(), path, 0, status.getLen(), partitionKeys); Scanner scanner = getScanner(meta, schema, fragment, null); @@ -597,7 +598,7 @@ protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path * @param partitionKeys partition keys * @return PartitionFileFragment */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + protected PartitionFileFragment getSplittablePartitionFragment(String fragmentId, Path file, long start, long length, String[] hosts, String partitionKeys) { return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); } @@ -612,7 +613,7 @@ protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, * @return PartitionFileFragment * @throws IOException */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + protected PartitionFileFragment getSplittablePartitionFragment(String fragmentId, Path file, BlockLocation blockLocation , String partitionKeys) throws IOException { return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); } @@ -629,7 +630,7 @@ protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, * @return PartitionFileFragment * @throws IOException */ - protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + protected Fragment getNonSplittablePartitionFragment(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations, String partitionKeys) throws IOException { String[] hosts = getHosts(blkLocations); return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); @@ -641,13 +642,11 @@ protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long star * @param tableName table name * @param meta all meta information for scanning a fragmented table * @param schema table schema - * @param partitionKeys the list of partition keys - * @param inputs the list of paths * @return the list of PartitionFileFragment * @throws IOException */ - public List getPartitionSplits(String tableName, TableMeta meta, Schema schema, String[] partitionKeys, - Path... inputs) throws IOException { + public List getPartitionSplits(String tableName, TableMeta meta, Schema schema + , PartitionPruningHandle pruningHandle) throws IOException { long startTime = System.currentTimeMillis(); // generate splits' @@ -656,6 +655,8 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem List blockLocations = Lists.newArrayList(); int i = 0; + Path[] inputs = pruningHandle.getPartitionPaths(); + String[] partitionKeys = pruningHandle.getPartitionKeys(); for (Path p : inputs) { ArrayList files = Lists.newArrayList(); if (fs.isFile(p)) { @@ -665,58 +666,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } for (FileStatus file : files) { - Path path = file.getPath(); - long length = file.getLen(); - if (length > 0) { - // Get locations of blocks of file - BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); - boolean splittable = isPartitionSplittable(meta, schema, path, partitionKeys[i], file); - if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { - - if (splittable) { - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - blockLocations.addAll(Arrays.asList(blkLocations)); - - } else { // Non splittable - long blockSize = blkLocations[0].getLength(); - if (blockSize >= length) { - blockLocations.addAll(Arrays.asList(blkLocations)); - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - } else { - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - - } else { - if (splittable) { - - long minSize = Math.max(getMinSplitSize(), 1); - - long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one - long splitSize = Math.max(minSize, blockSize); - long bytesRemaining = length; - - // for s3 - while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - bytesRemaining -= splitSize; - } - if (bytesRemaining > 0) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - } - } else { // Non splittable - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - } + computePartitionSplits(file, meta, schema, tableName, partitionKeys[i], splits, volumeSplits, blockLocations); } if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); @@ -735,7 +685,65 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem return splits; } - private void setVolumeMeta(List splits, final List blockLocations) + protected void computePartitionSplits(FileStatus file, TableMeta meta, Schema schema, String tableName, + String partitionKey, List splits, List volumeSplits, List blockLocations) + throws IOException { + + Path path = file.getPath(); + long length = file.getLen(); + if (length > 0) { + // Get locations of blocks of file + BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); + boolean splittable = isSplittablePartitionFragment(meta, schema, path, partitionKey, file); + if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { + + if (splittable) { + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); + } + blockLocations.addAll(Arrays.asList(blkLocations)); + + } else { // Non splittable + long blockSize = blkLocations[0].getLength(); + if (blockSize >= length) { + blockLocations.addAll(Arrays.asList(blkLocations)); + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); + } + } else { + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); + } + } + + } else { + if (splittable) { + + long minSize = Math.max(getMinSplitSize(), 1); + + long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one + long splitSize = Math.max(minSize, blockSize); + long bytesRemaining = length; + + // for s3 + while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitionKey)); + bytesRemaining -= splitSize; + } + if (bytesRemaining > 0) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitionKey)); + } + } else { // Non splittable + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); + } + } + } + } + + protected void setVolumeMeta(List splits, final List blockLocations) throws IOException { int locationSize = blockLocations.size(); diff --git a/tajo-storage/tajo-storage-s3/pom.xml b/tajo-storage/tajo-storage-s3/pom.xml index 4af62e0062..9a78814301 100644 --- a/tajo-storage/tajo-storage-s3/pom.xml +++ b/tajo-storage/tajo-storage-s3/pom.xml @@ -292,7 +292,7 @@ org.apache.tajo tajo-catalog-common - test + provided org.apache.tajo @@ -353,11 +353,10 @@ - org.apache.hadoop hadoop-hdfs - test + provided org.apache.hadoop @@ -465,7 +464,7 @@ org.apache.tajo tajo-plan - test + provided diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 728b18ec40..f12403a321 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.net.URI; +import java.util.HashSet; +import java.util.List; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -35,21 +37,30 @@ import com.amazonaws.services.s3.iterable.S3Objects; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import com.google.common.primitives.Ints; +import io.airlift.units.DataSize; import io.airlift.units.Duration; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.tajo.conf.TajoConf; +import org.apache.tajo.catalog.*; +import org.apache.tajo.plan.partition.PartitionPruningHandle; import org.apache.tajo.storage.FileTablespace; import net.minidev.json.JSONObject; +import org.apache.tajo.storage.fragment.Fragment; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.nullToEmpty; +import static io.airlift.units.DataSize.Unit.MEGABYTE; + public class S3TableSpace extends FileTablespace { private final static Log LOG = LogFactory.getLog(S3TableSpace.class); @@ -57,6 +68,7 @@ public class S3TableSpace extends FileTablespace { private boolean useInstanceCredentials; //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; + private static final DataSize BLOCK_SIZE = new DataSize(32, MEGABYTE); public S3TableSpace(String spaceName, URI uri, JSONObject config) { super(spaceName, uri, config); @@ -66,6 +78,8 @@ public S3TableSpace(String spaceName, URI uri, JSONObject config) { public void init(TajoConf tajoConf) throws IOException { super.init(tajoConf); + this.blocksMetadataEnabled = false; + int maxErrorRetries = conf.getIntVar(TajoConf.ConfVars.S3_MAX_ERROR_RETRIES); boolean sslEnabled = conf.getBoolVar(TajoConf.ConfVars.S3_SSL_ENABLED); @@ -128,13 +142,7 @@ private static AWSCredentials getAwsCredentials(URI uri, Configuration conf) { @Override public long calculateSize(Path path) throws IOException { - String key = keyFromPath(path); - if (!key.isEmpty()) { - key += "/"; - } - - Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), key); - Stream objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); + Stream objectStream = getS3ObjectSummaryStream(path); long totalBucketSize = objectStream.mapToLong(object -> object.getSize()).sum(); objectStream.close(); return totalBucketSize; @@ -153,6 +161,133 @@ private String keyFromPath(Path path) return key; } + @Override + public List getPartitionSplits(String tableName, TableMeta meta, Schema schema + , PartitionPruningHandle pruningHandle) throws IOException { + long startTime = System.currentTimeMillis(); + + List files = Lists.newArrayList(); + List partitionKeys = Lists.newArrayList(); + + // Generate the list of FileStatuses and partition keys + Path[] paths = pruningHandle.getPartitionPaths(); + if (pruningHandle.hasConjunctiveForms()) { + for(Path path : paths) { + listS3ObjectsOfPartitionTable(path, files, partitionKeys, pruningHandle); + } + } else { + HashSet parents = getParentPaths(paths); + for(Path parent : parents) { + listS3ObjectsOfPartitionTable(parent, files, partitionKeys, pruningHandle); + } + } + + // Generate splits' + List splits = Lists.newArrayList(); + List volumeSplits = Lists.newArrayList(); + List blockLocations = Lists.newArrayList(); + + int i = 0; + for (FileStatus file : files) { + computePartitionSplits(file, meta, schema, tableName, partitionKeys.get(i), splits, volumeSplits, blockLocations); + if (LOG.isDebugEnabled()){ + LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); + } + i++; + } + + // Combine original fileFragments with new VolumeId information + setVolumeMeta(volumeSplits, blockLocations); + splits.addAll(volumeSplits); + LOG.info("Total # of splits: " + splits.size()); + + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(String.format("Split for partition table :%d ms elapsed.", elapsedMills)); + + return splits; + } + + /** + * Generate the list of FileStatus and partition keys using AWS S3 SDK. + * + */ + private void listS3ObjectsOfPartitionTable(Path prefixPath, List files, + List partitionKeys, PartitionPruningHandle pruningHandle) throws IOException { + Stream objectStream = getS3ObjectSummaryStream(prefixPath); + + objectStream + .filter(summary -> summary.getSize() > 0 && !summary.getKey().endsWith("/")) + .forEach(summary -> { + String bucketName = summary.getBucketName(); + String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); + Path path = new Path(pathString); + String fileName = path.getName(); + + if (!fileName.startsWith("_") && !fileName.startsWith(".")) { + int lastIndex = pathString.lastIndexOf("/"); + String partitionPathString = pathString.substring(0, lastIndex); + Path partitionPath = new Path(partitionPathString); + + if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); + files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), + summary.getLastModified().getTime(), path)); + partitionKeys.add(partitionKey); + } + } + } + ); + } + + private Stream getS3ObjectSummaryStream(Path path) throws IOException { + String prefix = keyFromPath(path); + if (!prefix.isEmpty()) { + prefix += "/"; + } + + Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), prefix); + Stream objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); + + return objectStream; + } + + /** + * Find parent paths of the specified paths. + * + * example #1: + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-01 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-10-02 + * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem + * + * example #2: + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02/l_returnflag=A + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02/l_returnflag=R + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03/l_returnflag=R + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-02/l_returnflag=A + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02/l_returnflag=A + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-10-02/l_returnflag=R + * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-10-02 + * + * @param paths the collection of parent paths + * @return + */ + private HashSet getParentPaths(Path[] paths) { + HashSet hashSet = Sets.newHashSet(); + + for(Path path : paths) { + hashSet.add(path.getParent()); + } + + return hashSet; + } + @VisibleForTesting public AmazonS3 getAmazonS3Client() { return s3; From 84c62fb91fc1816a151b666293a4263f5dc4718b Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 4 Apr 2016 09:42:43 +0900 Subject: [PATCH 105/127] Refactor methods for splitting partition fragment --- .../apache/tajo/storage/FileTablespace.java | 121 +++++++++--------- 1 file changed, 63 insertions(+), 58 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 07bf7541e4..2314a67e2a 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -576,7 +576,7 @@ private String[] getHosts(BlockLocation[] blkLocations) throws IOException { * @return is this file isSplittable? * @throws IOException */ - protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path, String partitionKeys, + protected boolean isSplittablePartitionFragment(TableMeta meta, Schema schema, Path path, String partitionKeys, FileStatus status) throws IOException { Fragment fragment = new PartitionFileFragment(path.getName(), path, 0, status.getLen(), partitionKeys); Scanner scanner = getScanner(meta, schema, fragment, null); @@ -596,7 +596,7 @@ protected boolean isPartitionSplittable(TableMeta meta, Schema schema, Path path * @param partitionKeys partition keys * @return PartitionFileFragment */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, long start, long length, + protected PartitionFileFragment getSplittablePartitionFragment(String fragmentId, Path file, long start, long length, String[] hosts, String partitionKeys) { return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); } @@ -611,7 +611,7 @@ protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, * @return PartitionFileFragment * @throws IOException */ - protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, BlockLocation blockLocation + protected PartitionFileFragment getSplittablePartitionFragment(String fragmentId, Path file, BlockLocation blockLocation , String partitionKeys) throws IOException { return new PartitionFileFragment(fragmentId, file, blockLocation, partitionKeys); } @@ -628,7 +628,7 @@ protected PartitionFileFragment makePartitionSplit(String fragmentId, Path file, * @return PartitionFileFragment * @throws IOException */ - protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long start, long length, + protected Fragment getNonSplittablePartitionFragment(String fragmentId, Path file, long start, long length, BlockLocation[] blkLocations, String partitionKeys) throws IOException { String[] hosts = getHosts(blkLocations); return new PartitionFileFragment(fragmentId, file, start, length, hosts, partitionKeys); @@ -640,8 +640,6 @@ protected Fragment makeNonPartitionSplit(String fragmentId, Path file, long star * @param tableName table name * @param meta all meta information for scanning a fragmented table * @param schema table schema - * @param partitionKeys the list of partition keys - * @param inputs the list of paths * @return the list of PartitionFileFragment * @throws IOException */ @@ -664,58 +662,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } for (FileStatus file : files) { - Path path = file.getPath(); - long length = file.getLen(); - if (length > 0) { - // Get locations of blocks of file - BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); - boolean splittable = isPartitionSplittable(meta, schema, path, partitionKeys[i], file); - if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { - - if (splittable) { - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - blockLocations.addAll(Arrays.asList(blkLocations)); - - } else { // Non splittable - long blockSize = blkLocations[0].getLength(); - if (blockSize >= length) { - blockLocations.addAll(Arrays.asList(blkLocations)); - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(makePartitionSplit(tableName, path, blockLocation, partitionKeys[i])); - } - } else { - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - - } else { - if (splittable) { - - long minSize = Math.max(getMinSplitSize(), 1); - - long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one - long splitSize = Math.max(minSize, blockSize); - long bytesRemaining = length; - - // for s3 - while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - bytesRemaining -= splitSize; - } - if (bytesRemaining > 0) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(makePartitionSplit(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitionKeys[i])); - } - } else { // Non splittable - splits.add(makeNonPartitionSplit(tableName, path, 0, length, blkLocations, partitionKeys[i])); - } - } - } + computePartitionSplits(file, meta, schema, tableName, partitionKeys[i], splits, volumeSplits, blockLocations); } if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); @@ -734,6 +681,64 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem return splits; } + protected void computePartitionSplits(FileStatus file, TableMeta meta, Schema schema, String tableName, + String partitionKey, List splits, List volumeSplits, List blockLocations) + throws IOException { + + Path path = file.getPath(); + long length = file.getLen(); + if (length > 0) { + // Get locations of blocks of file + BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); + boolean splittable = isSplittablePartitionFragment(meta, schema, path, partitionKey, file); + if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { + + if (splittable) { + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); + } + blockLocations.addAll(Arrays.asList(blkLocations)); + + } else { // Non splittable + long blockSize = blkLocations[0].getLength(); + if (blockSize >= length) { + blockLocations.addAll(Arrays.asList(blkLocations)); + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); + } + } else { + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); + } + } + + } else { + if (splittable) { + + long minSize = Math.max(getMinSplitSize(), 1); + + long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one + long splitSize = Math.max(minSize, blockSize); + long bytesRemaining = length; + + // for s3 + while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitionKey)); + bytesRemaining -= splitSize; + } + if (bytesRemaining > 0) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitionKey)); + } + } else { // Non splittable + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); + } + } + } + } + private void setVolumeMeta(List splits, final List blockLocations) throws IOException { From 287ca0951d428f2ac1b29b4db077fea8a03d484f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 4 Apr 2016 12:21:27 +0900 Subject: [PATCH 106/127] Add S3TableSpace::getFilteredPrefixList --- .../apache/tajo/storage/s3/S3TableSpace.java | 76 ++++++++++++++----- 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index f12403a321..687dff84c7 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -55,6 +55,7 @@ import net.minidev.json.JSONObject; import org.apache.tajo.storage.fragment.Fragment; +import org.apache.tajo.util.FileUtil; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.nullToEmpty; @@ -171,6 +172,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem // Generate the list of FileStatuses and partition keys Path[] paths = pruningHandle.getPartitionPaths(); + if (pruningHandle.hasConjunctiveForms()) { for(Path path : paths) { listS3ObjectsOfPartitionTable(path, files, partitionKeys, pruningHandle); @@ -218,26 +220,26 @@ private void listS3ObjectsOfPartitionTable(Path prefixPath, List fil objectStream .filter(summary -> summary.getSize() > 0 && !summary.getKey().endsWith("/")) - .forEach(summary -> { - String bucketName = summary.getBucketName(); - String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); - Path path = new Path(pathString); - String fileName = path.getName(); - - if (!fileName.startsWith("_") && !fileName.startsWith(".")) { - int lastIndex = pathString.lastIndexOf("/"); - String partitionPathString = pathString.substring(0, lastIndex); - Path partitionPath = new Path(partitionPathString); - - if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { - String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); - files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), - summary.getLastModified().getTime(), path)); - partitionKeys.add(partitionKey); + .forEach(summary -> { + String bucketName = summary.getBucketName(); + String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); + Path path = new Path(pathString); + String fileName = path.getName(); + + if (!fileName.startsWith("_") && !fileName.startsWith(".")) { + int lastIndex = pathString.lastIndexOf("/"); + String partitionPathString = pathString.substring(0, lastIndex); + Path partitionPath = new Path(partitionPathString); + + if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); + files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), + summary.getLastModified().getTime(), path)); + partitionKeys.add(partitionKey); + } } } - } - ); + ); } private Stream getS3ObjectSummaryStream(Path path) throws IOException { @@ -252,6 +254,7 @@ private Stream getS3ObjectSummaryStream(Path path) throws IOExc return objectStream; } + /** * Find parent paths of the specified paths. * @@ -275,8 +278,8 @@ private Stream getS3ObjectSummaryStream(Path path) throws IOExc * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-02 * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-10-02 * - * @param paths the collection of parent paths - * @return + * @param paths + * @return the collection of parent paths */ private HashSet getParentPaths(Path[] paths) { HashSet hashSet = Sets.newHashSet(); @@ -288,6 +291,39 @@ private HashSet getParentPaths(Path[] paths) { return hashSet; } + + + /** + * Find prefix paths of the specified paths. + * + * example + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1993-02-01 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2015-02-01 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2016-02-02 + * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1 + * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2 + * + * @param paths + * @return the collection of prefix paths + */ + private HashSet getFilteredPrefixList(Path[] paths) { + HashSet hashSet = Sets.newHashSet(); + + for(Path path : paths) { + String[] partitionKeyValue = path.getName().split("="); + if (partitionKeyValue != null && partitionKeyValue.length == 2) { + String name = partitionKeyValue[0] + "=" + partitionKeyValue[1].substring(0, 1); + Path prefix = new Path(path.getParent(), name); + hashSet.add(prefix); + } + } + + return hashSet; + } + + @VisibleForTesting public AmazonS3 getAmazonS3Client() { return s3; From 245c3fa49624a2ffa34287916971fb6969b5c4ea Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 4 Apr 2016 22:53:16 +0900 Subject: [PATCH 107/127] Implement to list S3 Objects by marker --- .../java/org/apache/tajo/util/FileUtil.java | 38 +++ .../apache/tajo/storage/s3/S3TableSpace.java | 246 +++++++++++------- 2 files changed, 186 insertions(+), 98 deletions(-) diff --git a/tajo-common/src/main/java/org/apache/tajo/util/FileUtil.java b/tajo-common/src/main/java/org/apache/tajo/util/FileUtil.java index 95700d0a14..fd693d7f51 100644 --- a/tajo-common/src/main/java/org/apache/tajo/util/FileUtil.java +++ b/tajo-common/src/main/java/org/apache/tajo/util/FileUtil.java @@ -142,4 +142,42 @@ public static void cleanupAndthrowIfFailed(java.io.Closeable... closeables) thro throw ioe; } } + + public static String getCommonPrefix(Path... paths){ + String prefix = ""; + String[][] folders = new String[paths.length][]; + + // split on file separator + for(int i = 0; i < paths.length; i++){ + folders[i] = paths[i].toString().split("/"); + } + + for(int j = 0; j < folders[0].length; j++){ + // grab the next folder name in the first path + String thisFolder = folders[0][j]; + // assume all have matched in case there are no more paths + boolean allMatched = true; + + // look at the other paths + for(int i = 1; i < folders.length && allMatched; i++){ + // if there is no folder here + if(folders[i].length < j){ + allMatched = false; + // stop looking because we've gone as far as we can + break; + } + // check if it matched + allMatched &= folders[i][j].equals(thisFolder); + } + // if they all matched this folder name + if(allMatched) { + // add it to the answer + prefix += thisFolder + "/"; + } else { + break; + } + } + + return prefix; + } } diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 687dff84c7..9a8377cfe4 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -20,8 +20,10 @@ import java.io.IOException; import java.net.URI; +import java.util.Arrays; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -35,9 +37,12 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.iterable.S3Objects; +import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import io.airlift.units.DataSize; @@ -166,22 +171,20 @@ private String keyFromPath(Path path) public List getPartitionSplits(String tableName, TableMeta meta, Schema schema , PartitionPruningHandle pruningHandle) throws IOException { long startTime = System.currentTimeMillis(); - List files = Lists.newArrayList(); List partitionKeys = Lists.newArrayList(); // Generate the list of FileStatuses and partition keys Path[] paths = pruningHandle.getPartitionPaths(); + // Get common prefix of partition paths + String commonPrefix = FileUtil.getCommonPrefix(paths); + + // List buckets to generate FileStatuses and partition keys if (pruningHandle.hasConjunctiveForms()) { - for(Path path : paths) { - listS3ObjectsOfPartitionTable(path, files, partitionKeys, pruningHandle); - } + listS3ObjectsByMarker(commonPrefix, pruningHandle, files, partitionKeys); } else { - HashSet parents = getParentPaths(paths); - for(Path parent : parents) { - listS3ObjectsOfPartitionTable(parent, files, partitionKeys, pruningHandle); - } + listAllS3Objects(new Path(commonPrefix), files, partitionKeys, pruningHandle); } // Generate splits' @@ -210,119 +213,166 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem return splits; } - /** - * Generate the list of FileStatus and partition keys using AWS S3 SDK. - * - */ - private void listS3ObjectsOfPartitionTable(Path prefixPath, List files, - List partitionKeys, PartitionPruningHandle pruningHandle) throws IOException { - Stream objectStream = getS3ObjectSummaryStream(prefixPath); + private Path getPathFromBucket(S3ObjectSummary summary) { + String bucketName = summary.getBucketName(); + String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); + Path path = new Path(pathString); + return path; + } + + private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle pruningHandle + , List files, List partitions) throws IOException { + long startTime = System.currentTimeMillis(); + + ObjectListing objectListing; + String previousPartition = null, nextPartition = null; + int callCount = 0; + boolean finished = false, enabled = false; + + String prefix = keyFromPath(new Path(commonPrefix)); + if (!prefix.isEmpty()) { + prefix += "/"; + } + + ListObjectsRequest request = new ListObjectsRequest() + .withBucketName(uri.getHost()) + .withPrefix(prefix); - objectStream - .filter(summary -> summary.getSize() > 0 && !summary.getKey().endsWith("/")) - .forEach(summary -> { - String bucketName = summary.getBucketName(); - String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); - Path path = new Path(pathString); - String fileName = path.getName(); - if (!fileName.startsWith("_") && !fileName.startsWith(".")) { - int lastIndex = pathString.lastIndexOf("/"); - String partitionPathString = pathString.substring(0, lastIndex); - Path partitionPath = new Path(partitionPathString); + Map partitionMap = Maps.newHashMap(); + for (int i = 0; i < pruningHandle.getPartitionKeys().length; i++) { + partitionMap.put(i, pruningHandle.getPartitionKeys()[i]); + } + + do { + enabled = true; + objectListing = s3.listObjects(request); + + int objectsCount = objectListing.getObjectSummaries().size(); + + // Get partition of last bucket from current objects + Path lastPath = getPathFromBucket(objectListing.getObjectSummaries().get(objectsCount - 1)); + String lastPartition = lastPath.getParent().getName(); + + // Check target partition compare with last partition of current objects + if (previousPartition == null) { + if (partitionMap.get(0).compareTo(lastPartition) > 0) { + enabled = false; + } + } else { + if (previousPartition.compareTo(lastPartition) > 0) { + enabled = false; + } + } + + // Generate FileStatus and partition key + if (enabled) { + for (S3ObjectSummary summary : objectListing.getObjectSummaries()) { + if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { + Path path = getPathFromBucket(summary); + + if (!path.getName().startsWith("_") && !path.getName().startsWith(".")) { + Path partitionPath = path.getParent(); + // If Tajo can matched partition from partition map, add it to final list. if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), summary.getLastModified().getTime(), path)); - partitionKeys.add(partitionKey); + partitions.add(partitionKey); + previousPartition = partitionKey; + } else { + // If Tajo can't matched partition, consider to move next marker. + int index = -1; + // If any partition not yet added + if (previousPartition == null) { + nextPartition = partitionMap.get(0); + } else { + // Find index of previous partition + for(Map.Entry entry : partitionMap.entrySet()) { + if (entry.getValue().equals(previousPartition)) { + index = entry.getKey(); + break; + } + } + + // Find next target partition with the index of previous partition + if ((index + 1) < partitionMap.size()) { + nextPartition = partitionMap.get(index+1); + } else if ((index + 1) == partitionMap.size()) { + finished = true; + break; + } + } + + if (nextPartition != null && nextPartition.compareTo(lastPartition) <= 0) { + continue; + } else { + break; + } } } } - ); - } - - private Stream getS3ObjectSummaryStream(Path path) throws IOException { - String prefix = keyFromPath(path); - if (!prefix.isEmpty()) { - prefix += "/"; - } - - Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), prefix); - Stream objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); + } + } - return objectStream; + request.setMarker(objectListing.getNextMarker()); + callCount++; + } while (objectListing.isTruncated() && !finished); + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(String.format("List S3Objects: %d ms elapsed. API call count: %d", elapsedMills, callCount)); } - /** - * Find parent paths of the specified paths. - * - * example #1: - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-01 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-10-02 - * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem - * - * example #2: - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02/l_returnflag=A - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02/l_returnflag=R - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03/l_returnflag=R - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-02/l_returnflag=A - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02/l_returnflag=A - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-10-02/l_returnflag=R - * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-03 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-02-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-10-02 + * Generate the list of FileStatus and partition keys using AWS S3 SDK. * - * @param paths - * @return the collection of parent paths */ - private HashSet getParentPaths(Path[] paths) { - HashSet hashSet = Sets.newHashSet(); - - for(Path path : paths) { - hashSet.add(path.getParent()); - } + private void listAllS3Objects(Path prefixPath, List files, + List partitionKeys, PartitionPruningHandle pruningHandle) throws IOException { + long startTime = System.currentTimeMillis(); - return hashSet; - } + Stream objectStream = getS3ObjectSummaryStream(prefixPath); + objectStream + .filter(summary -> summary.getSize() > 0 && !summary.getKey().endsWith("/")) + .forEach(summary -> { + String bucketName = summary.getBucketName(); + String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); + Path path = new Path(pathString); + String fileName = path.getName(); + + if (!fileName.startsWith("_") && !fileName.startsWith(".")) { + int lastIndex = pathString.lastIndexOf("/"); + String partitionPathString = pathString.substring(0, lastIndex); + Path partitionPath = new Path(partitionPathString); + + if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); + files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), + summary.getLastModified().getTime(), path)); + partitionKeys.add(partitionKey); + } + } + } + ); + long finishTime = System.currentTimeMillis(); + long elapsedMills = finishTime - startTime; + LOG.info(String.format("List S3Objects: %d ms elapsed", elapsedMills)); + } - /** - * Find prefix paths of the specified paths. - * - * example - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1992-01-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1993-02-01 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1995-02-02 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2015-02-01 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2016-02-02 - * --> s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=1 - * s3://tajo-data-us-east-1/tpch-1g-partition/lineitem/l_shipdate=2 - * - * @param paths - * @return the collection of prefix paths - */ - private HashSet getFilteredPrefixList(Path[] paths) { - HashSet hashSet = Sets.newHashSet(); - - for(Path path : paths) { - String[] partitionKeyValue = path.getName().split("="); - if (partitionKeyValue != null && partitionKeyValue.length == 2) { - String name = partitionKeyValue[0] + "=" + partitionKeyValue[1].substring(0, 1); - Path prefix = new Path(path.getParent(), name); - hashSet.add(prefix); - } + private Stream getS3ObjectSummaryStream(Path path) throws IOException { + String prefix = keyFromPath(path); + if (!prefix.isEmpty()) { + prefix += "/"; } - return hashSet; - } + Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), prefix); + Stream objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); + return objectStream; + } @VisibleForTesting public AmazonS3 getAmazonS3Client() { From e4200db256c139816adb196dd5ae2a4b4cc6fc99 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 01:03:07 +0900 Subject: [PATCH 108/127] Separate a method to make FileStatus using S3ObjectSummary --- .../apache/tajo/storage/s3/S3TableSpace.java | 80 +++++++++++-------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 9a8377cfe4..69bd3b8a02 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -182,7 +182,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem // List buckets to generate FileStatuses and partition keys if (pruningHandle.hasConjunctiveForms()) { - listS3ObjectsByMarker(commonPrefix, pruningHandle, files, partitionKeys); + listS3ObjectsByMarker(new Path(commonPrefix), files, partitionKeys, pruningHandle); } else { listAllS3Objects(new Path(commonPrefix), files, partitionKeys, pruningHandle); } @@ -213,15 +213,17 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem return splits; } - private Path getPathFromBucket(S3ObjectSummary summary) { - String bucketName = summary.getBucketName(); - String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); - Path path = new Path(pathString); - return path; - } - - private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle pruningHandle - , List files, List partitions) throws IOException { + /** + * Generate the list of FileStatus and partition key using marker parameter in prefix listing API + * + * @param path path to be listed + * @param files the list of FileStatus to be generated + * @param partitionKeys the list of partition key to be generated + * @param pruningHandle informs of partition pruning results + * @throws IOException + */ + private void listS3ObjectsByMarker(Path path, List files, List partitionKeys, + PartitionPruningHandle pruningHandle) throws IOException { long startTime = System.currentTimeMillis(); ObjectListing objectListing; @@ -229,7 +231,7 @@ private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle p int callCount = 0; boolean finished = false, enabled = false; - String prefix = keyFromPath(new Path(commonPrefix)); + String prefix = keyFromPath(path); if (!prefix.isEmpty()) { prefix += "/"; } @@ -238,7 +240,6 @@ private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle p .withBucketName(uri.getHost()) .withPrefix(prefix); - Map partitionMap = Maps.newHashMap(); for (int i = 0; i < pruningHandle.getPartitionKeys().length; i++) { partitionMap.put(i, pruningHandle.getPartitionKeys()[i]); @@ -246,6 +247,8 @@ private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle p do { enabled = true; + + // Get first chunk of 1000 objects objectListing = s3.listObjects(request); int objectsCount = objectListing.getObjectSummaries().size(); @@ -269,17 +272,16 @@ private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle p if (enabled) { for (S3ObjectSummary summary : objectListing.getObjectSummaries()) { if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { - Path path = getPathFromBucket(summary); + Path bucketPath = getPathFromBucket(summary); - if (!path.getName().startsWith("_") && !path.getName().startsWith(".")) { - Path partitionPath = path.getParent(); + if (!bucketPath.getName().startsWith("_") && !bucketPath.getName().startsWith(".")) { + Path partitionPath = bucketPath.getParent(); // If Tajo can matched partition from partition map, add it to final list. if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + files.add(getFileStatusFromBucket(summary, bucketPath)); String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); - files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), - summary.getLastModified().getTime(), path)); - partitions.add(partitionKey); + partitionKeys.add(partitionKey); previousPartition = partitionKey; } else { // If Tajo can't matched partition, consider to move next marker. @@ -325,33 +327,31 @@ private void listS3ObjectsByMarker(String commonPrefix, PartitionPruningHandle p } /** - * Generate the list of FileStatus and partition keys using AWS S3 SDK. + * Generate the list of FileStatus and partition key * + * @param path path to be listed + * @param files the list of FileStatus to be generated + * @param partitionKeys the list of partition key to be generated + * @param pruningHandle informs of partition pruning results + * @throws IOException */ - private void listAllS3Objects(Path prefixPath, List files, - List partitionKeys, PartitionPruningHandle pruningHandle) throws IOException { + private void listAllS3Objects(Path path, List files, List partitionKeys, PartitionPruningHandle + pruningHandle) throws IOException { long startTime = System.currentTimeMillis(); - Stream objectStream = getS3ObjectSummaryStream(prefixPath); + Stream objectStream = getS3ObjectSummaryStream(path); objectStream .filter(summary -> summary.getSize() > 0 && !summary.getKey().endsWith("/")) .forEach(summary -> { - String bucketName = summary.getBucketName(); - String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); - Path path = new Path(pathString); - String fileName = path.getName(); + Path bucketPath = getPathFromBucket(summary); + String fileName = bucketPath.getName(); if (!fileName.startsWith("_") && !fileName.startsWith(".")) { - int lastIndex = pathString.lastIndexOf("/"); - String partitionPathString = pathString.substring(0, lastIndex); - Path partitionPath = new Path(partitionPathString); - + Path partitionPath = bucketPath.getParent(); if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { - String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); - files.add(new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), - summary.getLastModified().getTime(), path)); - partitionKeys.add(partitionKey); + files.add(getFileStatusFromBucket(summary, bucketPath)); + partitionKeys.add(pruningHandle.getPartitionMap().get(partitionPath)); } } } @@ -362,6 +362,18 @@ private void listAllS3Objects(Path prefixPath, List files, LOG.info(String.format("List S3Objects: %d ms elapsed", elapsedMills)); } + private FileStatus getFileStatusFromBucket(S3ObjectSummary summary, Path path) { + return new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), + summary.getLastModified().getTime(), path); + } + + private Path getPathFromBucket(S3ObjectSummary summary) { + String bucketName = summary.getBucketName(); + String pathString = uri.getScheme() + "://" + bucketName + "/" + summary.getKey(); + Path path = new Path(pathString); + return path; + } + private Stream getS3ObjectSummaryStream(Path path) throws IOException { String prefix = keyFromPath(path); if (!prefix.isEmpty()) { From dbf8b31850df11968471cdeef792e77847aa4a39 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 01:43:20 +0900 Subject: [PATCH 109/127] Remove unnecessary packages --- .../src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 69bd3b8a02..f5ed16bedf 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -20,8 +20,6 @@ import java.io.IOException; import java.net.URI; -import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -43,7 +41,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import io.airlift.units.DataSize; import io.airlift.units.Duration; From 8481b45d102739f1b6c99429137499f5b21b4e86 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 10:49:05 +0900 Subject: [PATCH 110/127] Trigger for travis CI build --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index cefb3df926..c7d151a76a 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,7 @@ Tajo Change Log Release 0.12.0 - unreleased + NEW FEATURES TAJO-1955: Add a feature to strip quotes from CSV file. (hyunsik) From 0f0a3d20cdf899beeb4a01822c4b67531be4d77c Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 15:54:46 +0900 Subject: [PATCH 111/127] Remove unnecessary modifications --- .../exec/NonForwardQueryResultFileScanner.java | 12 +++--------- .../org/apache/tajo/querymaster/Repartitioner.java | 13 +++---------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index c0a721e235..3b1e575564 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -106,15 +106,9 @@ private void initSeqScanExec() throws IOException, TajoException { List fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { - if (tablespace instanceof S3TableSpace) { - S3TableSpace s3TableSpace = TUtil.checkTypeAndGet(tablespace, S3TableSpace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(s3TableSpace, scanNode, tableDesc - , catalog, tajoConf)); - } else { - FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); - fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc - , catalog, tajoConf)); - } + FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); + fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc + , catalog, tajoConf)); } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index b8424ddaa8..fcf992e310 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -476,16 +476,9 @@ public static List getFragmentsFromPartitionedTable(Tablespace tsHandl PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); rewriter.setCatalog(catalog); PartitionPruningHandle pruningHandle = rewriter.getPartitionPruningHandle(conf, partitionsScan); - if (tsHandler instanceof S3TableSpace) { - S3TableSpace tablespace = (S3TableSpace) tsHandler; - fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() - , pruningHandle)); - } else { - FileTablespace tablespace = (FileTablespace) tsHandler; - fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() - , pruningHandle)); - } - + FileTablespace tablespace = (FileTablespace) tsHandler; + fragments.addAll(tablespace.getPartitionSplits(scan.getCanonicalName(), table.getMeta(), table.getSchema() + , pruningHandle)); return fragments; } From 2b9ddce1618f5984761aa6186c3f822127604685 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 15:58:29 +0900 Subject: [PATCH 112/127] Remove unnecessary modifications --- .../apache/tajo/storage/FileTablespace.java | 113 +++++++++--------- 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 2314a67e2a..ced70c8f12 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -441,6 +441,7 @@ private int[] getDiskIds(VolumeId[] volumeIds) { public List getSplits(String tableName, TableMeta meta, Schema schema, Path... inputs) throws IOException { // generate splits' + List splits = Lists.newArrayList(); List volumeSplits = Lists.newArrayList(); List blockLocations = Lists.newArrayList(); @@ -662,7 +663,59 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem } for (FileStatus file : files) { - computePartitionSplits(file, meta, schema, tableName, partitionKeys[i], splits, volumeSplits, blockLocations); + Path path = file.getPath(); + long length = file.getLen(); + if (length > 0) { + // Get locations of blocks of file + BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); + boolean splittable = isSplittablePartitionFragment(meta, schema, path, partitionKeys[i], file); + if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { + + if (splittable) { + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKeys[i])); + } + blockLocations.addAll(Arrays.asList(blkLocations)); + + } else { // Non splittable + long blockSize = blkLocations[0].getLength(); + if (blockSize >= length) { + blockLocations.addAll(Arrays.asList(blkLocations)); + for (BlockLocation blockLocation : blkLocations) { + volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKeys[i])); + } + } else { + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, + partitionKeys[i])); + } + } + + } else { + if (splittable) { + + long minSize = Math.max(getMinSplitSize(), 1); + + long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one + long splitSize = Math.max(minSize, blockSize); + long bytesRemaining = length; + + // for s3 + while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, splitSize, + blkLocations[blkIndex].getHosts(), partitionKeys[i])); + bytesRemaining -= splitSize; + } + if (bytesRemaining > 0) { + int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); + splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, bytesRemaining, + blkLocations[blkIndex].getHosts(), partitionKeys[i])); + } + } else { // Non splittable + splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKeys[i])); + } + } + } } if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); @@ -681,64 +734,6 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem return splits; } - protected void computePartitionSplits(FileStatus file, TableMeta meta, Schema schema, String tableName, - String partitionKey, List splits, List volumeSplits, List blockLocations) - throws IOException { - - Path path = file.getPath(); - long length = file.getLen(); - if (length > 0) { - // Get locations of blocks of file - BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); - boolean splittable = isSplittablePartitionFragment(meta, schema, path, partitionKey, file); - if (blocksMetadataEnabled && fs instanceof DistributedFileSystem) { - - if (splittable) { - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); - } - blockLocations.addAll(Arrays.asList(blkLocations)); - - } else { // Non splittable - long blockSize = blkLocations[0].getLength(); - if (blockSize >= length) { - blockLocations.addAll(Arrays.asList(blkLocations)); - for (BlockLocation blockLocation : blkLocations) { - volumeSplits.add(getSplittablePartitionFragment(tableName, path, blockLocation, partitionKey)); - } - } else { - splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); - } - } - - } else { - if (splittable) { - - long minSize = Math.max(getMinSplitSize(), 1); - - long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one - long splitSize = Math.max(minSize, blockSize); - long bytesRemaining = length; - - // for s3 - while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, splitSize, - blkLocations[blkIndex].getHosts(), partitionKey)); - bytesRemaining -= splitSize; - } - if (bytesRemaining > 0) { - int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); - splits.add(getSplittablePartitionFragment(tableName, path, length - bytesRemaining, bytesRemaining, - blkLocations[blkIndex].getHosts(), partitionKey)); - } - } else { // Non splittable - splits.add(getNonSplittablePartitionFragment(tableName, path, 0, length, blkLocations, partitionKey)); - } - } - } - } - private void setVolumeMeta(List splits, final List blockLocations) throws IOException { From 3c731c5816880d5a1230eb7eef417026e02f9dbd Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 18:57:51 +0900 Subject: [PATCH 113/127] Remove unnecessary modifications --- tajo-core/pom.xml | 5 ----- .../tajo/master/exec/NonForwardQueryResultFileScanner.java | 1 - .../main/java/org/apache/tajo/querymaster/Repartitioner.java | 1 - 3 files changed, 7 deletions(-) diff --git a/tajo-core/pom.xml b/tajo-core/pom.xml index c21bbff005..f4e1b9e81b 100644 --- a/tajo-core/pom.xml +++ b/tajo-core/pom.xml @@ -199,11 +199,6 @@ org.apache.tajo tajo-storage-hdfs - - org.apache.tajo - tajo-storage-s3 - ${project.version} - org.apache.tajo tajo-pullserver diff --git a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java index 3b1e575564..d231e48648 100644 --- a/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java +++ b/tajo-core/src/main/java/org/apache/tajo/master/exec/NonForwardQueryResultFileScanner.java @@ -45,7 +45,6 @@ import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; import org.apache.tajo.storage.fragment.Fragment; import org.apache.tajo.storage.fragment.FragmentConvertor; -import org.apache.tajo.storage.s3.S3TableSpace; import org.apache.tajo.tuple.memory.MemoryBlock; import org.apache.tajo.tuple.memory.MemoryRowBlock; import org.apache.tajo.util.CompressionUtil; diff --git a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java index fcf992e310..aef685e9f8 100644 --- a/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java +++ b/tajo-core/src/main/java/org/apache/tajo/querymaster/Repartitioner.java @@ -56,7 +56,6 @@ import org.apache.tajo.storage.*; import org.apache.tajo.storage.fragment.FileFragment; import org.apache.tajo.storage.fragment.Fragment; -import org.apache.tajo.storage.s3.S3TableSpace; import org.apache.tajo.unit.StorageUnit; import org.apache.tajo.util.Pair; import org.apache.tajo.util.TUtil; From 0d4c7c3ea966ed0a0261fdd7d049f389934e9e50 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Tue, 5 Apr 2016 23:57:09 +0900 Subject: [PATCH 114/127] Fix a bug about multi-level partition --- .../apache/tajo/storage/s3/S3TableSpace.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index e44a75035a..db16be5373 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -177,9 +177,12 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem // Generate the list of FileStatuses and partition keys Path[] paths = pruningHandle.getPartitionPaths(); + if (paths.length == 0) { + return splits; + } + // Get common prefix of partition paths String commonPrefix = FileUtil.getCommonPrefix(paths); - // Generate splits if (pruningHandle.hasConjunctiveForms()) { splits.addAll(getFragmentsByMarker(meta, schema, tableName, new Path(commonPrefix), pruningHandle)); @@ -208,7 +211,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); ObjectListing objectListing; - String previousPartition = null, nextPartition = null; + Path previousPartition = null, nextPartition = null; int callCount = 0, i = 0; boolean finished = false, enabled = false; @@ -221,9 +224,9 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin .withBucketName(uri.getHost()) .withPrefix(prefix); - Map partitionMap = Maps.newHashMap(); + Map partitionMap = Maps.newHashMap(); for (i = 0; i < pruningHandle.getPartitionKeys().length; i++) { - partitionMap.put(i, pruningHandle.getPartitionKeys()[i]); + partitionMap.put(i, pruningHandle.getPartitionPaths()[i]); } i = 0; @@ -237,7 +240,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin // Get partition of last bucket from current objects Path lastPath = getPathFromBucket(objectListing.getObjectSummaries().get(objectsCount - 1)); - String lastPartition = lastPath.getParent().getName(); + Path lastPartition = lastPath.getParent(); // Check target partition compare with last partition of current objects if (previousPartition == null) { @@ -264,7 +267,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin FileStatus file = getFileStatusFromBucket(summary, bucketPath); String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); - previousPartition = partitionKey; + previousPartition = partitionPath; if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); @@ -278,7 +281,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin nextPartition = partitionMap.get(0); } else { // Find index of previous partition - for(Map.Entry entry : partitionMap.entrySet()) { + for(Map.Entry entry : partitionMap.entrySet()) { if (entry.getValue().equals(previousPartition)) { index = entry.getKey(); break; From fd11849e618c6b9a54991fee5469029e701a3d65 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 6 Apr 2016 14:02:01 +0900 Subject: [PATCH 115/127] Clean up codes --- .../partition/PartitionPruningHandle.java | 9 +-- .../rules/PartitionedTableRewriter.java | 9 +-- .../apache/tajo/storage/FileTablespace.java | 2 +- .../apache/tajo/storage/s3/S3TableSpace.java | 68 ++++++++++--------- 4 files changed, 41 insertions(+), 47 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java index 8d1cad8939..bed50a54cb 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/partition/PartitionPruningHandle.java @@ -31,15 +31,12 @@ public class PartitionPruningHandle { private Path[] partitionPaths; private String[] partitionKeys; private long totalVolume; - private Map partitionMap; private EvalNode[] conjunctiveForms; - public PartitionPruningHandle(Path[] partitionPaths, String[] partitionKeys, long totalVolume, - Map partitionMap) { + public PartitionPruningHandle(Path[] partitionPaths, String[] partitionKeys, long totalVolume) { this.partitionPaths = partitionPaths; this.partitionKeys = partitionKeys; this.totalVolume = totalVolume; - this.partitionMap = partitionMap; } public Path[] getPartitionPaths() { @@ -54,10 +51,6 @@ public long getTotalVolume() { return totalVolume; } - public Map getPartitionMap() { - return partitionMap; - } - public boolean hasConjunctiveForms() { return this.conjunctiveForms != null; } diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 71a060e0e3..991110e02c 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -20,7 +20,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -200,15 +199,13 @@ private PartitionPruningHandle getPartitionPruningHandleByCatalog(List partitionMap = Maps.newHashMap(); for (int i = 0; i < partitions.size(); i++) { CatalogProtos.PartitionDescProto partition = partitions.get(i); filteredPaths[i] = new Path(partition.getPath()); partitionKeys[i] = partition.getPartitionName(); totalVolume += partition.getNumBytes(); - partitionMap.put(filteredPaths[i], partitionKeys[i]); } - return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume, partitionMap); + return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume); } /** @@ -247,17 +244,15 @@ private PartitionPruningHandle getPartitionPruningHandleByFileSystem(Schema part // Get partition keys and volume from the list of partition directories partitionKeys = new String[filteredPaths.length]; - Map partitionMap = Maps.newHashMap(); for (int i = 0; i < partitionKeys.length; i++) { Path path = filteredPaths[i]; startIdx = path.toString().indexOf(getColumnPartitionPathPrefix(partitionColumns)); partitionKeys[i] = path.toString().substring(startIdx); summary = fs.getContentSummary(path); totalVolume += summary.getLength(); - partitionMap.put(path, partitionKeys[i]); } - return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume, partitionMap); + return new PartitionPruningHandle(filteredPaths, partitionKeys, totalVolume); } /** diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 45c8ef9ebb..875e717a5c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -368,7 +368,7 @@ protected boolean isSplittable(TableMeta meta, Schema schema, Path path, FileSta return split; } - protected static final double SPLIT_SLOP = 1.1; // 10% slop + private static final double SPLIT_SLOP = 1.1; // 10% slop protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index db16be5373..b99bb1a040 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -67,9 +67,9 @@ public class S3TableSpace extends FileTablespace { private AmazonS3 s3; private boolean useInstanceCredentials; - //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; private static final DataSize BLOCK_SIZE = new DataSize(32, MEGABYTE); + protected static final double SPLIT_SLOP = 1.1; // 10% slop public S3TableSpace(String spaceName, URI uri, JSONObject config) { super(spaceName, uri, config); @@ -102,6 +102,7 @@ public void init(TajoConf tajoConf) throws IOException { if (s3 != null) { String endPoint = conf.getTrimmed(ENDPOINT,""); try { + // Check where use a custom endpoint if (!endPoint.isEmpty()) { s3.setEndpoint(endPoint); } @@ -176,18 +177,26 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem // Generate the list of FileStatuses and partition keys Path[] paths = pruningHandle.getPartitionPaths(); - if (paths.length == 0) { return splits; } + // Prepare partition map which includes index for each partition path + Map partitionPathMap = Maps.newHashMap(); + for (int i = 0; i < pruningHandle.getPartitionKeys().length; i++) { + partitionPathMap.put(pruningHandle.getPartitionPaths()[i], i); + } + // Get common prefix of partition paths String commonPrefix = FileUtil.getCommonPrefix(paths); + // Generate splits if (pruningHandle.hasConjunctiveForms()) { - splits.addAll(getFragmentsByMarker(meta, schema, tableName, new Path(commonPrefix), pruningHandle)); + splits.addAll(getFragmentsByMarker(meta, schema, tableName, new Path(commonPrefix), pruningHandle, + partitionPathMap)); } else { - splits.addAll(getFragmentsByListingAllObjects(meta, schema, tableName, new Path(commonPrefix), pruningHandle)); + splits.addAll(getFragmentsByListingAllObjects(meta, schema, tableName, new Path(commonPrefix), pruningHandle, + partitionPathMap)); } LOG.info("Total # of splits: " + splits.size()); @@ -207,7 +216,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem * @throws IOException */ private List getFragmentsByMarker(TableMeta meta, Schema schema, String tableName, Path path, - PartitionPruningHandle pruningHandle) throws IOException { + PartitionPruningHandle pruningHandle, Map partitionPathMap) throws IOException { List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); ObjectListing objectListing; @@ -215,6 +224,9 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin int callCount = 0, i = 0; boolean finished = false, enabled = false; + int partitionCount = pruningHandle.getPartitionPaths().length; + + // Listing S3 Objects using AWS API String prefix = keyFromPath(path); if (!prefix.isEmpty()) { prefix += "/"; @@ -224,12 +236,6 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin .withBucketName(uri.getHost()) .withPrefix(prefix); - Map partitionMap = Maps.newHashMap(); - for (i = 0; i < pruningHandle.getPartitionKeys().length; i++) { - partitionMap.put(i, pruningHandle.getPartitionPaths()[i]); - } - - i = 0; do { enabled = true; @@ -244,7 +250,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin // Check target partition compare with last partition of current objects if (previousPartition == null) { - if (partitionMap.get(0).compareTo(lastPartition) > 0) { + if (pruningHandle.getPartitionPaths()[0].compareTo(lastPartition) > 0) { enabled = false; } } else { @@ -263,9 +269,9 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin Path partitionPath = bucketPath.getParent(); // If Tajo can matched partition from partition map, add it to final list. - if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + if (partitionPathMap.containsKey(partitionPath)) { FileStatus file = getFileStatusFromBucket(summary, bucketPath); - String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); + String partitionKey = getPartitionKey(pruningHandle, partitionPathMap, partitionPath); computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); previousPartition = partitionPath; @@ -274,24 +280,17 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } i++; } else { - // If Tajo can't matched partition, consider to move next marker. - int index = -1; - // If any partition not yet added + // If there is no matched partition, consider to move next marker. Otherwise get next object. if (previousPartition == null) { - nextPartition = partitionMap.get(0); + nextPartition = pruningHandle.getPartitionPaths()[0]; } else { // Find index of previous partition - for(Map.Entry entry : partitionMap.entrySet()) { - if (entry.getValue().equals(previousPartition)) { - index = entry.getKey(); - break; - } - } + int index = partitionPathMap.get(previousPartition); // Find next target partition with the index of previous partition - if ((index + 1) < partitionMap.size()) { - nextPartition = partitionMap.get(index+1); - } else if ((index + 1) == partitionMap.size()) { + if ((index + 1) < partitionCount) { + nextPartition = pruningHandle.getPartitionPaths()[index+1]; + } else if ((index + 1) == partitionCount) { finished = true; break; } @@ -326,7 +325,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin * @throws IOException */ private List getFragmentsByListingAllObjects(TableMeta meta, Schema schema, String tableName, Path path, - PartitionPruningHandle pruningHandle) throws IOException { + PartitionPruningHandle pruningHandle, Map partitionPathMap) throws IOException { List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); @@ -344,15 +343,16 @@ private List getFragmentsByListingAllObjects(TableMeta meta, Schema sc if (!fileName.startsWith("_") && !fileName.startsWith(".")) { Path partitionPath = bucketPath.getParent(); - if (pruningHandle.getPartitionMap().containsKey(partitionPath)) { + if (partitionPathMap.containsKey(partitionPath)) { FileStatus file = getFileStatusFromBucket(summary, bucketPath); - String partitionKey = pruningHandle.getPartitionMap().get(partitionPath); - computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); } + String partitionKey = getPartitionKey(pruningHandle, partitionPathMap, partitionPath); + computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); } i++; + } } } } @@ -364,6 +364,12 @@ private List getFragmentsByListingAllObjects(TableMeta meta, Schema sc return splits; } + private String getPartitionKey(PartitionPruningHandle pruningHandle, Map partitionPathMap, Path + partitionPath) { + int index = partitionPathMap.get(partitionPath); + return pruningHandle.getPartitionKeys()[index]; + } + private void computePartitionSplits(FileStatus file, TableMeta meta, Schema schema, String tableName, String partitionKey, List splits) throws IOException { Path path = file.getPath(); From 1565bae3dfef31350f9a5fd6c370637c956e1095 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 6 Apr 2016 14:05:30 +0900 Subject: [PATCH 116/127] Move local variable location --- .../main/java/org/apache/tajo/storage/s3/S3TableSpace.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index b99bb1a040..4fded5a0bc 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -220,7 +220,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); ObjectListing objectListing; - Path previousPartition = null, nextPartition = null; + Path previousPartition = null; int callCount = 0, i = 0; boolean finished = false, enabled = false; @@ -280,7 +280,9 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } i++; } else { - // If there is no matched partition, consider to move next marker. Otherwise get next object. + Path nextPartition = null; + + // Get next target partition if (previousPartition == null) { nextPartition = pruningHandle.getPartitionPaths()[0]; } else { @@ -296,6 +298,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } } + // If there is no matched partition, consider to move next marker. Otherwise access next object. if (nextPartition != null && nextPartition.compareTo(lastPartition) <= 0) { continue; } else { From df1c49966275c488f29c10e38e1145073611d5f4 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Wed, 6 Apr 2016 14:07:32 +0900 Subject: [PATCH 117/127] Initalize next partition path --- .../main/java/org/apache/tajo/storage/s3/S3TableSpace.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 4fded5a0bc..a2a2993c3f 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -220,7 +220,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); ObjectListing objectListing; - Path previousPartition = null; + Path previousPartition = null, nextPartition = null; int callCount = 0, i = 0; boolean finished = false, enabled = false; @@ -280,7 +280,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } i++; } else { - Path nextPartition = null; + nextPartition = null; // Get next target partition if (previousPartition == null) { From 9b82f4ba84f76c06470603a5e716ba57b21e1e21 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 7 Apr 2016 16:08:26 +0900 Subject: [PATCH 118/127] Add debug logs --- .../apache/tajo/storage/s3/S3TableSpace.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index a2a2993c3f..264ce36089 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -185,6 +185,7 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem Map partitionPathMap = Maps.newHashMap(); for (int i = 0; i < pruningHandle.getPartitionKeys().length; i++) { partitionPathMap.put(pruningHandle.getPartitionPaths()[i], i); + LOG.info("### init partition - i:" + i + ", partition:" + pruningHandle.getPartitionPaths()[i]); } // Get common prefix of partition paths @@ -225,12 +226,13 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin boolean finished = false, enabled = false; int partitionCount = pruningHandle.getPartitionPaths().length; - + LOG.info("### 100 ### partitionCount:" + partitionCount); // Listing S3 Objects using AWS API String prefix = keyFromPath(path); if (!prefix.isEmpty()) { prefix += "/"; } + LOG.info("### 110 ### prefix:" + prefix); ListObjectsRequest request = new ListObjectsRequest() .withBucketName(uri.getHost()) @@ -243,30 +245,38 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin objectListing = s3.listObjects(request); int objectsCount = objectListing.getObjectSummaries().size(); + LOG.info("### 200 ### objectsCount:" + objectsCount); // Get partition of last bucket from current objects Path lastPath = getPathFromBucket(objectListing.getObjectSummaries().get(objectsCount - 1)); Path lastPartition = lastPath.getParent(); + LOG.info("### 210 ### lastPath:" + lastPath); // Check target partition compare with last partition of current objects if (previousPartition == null) { if (pruningHandle.getPartitionPaths()[0].compareTo(lastPartition) > 0) { enabled = false; + LOG.info("### 220 ###"); } } else { if (previousPartition.compareTo(lastPartition) > 0) { enabled = false; + LOG.info("### 230 ###"); } } + LOG.info("### 300 ### callCount:" + callCount + ", nextMarker:" + objectListing.getNextMarker()); // Generate FileStatus and partition key if (enabled) { for (S3ObjectSummary summary : objectListing.getObjectSummaries()) { + LOG.info("### 310 ### key:" + summary.getKey()); if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { Path bucketPath = getPathFromBucket(summary); + LOG.info("### 310 ### bucketPath:" + bucketPath); if (!bucketPath.getName().startsWith("_") && !bucketPath.getName().startsWith(".")) { Path partitionPath = bucketPath.getParent(); + LOG.info("### 320 ### partitionPath:" + partitionPath ); // If Tajo can matched partition from partition map, add it to final list. if (partitionPathMap.containsKey(partitionPath)) { @@ -274,6 +284,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin String partitionKey = getPartitionKey(pruningHandle, partitionPathMap, partitionPath); computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); previousPartition = partitionPath; + LOG.info("### 330 ### previousPartition:" + previousPartition ); if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); @@ -285,6 +296,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin // Get next target partition if (previousPartition == null) { nextPartition = pruningHandle.getPartitionPaths()[0]; + LOG.info("### 400 ### nextPartition:" + nextPartition); } else { // Find index of previous partition int index = partitionPathMap.get(previousPartition); @@ -292,16 +304,21 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin // Find next target partition with the index of previous partition if ((index + 1) < partitionCount) { nextPartition = pruningHandle.getPartitionPaths()[index+1]; + LOG.info("### 410 ### nextPartition:" + nextPartition + ", index:" + index); } else if ((index + 1) == partitionCount) { finished = true; + LOG.info("### 420 ### finished"); break; } } + LOG.info("### 430 ###"); // If there is no matched partition, consider to move next marker. Otherwise access next object. if (nextPartition != null && nextPartition.compareTo(lastPartition) <= 0) { + LOG.info("### 440 ###"); continue; } else { + LOG.info("### 450 ###"); break; } } @@ -309,6 +326,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } } } + LOG.info("### 500 ### "); request.setMarker(objectListing.getNextMarker()); callCount++; From 5a9d9533d14b719f07027bca02c6dc14bd1f475e Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 29 Apr 2016 17:53:02 +0900 Subject: [PATCH 119/127] Clean up S3TableSpace::getFragmentsByMarker --- .../apache/tajo/storage/s3/S3TableSpace.java | 160 +++++++++--------- 1 file changed, 84 insertions(+), 76 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index 264ce36089..d353add33d 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -22,6 +22,7 @@ import java.net.URI; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -185,7 +186,6 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem Map partitionPathMap = Maps.newHashMap(); for (int i = 0; i < pruningHandle.getPartitionKeys().length; i++) { partitionPathMap.put(pruningHandle.getPartitionPaths()[i], i); - LOG.info("### init partition - i:" + i + ", partition:" + pruningHandle.getPartitionPaths()[i]); } // Get common prefix of partition paths @@ -217,120 +217,103 @@ public List getPartitionSplits(String tableName, TableMeta meta, Schem * @throws IOException */ private List getFragmentsByMarker(TableMeta meta, Schema schema, String tableName, Path path, - PartitionPruningHandle pruningHandle, Map partitionPathMap) throws IOException { + PartitionPruningHandle pruningHandle, Map partitionPathMap) throws IOException { List splits = Lists.newArrayList(); long startTime = System.currentTimeMillis(); ObjectListing objectListing; Path previousPartition = null, nextPartition = null; int callCount = 0, i = 0; - boolean finished = false, enabled = false; + boolean isFirst = true, isFinished = false, isAccepted = false; int partitionCount = pruningHandle.getPartitionPaths().length; - LOG.info("### 100 ### partitionCount:" + partitionCount); + // Listing S3 Objects using AWS API String prefix = keyFromPath(path); if (!prefix.isEmpty()) { prefix += "/"; } - LOG.info("### 110 ### prefix:" + prefix); ListObjectsRequest request = new ListObjectsRequest() .withBucketName(uri.getHost()) .withPrefix(prefix); do { - enabled = true; + isAccepted = false; // Get first chunk of 1000 objects objectListing = s3.listObjects(request); int objectsCount = objectListing.getObjectSummaries().size(); - LOG.info("### 200 ### objectsCount:" + objectsCount); // Get partition of last bucket from current objects - Path lastPath = getPathFromBucket(objectListing.getObjectSummaries().get(objectsCount - 1)); - Path lastPartition = lastPath.getParent(); - LOG.info("### 210 ### lastPath:" + lastPath); - - // Check target partition compare with last partition of current objects - if (previousPartition == null) { - if (pruningHandle.getPartitionPaths()[0].compareTo(lastPartition) > 0) { - enabled = false; - LOG.info("### 220 ###"); + S3ObjectSummary firstBucket = objectListing.getObjectSummaries().get(0); + Path firstPath = getPathFromBucket(firstBucket); + Path firstPartition = isFile(firstBucket) ? firstPath.getParent() : firstPath; + + S3ObjectSummary lastBucket = objectListing.getObjectSummaries().get(objectsCount - 1); + Path lastPath = getPathFromBucket(lastBucket); + Path lastPartition = isFile(lastBucket) ? lastPath.getParent() : lastPath; + + if (isFirst) { + nextPartition = pruningHandle.getPartitionPaths()[0]; + if (nextPartition.compareTo(firstPartition) <= 0 || nextPartition.compareTo(lastPartition) <= 0) { + isAccepted = true; } } else { - if (previousPartition.compareTo(lastPartition) > 0) { - enabled = false; - LOG.info("### 230 ###"); + if (previousPartition.compareTo(firstPartition) <= 0 || nextPartition.compareTo(firstPartition) <= 0 + || previousPartition.compareTo(lastPartition) <= 0 || nextPartition.compareTo(lastPartition) <= 0) { + isAccepted = true; } } - LOG.info("### 300 ### callCount:" + callCount + ", nextMarker:" + objectListing.getNextMarker()); - // Generate FileStatus and partition key - if (enabled) { + // If this is first call or current objects include target partition, generate fragments. + if (isAccepted) { for (S3ObjectSummary summary : objectListing.getObjectSummaries()) { - LOG.info("### 310 ### key:" + summary.getKey()); - if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { - Path bucketPath = getPathFromBucket(summary); - LOG.info("### 310 ### bucketPath:" + bucketPath); - - if (!bucketPath.getName().startsWith("_") && !bucketPath.getName().startsWith(".")) { - Path partitionPath = bucketPath.getParent(); - LOG.info("### 320 ### partitionPath:" + partitionPath ); - - // If Tajo can matched partition from partition map, add it to final list. - if (partitionPathMap.containsKey(partitionPath)) { - FileStatus file = getFileStatusFromBucket(summary, bucketPath); - String partitionKey = getPartitionKey(pruningHandle, partitionPathMap, partitionPath); - computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); - previousPartition = partitionPath; - LOG.info("### 330 ### previousPartition:" + previousPartition ); - - if (LOG.isDebugEnabled()){ - LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); - } - i++; + Optional bucketPath = getValidPathFromBucket(summary); + + if (bucketPath.isPresent()) { + Path partitionPath = bucketPath.get().getParent(); + + // If Tajo can matched partition from partition map, add it to final list. + if (partitionPathMap.containsKey(partitionPath)) { + FileStatus file = getFileStatusFromBucket(summary, bucketPath.get()); + String partitionKey = getPartitionKey(pruningHandle, partitionPathMap, partitionPath); + computePartitionSplits(file, meta, schema, tableName, partitionKey, splits); + previousPartition = partitionPath; + + int index = partitionPathMap.get(previousPartition); + if ((index + 1) < partitionCount) { + nextPartition = pruningHandle.getPartitionPaths()[index + 1]; } else { nextPartition = null; + } - // Get next target partition - if (previousPartition == null) { - nextPartition = pruningHandle.getPartitionPaths()[0]; - LOG.info("### 400 ### nextPartition:" + nextPartition); - } else { - // Find index of previous partition - int index = partitionPathMap.get(previousPartition); - - // Find next target partition with the index of previous partition - if ((index + 1) < partitionCount) { - nextPartition = pruningHandle.getPartitionPaths()[index+1]; - LOG.info("### 410 ### nextPartition:" + nextPartition + ", index:" + index); - } else if ((index + 1) == partitionCount) { - finished = true; - LOG.info("### 420 ### finished"); - break; - } - } - - LOG.info("### 430 ###"); - // If there is no matched partition, consider to move next marker. Otherwise access next object. - if (nextPartition != null && nextPartition.compareTo(lastPartition) <= 0) { - LOG.info("### 440 ###"); - continue; - } else { - LOG.info("### 450 ###"); - break; + if (LOG.isDebugEnabled()){ + LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); + } + i++; + } else { + // If current objects include next target partition, get next object. + if (nextPartition != null && nextPartition.compareTo(lastPartition) <= 0) { + continue; + } else { + if (previousPartition != null && nextPartition == null) { + isFinished = true; } + break; } } } } } - LOG.info("### 500 ### "); + if (isFirst) { + isFirst = false; + } request.setMarker(objectListing.getNextMarker()); callCount++; - } while (objectListing.isTruncated() && !finished); + } while (objectListing.isTruncated() && !isFinished); + long finishTime = System.currentTimeMillis(); long elapsedMills = finishTime - startTime; LOG.info(String.format("List S3Objects: %d ms elapsed. API call count: %d", elapsedMills, callCount)); @@ -358,7 +341,7 @@ private List getFragmentsByListingAllObjects(TableMeta meta, Schema sc int i = 0; Iterable objectSummaries = S3Objects.withPrefix(s3, uri.getHost(), prefix); for (S3ObjectSummary summary : objectSummaries) { - if (summary.getSize() >0 && !summary.getKey().endsWith("/")) { + if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { Path bucketPath = getPathFromBucket(summary); String fileName = bucketPath.getName(); @@ -423,9 +406,29 @@ private void computePartitionSplits(FileStatus file, TableMeta meta, Schema sche } } - private FileStatus getFileStatusFromBucket(S3ObjectSummary summary, Path path) { - return new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), - summary.getLastModified().getTime(), path); + private Optional getValidPathFromBucket(S3ObjectSummary summary) { + Optional path = null; + + if (isFile(summary)) { + Path bucketPath = getPathFromBucket(summary); + if (!bucketPath.getName().startsWith("_") && !bucketPath.getName().startsWith(".")) { + path = Optional.of(bucketPath); + } else { + path = Optional.empty(); + } + } else { + path = Optional.empty(); + } + + return path; + } + + private boolean isFile(S3ObjectSummary summary) { + if (summary.getSize() > 0 && !summary.getKey().endsWith("/")) { + return true; + } else { + return false; + } } private Path getPathFromBucket(S3ObjectSummary summary) { @@ -435,6 +438,11 @@ private Path getPathFromBucket(S3ObjectSummary summary) { return path; } + private FileStatus getFileStatusFromBucket(S3ObjectSummary summary, Path path) { + return new FileStatus(summary.getSize(), false, 1, BLOCK_SIZE.toBytes(), + summary.getLastModified().getTime(), path); + } + @VisibleForTesting public AmazonS3 getAmazonS3Client() { return s3; From 2dcc5c00a5441c7b89feff4a5372d8f990466f6f Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 29 Apr 2016 18:07:40 +0900 Subject: [PATCH 120/127] Fix a bug for checking first phase --- .../org/apache/tajo/storage/s3/S3TableSpace.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java index d353add33d..15f1edefbc 100644 --- a/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java +++ b/tajo-storage/tajo-storage-s3/src/main/java/org/apache/tajo/storage/s3/S3TableSpace.java @@ -242,10 +242,9 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin // Get first chunk of 1000 objects objectListing = s3.listObjects(request); - int objectsCount = objectListing.getObjectSummaries().size(); - // Get partition of last bucket from current objects + // Get first bucket and last bucket from current objects S3ObjectSummary firstBucket = objectListing.getObjectSummaries().get(0); Path firstPath = getPathFromBucket(firstBucket); Path firstPartition = isFile(firstBucket) ? firstPath.getParent() : firstPath; @@ -254,6 +253,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin Path lastPath = getPathFromBucket(lastBucket); Path lastPartition = isFile(lastBucket) ? lastPath.getParent() : lastPath; + // Check if current objects include target partition. if (isFirst) { nextPartition = pruningHandle.getPartitionPaths()[0]; if (nextPartition.compareTo(firstPartition) <= 0 || nextPartition.compareTo(lastPartition) <= 0) { @@ -266,7 +266,7 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } } - // If this is first call or current objects include target partition, generate fragments. + // Generate fragments. if (isAccepted) { for (S3ObjectSummary summary : objectListing.getObjectSummaries()) { Optional bucketPath = getValidPathFromBucket(summary); @@ -288,6 +288,10 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin nextPartition = null; } + if (isFirst) { + isFirst = false; + } + if (LOG.isDebugEnabled()){ LOG.debug("# of average splits per partition: " + splits.size() / (i+1)); } @@ -307,9 +311,6 @@ private List getFragmentsByMarker(TableMeta meta, Schema schema, Strin } } - if (isFirst) { - isFirst = false; - } request.setMarker(objectListing.getNextMarker()); callCount++; } while (objectListing.isTruncated() && !isFinished); From bd54c992804c8b43aa03583bc1e475295d6bd701 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Sun, 8 May 2016 23:48:40 +0900 Subject: [PATCH 121/127] Fix a bug of unit tests --- .../plan/rewrite/rules/PartitionedTableRewriter.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java index 5969c5f05e..58cf6631c8 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/PartitionedTableRewriter.java @@ -560,8 +560,13 @@ public static Tuple buildTupleFromPartitionKeys(Schema partitionColumnSchema, St if (parts.length == 2) { int columnId = partitionColumnSchema.getColumnIdByName(parts[0]); Column keyColumn = partitionColumnSchema.getColumn(columnId); - tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), - StringUtils.unescapePathName(parts[1]))); + + if (parts[1].equals(StorageConstants.DEFAULT_PARTITION_NAME)){ + tuple.put(columnId, DatumFactory.createNullDatum()); + } else { + tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), + StringUtils.unescapePathName(parts[1]))); + } } } From 8462c5163fbbd81b916def9233adca3e879d82f7 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Mon, 9 May 2016 14:38:21 +0900 Subject: [PATCH 122/127] Remove QueryTestCaseBase from TestPartitionedTableRewriter --- .../org/apache/tajo/catalog/CatalogUtil.java | 5 + .../org/apache/tajo/catalog/TableDesc.java | 32 +++- .../planner/TestPartitionedTableRewriter.java | 167 +++++++----------- 3 files changed, 93 insertions(+), 111 deletions(-) diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java index 2b20907c44..b7da7cb84d 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java @@ -332,6 +332,11 @@ public static TableDesc newTableDesc(String tableName, Schema schema, TableMeta return new TableDesc(tableName, schema, meta, path.toUri()); } + public static TableDesc newTableDesc(String tableName, Schema schema, TableMeta meta, Path path + , PartitionMethodDesc partitionMethodDesc) { + return new TableDesc(tableName, schema, meta, path.toUri(), partitionMethodDesc); + } + public static TableDesc newTableDesc(TableDescProto proto) { return new TableDesc(proto); } diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TableDesc.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TableDesc.java index 8122bd59db..948567001c 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TableDesc.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/TableDesc.java @@ -51,32 +51,46 @@ public class TableDesc implements ProtoObject, GsonObject, Clone public TableDesc() { } + public TableDesc(String tableName, @Nullable Schema schema, TableMeta meta, @Nullable URI uri, boolean external) { + this(tableName, schema, meta, uri, null, external); + } + public TableDesc(String tableName, @Nullable Schema schema, TableMeta meta, - @Nullable URI uri, boolean external) { + @Nullable URI uri, @Nullable PartitionMethodDesc partitionMethodDesc, boolean external) { this.tableName = tableName; this.schema = schema; this.meta = meta; this.uri = uri; + this.partitionMethodDesc = partitionMethodDesc; this.external = external; } public TableDesc(String tableName, @Nullable Schema schema, TableMeta meta, @Nullable URI path) { - this(tableName, schema, meta, path, true); + this(tableName, schema, meta, path, null, true); } - + + public TableDesc(String tableName, @Nullable Schema schema, TableMeta meta, @Nullable URI path, + @Nullable PartitionMethodDesc partitionMethodDesc) { + this(tableName, schema, meta, path, partitionMethodDesc, true); + } + public TableDesc(String tableName, @Nullable Schema schema, String dataFormat, KeyValueSet options, @Nullable URI path) { this(tableName, schema, new TableMeta(dataFormat, options), path); } - - public TableDesc(TableDescProto proto) { + + public TableDesc(String tableName, @Nullable Schema schema, String dataFormat, KeyValueSet options, + @Nullable URI path, @Nullable PartitionMethodDesc partitionMethodDesc) { + this(tableName, schema, new TableMeta(dataFormat, options), path, partitionMethodDesc); + } + + public TableDesc(TableDescProto proto) { this(proto.getTableName(), proto.hasSchema() ? SchemaFactory.newV1(proto.getSchema()) : null, - new TableMeta(proto.getMeta()), proto.hasPath() ? URI.create(proto.getPath()) : null, proto.getIsExternal()); + new TableMeta(proto.getMeta()), proto.hasPath() ? URI.create(proto.getPath()) : null, + proto.hasPartition() ? new PartitionMethodDesc(proto.getPartition()) : null, + proto.getIsExternal()); if(proto.hasStats()) { this.stats = new TableStats(proto.getStats()); - } - if (proto.hasPartition()) { - this.partitionMethodDesc = new PartitionMethodDesc(proto.getPartition()); } } diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 6d68db5e8b..5867aaf9d2 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -20,9 +20,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.tajo.LocalTajoTestingUtility; -import org.apache.tajo.OverridableConf; -import org.apache.tajo.QueryTestCaseBase; +import org.apache.tajo.*; import org.apache.tajo.algebra.Expr; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.partition.PartitionMethodDesc; @@ -30,28 +28,45 @@ import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.engine.query.QueryContext; +import org.apache.tajo.parser.sql.SQLAnalyzer; import org.apache.tajo.plan.LogicalPlan; +import org.apache.tajo.plan.LogicalPlanner; import org.apache.tajo.plan.logical.*; import org.apache.tajo.plan.partition.PartitionPruningHandle; import org.apache.tajo.plan.rewrite.rules.PartitionedTableRewriter; +import org.apache.tajo.storage.TablespaceManager; import org.apache.tajo.util.CommonTestingUtil; import org.apache.tajo.util.FileUtil; -import org.apache.tajo.util.KeyValueSet; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.*; +import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME; +import static org.apache.tajo.TajoConstants.DEFAULT_TABLESPACE_NAME; import static org.junit.Assert.*; -public class TestPartitionedTableRewriter extends QueryTestCaseBase { - +public class TestPartitionedTableRewriter { + private TajoConf conf; + private final String TEST_PATH = TajoTestingCluster.DEFAULT_TEST_DIRECTORY + "/TestPartitionedTableRewriter"; + private TajoTestingCluster util; + private CatalogService catalog; + private SQLAnalyzer analyzer; + private LogicalPlanner planner; + private Path testDir; + private FileSystem fs; + final static String PARTITION_TABLE_NAME = "tb_partition"; final static String MULTIPLE_PARTITION_TABLE_NAME = "tb_multiple_partition"; - @BeforeClass - public static void setUp() throws Exception { - FileSystem fs = FileSystem.get(conf); - Path rootDir = TajoConf.getWarehouseDir(testingCluster.getConfiguration()); + @Before + public void setUp() throws Exception { + util = new TajoTestingCluster(); + util.initTestDir(); + util.startCatalogCluster(); + catalog = util.getCatalogService(); + testDir = CommonTestingUtil.getTestDir(TEST_PATH); + catalog.createTablespace(DEFAULT_TABLESPACE_NAME, testDir.toUri().toString()); + catalog.createDatabase(DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME); + conf = util.getConfiguration(); + fs = FileSystem.get(conf); Schema schema = SchemaBuilder.builder() .add("n_nationkey", TajoDataTypes.Type.INT8) @@ -59,28 +74,33 @@ public static void setUp() throws Exception { .add("n_regionkey", TajoDataTypes.Type.INT8) .build(); - TableMeta meta = CatalogUtil.newTableMeta("TEXT", new KeyValueSet()); + TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, util.getConfiguration()); + + createTableWithOnePartitionKeyColumn(fs, schema, meta); + createlTableWithMultiplePartitionKeyColumns(fs, schema, meta); - createExternalTableIncludedOnePartitionKeyColumn(fs, rootDir, schema, meta); - createExternalTableIncludedMultiplePartitionKeyColumns(fs, rootDir, schema, meta); + analyzer = new SQLAnalyzer(); + planner = new LogicalPlanner(catalog, TablespaceManager.getInstance()); } - private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem fs, Path rootDir, Schema schema, + private void createTableWithOnePartitionKeyColumn(FileSystem fs, Schema schema, TableMeta meta) throws Exception { Schema partSchema = SchemaBuilder.builder() .add("key", TajoDataTypes.Type.TEXT) .build(); PartitionMethodDesc partitionMethodDesc = - new PartitionMethodDesc("TestPartitionedTableRewriter", PARTITION_TABLE_NAME, + new PartitionMethodDesc(DEFAULT_DATABASE_NAME, PARTITION_TABLE_NAME, CatalogProtos.PartitionType.COLUMN, "key", partSchema); - Path tablePath = new Path(rootDir, PARTITION_TABLE_NAME); + Path tablePath = new Path(testDir, PARTITION_TABLE_NAME); fs.mkdirs(tablePath); - client.createExternalTable(PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + TableDesc desc = CatalogUtil.newTableDesc(DEFAULT_DATABASE_NAME + "." + PARTITION_TABLE_NAME, schema, meta, + tablePath, partitionMethodDesc); + catalog.createTable(desc); - TableDesc tableDesc = client.getTableDesc(PARTITION_TABLE_NAME); + TableDesc tableDesc = catalog.getTableDesc(DEFAULT_DATABASE_NAME + "." + PARTITION_TABLE_NAME); assertNotNull(tableDesc); Path path = new Path(tableDesc.getUri().toString() + "/key=part123"); @@ -96,7 +116,7 @@ private static void createExternalTableIncludedOnePartitionKeyColumn(FileSystem FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); } - private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileSystem fs, Path rootDir, + private void createlTableWithMultiplePartitionKeyColumns(FileSystem fs, Schema schema, TableMeta meta) throws Exception { Schema partSchema = SchemaBuilder.builder() .add("key1", TajoDataTypes.Type.TEXT) @@ -106,15 +126,17 @@ private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileS PartitionMethodDesc partitionMethodDesc = - new PartitionMethodDesc("TestPartitionedTableRewriter", MULTIPLE_PARTITION_TABLE_NAME, + new PartitionMethodDesc("default", MULTIPLE_PARTITION_TABLE_NAME, CatalogProtos.PartitionType.COLUMN, "key1,key2,key3", partSchema); - Path tablePath = new Path(rootDir, MULTIPLE_PARTITION_TABLE_NAME); + Path tablePath = new Path(testDir, MULTIPLE_PARTITION_TABLE_NAME); fs.mkdirs(tablePath); - client.createExternalTable(MULTIPLE_PARTITION_TABLE_NAME, schema, tablePath.toUri(), meta, partitionMethodDesc); + TableDesc desc = CatalogUtil.newTableDesc(DEFAULT_DATABASE_NAME + "." + MULTIPLE_PARTITION_TABLE_NAME, schema, + meta, tablePath, partitionMethodDesc); + catalog.createTable(desc); - TableDesc tableDesc = client.getTableDesc(MULTIPLE_PARTITION_TABLE_NAME); + TableDesc tableDesc = catalog.getTableDesc(DEFAULT_DATABASE_NAME + "." + MULTIPLE_PARTITION_TABLE_NAME); assertNotNull(tableDesc); Path path = new Path(tableDesc.getUri().toString() + "/key1=part123"); @@ -138,16 +160,15 @@ private static void createExternalTableIncludedMultiplePartitionKeyColumns(FileS FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); } - @AfterClass - public static void tearDown() throws Exception { - client.executeQuery("DROP TABLE IF EXISTS " + PARTITION_TABLE_NAME + " PURGE;"); - client.executeQuery("DROP TABLE IF EXISTS " + MULTIPLE_PARTITION_TABLE_NAME + " PURGE;"); + @After + public void tearDown() throws Exception { + util.shutdownCatalogCluster(); } @Test public void testFilterIncludePartitionKeyColumn() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part456' ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + Expr expr = analyzer.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part456' ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -177,7 +198,6 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { assertEquals(1, filteredPaths.length); assertEquals("key=part456", filteredPaths[0].getName()); - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(1, partitionKeys.length); assertEquals("key=part456", partitionKeys[0]); @@ -187,8 +207,8 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { @Test public void testWithoutAnyFilters() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + Expr expr = analyzer.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " ORDER BY key"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -226,8 +246,8 @@ public void testWithoutAnyFilters() throws Exception { @Test public void testFilterIncludeNonExistingPartitionValue() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part123456789'"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + Expr expr = analyzer.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE key = 'part123456789'"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -258,9 +278,8 @@ public void testFilterIncludeNonExistingPartitionValue() throws Exception { @Test public void testFilterIncludeNonPartitionKeyColumn() throws Exception { - String sql = "SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE n_nationkey = 1"; - Expr expr = sqlParser.parse(sql); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + Expr expr = analyzer.parse("SELECT * FROM " + PARTITION_TABLE_NAME + " WHERE n_nationkey = 1"); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -300,9 +319,9 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { @Test public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + Expr expr = analyzer.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + " WHERE key1 = 'part789' and key2 = 'supp789' and key3=3"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -340,9 +359,9 @@ public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { @Test public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + Expr expr = analyzer.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + " WHERE key1 = 'part123' and key2 = 'supp123' order by n_nationkey"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -389,9 +408,9 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { @Test public void testFilterIncludeNonPartitionKeyColumns() throws Exception { - Expr expr = sqlParser.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + Expr expr = analyzer.parse("SELECT * FROM " + MULTIPLE_PARTITION_TABLE_NAME + " WHERE key1 = 'part123' and n_nationkey >= 2 order by n_nationkey"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); + QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); LogicalPlan newPlan = planner.createPlan(defaultContext, expr); LogicalNode plan = newPlan.getRootBlock().getRoot(); @@ -436,60 +455,4 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { assertEquals(23L, partitionPruningHandle.getTotalVolume()); } - @Test - public final void testPartitionPruningWitCTAS() throws Exception { - String tableName = "testPartitionPruningUsingDirectories".toLowerCase(); - String canonicalTableName = CatalogUtil.getCanonicalTableName("\"" + getCurrentDatabase() +"\"", tableName); - - executeString( - "create table " + canonicalTableName + "(col1 int4, col2 int4) partition by column(key float8) " - + " as select l_orderkey, l_partkey, l_quantity from default.lineitem"); - - TableDesc tableDesc = catalog.getTableDesc(getCurrentDatabase(), tableName); - assertNotNull(tableDesc); - - // With a filter which checks a partition key column - Expr expr = sqlParser.parse("SELECT * FROM " + canonicalTableName + " WHERE key <= 40.0 ORDER BY key"); - QueryContext defaultContext = LocalTajoTestingUtility.createDummyContext(testingCluster.getConfiguration()); - LogicalPlan newPlan = planner.createPlan(defaultContext, expr); - LogicalNode plan = newPlan.getRootBlock().getRoot(); - - assertEquals(NodeType.ROOT, plan.getType()); - LogicalRootNode root = (LogicalRootNode) plan; - - ProjectionNode projNode = root.getChild(); - - assertEquals(NodeType.SORT, projNode.getChild().getType()); - SortNode sortNode = projNode.getChild(); - - assertEquals(NodeType.SELECTION, sortNode.getChild().getType()); - SelectionNode selNode = sortNode.getChild(); - assertTrue(selNode.hasQual()); - - assertEquals(NodeType.SCAN, selNode.getChild().getType()); - ScanNode scanNode = selNode.getChild(); - scanNode.setQual(selNode.getQual()); - - PartitionedTableRewriter rewriter = new PartitionedTableRewriter(); - rewriter.setCatalog(catalog); - - PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); - assertNotNull(partitionPruningHandle); - - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=17.0", filteredPaths[0].getName()); - assertEquals("key=36.0", filteredPaths[1].getName()); - assertEquals("key=38.0", filteredPaths[2].getName()); - - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); - assertEquals(3, partitionKeys.length); - assertEquals("key=17.0", partitionKeys[0]); - assertEquals("key=36.0", partitionKeys[1]); - assertEquals("key=38.0", partitionKeys[2]); - - assertEquals(12L, partitionPruningHandle.getTotalVolume()); - - executeString("DROP TABLE " + canonicalTableName + " PURGE").close(); - } } \ No newline at end of file From a440b5cb296c2e8cbadf28554beb5006f7ce74b9 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Thu, 12 May 2016 22:48:42 +0900 Subject: [PATCH 123/127] Add order by clause to AbstractDBStore::getPartitionsOfTable --- .../java/org/apache/tajo/catalog/store/AbstractDBStore.java | 3 ++- .../tajo/engine/planner/TestPartitionedTableRewriter.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java index 88fabe2b8a..0c1d2b6074 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java @@ -2063,7 +2063,8 @@ public List getPartitionsOfTable(String databaseName, String ensurePartitionTable(tableName, tableId); String sql = "SELECT PATH, PARTITION_NAME, " + COL_PARTITIONS_PK + ", " + COL_PARTITION_BYTES - + " FROM " + TB_PARTTIONS +" WHERE " + COL_TABLES_PK + " = ? "; + + " FROM " + TB_PARTTIONS +" WHERE " + COL_TABLES_PK + " = ? " + + " ORDER BY PATH "; if (LOG.isDebugEnabled()) { LOG.debug(sql); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 5867aaf9d2..6431c5db78 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -77,7 +77,7 @@ public void setUp() throws Exception { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, util.getConfiguration()); createTableWithOnePartitionKeyColumn(fs, schema, meta); - createlTableWithMultiplePartitionKeyColumns(fs, schema, meta); + createTableWithMultiplePartitionKeyColumns(fs, schema, meta); analyzer = new SQLAnalyzer(); planner = new LogicalPlanner(catalog, TablespaceManager.getInstance()); @@ -116,7 +116,7 @@ private void createTableWithOnePartitionKeyColumn(FileSystem fs, Schema schema, FileUtil.writeTextToFile("3|CANADA|1", new Path(path, "data")); } - private void createlTableWithMultiplePartitionKeyColumns(FileSystem fs, + private void createTableWithMultiplePartitionKeyColumns(FileSystem fs, Schema schema, TableMeta meta) throws Exception { Schema partSchema = SchemaBuilder.builder() .add("key1", TajoDataTypes.Type.TEXT) From da755d9551b40ec45ccd5204c1ee6e19cc27ace2 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 13 May 2016 10:33:34 +0900 Subject: [PATCH 124/127] Fix unit test bugs --- .../tajo/catalog/store/AbstractDBStore.java | 3 +- .../planner/TestPartitionedTableRewriter.java | 98 +++++++++---------- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java index 0c1d2b6074..88fabe2b8a 100644 --- a/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java +++ b/tajo-catalog/tajo-catalog-server/src/main/java/org/apache/tajo/catalog/store/AbstractDBStore.java @@ -2063,8 +2063,7 @@ public List getPartitionsOfTable(String databaseName, String ensurePartitionTable(tableName, tableId); String sql = "SELECT PATH, PARTITION_NAME, " + COL_PARTITIONS_PK + ", " + COL_PARTITION_BYTES - + " FROM " + TB_PARTTIONS +" WHERE " + COL_TABLES_PK + " = ? " - + " ORDER BY PATH "; + + " FROM " + TB_PARTTIONS +" WHERE " + COL_TABLES_PK + " = ? "; if (LOG.isDebugEnabled()) { LOG.debug(sql); diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 6431c5db78..794f8e1a1e 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -39,6 +39,10 @@ import org.apache.tajo.util.FileUtil; import org.junit.*; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME; import static org.apache.tajo.TajoConstants.DEFAULT_TABLESPACE_NAME; import static org.junit.Assert.*; @@ -229,17 +233,17 @@ public void testWithoutAnyFilters() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=part123", filteredPaths[0].getName()); - assertEquals("key=part456", filteredPaths[1].getName()); - assertEquals("key=part789", filteredPaths[2].getName()); + List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); + assertEquals(3, filteredPaths.size()); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); - assertEquals(3, partitionKeys.length); - assertEquals("key=part123", partitionKeys[0]); - assertEquals("key=part456", partitionKeys[1]); - assertEquals("key=part789", partitionKeys[2]); + List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); + assertEquals(3, partitionKeys.size()); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -302,17 +306,17 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - assertEquals(3, filteredPaths.length); - assertEquals("key=part123", filteredPaths[0].getName()); - assertEquals("key=part456", filteredPaths[1].getName()); - assertEquals("key=part789", filteredPaths[2].getName()); + List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); + assertEquals(3, filteredPaths.size()); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); + assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); - assertEquals(3, partitionKeys.length); - assertEquals("key=part123", partitionKeys[0]); - assertEquals("key=part456", partitionKeys[1]); - assertEquals("key=part789", partitionKeys[2]); + List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); + assertEquals(3, partitionKeys.size()); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); + assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -387,21 +391,19 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - assertEquals(2, filteredPaths.length); - - assertEquals("key3=1", filteredPaths[0].getName()); - assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); - assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); + List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); + assertEquals(2, filteredPaths.size()); + assertTrue(filteredPaths.stream(). + anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); + assertTrue(filteredPaths.stream(). + anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); - assertEquals("key3=2", filteredPaths[1].getName()); - assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); - assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); - assertEquals(2, partitionKeys.length); - assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); - assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); + List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); + assertEquals(2, partitionKeys.size()); + assertTrue(partitionKeys.stream() + .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); + assertTrue(partitionKeys.stream() + .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } @@ -436,21 +438,19 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); - assertEquals(2, filteredPaths.length); - - assertEquals("key3=1", filteredPaths[0].getName()); - assertEquals("key2=supp123", filteredPaths[0].getParent().getName()); - assertEquals("key1=part123", filteredPaths[0].getParent().getParent().getName()); - - assertEquals("key3=2", filteredPaths[1].getName()); - assertEquals("key2=supp123", filteredPaths[1].getParent().getName()); - assertEquals("key1=part123", filteredPaths[1].getParent().getParent().getName()); - - String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); - assertEquals(2, partitionKeys.length); - assertEquals("key1=part123/key2=supp123/key3=1", partitionKeys[0]); - assertEquals("key1=part123/key2=supp123/key3=2", partitionKeys[1]); + List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); + assertEquals(2, filteredPaths.size()); + assertTrue(filteredPaths.stream(). + anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); + assertTrue(filteredPaths.stream(). + anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); + + List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); + assertEquals(2, partitionKeys.size()); + assertTrue(partitionKeys.stream() + .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); + assertTrue(partitionKeys.stream() + .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } From 15f97fd4c8892be66d0e316812ad77d728f9e276 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 13 May 2016 10:57:42 +0900 Subject: [PATCH 125/127] Remove codes for Stream::anyMatch --- .../planner/TestPartitionedTableRewriter.java | 122 +++++++++++------- 1 file changed, 73 insertions(+), 49 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 794f8e1a1e..5290885911 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -41,6 +41,7 @@ import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import java.util.stream.Stream; import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME; @@ -128,7 +129,6 @@ private void createTableWithMultiplePartitionKeyColumns(FileSystem fs, .add("key3", TajoDataTypes.Type.INT8) .build(); - PartitionMethodDesc partitionMethodDesc = new PartitionMethodDesc("default", MULTIPLE_PARTITION_TABLE_NAME, CatalogProtos.PartitionType.COLUMN, "key1,key2,key3", partSchema); @@ -233,17 +233,25 @@ public void testWithoutAnyFilters() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); - assertEquals(3, filteredPaths.size()); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); - - List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); - assertEquals(3, partitionKeys.size()); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); + Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) + .sorted((path1, path2) -> { + return path1.compareTo(path2); + }); + List partitionPathList = partitionPathStream.collect(Collectors.toList()); + assertEquals(3, partitionPathList.size()); + assertTrue(partitionPathList.get(0).toString().indexOf("key=part123") > -1); + assertTrue(partitionPathList.get(1).toString().indexOf("key=part456") > -1); + assertTrue(partitionPathList.get(2).toString().indexOf("key=part789") > -1); + + Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) + .sorted((keys1, keys2) -> { + return keys1.compareTo(keys2); + }); + List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); + assertEquals(3, partitionKeysList.size()); + assertTrue(partitionKeysList.get(0).toString().indexOf("key=part123") > -1); + assertTrue(partitionKeysList.get(1).toString().indexOf("key=part456") > -1); + assertTrue(partitionKeysList.get(2).toString().indexOf("key=part789") > -1); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -306,17 +314,25 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); - assertEquals(3, filteredPaths.size()); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); - assertTrue(filteredPaths.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); - - List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); - assertEquals(3, partitionKeys.size()); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part123") > -1)); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part456") > -1)); - assertTrue(partitionKeys.stream().anyMatch(path -> path.toString().indexOf("key=part789") > -1)); + Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) + .sorted((path1, path2) -> { + return path1.compareTo(path2); + }); + List partitionPathList = partitionPathStream.collect(Collectors.toList()); + assertEquals(3, partitionPathList.size()); + assertTrue(partitionPathList.get(0).toString().indexOf("key=part123") > -1); + assertTrue(partitionPathList.get(1).toString().indexOf("key=part456") > -1); + assertTrue(partitionPathList.get(2).toString().indexOf("key=part789") > -1); + + Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) + .sorted((keys1, keys2) -> { + return keys1.compareTo(keys2); + }); + List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); + assertEquals(3, partitionKeysList.size()); + assertTrue(partitionKeysList.get(0).toString().indexOf("key=part123") > -1); + assertTrue(partitionKeysList.get(1).toString().indexOf("key=part456") > -1); + assertTrue(partitionKeysList.get(2).toString().indexOf("key=part789") > -1); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -391,19 +407,23 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); - assertEquals(2, filteredPaths.size()); - assertTrue(filteredPaths.stream(). - anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); - assertTrue(filteredPaths.stream(). - anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); - - List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); - assertEquals(2, partitionKeys.size()); - assertTrue(partitionKeys.stream() - .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); - assertTrue(partitionKeys.stream() - .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); + Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) + .sorted((path1, path2) -> { + return path1.compareTo(path2); + }); + List partitionPathList = partitionPathStream.collect(Collectors.toList()); + assertEquals(2, partitionPathList.size()); + assertTrue(partitionPathList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); + assertTrue(partitionPathList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + + Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) + .sorted((keys1, keys2) -> { + return keys1.compareTo(keys2); + }); + List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); + assertEquals(2, partitionKeysList.size()); + assertTrue(partitionKeysList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); + assertTrue(partitionKeysList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } @@ -438,19 +458,23 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { PartitionPruningHandle partitionPruningHandle = rewriter.getPartitionPruningHandle(conf, scanNode); assertNotNull(partitionPruningHandle); - List filteredPaths = Arrays.asList(partitionPruningHandle.getPartitionPaths()); - assertEquals(2, filteredPaths.size()); - assertTrue(filteredPaths.stream(). - anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); - assertTrue(filteredPaths.stream(). - anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); - - List partitionKeys = Arrays.asList(partitionPruningHandle.getPartitionKeys()); - assertEquals(2, partitionKeys.size()); - assertTrue(partitionKeys.stream() - .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=1") > -1)); - assertTrue(partitionKeys.stream() - .anyMatch(path -> path.toString().indexOf("key1=part123/key2=supp123/key3=2") > -1)); + Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) + .sorted((path1, path2) -> { + return path1.compareTo(path2); + }); + List partitionPathList = partitionPathStream.collect(Collectors.toList()); + assertEquals(2, partitionPathList.size()); + assertTrue(partitionPathList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); + assertTrue(partitionPathList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + + Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) + .sorted((keys1, keys2) -> { + return keys1.compareTo(keys2); + }); + List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); + assertEquals(2, partitionKeysList.size()); + assertTrue(partitionKeysList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); + assertTrue(partitionKeysList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } From fd8f14465a3e5f128416bb17f13bc82cde93c780 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 13 May 2016 11:00:27 +0900 Subject: [PATCH 126/127] Remove unused package --- .../apache/tajo/engine/planner/TestPartitionedTableRewriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 5290885911..1bda73a258 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -39,7 +39,6 @@ import org.apache.tajo.util.FileUtil; import org.junit.*; -import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; From 8a9837c2f567b37092a267f3fc706521ae715266 Mon Sep 17 00:00:00 2001 From: JaeHwa Jung Date: Fri, 13 May 2016 12:26:14 +0900 Subject: [PATCH 127/127] Clean up test codes --- .../planner/TestPartitionedTableRewriter.java | 78 +++++++------------ .../fragment/PartitionFileFragment.java | 1 - 2 files changed, 30 insertions(+), 49 deletions(-) diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java index 1bda73a258..69fc0fec7f 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/planner/TestPartitionedTableRewriter.java @@ -199,7 +199,7 @@ public void testFilterIncludePartitionKeyColumn() throws Exception { Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(1, filteredPaths.length); - assertEquals("key=part456", filteredPaths[0].getName()); + assertTrue(filteredPaths[0].toString().endsWith("key=part456")); String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(1, partitionKeys.length); @@ -233,24 +233,20 @@ public void testWithoutAnyFilters() throws Exception { assertNotNull(partitionPruningHandle); Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) - .sorted((path1, path2) -> { - return path1.compareTo(path2); - }); + .sorted((path1, path2) -> path1.compareTo(path2)); List partitionPathList = partitionPathStream.collect(Collectors.toList()); assertEquals(3, partitionPathList.size()); - assertTrue(partitionPathList.get(0).toString().indexOf("key=part123") > -1); - assertTrue(partitionPathList.get(1).toString().indexOf("key=part456") > -1); - assertTrue(partitionPathList.get(2).toString().indexOf("key=part789") > -1); + assertTrue(partitionPathList.get(0).toString().endsWith("key=part123")); + assertTrue(partitionPathList.get(1).toString().endsWith("key=part456")); + assertTrue(partitionPathList.get(2).toString().endsWith("key=part789")); Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) - .sorted((keys1, keys2) -> { - return keys1.compareTo(keys2); - }); + .sorted((keys1, keys2) -> keys1.compareTo(keys2)); List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); assertEquals(3, partitionKeysList.size()); - assertTrue(partitionKeysList.get(0).toString().indexOf("key=part123") > -1); - assertTrue(partitionKeysList.get(1).toString().indexOf("key=part456") > -1); - assertTrue(partitionKeysList.get(2).toString().indexOf("key=part789") > -1); + assertEquals(partitionKeysList.get(0), "key=part123"); + assertEquals(partitionKeysList.get(1), "key=part456"); + assertEquals(partitionKeysList.get(2), "key=part789"); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -314,24 +310,20 @@ public void testFilterIncludeNonPartitionKeyColumn() throws Exception { assertNotNull(partitionPruningHandle); Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) - .sorted((path1, path2) -> { - return path1.compareTo(path2); - }); + .sorted((path1, path2) -> path1.compareTo(path2)); List partitionPathList = partitionPathStream.collect(Collectors.toList()); assertEquals(3, partitionPathList.size()); - assertTrue(partitionPathList.get(0).toString().indexOf("key=part123") > -1); - assertTrue(partitionPathList.get(1).toString().indexOf("key=part456") > -1); - assertTrue(partitionPathList.get(2).toString().indexOf("key=part789") > -1); + assertTrue(partitionPathList.get(0).toString().endsWith("key=part123")); + assertTrue(partitionPathList.get(1).toString().endsWith("key=part456")); + assertTrue(partitionPathList.get(2).toString().endsWith("key=part789")); Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) - .sorted((keys1, keys2) -> { - return keys1.compareTo(keys2); - }); + .sorted((keys1, keys2) -> keys1.compareTo(keys2)); List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); assertEquals(3, partitionKeysList.size()); - assertTrue(partitionKeysList.get(0).toString().indexOf("key=part123") > -1); - assertTrue(partitionKeysList.get(1).toString().indexOf("key=part456") > -1); - assertTrue(partitionKeysList.get(2).toString().indexOf("key=part789") > -1); + assertEquals(partitionKeysList.get(0), "key=part123"); + assertEquals(partitionKeysList.get(1), "key=part456"); + assertEquals(partitionKeysList.get(2), "key=part789"); assertEquals(33L, partitionPruningHandle.getTotalVolume()); } @@ -365,9 +357,7 @@ public void testFilterIncludeEveryPartitionKeyColumn() throws Exception { Path[] filteredPaths = partitionPruningHandle.getPartitionPaths(); assertEquals(1, filteredPaths.length); - assertEquals("key3=3", filteredPaths[0].getName()); - assertEquals("key2=supp789", filteredPaths[0].getParent().getName()); - assertEquals("key1=part789", filteredPaths[0].getParent().getParent().getName()); + assertTrue(filteredPaths[0].toString().endsWith("key1=part789/key2=supp789/key3=3")); String[] partitionKeys = partitionPruningHandle.getPartitionKeys(); assertEquals(1, partitionKeys.length); @@ -407,22 +397,18 @@ public void testFilterIncludeSomeOfPartitionKeyColumns() throws Exception { assertNotNull(partitionPruningHandle); Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) - .sorted((path1, path2) -> { - return path1.compareTo(path2); - }); + .sorted((path1, path2) -> path1.compareTo(path2)); List partitionPathList = partitionPathStream.collect(Collectors.toList()); assertEquals(2, partitionPathList.size()); - assertTrue(partitionPathList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); - assertTrue(partitionPathList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + assertTrue(partitionPathList.get(0).toString().endsWith("key1=part123/key2=supp123/key3=1")); + assertTrue(partitionPathList.get(1).toString().endsWith("key1=part123/key2=supp123/key3=2")); Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) - .sorted((keys1, keys2) -> { - return keys1.compareTo(keys2); - }); + .sorted((keys1, keys2) -> keys1.compareTo(keys2)); List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); assertEquals(2, partitionKeysList.size()); - assertTrue(partitionKeysList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); - assertTrue(partitionKeysList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + assertEquals(partitionKeysList.get(0), ("key1=part123/key2=supp123/key3=1")); + assertEquals(partitionKeysList.get(1), ("key1=part123/key2=supp123/key3=2")); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } @@ -458,22 +444,18 @@ public void testFilterIncludeNonPartitionKeyColumns() throws Exception { assertNotNull(partitionPruningHandle); Stream partitionPathStream = Stream.of(partitionPruningHandle.getPartitionPaths()) - .sorted((path1, path2) -> { - return path1.compareTo(path2); - }); + .sorted((path1, path2) -> path1.compareTo(path2)); List partitionPathList = partitionPathStream.collect(Collectors.toList()); assertEquals(2, partitionPathList.size()); - assertTrue(partitionPathList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); - assertTrue(partitionPathList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + assertTrue(partitionPathList.get(0).toString().endsWith("key1=part123/key2=supp123/key3=1")); + assertTrue(partitionPathList.get(1).toString().endsWith("key1=part123/key2=supp123/key3=2")); Stream partitionKeysStream = Stream.of(partitionPruningHandle.getPartitionKeys()) - .sorted((keys1, keys2) -> { - return keys1.compareTo(keys2); - }); + .sorted((keys1, keys2) -> keys1.compareTo(keys2)); List partitionKeysList = partitionKeysStream.collect(Collectors.toList()); assertEquals(2, partitionKeysList.size()); - assertTrue(partitionKeysList.get(0).toString().indexOf("key1=part123/key2=supp123/key3=1") > -1); - assertTrue(partitionKeysList.get(1).toString().indexOf("key1=part123/key2=supp123/key3=2") > -1); + assertEquals(partitionKeysList.get(0), ("key1=part123/key2=supp123/key3=1")); + assertEquals(partitionKeysList.get(1), ("key1=part123/key2=supp123/key3=2")); assertEquals(23L, partitionPruningHandle.getTotalVolume()); } diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java index 0459eed81d..3a20fb5264 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/fragment/PartitionFileFragment.java @@ -26,7 +26,6 @@ import org.apache.hadoop.fs.Path; import org.apache.tajo.BuiltinStorages; import org.apache.tajo.storage.StorageFragmentProtos.PartitionFileFragmentProto; -import org.apache.tajo.util.TUtil; import java.io.IOException; import java.util.ArrayList;