From d5fdf2f0e658928ac156d83ec4d4dbfb7f1c42e5 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 13 Jan 2016 16:33:08 +0900 Subject: [PATCH 1/8] lightweight improvement --- .../tajo/storage/index/bst/BSTIndex.java | 47 ++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index e0051f4c42..df28e0492b 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -800,31 +800,46 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { if (arr.length == 0) { LOG.error("arr.length: 0, loadNum: " + loadNum + ", inited: " + inited.get()); } + + correctable = false; while (true) { - if (comparator.compare(arr[centerPos], key) > 0) { + int compareResult = comparator.compare(arr[centerPos], key); + int subResult; + + if (compareResult > 0) { if (centerPos == 0) { - correctable = false; - break; - } else if (comparator.compare(arr[centerPos - 1], key) < 0) { - correctable = false; - offset = centerPos - 1; break; } else { - end = centerPos; - centerPos = (start + end) / 2; + subResult = comparator.compare(arr[centerPos - 1], key); + if (subResult < 0) { + offset = centerPos - 1; + break; + } else if (subResult == 0) { + correctable = true; + offset = centerPos - 1; + break; + } else { + end = centerPos - 1; + centerPos = (start + end) / 2; + } } - } else if (comparator.compare(arr[centerPos], key) < 0) { + } else if (compareResult < 0) { if (centerPos == arr.length - 1) { - correctable = false; - offset = centerPos; - break; - } else if (comparator.compare(arr[centerPos + 1], key) > 0) { - correctable = false; offset = centerPos; break; } else { - start = centerPos + 1; - centerPos = (start + end) / 2; + subResult = comparator.compare(arr[centerPos + 1], key); + if (subResult > 0) { + offset = centerPos; + break; + } else if (subResult == 0) { + correctable = true; + offset = centerPos + 1; + break; + } else { + start = centerPos + 1; + centerPos = (start + end) / 2; + } } } else { correctable = true; From a7b2686f1275e05866dc4d5563a8e0a224fb49e1 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 13 Jan 2016 18:22:25 +0900 Subject: [PATCH 2/8] Refine binary search --- .../tajo/storage/index/bst/BSTIndex.java | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index df28e0492b..9406b8a269 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -794,6 +794,7 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { int offset = -1; int start = startPos; int end = endPos; + int prevCenter = -1; //http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6412541 int centerPos = (start + end) >>> 1; @@ -803,48 +804,52 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { correctable = false; while (true) { - int compareResult = comparator.compare(arr[centerPos], key); - int subResult; + if (end - start == 1) { + int comp; + // prevCenter should be either end or start + if (end == prevCenter) { + comp = comparator.compare(arr[start], key); - if (compareResult > 0) { - if (centerPos == 0) { - break; - } else { - subResult = comparator.compare(arr[centerPos - 1], key); - if (subResult < 0) { - offset = centerPos - 1; - break; - } else if (subResult == 0) { + if (comp == 0) { correctable = true; - offset = centerPos - 1; - break; - } else { - end = centerPos - 1; - centerPos = (start + end) / 2; + offset = start; + } else if (comp < 0) { + offset = start; } - } - } else if (compareResult < 0) { - if (centerPos == arr.length - 1) { - offset = centerPos; break; } else { - subResult = comparator.compare(arr[centerPos + 1], key); - if (subResult > 0) { - offset = centerPos; + if (end == arr.length) { + offset = start; break; - } else if (subResult == 0) { + } + + comp = comparator.compare(arr[end], key); + if (comp == 0) { correctable = true; - offset = centerPos + 1; - break; - } else { - start = centerPos + 1; - centerPos = (start + end) / 2; + offset = end; + } else if (comp > 0) { + offset = start; } + break; } - } else { + } + + int compareResult = comparator.compare(arr[centerPos], key); + + if (compareResult == 0) { correctable = true; offset = centerPos; break; + } else { + prevCenter = centerPos; + + if (compareResult > 0) { + end = centerPos; + } else { + start = centerPos; + } + + centerPos = (start + end) / 2; } } return offset; From 58a59e832a8bba350814ab92ef421ae95f5be158 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Fri, 15 Jan 2016 16:39:56 +0900 Subject: [PATCH 3/8] test code --- .../org/apache/tajo/storage/index/TestBSTIndex.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java index a9d8ce27c0..78ce645660 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java @@ -49,7 +49,7 @@ public class TestBSTIndex { private Schema schema; private TableMeta meta; - private static final int TUPLE_NUM = 10000; + private static final int TUPLE_NUM = 1000000; private static final int LOAD_NUM = 100; private static final String TEST_PATH = "target/test-data/TestIndex"; private Path testDir; @@ -130,6 +130,7 @@ public void testFindValue() throws IOException { Tuple keyTuple; long offset; + long sum = 0; while (true) { keyTuple = new VTuple(2); offset = scanner.getNextOffset(); @@ -138,8 +139,12 @@ public void testFindValue() throws IOException { keyTuple.put(0, tuple.asDatum(1)); keyTuple.put(1, tuple.asDatum(2)); + long start = System.currentTimeMillis(); creater.write(keyTuple, offset); + long end = System.currentTimeMillis(); + sum += (end - start); } + System.out.println("write : "+(sum/1000)+" sec"); creater.flush(); creater.close(); @@ -155,7 +160,10 @@ public void testFindValue() throws IOException { for (int i = 0; i < TUPLE_NUM - 1; i++) { tuple.put(0, DatumFactory.createInt8(i)); tuple.put(1, DatumFactory.createFloat8(i)); + long start = System.currentTimeMillis(); long offsets = reader.find(tuple); + long end = System.currentTimeMillis(); + sum += end-start; scanner.seek(offsets); tuple = scanner.next(); assertTrue("seek check [" + (i) + " ," + (tuple.getInt8(1)) + "]", (i) == (tuple.getInt8(1))); @@ -170,6 +178,7 @@ public void testFindValue() throws IOException { assertTrue("[seek check " + (i + 1) + " ]", (i + 1) == (tuple.getInt4(0))); assertTrue("[seek check " + (i + 1) + " ]", (i + 1) == (tuple.getInt8(1))); } + System.out.println("find: "+(sum / 1000) + " sec"); reader.close(); scanner.close(); } From 74c857dcab827b662d565e13284f665723eb798c Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Fri, 15 Jan 2016 21:05:10 +0900 Subject: [PATCH 4/8] Revert "test code" This reverts commit 58a59e832a8bba350814ab92ef421ae95f5be158. --- .../org/apache/tajo/storage/index/TestBSTIndex.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java index 78ce645660..a9d8ce27c0 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/index/TestBSTIndex.java @@ -49,7 +49,7 @@ public class TestBSTIndex { private Schema schema; private TableMeta meta; - private static final int TUPLE_NUM = 1000000; + private static final int TUPLE_NUM = 10000; private static final int LOAD_NUM = 100; private static final String TEST_PATH = "target/test-data/TestIndex"; private Path testDir; @@ -130,7 +130,6 @@ public void testFindValue() throws IOException { Tuple keyTuple; long offset; - long sum = 0; while (true) { keyTuple = new VTuple(2); offset = scanner.getNextOffset(); @@ -139,12 +138,8 @@ public void testFindValue() throws IOException { keyTuple.put(0, tuple.asDatum(1)); keyTuple.put(1, tuple.asDatum(2)); - long start = System.currentTimeMillis(); creater.write(keyTuple, offset); - long end = System.currentTimeMillis(); - sum += (end - start); } - System.out.println("write : "+(sum/1000)+" sec"); creater.flush(); creater.close(); @@ -160,10 +155,7 @@ public void testFindValue() throws IOException { for (int i = 0; i < TUPLE_NUM - 1; i++) { tuple.put(0, DatumFactory.createInt8(i)); tuple.put(1, DatumFactory.createFloat8(i)); - long start = System.currentTimeMillis(); long offsets = reader.find(tuple); - long end = System.currentTimeMillis(); - sum += end-start; scanner.seek(offsets); tuple = scanner.next(); assertTrue("seek check [" + (i) + " ," + (tuple.getInt8(1)) + "]", (i) == (tuple.getInt8(1))); @@ -178,7 +170,6 @@ public void testFindValue() throws IOException { assertTrue("[seek check " + (i + 1) + " ]", (i + 1) == (tuple.getInt4(0))); assertTrue("[seek check " + (i + 1) + " ]", (i + 1) == (tuple.getInt8(1))); } - System.out.println("find: "+(sum / 1000) + " sec"); reader.close(); scanner.close(); } From c5e2a5754c7c32c46bbefdbda3af57e08f6995ce Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Sun, 17 Jan 2016 22:09:23 +0900 Subject: [PATCH 5/8] Fix for one item array --- .../main/java/org/apache/tajo/storage/index/bst/BSTIndex.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index 9406b8a269..5c5445120a 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -819,6 +819,9 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { break; } else { if (end == arr.length) { + if (comparator.compare(arr[start], key) == 0) { + correctable = true; + } offset = start; break; } From 4b26bfc734a4c360e2ece94fdfb5a7ff6bea820c Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 20 Jan 2016 12:15:22 +0900 Subject: [PATCH 6/8] Handles when size of array is 1 --- .../apache/tajo/storage/index/bst/BSTIndex.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index 5c5445120a..e5c9afa463 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -27,6 +27,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.proto.CatalogProtos.SchemaProto; +import org.apache.tajo.exception.TajoInternalError; +import org.apache.tajo.exception.TajoRuntimeException; +import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos; import org.apache.tajo.storage.*; import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder; import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; @@ -800,9 +803,23 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { int centerPos = (start + end) >>> 1; if (arr.length == 0) { LOG.error("arr.length: 0, loadNum: " + loadNum + ", inited: " + inited.get()); + return -1; } correctable = false; + if (arr.length == 1) { + int comp = comparator.compare(arr[0], key); + + if (comp < 0) { + return 0; + } else if (comp > 0) { + return -1; + } + + correctable = true; + return 0; + } + while (true) { if (end - start == 1) { int comp; From 282e4b6b8f081ed72fd790488f92505a6ffcbd43 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 20 Jan 2016 12:16:24 +0900 Subject: [PATCH 7/8] Remove unused imports --- .../main/java/org/apache/tajo/storage/index/bst/BSTIndex.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index e5c9afa463..f4b32e9261 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -27,9 +27,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.proto.CatalogProtos.SchemaProto; -import org.apache.tajo.exception.TajoInternalError; -import org.apache.tajo.exception.TajoRuntimeException; -import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos; import org.apache.tajo.storage.*; import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder; import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder; From 0c51ddc1dcdda26d45da3bd286372e0379a90af9 Mon Sep 17 00:00:00 2001 From: Jongyoung Park Date: Wed, 24 Feb 2016 16:14:02 +0900 Subject: [PATCH 8/8] Use bit operation --- .../main/java/org/apache/tajo/storage/index/bst/BSTIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java index f4b32e9261..daf8f9855a 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/index/bst/BSTIndex.java @@ -866,7 +866,7 @@ private int binarySearch(Tuple[] arr, Tuple key, int startPos, int endPos) { start = centerPos; } - centerPos = (start + end) / 2; + centerPos = (start + end) >>> 1; } } return offset;