diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java
new file mode 100644
index 000000000000..a4de8c525ef9
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/ForUtil.java
@@ -0,0 +1,1054 @@
+// This file has been automatically generated, DO NOT EDIT
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+// Inspired from https://fulmicoton.com/posts/bitpacking/
+// Encodes multiple integers in a long to get SIMD-like speedups.
+// If bitsPerValue <= 8 then we pack 8 ints per long
+// else if bitsPerValue <= 16 we pack 4 ints per long
+// else we pack 2 ints per long
+final class ForUtil {
+
+ static final int BLOCK_SIZE = 128;
+ private static final int BLOCK_SIZE_LOG2 = 7;
+
+ private static long expandMask32(long mask32) {
+ return mask32 | (mask32 << 32);
+ }
+
+ private static long expandMask16(long mask16) {
+ return expandMask32(mask16 | (mask16 << 16));
+ }
+
+ private static long expandMask8(long mask8) {
+ return expandMask16(mask8 | (mask8 << 8));
+ }
+
+ private static long mask32(int bitsPerValue) {
+ return expandMask32((1L << bitsPerValue) - 1);
+ }
+
+ private static long mask16(int bitsPerValue) {
+ return expandMask16((1L << bitsPerValue) - 1);
+ }
+
+ private static long mask8(int bitsPerValue) {
+ return expandMask8((1L << bitsPerValue) - 1);
+ }
+
+ private static void expand8(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 56) & 0xFFL;
+ arr[16 + i] = (l >>> 48) & 0xFFL;
+ arr[32 + i] = (l >>> 40) & 0xFFL;
+ arr[48 + i] = (l >>> 32) & 0xFFL;
+ arr[64 + i] = (l >>> 24) & 0xFFL;
+ arr[80 + i] = (l >>> 16) & 0xFFL;
+ arr[96 + i] = (l >>> 8) & 0xFFL;
+ arr[112 + i] = l & 0xFFL;
+ }
+ }
+
+ private static void expand8To32(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 24) & 0x000000FF000000FFL;
+ arr[16 + i] = (l >>> 16) & 0x000000FF000000FFL;
+ arr[32 + i] = (l >>> 8) & 0x000000FF000000FFL;
+ arr[48 + i] = l & 0x000000FF000000FFL;
+ }
+ }
+
+ private static void collapse8(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ arr[i] =
+ (arr[i] << 56)
+ | (arr[16 + i] << 48)
+ | (arr[32 + i] << 40)
+ | (arr[48 + i] << 32)
+ | (arr[64 + i] << 24)
+ | (arr[80 + i] << 16)
+ | (arr[96 + i] << 8)
+ | arr[112 + i];
+ }
+ }
+
+ private static void expand16(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 48) & 0xFFFFL;
+ arr[32 + i] = (l >>> 32) & 0xFFFFL;
+ arr[64 + i] = (l >>> 16) & 0xFFFFL;
+ arr[96 + i] = l & 0xFFFFL;
+ }
+ }
+
+ private static void expand16To32(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL;
+ arr[32 + i] = l & 0x0000FFFF0000FFFFL;
+ }
+ }
+
+ private static void collapse16(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ arr[i] = (arr[i] << 48) | (arr[32 + i] << 32) | (arr[64 + i] << 16) | arr[96 + i];
+ }
+ }
+
+ private static void expand32(long[] arr) {
+ for (int i = 0; i < 64; ++i) {
+ long l = arr[i];
+ arr[i] = l >>> 32;
+ arr[64 + i] = l & 0xFFFFFFFFL;
+ }
+ }
+
+ private static void collapse32(long[] arr) {
+ for (int i = 0; i < 64; ++i) {
+ arr[i] = (arr[i] << 32) | arr[64 + i];
+ }
+ }
+
+ private final long[] tmp = new long[BLOCK_SIZE / 2];
+
+ /** Encode 128 integers from {@code longs} into {@code out}. */
+ void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException {
+ final int nextPrimitive;
+ final int numLongs;
+ if (bitsPerValue <= 8) {
+ nextPrimitive = 8;
+ numLongs = BLOCK_SIZE / 8;
+ collapse8(longs);
+ } else if (bitsPerValue <= 16) {
+ nextPrimitive = 16;
+ numLongs = BLOCK_SIZE / 4;
+ collapse16(longs);
+ } else {
+ nextPrimitive = 32;
+ numLongs = BLOCK_SIZE / 2;
+ collapse32(longs);
+ }
+
+ final int numLongsPerShift = bitsPerValue * 2;
+ int idx = 0;
+ int shift = nextPrimitive - bitsPerValue;
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ tmp[i] = longs[idx++] << shift;
+ }
+ for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ tmp[i] |= longs[idx++] << shift;
+ }
+ }
+
+ final int remainingBitsPerLong = shift + bitsPerValue;
+ final long maskRemainingBitsPerLong;
+ if (nextPrimitive == 8) {
+ maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong];
+ } else if (nextPrimitive == 16) {
+ maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong];
+ } else {
+ maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong];
+ }
+
+ int tmpIdx = 0;
+ int remainingBitsPerValue = bitsPerValue;
+ while (idx < numLongs) {
+ if (remainingBitsPerValue >= remainingBitsPerLong) {
+ remainingBitsPerValue -= remainingBitsPerLong;
+ tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong;
+ if (remainingBitsPerValue == 0) {
+ idx++;
+ remainingBitsPerValue = bitsPerValue;
+ }
+ } else {
+ final long mask1, mask2;
+ if (nextPrimitive == 8) {
+ mask1 = MASKS8[remainingBitsPerValue];
+ mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue];
+ } else if (nextPrimitive == 16) {
+ mask1 = MASKS16[remainingBitsPerValue];
+ mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue];
+ } else {
+ mask1 = MASKS32[remainingBitsPerValue];
+ mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue];
+ }
+ tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue);
+ remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue;
+ tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2;
+ }
+ }
+
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ out.writeLong(tmp[i]);
+ }
+ }
+
+ /** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */
+ int numBytes(int bitsPerValue) {
+ return bitsPerValue << (BLOCK_SIZE_LOG2 - 3);
+ }
+
+ private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs)
+ throws IOException {
+ final int numLongs = bitsPerValue << 1;
+ in.readLongs(tmp, 0, numLongs);
+ final long mask = MASKS32[bitsPerValue];
+ int longsIdx = 0;
+ int shift = 32 - bitsPerValue;
+ for (; shift >= 0; shift -= bitsPerValue) {
+ shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask);
+ longsIdx += numLongs;
+ }
+ final int remainingBitsPerLong = shift + bitsPerValue;
+ final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong];
+ int tmpIdx = 0;
+ int remainingBits = remainingBitsPerLong;
+ for (; longsIdx < BLOCK_SIZE / 2; ++longsIdx) {
+ int b = bitsPerValue - remainingBits;
+ long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b;
+ while (b >= remainingBitsPerLong) {
+ b -= remainingBitsPerLong;
+ l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b;
+ }
+ if (b > 0) {
+ l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b];
+ remainingBits = remainingBitsPerLong - b;
+ } else {
+ remainingBits = remainingBitsPerLong;
+ }
+ longs[longsIdx] = l;
+ }
+ }
+
+ /**
+ * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which
+ * generates SIMD instructions for it in order to shift multiple longs at once.
+ */
+ private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) {
+ for (int i = 0; i < count; ++i) {
+ b[bi + i] = (a[i] >>> shift) & mask;
+ }
+ }
+
+ private static final long[] MASKS8 = new long[8];
+ private static final long[] MASKS16 = new long[16];
+ private static final long[] MASKS32 = new long[32];
+
+ static {
+ for (int i = 0; i < 8; ++i) {
+ MASKS8[i] = mask8(i);
+ }
+ for (int i = 0; i < 16; ++i) {
+ MASKS16[i] = mask16(i);
+ }
+ for (int i = 0; i < 32; ++i) {
+ MASKS32[i] = mask32(i);
+ }
+ }
+ // mark values in array as final longs to avoid the cost of reading array, arrays should only be
+ // used when the idx is a variable
+ private static final long MASK8_1 = MASKS8[1];
+ private static final long MASK8_2 = MASKS8[2];
+ private static final long MASK8_3 = MASKS8[3];
+ private static final long MASK8_4 = MASKS8[4];
+ private static final long MASK8_5 = MASKS8[5];
+ private static final long MASK8_6 = MASKS8[6];
+ private static final long MASK8_7 = MASKS8[7];
+ private static final long MASK16_1 = MASKS16[1];
+ private static final long MASK16_2 = MASKS16[2];
+ private static final long MASK16_3 = MASKS16[3];
+ private static final long MASK16_4 = MASKS16[4];
+ private static final long MASK16_5 = MASKS16[5];
+ private static final long MASK16_6 = MASKS16[6];
+ private static final long MASK16_7 = MASKS16[7];
+ private static final long MASK16_9 = MASKS16[9];
+ private static final long MASK16_10 = MASKS16[10];
+ private static final long MASK16_11 = MASKS16[11];
+ private static final long MASK16_12 = MASKS16[12];
+ private static final long MASK16_13 = MASKS16[13];
+ private static final long MASK16_14 = MASKS16[14];
+ private static final long MASK16_15 = MASKS16[15];
+ private static final long MASK32_1 = MASKS32[1];
+ private static final long MASK32_2 = MASKS32[2];
+ private static final long MASK32_3 = MASKS32[3];
+ private static final long MASK32_4 = MASKS32[4];
+ private static final long MASK32_5 = MASKS32[5];
+ private static final long MASK32_6 = MASKS32[6];
+ private static final long MASK32_7 = MASKS32[7];
+ private static final long MASK32_8 = MASKS32[8];
+ private static final long MASK32_9 = MASKS32[9];
+ private static final long MASK32_10 = MASKS32[10];
+ private static final long MASK32_11 = MASKS32[11];
+ private static final long MASK32_12 = MASKS32[12];
+ private static final long MASK32_13 = MASKS32[13];
+ private static final long MASK32_14 = MASKS32[14];
+ private static final long MASK32_15 = MASKS32[15];
+ private static final long MASK32_17 = MASKS32[17];
+ private static final long MASK32_18 = MASKS32[18];
+ private static final long MASK32_19 = MASKS32[19];
+ private static final long MASK32_20 = MASKS32[20];
+ private static final long MASK32_21 = MASKS32[21];
+ private static final long MASK32_22 = MASKS32[22];
+ private static final long MASK32_23 = MASKS32[23];
+ private static final long MASK32_24 = MASKS32[24];
+
+ /** Decode 128 integers into {@code longs}. */
+ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException {
+ switch (bitsPerValue) {
+ case 1:
+ decode1(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 2:
+ decode2(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 3:
+ decode3(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 4:
+ decode4(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 5:
+ decode5(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 6:
+ decode6(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 7:
+ decode7(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 8:
+ decode8(in, tmp, longs);
+ expand8(longs);
+ break;
+ case 9:
+ decode9(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 10:
+ decode10(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 11:
+ decode11(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 12:
+ decode12(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 13:
+ decode13(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 14:
+ decode14(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 15:
+ decode15(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 16:
+ decode16(in, tmp, longs);
+ expand16(longs);
+ break;
+ case 17:
+ decode17(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 18:
+ decode18(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 19:
+ decode19(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 20:
+ decode20(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 21:
+ decode21(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 22:
+ decode22(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 23:
+ decode23(in, tmp, longs);
+ expand32(longs);
+ break;
+ case 24:
+ decode24(in, tmp, longs);
+ expand32(longs);
+ break;
+ default:
+ decodeSlow(bitsPerValue, in, tmp, longs);
+ expand32(longs);
+ break;
+ }
+ }
+
+ /**
+ * Decodes 128 integers into 64 {@code longs} such that each long contains two values, each
+ * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs}
+ * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This
+ * representation may allow subsequent operations to be performed on two values at a time.
+ */
+ void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException {
+ switch (bitsPerValue) {
+ case 1:
+ decode1(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 2:
+ decode2(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 3:
+ decode3(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 4:
+ decode4(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 5:
+ decode5(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 6:
+ decode6(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 7:
+ decode7(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 8:
+ decode8(in, tmp, longs);
+ expand8To32(longs);
+ break;
+ case 9:
+ decode9(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 10:
+ decode10(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 11:
+ decode11(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 12:
+ decode12(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 13:
+ decode13(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 14:
+ decode14(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 15:
+ decode15(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 16:
+ decode16(in, tmp, longs);
+ expand16To32(longs);
+ break;
+ case 17:
+ decode17(in, tmp, longs);
+ break;
+ case 18:
+ decode18(in, tmp, longs);
+ break;
+ case 19:
+ decode19(in, tmp, longs);
+ break;
+ case 20:
+ decode20(in, tmp, longs);
+ break;
+ case 21:
+ decode21(in, tmp, longs);
+ break;
+ case 22:
+ decode22(in, tmp, longs);
+ break;
+ case 23:
+ decode23(in, tmp, longs);
+ break;
+ case 24:
+ decode24(in, tmp, longs);
+ break;
+ default:
+ decodeSlow(bitsPerValue, in, tmp, longs);
+ break;
+ }
+ }
+
+ private static void decode1(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 2);
+ shiftLongs(tmp, 2, longs, 0, 7, MASK8_1);
+ shiftLongs(tmp, 2, longs, 2, 6, MASK8_1);
+ shiftLongs(tmp, 2, longs, 4, 5, MASK8_1);
+ shiftLongs(tmp, 2, longs, 6, 4, MASK8_1);
+ shiftLongs(tmp, 2, longs, 8, 3, MASK8_1);
+ shiftLongs(tmp, 2, longs, 10, 2, MASK8_1);
+ shiftLongs(tmp, 2, longs, 12, 1, MASK8_1);
+ shiftLongs(tmp, 2, longs, 14, 0, MASK8_1);
+ }
+
+ private static void decode2(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 4);
+ shiftLongs(tmp, 4, longs, 0, 6, MASK8_2);
+ shiftLongs(tmp, 4, longs, 4, 4, MASK8_2);
+ shiftLongs(tmp, 4, longs, 8, 2, MASK8_2);
+ shiftLongs(tmp, 4, longs, 12, 0, MASK8_2);
+ }
+
+ private static void decode3(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 6);
+ shiftLongs(tmp, 6, longs, 0, 5, MASK8_3);
+ shiftLongs(tmp, 6, longs, 6, 2, MASK8_3);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 12; iter < 2; ++iter, tmpIdx += 3, longsIdx += 2) {
+ long l0 = (tmp[tmpIdx + 0] & MASK8_2) << 1;
+ l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_1;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 2;
+ l1 |= (tmp[tmpIdx + 2] & MASK8_2) << 0;
+ longs[longsIdx + 1] = l1;
+ }
+ }
+
+ private static void decode4(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 8);
+ shiftLongs(tmp, 8, longs, 0, 4, MASK8_4);
+ shiftLongs(tmp, 8, longs, 8, 0, MASK8_4);
+ }
+
+ private static void decode5(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 10);
+ shiftLongs(tmp, 10, longs, 0, 3, MASK8_5);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 10; iter < 2; ++iter, tmpIdx += 5, longsIdx += 3) {
+ long l0 = (tmp[tmpIdx + 0] & MASK8_3) << 2;
+ l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK8_2;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK8_1) << 4;
+ l1 |= (tmp[tmpIdx + 2] & MASK8_3) << 1;
+ l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK8_1;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 3] & MASK8_2) << 3;
+ l2 |= (tmp[tmpIdx + 4] & MASK8_3) << 0;
+ longs[longsIdx + 2] = l2;
+ }
+ }
+
+ private static void decode6(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 12);
+ shiftLongs(tmp, 12, longs, 0, 2, MASK8_6);
+ shiftLongs(tmp, 12, tmp, 0, 0, MASK8_2);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 12; iter < 4; ++iter, tmpIdx += 3, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 4;
+ l0 |= tmp[tmpIdx + 1] << 2;
+ l0 |= tmp[tmpIdx + 2] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+
+ private static void decode7(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 14);
+ shiftLongs(tmp, 14, longs, 0, 1, MASK8_7);
+ shiftLongs(tmp, 14, tmp, 0, 0, MASK8_1);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 14; iter < 2; ++iter, tmpIdx += 7, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 6;
+ l0 |= tmp[tmpIdx + 1] << 5;
+ l0 |= tmp[tmpIdx + 2] << 4;
+ l0 |= tmp[tmpIdx + 3] << 3;
+ l0 |= tmp[tmpIdx + 4] << 2;
+ l0 |= tmp[tmpIdx + 5] << 1;
+ l0 |= tmp[tmpIdx + 6] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+
+ private static void decode8(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(longs, 0, 16);
+ }
+
+ private static void decode9(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 18);
+ shiftLongs(tmp, 18, longs, 0, 7, MASK16_9);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 18; iter < 2; ++iter, tmpIdx += 9, longsIdx += 7) {
+ long l0 = (tmp[tmpIdx + 0] & MASK16_7) << 2;
+ l0 |= (tmp[tmpIdx + 1] >>> 5) & MASK16_2;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK16_5) << 4;
+ l1 |= (tmp[tmpIdx + 2] >>> 3) & MASK16_4;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 2] & MASK16_3) << 6;
+ l2 |= (tmp[tmpIdx + 3] >>> 1) & MASK16_6;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 3] & MASK16_1) << 8;
+ l3 |= (tmp[tmpIdx + 4] & MASK16_7) << 1;
+ l3 |= (tmp[tmpIdx + 5] >>> 6) & MASK16_1;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 5] & MASK16_6) << 3;
+ l4 |= (tmp[tmpIdx + 6] >>> 4) & MASK16_3;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 6] & MASK16_4) << 5;
+ l5 |= (tmp[tmpIdx + 7] >>> 2) & MASK16_5;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 7] & MASK16_2) << 7;
+ l6 |= (tmp[tmpIdx + 8] & MASK16_7) << 0;
+ longs[longsIdx + 6] = l6;
+ }
+ }
+
+ private static void decode10(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 20);
+ shiftLongs(tmp, 20, longs, 0, 6, MASK16_10);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 20; iter < 4; ++iter, tmpIdx += 5, longsIdx += 3) {
+ long l0 = (tmp[tmpIdx + 0] & MASK16_6) << 4;
+ l0 |= (tmp[tmpIdx + 1] >>> 2) & MASK16_4;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK16_2) << 8;
+ l1 |= (tmp[tmpIdx + 2] & MASK16_6) << 2;
+ l1 |= (tmp[tmpIdx + 3] >>> 4) & MASK16_2;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 3] & MASK16_4) << 6;
+ l2 |= (tmp[tmpIdx + 4] & MASK16_6) << 0;
+ longs[longsIdx + 2] = l2;
+ }
+ }
+
+ private static void decode11(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 22);
+ shiftLongs(tmp, 22, longs, 0, 5, MASK16_11);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 22; iter < 2; ++iter, tmpIdx += 11, longsIdx += 5) {
+ long l0 = (tmp[tmpIdx + 0] & MASK16_5) << 6;
+ l0 |= (tmp[tmpIdx + 1] & MASK16_5) << 1;
+ l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK16_1;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 2] & MASK16_4) << 7;
+ l1 |= (tmp[tmpIdx + 3] & MASK16_5) << 2;
+ l1 |= (tmp[tmpIdx + 4] >>> 3) & MASK16_2;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 4] & MASK16_3) << 8;
+ l2 |= (tmp[tmpIdx + 5] & MASK16_5) << 3;
+ l2 |= (tmp[tmpIdx + 6] >>> 2) & MASK16_3;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 6] & MASK16_2) << 9;
+ l3 |= (tmp[tmpIdx + 7] & MASK16_5) << 4;
+ l3 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_4;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 8] & MASK16_1) << 10;
+ l4 |= (tmp[tmpIdx + 9] & MASK16_5) << 5;
+ l4 |= (tmp[tmpIdx + 10] & MASK16_5) << 0;
+ longs[longsIdx + 4] = l4;
+ }
+ }
+
+ private static void decode12(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 24);
+ shiftLongs(tmp, 24, longs, 0, 4, MASK16_12);
+ shiftLongs(tmp, 24, tmp, 0, 0, MASK16_4);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 24; iter < 8; ++iter, tmpIdx += 3, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 8;
+ l0 |= tmp[tmpIdx + 1] << 4;
+ l0 |= tmp[tmpIdx + 2] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+
+ private static void decode13(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 26);
+ shiftLongs(tmp, 26, longs, 0, 3, MASK16_13);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 26; iter < 2; ++iter, tmpIdx += 13, longsIdx += 3) {
+ long l0 = (tmp[tmpIdx + 0] & MASK16_3) << 10;
+ l0 |= (tmp[tmpIdx + 1] & MASK16_3) << 7;
+ l0 |= (tmp[tmpIdx + 2] & MASK16_3) << 4;
+ l0 |= (tmp[tmpIdx + 3] & MASK16_3) << 1;
+ l0 |= (tmp[tmpIdx + 4] >>> 2) & MASK16_1;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 4] & MASK16_2) << 11;
+ l1 |= (tmp[tmpIdx + 5] & MASK16_3) << 8;
+ l1 |= (tmp[tmpIdx + 6] & MASK16_3) << 5;
+ l1 |= (tmp[tmpIdx + 7] & MASK16_3) << 2;
+ l1 |= (tmp[tmpIdx + 8] >>> 1) & MASK16_2;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 8] & MASK16_1) << 12;
+ l2 |= (tmp[tmpIdx + 9] & MASK16_3) << 9;
+ l2 |= (tmp[tmpIdx + 10] & MASK16_3) << 6;
+ l2 |= (tmp[tmpIdx + 11] & MASK16_3) << 3;
+ l2 |= (tmp[tmpIdx + 12] & MASK16_3) << 0;
+ longs[longsIdx + 2] = l2;
+ }
+ }
+
+ private static void decode14(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 28);
+ shiftLongs(tmp, 28, longs, 0, 2, MASK16_14);
+ shiftLongs(tmp, 28, tmp, 0, 0, MASK16_2);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 28; iter < 4; ++iter, tmpIdx += 7, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 12;
+ l0 |= tmp[tmpIdx + 1] << 10;
+ l0 |= tmp[tmpIdx + 2] << 8;
+ l0 |= tmp[tmpIdx + 3] << 6;
+ l0 |= tmp[tmpIdx + 4] << 4;
+ l0 |= tmp[tmpIdx + 5] << 2;
+ l0 |= tmp[tmpIdx + 6] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+
+ private static void decode15(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 30);
+ shiftLongs(tmp, 30, longs, 0, 1, MASK16_15);
+ shiftLongs(tmp, 30, tmp, 0, 0, MASK16_1);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 30; iter < 2; ++iter, tmpIdx += 15, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 14;
+ l0 |= tmp[tmpIdx + 1] << 13;
+ l0 |= tmp[tmpIdx + 2] << 12;
+ l0 |= tmp[tmpIdx + 3] << 11;
+ l0 |= tmp[tmpIdx + 4] << 10;
+ l0 |= tmp[tmpIdx + 5] << 9;
+ l0 |= tmp[tmpIdx + 6] << 8;
+ l0 |= tmp[tmpIdx + 7] << 7;
+ l0 |= tmp[tmpIdx + 8] << 6;
+ l0 |= tmp[tmpIdx + 9] << 5;
+ l0 |= tmp[tmpIdx + 10] << 4;
+ l0 |= tmp[tmpIdx + 11] << 3;
+ l0 |= tmp[tmpIdx + 12] << 2;
+ l0 |= tmp[tmpIdx + 13] << 1;
+ l0 |= tmp[tmpIdx + 14] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+
+ private static void decode16(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(longs, 0, 32);
+ }
+
+ private static void decode17(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 34);
+ shiftLongs(tmp, 34, longs, 0, 15, MASK32_17);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 34; iter < 2; ++iter, tmpIdx += 17, longsIdx += 15) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_15) << 2;
+ l0 |= (tmp[tmpIdx + 1] >>> 13) & MASK32_2;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK32_13) << 4;
+ l1 |= (tmp[tmpIdx + 2] >>> 11) & MASK32_4;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 2] & MASK32_11) << 6;
+ l2 |= (tmp[tmpIdx + 3] >>> 9) & MASK32_6;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 3] & MASK32_9) << 8;
+ l3 |= (tmp[tmpIdx + 4] >>> 7) & MASK32_8;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 4] & MASK32_7) << 10;
+ l4 |= (tmp[tmpIdx + 5] >>> 5) & MASK32_10;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 5] & MASK32_5) << 12;
+ l5 |= (tmp[tmpIdx + 6] >>> 3) & MASK32_12;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 6] & MASK32_3) << 14;
+ l6 |= (tmp[tmpIdx + 7] >>> 1) & MASK32_14;
+ longs[longsIdx + 6] = l6;
+ long l7 = (tmp[tmpIdx + 7] & MASK32_1) << 16;
+ l7 |= (tmp[tmpIdx + 8] & MASK32_15) << 1;
+ l7 |= (tmp[tmpIdx + 9] >>> 14) & MASK32_1;
+ longs[longsIdx + 7] = l7;
+ long l8 = (tmp[tmpIdx + 9] & MASK32_14) << 3;
+ l8 |= (tmp[tmpIdx + 10] >>> 12) & MASK32_3;
+ longs[longsIdx + 8] = l8;
+ long l9 = (tmp[tmpIdx + 10] & MASK32_12) << 5;
+ l9 |= (tmp[tmpIdx + 11] >>> 10) & MASK32_5;
+ longs[longsIdx + 9] = l9;
+ long l10 = (tmp[tmpIdx + 11] & MASK32_10) << 7;
+ l10 |= (tmp[tmpIdx + 12] >>> 8) & MASK32_7;
+ longs[longsIdx + 10] = l10;
+ long l11 = (tmp[tmpIdx + 12] & MASK32_8) << 9;
+ l11 |= (tmp[tmpIdx + 13] >>> 6) & MASK32_9;
+ longs[longsIdx + 11] = l11;
+ long l12 = (tmp[tmpIdx + 13] & MASK32_6) << 11;
+ l12 |= (tmp[tmpIdx + 14] >>> 4) & MASK32_11;
+ longs[longsIdx + 12] = l12;
+ long l13 = (tmp[tmpIdx + 14] & MASK32_4) << 13;
+ l13 |= (tmp[tmpIdx + 15] >>> 2) & MASK32_13;
+ longs[longsIdx + 13] = l13;
+ long l14 = (tmp[tmpIdx + 15] & MASK32_2) << 15;
+ l14 |= (tmp[tmpIdx + 16] & MASK32_15) << 0;
+ longs[longsIdx + 14] = l14;
+ }
+ }
+
+ private static void decode18(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 36);
+ shiftLongs(tmp, 36, longs, 0, 14, MASK32_18);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 36; iter < 4; ++iter, tmpIdx += 9, longsIdx += 7) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_14) << 4;
+ l0 |= (tmp[tmpIdx + 1] >>> 10) & MASK32_4;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK32_10) << 8;
+ l1 |= (tmp[tmpIdx + 2] >>> 6) & MASK32_8;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 2] & MASK32_6) << 12;
+ l2 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_12;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 3] & MASK32_2) << 16;
+ l3 |= (tmp[tmpIdx + 4] & MASK32_14) << 2;
+ l3 |= (tmp[tmpIdx + 5] >>> 12) & MASK32_2;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 5] & MASK32_12) << 6;
+ l4 |= (tmp[tmpIdx + 6] >>> 8) & MASK32_6;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 6] & MASK32_8) << 10;
+ l5 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_10;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 7] & MASK32_4) << 14;
+ l6 |= (tmp[tmpIdx + 8] & MASK32_14) << 0;
+ longs[longsIdx + 6] = l6;
+ }
+ }
+
+ private static void decode19(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 38);
+ shiftLongs(tmp, 38, longs, 0, 13, MASK32_19);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 38; iter < 2; ++iter, tmpIdx += 19, longsIdx += 13) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_13) << 6;
+ l0 |= (tmp[tmpIdx + 1] >>> 7) & MASK32_6;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK32_7) << 12;
+ l1 |= (tmp[tmpIdx + 2] >>> 1) & MASK32_12;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 2] & MASK32_1) << 18;
+ l2 |= (tmp[tmpIdx + 3] & MASK32_13) << 5;
+ l2 |= (tmp[tmpIdx + 4] >>> 8) & MASK32_5;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 4] & MASK32_8) << 11;
+ l3 |= (tmp[tmpIdx + 5] >>> 2) & MASK32_11;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 5] & MASK32_2) << 17;
+ l4 |= (tmp[tmpIdx + 6] & MASK32_13) << 4;
+ l4 |= (tmp[tmpIdx + 7] >>> 9) & MASK32_4;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 7] & MASK32_9) << 10;
+ l5 |= (tmp[tmpIdx + 8] >>> 3) & MASK32_10;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 8] & MASK32_3) << 16;
+ l6 |= (tmp[tmpIdx + 9] & MASK32_13) << 3;
+ l6 |= (tmp[tmpIdx + 10] >>> 10) & MASK32_3;
+ longs[longsIdx + 6] = l6;
+ long l7 = (tmp[tmpIdx + 10] & MASK32_10) << 9;
+ l7 |= (tmp[tmpIdx + 11] >>> 4) & MASK32_9;
+ longs[longsIdx + 7] = l7;
+ long l8 = (tmp[tmpIdx + 11] & MASK32_4) << 15;
+ l8 |= (tmp[tmpIdx + 12] & MASK32_13) << 2;
+ l8 |= (tmp[tmpIdx + 13] >>> 11) & MASK32_2;
+ longs[longsIdx + 8] = l8;
+ long l9 = (tmp[tmpIdx + 13] & MASK32_11) << 8;
+ l9 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_8;
+ longs[longsIdx + 9] = l9;
+ long l10 = (tmp[tmpIdx + 14] & MASK32_5) << 14;
+ l10 |= (tmp[tmpIdx + 15] & MASK32_13) << 1;
+ l10 |= (tmp[tmpIdx + 16] >>> 12) & MASK32_1;
+ longs[longsIdx + 10] = l10;
+ long l11 = (tmp[tmpIdx + 16] & MASK32_12) << 7;
+ l11 |= (tmp[tmpIdx + 17] >>> 6) & MASK32_7;
+ longs[longsIdx + 11] = l11;
+ long l12 = (tmp[tmpIdx + 17] & MASK32_6) << 13;
+ l12 |= (tmp[tmpIdx + 18] & MASK32_13) << 0;
+ longs[longsIdx + 12] = l12;
+ }
+ }
+
+ private static void decode20(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 40);
+ shiftLongs(tmp, 40, longs, 0, 12, MASK32_20);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 40; iter < 8; ++iter, tmpIdx += 5, longsIdx += 3) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_12) << 8;
+ l0 |= (tmp[tmpIdx + 1] >>> 4) & MASK32_8;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK32_4) << 16;
+ l1 |= (tmp[tmpIdx + 2] & MASK32_12) << 4;
+ l1 |= (tmp[tmpIdx + 3] >>> 8) & MASK32_4;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 3] & MASK32_8) << 12;
+ l2 |= (tmp[tmpIdx + 4] & MASK32_12) << 0;
+ longs[longsIdx + 2] = l2;
+ }
+ }
+
+ private static void decode21(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 42);
+ shiftLongs(tmp, 42, longs, 0, 11, MASK32_21);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 42; iter < 2; ++iter, tmpIdx += 21, longsIdx += 11) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_11) << 10;
+ l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK32_10;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 1] & MASK32_1) << 20;
+ l1 |= (tmp[tmpIdx + 2] & MASK32_11) << 9;
+ l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_9;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 3] & MASK32_2) << 19;
+ l2 |= (tmp[tmpIdx + 4] & MASK32_11) << 8;
+ l2 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_8;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 5] & MASK32_3) << 18;
+ l3 |= (tmp[tmpIdx + 6] & MASK32_11) << 7;
+ l3 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_7;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 7] & MASK32_4) << 17;
+ l4 |= (tmp[tmpIdx + 8] & MASK32_11) << 6;
+ l4 |= (tmp[tmpIdx + 9] >>> 5) & MASK32_6;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 9] & MASK32_5) << 16;
+ l5 |= (tmp[tmpIdx + 10] & MASK32_11) << 5;
+ l5 |= (tmp[tmpIdx + 11] >>> 6) & MASK32_5;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 11] & MASK32_6) << 15;
+ l6 |= (tmp[tmpIdx + 12] & MASK32_11) << 4;
+ l6 |= (tmp[tmpIdx + 13] >>> 7) & MASK32_4;
+ longs[longsIdx + 6] = l6;
+ long l7 = (tmp[tmpIdx + 13] & MASK32_7) << 14;
+ l7 |= (tmp[tmpIdx + 14] & MASK32_11) << 3;
+ l7 |= (tmp[tmpIdx + 15] >>> 8) & MASK32_3;
+ longs[longsIdx + 7] = l7;
+ long l8 = (tmp[tmpIdx + 15] & MASK32_8) << 13;
+ l8 |= (tmp[tmpIdx + 16] & MASK32_11) << 2;
+ l8 |= (tmp[tmpIdx + 17] >>> 9) & MASK32_2;
+ longs[longsIdx + 8] = l8;
+ long l9 = (tmp[tmpIdx + 17] & MASK32_9) << 12;
+ l9 |= (tmp[tmpIdx + 18] & MASK32_11) << 1;
+ l9 |= (tmp[tmpIdx + 19] >>> 10) & MASK32_1;
+ longs[longsIdx + 9] = l9;
+ long l10 = (tmp[tmpIdx + 19] & MASK32_10) << 11;
+ l10 |= (tmp[tmpIdx + 20] & MASK32_11) << 0;
+ longs[longsIdx + 10] = l10;
+ }
+ }
+
+ private static void decode22(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 44);
+ shiftLongs(tmp, 44, longs, 0, 10, MASK32_22);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 44; iter < 4; ++iter, tmpIdx += 11, longsIdx += 5) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_10) << 12;
+ l0 |= (tmp[tmpIdx + 1] & MASK32_10) << 2;
+ l0 |= (tmp[tmpIdx + 2] >>> 8) & MASK32_2;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 2] & MASK32_8) << 14;
+ l1 |= (tmp[tmpIdx + 3] & MASK32_10) << 4;
+ l1 |= (tmp[tmpIdx + 4] >>> 6) & MASK32_4;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 4] & MASK32_6) << 16;
+ l2 |= (tmp[tmpIdx + 5] & MASK32_10) << 6;
+ l2 |= (tmp[tmpIdx + 6] >>> 4) & MASK32_6;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 6] & MASK32_4) << 18;
+ l3 |= (tmp[tmpIdx + 7] & MASK32_10) << 8;
+ l3 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_8;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 8] & MASK32_2) << 20;
+ l4 |= (tmp[tmpIdx + 9] & MASK32_10) << 10;
+ l4 |= (tmp[tmpIdx + 10] & MASK32_10) << 0;
+ longs[longsIdx + 4] = l4;
+ }
+ }
+
+ private static void decode23(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 46);
+ shiftLongs(tmp, 46, longs, 0, 9, MASK32_23);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 46; iter < 2; ++iter, tmpIdx += 23, longsIdx += 9) {
+ long l0 = (tmp[tmpIdx + 0] & MASK32_9) << 14;
+ l0 |= (tmp[tmpIdx + 1] & MASK32_9) << 5;
+ l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK32_5;
+ longs[longsIdx + 0] = l0;
+ long l1 = (tmp[tmpIdx + 2] & MASK32_4) << 19;
+ l1 |= (tmp[tmpIdx + 3] & MASK32_9) << 10;
+ l1 |= (tmp[tmpIdx + 4] & MASK32_9) << 1;
+ l1 |= (tmp[tmpIdx + 5] >>> 8) & MASK32_1;
+ longs[longsIdx + 1] = l1;
+ long l2 = (tmp[tmpIdx + 5] & MASK32_8) << 15;
+ l2 |= (tmp[tmpIdx + 6] & MASK32_9) << 6;
+ l2 |= (tmp[tmpIdx + 7] >>> 3) & MASK32_6;
+ longs[longsIdx + 2] = l2;
+ long l3 = (tmp[tmpIdx + 7] & MASK32_3) << 20;
+ l3 |= (tmp[tmpIdx + 8] & MASK32_9) << 11;
+ l3 |= (tmp[tmpIdx + 9] & MASK32_9) << 2;
+ l3 |= (tmp[tmpIdx + 10] >>> 7) & MASK32_2;
+ longs[longsIdx + 3] = l3;
+ long l4 = (tmp[tmpIdx + 10] & MASK32_7) << 16;
+ l4 |= (tmp[tmpIdx + 11] & MASK32_9) << 7;
+ l4 |= (tmp[tmpIdx + 12] >>> 2) & MASK32_7;
+ longs[longsIdx + 4] = l4;
+ long l5 = (tmp[tmpIdx + 12] & MASK32_2) << 21;
+ l5 |= (tmp[tmpIdx + 13] & MASK32_9) << 12;
+ l5 |= (tmp[tmpIdx + 14] & MASK32_9) << 3;
+ l5 |= (tmp[tmpIdx + 15] >>> 6) & MASK32_3;
+ longs[longsIdx + 5] = l5;
+ long l6 = (tmp[tmpIdx + 15] & MASK32_6) << 17;
+ l6 |= (tmp[tmpIdx + 16] & MASK32_9) << 8;
+ l6 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_8;
+ longs[longsIdx + 6] = l6;
+ long l7 = (tmp[tmpIdx + 17] & MASK32_1) << 22;
+ l7 |= (tmp[tmpIdx + 18] & MASK32_9) << 13;
+ l7 |= (tmp[tmpIdx + 19] & MASK32_9) << 4;
+ l7 |= (tmp[tmpIdx + 20] >>> 5) & MASK32_4;
+ longs[longsIdx + 7] = l7;
+ long l8 = (tmp[tmpIdx + 20] & MASK32_5) << 18;
+ l8 |= (tmp[tmpIdx + 21] & MASK32_9) << 9;
+ l8 |= (tmp[tmpIdx + 22] & MASK32_9) << 0;
+ longs[longsIdx + 8] = l8;
+ }
+ }
+
+ private static void decode24(DataInput in, long[] tmp, long[] longs) throws IOException {
+ in.readLongs(tmp, 0, 48);
+ shiftLongs(tmp, 48, longs, 0, 8, MASK32_24);
+ shiftLongs(tmp, 48, tmp, 0, 0, MASK32_8);
+ for (int iter = 0, tmpIdx = 0, longsIdx = 48; iter < 16; ++iter, tmpIdx += 3, longsIdx += 1) {
+ long l0 = tmp[tmpIdx + 0] << 16;
+ l0 |= tmp[tmpIdx + 1] << 8;
+ l0 |= tmp[tmpIdx + 2] << 0;
+ longs[longsIdx + 0] = l0;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java
new file mode 100644
index 000000000000..77dc6626cc3f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsFormat.java
@@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader;
+import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.packed.PackedInts;
+
+/**
+ * Lucene 5.0 postings format, which encodes postings in packed integer blocks for fast decode.
+ *
+ *
Basic idea:
+ *
+ *
+ * - Packed Blocks and VInt Blocks:
+ *
In packed blocks, integers are encoded with the same bit width ({@link PackedInts packed
+ * format}): the block size (i.e. number of integers inside block) is fixed (currently 128).
+ * Additionally blocks that are all the same value are encoded in an optimized way.
+ *
In VInt blocks, integers are encoded as {@link DataOutput#writeVInt VInt}: the block
+ * size is variable.
+ *
- Block structure:
+ *
When the postings are long enough, Lucene90PostingsFormat will try to encode most
+ * integer data as a packed block.
+ *
Take a term with 259 documents as an example, the first 256 document ids are encoded as
+ * two packed blocks, while the remaining 3 are encoded as one VInt block.
+ *
Different kinds of data are always encoded separately into different packed blocks, but
+ * may possibly be interleaved into the same VInt block.
+ *
This strategy is applied to pairs: <document number, frequency>, <position,
+ * payload length>, <position, offset start, offset length>, and <position,
+ * payload length, offsetstart, offset length>.
+ *
- Skipdata settings:
+ *
The structure of skip table is quite similar to previous version of Lucene. Skip
+ * interval is the same as block size, and each skip entry points to the beginning of each
+ * block. However, for the first block, skip data is omitted.
+ *
- Positions, Payloads, and Offsets:
+ *
A position is an integer indicating where the term occurs within one document. A payload
+ * is a blob of metadata associated with current position. An offset is a pair of integers
+ * indicating the tokenized start/end offsets for given term in current position: it is
+ * essentially a specialized payload.
+ *
When payloads and offsets are not omitted, numPositions==numPayloads==numOffsets
+ * (assuming a null payload contributes one count). As mentioned in block structure, it is
+ * possible to encode these three either combined or separately.
+ *
In all cases, payloads and offsets are stored together. When encoded as a packed block,
+ * position data is separated out as .pos, while payloads and offsets are encoded in .pay
+ * (payload metadata will also be stored directly in .pay). When encoded as VInt blocks, all
+ * these three are stored interleaved into the .pos (so is payload metadata).
+ *
With this strategy, the majority of payload and offset data will be outside .pos file.
+ * So for queries that require only position data, running on a full index with payloads and
+ * offsets, this reduces disk pre-fetches.
+ *
+ *
+ * Files and detailed format:
+ *
+ *
+ *
+ *
+ *
+ *
+ * - Term Dictionary
+ *
The .tim file contains the list of terms in each field along with per-term statistics
+ * (such as docfreq) and pointers to the frequencies, positions, payload and skip data in the
+ * .doc, .pos, and .pay files. See {@link Lucene90BlockTreeTermsWriter} for more details on
+ * the format.
+ *
NOTE: The term dictionary can plug into different postings implementations: the postings
+ * writer/reader are actually responsible for encoding and decoding the PostingsHeader and
+ * TermMetadata sections described here:
+ *
+ * - PostingsHeader --> Header, PackedBlockSize
+ *
- TermMetadata --> (DocFPDelta|SingletonDocID), PosFPDelta?, PosVIntBlockFPDelta?,
+ * PayFPDelta?, SkipFPDelta?
+ *
- Header, --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- PackedBlockSize, SingletonDocID --> {@link DataOutput#writeVInt VInt}
+ *
- DocFPDelta, PosFPDelta, PayFPDelta, PosVIntBlockFPDelta, SkipFPDelta --> {@link
+ * DataOutput#writeVLong VLong}
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ * Notes:
+ *
+ * - Header is a {@link CodecUtil#writeIndexHeader IndexHeader} storing the version
+ * information for the postings.
+ *
- PackedBlockSize is the fixed block size for packed blocks. In packed block, bit width
+ * is determined by the largest integer. Smaller block size result in smaller variance
+ * among width of integers hence smaller indexes. Larger block size result in more
+ * efficient bulk i/o hence better acceleration. This value should always be a multiple
+ * of 64, currently fixed as 128 as a tradeoff. It is also the skip interval used to
+ * accelerate {@link org.apache.lucene.index.PostingsEnum#advance(int)}.
+ *
- DocFPDelta determines the position of this term's TermFreqs within the .doc file. In
+ * particular, it is the difference of file offset between this term's data and previous
+ * term's data (or zero, for the first term in the block).On disk it is stored as the
+ * difference from previous value in sequence.
+ *
- PosFPDelta determines the position of this term's TermPositions within the .pos file.
+ * While PayFPDelta determines the position of this term's <TermPayloads,
+ * TermOffsets?> within the .pay file. Similar to DocFPDelta, it is the difference
+ * between two file positions (or neglected, for fields that omit payloads and offsets).
+ *
- PosVIntBlockFPDelta determines the position of this term's last TermPosition in last
+ * pos packed block within the .pos file. It is synonym for PayVIntBlockFPDelta or
+ * OffsetVIntBlockFPDelta. This is actually used to indicate whether it is necessary to
+ * load following payloads and offsets from .pos instead of .pay. Every time a new block
+ * of positions are to be loaded, the PostingsReader will use this value to check
+ * whether current block is packed format or VInt. When packed format, payloads and
+ * offsets are fetched from .pay, otherwise from .pos. (this value is neglected when
+ * total number of positions i.e. totalTermFreq is less or equal to PackedBlockSize).
+ *
- SkipFPDelta determines the position of this term's SkipData within the .doc file. In
+ * particular, it is the length of the TermFreq data. SkipDelta is only stored if
+ * DocFreq is not smaller than SkipMinimum (i.e. 128 in Lucene90PostingsFormat).
+ *
- SingletonDocID is an optimization when a term only appears in one document. In this
+ * case, instead of writing a file pointer to the .doc file (DocFPDelta), and then a
+ * VIntBlock at that location, the single document ID is written to the term dictionary.
+ *
+ *
+ *
+ *
+ *
+ *
+ * - Term Index
+ *
The .tip file contains an index into the term dictionary, so that it can be accessed
+ * randomly. See {@link Lucene90BlockTreeTermsWriter} for more details on the format.
+ *
+ *
+ *
+ *
+ *
+ * - Frequencies and Skip Data
+ *
The .doc file contains the lists of documents which contain each term, along with the
+ * frequency of the term in that document (except when frequencies are omitted: {@link
+ * IndexOptions#DOCS}). It also saves skip data to the beginning of each packed or VInt block,
+ * when the length of document list is larger than packed block size.
+ *
+ * - docFile(.doc) --> Header, <TermFreqs, SkipData?>TermCount, Footer
+ *
- Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- TermFreqs --> <PackedBlock> PackedDocBlockNum, VIntBlock?
+ *
- PackedBlock --> PackedDocDeltaBlock, PackedFreqBlock?
+ *
- VIntBlock --> <DocDelta[,
+ * Freq?]>DocFreq-PackedBlockSize*PackedDocBlockNum
+ *
- SkipData --> <<SkipLevelLength, SkipLevel> NumSkipLevels-1,
+ * SkipLevel>, SkipDatum?
+ *
- SkipLevel --> <SkipDatum> TrimmedDocFreq/(PackedBlockSize^(Level +
+ * 1))
+ *
- SkipDatum --> DocSkip, DocFPSkip, <PosFPSkip, PosBlockOffset, PayLength?,
+ * PayFPSkip?>?, ImpactLength, <CompetitiveFreqDelta, CompetitiveNormDelta?>
+ * ImpactCount, SkipChildLevelPointer?
+ *
- PackedDocDeltaBlock, PackedFreqBlock --> {@link PackedInts PackedInts}
+ *
- DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto,
+ * PayFPSkip, ImpactLength, CompetitiveFreqDelta --> {@link DataOutput#writeVInt
+ * VInt}
+ *
- CompetitiveNormDelta --> {@link DataOutput#writeZLong ZLong}
+ *
- SkipChildLevelPointer --> {@link DataOutput#writeVLong VLong}
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ * Notes:
+ *
+ * - PackedDocDeltaBlock is theoretically generated from two steps:
+ *
+ * - Calculate the difference between each document number and previous one, and get
+ * a d-gaps list (for the first document, use absolute value);
+ *
- For those d-gaps from first one to
+ * PackedDocBlockNum*PackedBlockSizeth, separately encode as packed
+ * blocks.
+ *
+ * If frequencies are not omitted, PackedFreqBlock will be generated without d-gap step.
+ * - VIntBlock stores remaining d-gaps (along with frequencies when possible) with a
+ * format that encodes DocDelta and Freq:
+ *
DocDelta: if frequencies are indexed, this determines both the document number and
+ * the frequency. In particular, DocDelta/2 is the difference between this document
+ * number and the previous document number (or zero when this is the first document in a
+ * TermFreqs). When DocDelta is odd, the frequency is one. When DocDelta is even, the
+ * frequency is read as another VInt. If frequencies are omitted, DocDelta contains the
+ * gap (not multiplied by 2) between document numbers and no frequency information is
+ * stored.
+ *
For example, the TermFreqs for a term which occurs once in document seven and
+ * three times in document eleven, with frequencies indexed, would be the following
+ * sequence of VInts:
+ *
15, 8, 3
+ *
If frequencies were omitted ({@link IndexOptions#DOCS}) it would be this sequence
+ * of VInts instead:
+ *
7,4
+ *
- PackedDocBlockNum is the number of packed blocks for current term's docids or
+ * frequencies. In particular, PackedDocBlockNum = floor(DocFreq/PackedBlockSize)
+ *
- TrimmedDocFreq = DocFreq % PackedBlockSize == 0 ? DocFreq - 1 : DocFreq. We use this
+ * trick since the definition of skip entry is a little different from base interface.
+ * In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for
+ * skipIntervalth, 2*skipIntervalth ... posting in the list.
+ * However, in Lucene90PostingsFormat, the skip data is saved for
+ * skipInterval+1th, 2*skipInterval+1th ... posting
+ * (skipInterval==PackedBlockSize in this case). When DocFreq is multiple of
+ * PackedBlockSize, MultiLevelSkipListWriter will expect one more skip data than
+ * Lucene90SkipWriter.
+ *
- SkipDatum is the metadata of one skip entry. For the first block (no matter packed or
+ * VInt), it is omitted.
+ *
- DocSkip records the document number of every PackedBlockSizeth document
+ * number in the postings (i.e. last document number in each packed block). On disk it
+ * is stored as the difference from previous value in the sequence.
+ *
- DocFPSkip records the file offsets of each block (excluding )posting at
+ * PackedBlockSize+1th, 2*PackedBlockSize+1th ... , in DocFile.
+ * The file offsets are relative to the start of current term's TermFreqs. On disk it is
+ * also stored as the difference from previous SkipDatum in the sequence.
+ *
- Since positions and payloads are also block encoded, the skip should skip to related
+ * block first, then fetch the values according to in-block offset. PosFPSkip and
+ * PayFPSkip record the file offsets of related block in .pos and .pay, respectively.
+ * While PosBlockOffset indicates which value to fetch inside the related block
+ * (PayBlockOffset is unnecessary since it is always equal to PosBlockOffset). Same as
+ * DocFPSkip, the file offsets are relative to the start of current term's TermFreqs,
+ * and stored as a difference sequence.
+ *
- PayByteUpto indicates the start offset of the current payload. It is equivalent to
+ * the sum of the payload lengths in the current block up to PosBlockOffset
+ *
- ImpactLength is the total length of CompetitiveFreqDelta and CompetitiveNormDelta
+ * pairs. CompetitiveFreqDelta and CompetitiveNormDelta are used to safely skip score
+ * calculation for uncompetitive documents; See {@link
+ * org.apache.lucene.codecs.CompetitiveImpactAccumulator} for more details.
+ *
+ *
+ *
+ *
+ *
+ *
+ * - Positions
+ *
The .pos file contains the lists of positions that each term occurs at within documents.
+ * It also sometimes stores part of payloads and offsets for speedup.
+ *
+ * - PosFile(.pos) --> Header, <TermPositions> TermCount, Footer
+ *
- Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- TermPositions --> <PackedPosDeltaBlock> PackedPosBlockNum,
+ * VIntBlock?
+ *
- VIntBlock --> <PositionDelta[, PayloadLength?], PayloadData?, OffsetDelta?,
+ * OffsetLength?>PosVIntCount
+ *
- PackedPosDeltaBlock --> {@link PackedInts PackedInts}
+ *
- PositionDelta, OffsetDelta, OffsetLength --> {@link DataOutput#writeVInt VInt}
+ *
- PayloadData --> {@link DataOutput#writeByte byte}PayLength
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ * Notes:
+ *
+ * - TermPositions are order by term (terms are implicit, from the term dictionary), and
+ * position values for each term document pair are incremental, and ordered by document
+ * number.
+ *
- PackedPosBlockNum is the number of packed blocks for current term's positions,
+ * payloads or offsets. In particular, PackedPosBlockNum =
+ * floor(totalTermFreq/PackedBlockSize)
+ *
- PosVIntCount is the number of positions encoded as VInt format. In particular,
+ * PosVIntCount = totalTermFreq - PackedPosBlockNum*PackedBlockSize
+ *
- The procedure how PackedPosDeltaBlock is generated is the same as PackedDocDeltaBlock
+ * in chapter Frequencies and Skip Data.
+ *
- PositionDelta is, if payloads are disabled for the term's field, the difference
+ * between the position of the current occurrence in the document and the previous
+ * occurrence (or zero, if this is the first occurrence in this document). If payloads
+ * are enabled for the term's field, then PositionDelta/2 is the difference between the
+ * current and the previous position. If payloads are enabled and PositionDelta is odd,
+ * then PayloadLength is stored, indicating the length of the payload at the current
+ * term position.
+ *
- For example, the TermPositions for a term which occurs as the fourth term in one
+ * document, and as the fifth and ninth term in a subsequent document, would be the
+ * following sequence of VInts (payloads disabled):
+ *
4, 5, 4
+ *
- PayloadData is metadata associated with the current term position. If PayloadLength
+ * is stored at the current position, then it indicates the length of this payload. If
+ * PayloadLength is not stored, then this payload has the same length as the payload at
+ * the previous position.
+ *
- OffsetDelta/2 is the difference between this position's startOffset from the previous
+ * occurrence (or zero, if this is the first occurrence in this document). If
+ * OffsetDelta is odd, then the length (endOffset-startOffset) differs from the previous
+ * occurrence and an OffsetLength follows. Offset data is only written for {@link
+ * IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
+ *
+ *
+ *
+ *
+ *
+ *
+ * - Payloads and Offsets
+ *
The .pay file will store payloads and offsets associated with certain term-document
+ * positions. Some payloads and offsets will be separated out into .pos file, for performance
+ * reasons.
+ *
+ * - PayFile(.pay): --> Header, <TermPayloads?, TermOffsets?>
+ * TermCount, Footer
+ *
- Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- TermPayloads --> <PackedPayLengthBlock, SumPayLength, PayData>
+ * PackedPayBlockNum
+ *
- TermOffsets --> <PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock>
+ * PackedPayBlockNum
+ *
- PackedPayLengthBlock, PackedOffsetStartDeltaBlock, PackedOffsetLengthBlock -->
+ * {@link PackedInts PackedInts}
+ *
- SumPayLength --> {@link DataOutput#writeVInt VInt}
+ *
- PayData --> {@link DataOutput#writeByte byte}SumPayLength
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ * Notes:
+ *
+ * - The order of TermPayloads/TermOffsets will be the same as TermPositions, note that
+ * part of payload/offsets are stored in .pos.
+ *
- The procedure how PackedPayLengthBlock and PackedOffsetLengthBlock are generated is
+ * the same as PackedFreqBlock in chapter Frequencies and Skip
+ * Data. While PackedStartDeltaBlock follows a same procedure as
+ * PackedDocDeltaBlock.
+ *
- PackedPayBlockNum is always equal to PackedPosBlockNum, for the same term. It is also
+ * synonym for PackedOffsetBlockNum.
+ *
- SumPayLength is the total length of payloads written within one block, should be the
+ * sum of PayLengths in one packed block.
+ *
- PayLength in PackedPayLengthBlock is the length of each payload associated with the
+ * current position.
+ *
+ *
+ *
+ * @lucene.experimental
+ */
+public final class Lucene90PostingsFormat extends PostingsFormat {
+
+ /**
+ * Filename extension for document number, frequencies, and skip data. See chapter: Frequencies and Skip Data
+ */
+ public static final String DOC_EXTENSION = "doc";
+
+ /** Filename extension for positions. See chapter: Positions */
+ public static final String POS_EXTENSION = "pos";
+
+ /**
+ * Filename extension for payloads and offsets. See chapter: Payloads and
+ * Offsets
+ */
+ public static final String PAY_EXTENSION = "pay";
+
+ /** Size of blocks. */
+ public static final int BLOCK_SIZE = ForUtil.BLOCK_SIZE;
+
+ /**
+ * Expert: The maximum number of skip levels. Smaller values result in slightly smaller indexes,
+ * but slower skipping in big posting lists.
+ */
+ static final int MAX_SKIP_LEVELS = 10;
+
+ static final String TERMS_CODEC = "Lucene90PostingsWriterTerms";
+ static final String DOC_CODEC = "Lucene90PostingsWriterDoc";
+ static final String POS_CODEC = "Lucene90PostingsWriterPos";
+ static final String PAY_CODEC = "Lucene90PostingsWriterPay";
+
+ // Increment version to change it
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ private final int minTermBlockSize;
+ private final int maxTermBlockSize;
+
+ /** Creates {@code Lucene90PostingsFormat} with default settings. */
+ public Lucene90PostingsFormat() {
+ this(
+ Lucene90BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
+ Lucene90BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ }
+
+ /**
+ * Creates {@code Lucene90PostingsFormat} with custom values for {@code minBlockSize} and {@code
+ * maxBlockSize} passed to block terms dictionary.
+ *
+ * @see
+ * Lucene90BlockTreeTermsWriter#Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)
+ */
+ public Lucene90PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+ super("Lucene90");
+ Lucene90BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize);
+ this.minTermBlockSize = minTermBlockSize;
+ this.maxTermBlockSize = maxTermBlockSize;
+ }
+
+ @Override
+ public String toString() {
+ return getName();
+ }
+
+ @Override
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
+ PostingsWriterBase postingsWriter = new Lucene90PostingsWriter(state);
+ boolean success = false;
+ try {
+ FieldsConsumer ret =
+ new Lucene90BlockTreeTermsWriter(
+ state, postingsWriter, minTermBlockSize, maxTermBlockSize);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(postingsWriter);
+ }
+ }
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ PostingsReaderBase postingsReader = new Lucene90PostingsReader(state);
+ boolean success = false;
+ try {
+ FieldsProducer ret = new Lucene90BlockTreeTermsReader(postingsReader, state);
+ success = true;
+ return ret;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(postingsReader);
+ }
+ }
+ }
+
+ /**
+ * Holds all state required for {@link Lucene90PostingsReader} to produce a {@link
+ * org.apache.lucene.index.PostingsEnum} without re-seeking the terms dict.
+ *
+ * @lucene.internal
+ */
+ public static final class IntBlockTermState extends BlockTermState {
+ /** file pointer to the start of the doc ids enumeration, in {@link #DOC_EXTENSION} file */
+ public long docStartFP;
+ /** file pointer to the start of the positions enumeration, in {@link #POS_EXTENSION} file */
+ public long posStartFP;
+ /** file pointer to the start of the payloads enumeration, in {@link #PAY_EXTENSION} file */
+ public long payStartFP;
+ /**
+ * file offset for the start of the skip list, relative to docStartFP, if there are more than
+ * {@link ForUtil#BLOCK_SIZE} docs; otherwise -1
+ */
+ public long skipOffset;
+ /**
+ * file offset for the last position in the last block, if there are more than {@link
+ * ForUtil#BLOCK_SIZE} positions; otherwise -1
+ */
+ public long lastPosBlockOffset;
+ /**
+ * docid when there is a single pulsed posting, otherwise -1. freq is always implicitly
+ * totalTermFreq in this case.
+ */
+ public int singletonDocID;
+
+ /** Sole constructor. */
+ public IntBlockTermState() {
+ skipOffset = -1;
+ lastPosBlockOffset = -1;
+ singletonDocID = -1;
+ }
+
+ @Override
+ public IntBlockTermState clone() {
+ IntBlockTermState other = new IntBlockTermState();
+ other.copyFrom(this);
+ return other;
+ }
+
+ @Override
+ public void copyFrom(TermState _other) {
+ super.copyFrom(_other);
+ IntBlockTermState other = (IntBlockTermState) _other;
+ docStartFP = other.docStartFP;
+ posStartFP = other.posStartFP;
+ payStartFP = other.payStartFP;
+ lastPosBlockOffset = other.lastPosBlockOffset;
+ skipOffset = other.skipOffset;
+ singletonDocID = other.singletonDocID;
+ }
+
+ @Override
+ public String toString() {
+ return super.toString()
+ + " docStartFP="
+ + docStartFP
+ + " posStartFP="
+ + posStartFP
+ + " payStartFP="
+ + payStartFP
+ + " lastPosBlockOffset="
+ + lastPosBlockOffset
+ + " singletonDocID="
+ + singletonDocID;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java
new file mode 100644
index 000000000000..e2223598033f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsReader.java
@@ -0,0 +1,2068 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_START;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Impacts;
+import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SlowImpactsEnum;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Concrete class that reads docId(maybe frq,pos,offset,payloads) list with postings format.
+ *
+ * @lucene.experimental
+ */
+public final class Lucene90PostingsReader extends PostingsReaderBase {
+
+ private final IndexInput docIn;
+ private final IndexInput posIn;
+ private final IndexInput payIn;
+
+ private final int version;
+
+ /** Sole constructor. */
+ public Lucene90PostingsReader(SegmentReadState state) throws IOException {
+ boolean success = false;
+ IndexInput docIn = null;
+ IndexInput posIn = null;
+ IndexInput payIn = null;
+
+ // NOTE: these data files are too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+
+ String docName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene90PostingsFormat.DOC_EXTENSION);
+ try {
+ docIn = state.directory.openInput(docName, state.context);
+ version =
+ CodecUtil.checkIndexHeader(
+ docIn,
+ DOC_CODEC,
+ VERSION_START,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ CodecUtil.retrieveChecksum(docIn);
+
+ if (state.fieldInfos.hasProx()) {
+ String proxName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene90PostingsFormat.POS_EXTENSION);
+ posIn = state.directory.openInput(proxName, state.context);
+ CodecUtil.checkIndexHeader(
+ posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.retrieveChecksum(posIn);
+
+ if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
+ String payName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name,
+ state.segmentSuffix,
+ Lucene90PostingsFormat.PAY_EXTENSION);
+ payIn = state.directory.openInput(payName, state.context);
+ CodecUtil.checkIndexHeader(
+ payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.retrieveChecksum(payIn);
+ }
+ }
+
+ this.docIn = docIn;
+ this.posIn = posIn;
+ this.payIn = payIn;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(docIn, posIn, payIn);
+ }
+ }
+ }
+
+ @Override
+ public void init(IndexInput termsIn, SegmentReadState state) throws IOException {
+ // Make sure we are talking to the matching postings writer
+ CodecUtil.checkIndexHeader(
+ termsIn,
+ TERMS_CODEC,
+ VERSION_START,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ final int indexBlockSize = termsIn.readVInt();
+ if (indexBlockSize != BLOCK_SIZE) {
+ throw new IllegalStateException(
+ "index-time BLOCK_SIZE ("
+ + indexBlockSize
+ + ") != read-time BLOCK_SIZE ("
+ + BLOCK_SIZE
+ + ")");
+ }
+ }
+
+ /** Read values that have been written using variable-length encoding instead of bit-packing. */
+ static void readVIntBlock(
+ IndexInput docIn, long[] docBuffer, long[] freqBuffer, int num, boolean indexHasFreq)
+ throws IOException {
+ if (indexHasFreq) {
+ for (int i = 0; i < num; i++) {
+ final int code = docIn.readVInt();
+ docBuffer[i] = code >>> 1;
+ if ((code & 1) != 0) {
+ freqBuffer[i] = 1;
+ } else {
+ freqBuffer[i] = docIn.readVInt();
+ }
+ }
+ } else {
+ for (int i = 0; i < num; i++) {
+ docBuffer[i] = docIn.readVInt();
+ }
+ }
+ }
+
+ static void prefixSum(long[] buffer, int count, long base) {
+ buffer[0] += base;
+ for (int i = 1; i < count; ++i) {
+ buffer[i] += buffer[i - 1];
+ }
+ }
+
+ static int findFirstGreater(long[] buffer, int target, int from) {
+ for (int i = from; i < BLOCK_SIZE; ++i) {
+ if (buffer[i] >= target) {
+ return i;
+ }
+ }
+ return BLOCK_SIZE;
+ }
+
+ @Override
+ public BlockTermState newTermState() {
+ return new IntBlockTermState();
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(docIn, posIn, payIn);
+ }
+
+ @Override
+ public void decodeTerm(
+ DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
+ throws IOException {
+ final IntBlockTermState termState = (IntBlockTermState) _termState;
+ final boolean fieldHasPositions =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean fieldHasOffsets =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ final boolean fieldHasPayloads = fieldInfo.hasPayloads();
+
+ if (absolute) {
+ termState.docStartFP = 0;
+ termState.posStartFP = 0;
+ termState.payStartFP = 0;
+ }
+
+ final long l = in.readVLong();
+ if ((l & 0x01) == 0) {
+ termState.docStartFP += l >>> 1;
+ if (termState.docFreq == 1) {
+ termState.singletonDocID = in.readVInt();
+ } else {
+ termState.singletonDocID = -1;
+ }
+ } else {
+ assert absolute == false;
+ assert termState.singletonDocID != -1;
+ termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
+ }
+
+ if (fieldHasPositions) {
+ termState.posStartFP += in.readVLong();
+ if (fieldHasOffsets || fieldHasPayloads) {
+ termState.payStartFP += in.readVLong();
+ }
+ if (termState.totalTermFreq > BLOCK_SIZE) {
+ termState.lastPosBlockOffset = in.readVLong();
+ } else {
+ termState.lastPosBlockOffset = -1;
+ }
+ }
+
+ if (termState.docFreq > BLOCK_SIZE) {
+ termState.skipOffset = in.readVLong();
+ } else {
+ termState.skipOffset = -1;
+ }
+ }
+
+ @Override
+ public PostingsEnum postings(
+ FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags)
+ throws IOException {
+
+ boolean indexHasPositions =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+
+ if (indexHasPositions == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
+ BlockDocsEnum docsEnum;
+ if (reuse instanceof BlockDocsEnum) {
+ docsEnum = (BlockDocsEnum) reuse;
+ if (!docsEnum.canReuse(docIn, fieldInfo)) {
+ docsEnum = new BlockDocsEnum(fieldInfo);
+ }
+ } else {
+ docsEnum = new BlockDocsEnum(fieldInfo);
+ }
+ return docsEnum.reset((IntBlockTermState) termState, flags);
+ } else {
+ EverythingEnum everythingEnum;
+ if (reuse instanceof EverythingEnum) {
+ everythingEnum = (EverythingEnum) reuse;
+ if (!everythingEnum.canReuse(docIn, fieldInfo)) {
+ everythingEnum = new EverythingEnum(fieldInfo);
+ }
+ } else {
+ everythingEnum = new EverythingEnum(fieldInfo);
+ }
+ return everythingEnum.reset((IntBlockTermState) termState, flags);
+ }
+ }
+
+ @Override
+ public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
+ throws IOException {
+ if (state.docFreq <= BLOCK_SIZE) {
+ // no skip data
+ return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
+ }
+
+ final boolean indexHasPositions =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean indexHasOffsets =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ final boolean indexHasPayloads = fieldInfo.hasPayloads();
+
+ if (indexHasPositions == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
+ return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
+ }
+
+ if (indexHasPositions
+ && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)
+ && (indexHasOffsets == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
+ && (indexHasPayloads == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
+ return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
+ }
+
+ return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
+ }
+
+ final class BlockDocsEnum extends PostingsEnum {
+
+ final PForUtil pforUtil = new PForUtil(new ForUtil());
+
+ private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+ private final long[] freqBuffer = new long[BLOCK_SIZE];
+
+ private int docBufferUpto;
+
+ private Lucene90SkipReader skipper;
+ private boolean skipped;
+
+ final IndexInput startDocIn;
+
+ IndexInput docIn;
+ final boolean indexHasFreq;
+ final boolean indexHasPos;
+ final boolean indexHasOffsets;
+ final boolean indexHasPayloads;
+
+ private int docFreq; // number of docs in this posting list
+ private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
+ private int blockUpto; // number of docs in or before the current block
+ private int doc; // doc we last read
+ private long accum; // accumulator for doc deltas
+
+ // Where this term's postings start in the .doc file:
+ private long docTermStartFP;
+
+ // Where this term's skip data starts (after
+ // docTermStartFP) in the .doc file (or -1 if there is
+ // no skip data for this term):
+ private long skipOffset;
+
+ // docID for next skip point, we won't use skipper if
+ // target docID is not larger than this
+ private int nextSkipDoc;
+
+ private boolean needsFreq; // true if the caller actually needs frequencies
+ // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
+ // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
+ // (needsFreq=false)
+ private boolean isFreqsRead;
+ private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+
+ public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
+ this.startDocIn = Lucene90PostingsReader.this.docIn;
+ this.docIn = null;
+ indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ indexHasPos =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ indexHasOffsets =
+ fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ indexHasPayloads = fieldInfo.hasPayloads();
+ // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+ // advance()
+ docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+ }
+
+ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
+ return docIn == startDocIn
+ && indexHasFreq
+ == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
+ && indexHasPos
+ == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
+ >= 0)
+ && indexHasPayloads == fieldInfo.hasPayloads();
+ }
+
+ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
+ docFreq = termState.docFreq;
+ totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
+ docTermStartFP = termState.docStartFP;
+ skipOffset = termState.skipOffset;
+ singletonDocID = termState.singletonDocID;
+ if (docFreq > 1) {
+ if (docIn == null) {
+ // lazy init
+ docIn = startDocIn.clone();
+ }
+ docIn.seek(docTermStartFP);
+ }
+
+ doc = -1;
+ this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
+ this.isFreqsRead = true;
+ if (indexHasFreq == false || needsFreq == false) {
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+ freqBuffer[i] = 1;
+ }
+ }
+ accum = 0;
+ blockUpto = 0;
+ nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+ docBufferUpto = BLOCK_SIZE;
+ skipped = false;
+ return this;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ if (isFreqsRead == false) {
+ pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
+ isFreqsRead = true;
+ }
+ return (int) freqBuffer[docBufferUpto - 1];
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int startOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ return null;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ // Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn
+ // after it
+ if (isFreqsRead == false) {
+ pforUtil.skip(docIn);
+ isFreqsRead = true;
+ }
+
+ final int left = docFreq - blockUpto;
+ assert left >= 0;
+
+ if (left >= BLOCK_SIZE) {
+ pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
+
+ if (indexHasFreq) {
+ if (needsFreq) {
+ isFreqsRead = false;
+ } else {
+ pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
+ }
+ }
+ blockUpto += BLOCK_SIZE;
+ } else if (docFreq == 1) {
+ docBuffer[0] = singletonDocID;
+ freqBuffer[0] = totalTermFreq;
+ docBuffer[1] = NO_MORE_DOCS;
+ blockUpto++;
+ } else {
+ // Read vInts:
+ readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
+ prefixSum(docBuffer, left, accum);
+ docBuffer[left] = NO_MORE_DOCS;
+ blockUpto += left;
+ }
+ accum = docBuffer[BLOCK_SIZE - 1];
+ docBufferUpto = 0;
+ assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs(); // we don't need to load freqBuffer for now (will be loaded later if
+ // necessary)
+ }
+
+ doc = (int) docBuffer[docBufferUpto];
+ docBufferUpto++;
+ return doc;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ // current skip docID < docIDs generated from current buffer <= next skip docID
+ // we don't need to skip if target is buffered already
+ if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+
+ if (skipper == null) {
+ // Lazy init: first time this enum has ever been used for skipping
+ skipper =
+ new Lucene90SkipReader(
+ docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+ }
+
+ if (!skipped) {
+ assert skipOffset != -1;
+ // This is the first time this enum has skipped
+ // since reset() was called; load the skip data:
+ skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
+ skipped = true;
+ }
+
+ // always plus one to fix the result, since skip position in Lucene90SkipReader
+ // is a little different from MultiLevelSkipListReader
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto >= blockUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ blockUpto = newDocUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc(); // actually, this is just lastSkipEntry
+ docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
+ // even if freqBuffer were not read from the previous block, we will mark them as read,
+ // as we don't need to skip the previous block freqBuffer in refillDocs,
+ // as we have already positioned docIn where in needs to be.
+ isFreqsRead = true;
+ }
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
+
+ // Now scan... this is an inlined/pared down version
+ // of nextDoc():
+ long doc;
+ while (true) {
+ doc = docBuffer[docBufferUpto];
+
+ if (doc >= target) {
+ break;
+ }
+ ++docBufferUpto;
+ }
+
+ docBufferUpto++;
+ return this.doc = (int) doc;
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ // Also handles payloads + offsets
+ final class EverythingEnum extends PostingsEnum {
+
+ final PForUtil pforUtil = new PForUtil(new ForUtil());
+
+ private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+ private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
+ private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
+
+ private final long[] payloadLengthBuffer;
+ private final long[] offsetStartDeltaBuffer;
+ private final long[] offsetLengthBuffer;
+
+ private byte[] payloadBytes;
+ private int payloadByteUpto;
+ private int payloadLength;
+
+ private int lastStartOffset;
+ private int startOffset;
+ private int endOffset;
+
+ private int docBufferUpto;
+ private int posBufferUpto;
+
+ private Lucene90SkipReader skipper;
+ private boolean skipped;
+
+ final IndexInput startDocIn;
+
+ IndexInput docIn;
+ final IndexInput posIn;
+ final IndexInput payIn;
+ final BytesRef payload;
+
+ final boolean indexHasOffsets;
+ final boolean indexHasPayloads;
+
+ private int docFreq; // number of docs in this posting list
+ private long totalTermFreq; // number of positions in this posting list
+ private int blockUpto; // number of docs in or before the current block
+ private int doc; // doc we last read
+ private long accum; // accumulator for doc deltas
+ private int freq; // freq we last read
+ private int position; // current position
+
+ // how many positions "behind" we are; nextPosition must
+ // skip these to "catch up":
+ private int posPendingCount;
+
+ // Lazy pos seek: if != -1 then we must seek to this FP
+ // before reading positions:
+ private long posPendingFP;
+
+ // Lazy pay seek: if != -1 then we must seek to this FP
+ // before reading payloads/offsets:
+ private long payPendingFP;
+
+ // Where this term's postings start in the .doc file:
+ private long docTermStartFP;
+
+ // Where this term's postings start in the .pos file:
+ private long posTermStartFP;
+
+ // Where this term's payloads/offsets start in the .pay
+ // file:
+ private long payTermStartFP;
+
+ // File pointer where the last (vInt encoded) pos delta
+ // block is. We need this to know whether to bulk
+ // decode vs vInt decode the block:
+ private long lastPosBlockFP;
+
+ // Where this term's skip data starts (after
+ // docTermStartFP) in the .doc file (or -1 if there is
+ // no skip data for this term):
+ private long skipOffset;
+
+ private int nextSkipDoc;
+
+ private boolean needsOffsets; // true if we actually need offsets
+ private boolean needsPayloads; // true if we actually need payloads
+ private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+
+ public EverythingEnum(FieldInfo fieldInfo) throws IOException {
+ indexHasOffsets =
+ fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ indexHasPayloads = fieldInfo.hasPayloads();
+
+ this.startDocIn = Lucene90PostingsReader.this.docIn;
+ this.docIn = null;
+ this.posIn = Lucene90PostingsReader.this.posIn.clone();
+ if (indexHasOffsets || indexHasPayloads) {
+ this.payIn = Lucene90PostingsReader.this.payIn.clone();
+ } else {
+ this.payIn = null;
+ }
+ if (indexHasOffsets) {
+ offsetStartDeltaBuffer = new long[BLOCK_SIZE];
+ offsetLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ startOffset = -1;
+ endOffset = -1;
+ }
+
+ if (indexHasPayloads) {
+ payloadLengthBuffer = new long[BLOCK_SIZE];
+ payloadBytes = new byte[128];
+ payload = new BytesRef();
+ } else {
+ payloadLengthBuffer = null;
+ payloadBytes = null;
+ payload = null;
+ }
+
+ // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+ // advance()
+ docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+ }
+
+ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
+ return docIn == startDocIn
+ && indexHasOffsets
+ == (fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0)
+ && indexHasPayloads == fieldInfo.hasPayloads();
+ }
+
+ public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOException {
+ docFreq = termState.docFreq;
+ docTermStartFP = termState.docStartFP;
+ posTermStartFP = termState.posStartFP;
+ payTermStartFP = termState.payStartFP;
+ skipOffset = termState.skipOffset;
+ totalTermFreq = termState.totalTermFreq;
+ singletonDocID = termState.singletonDocID;
+ if (docFreq > 1) {
+ if (docIn == null) {
+ // lazy init
+ docIn = startDocIn.clone();
+ }
+ docIn.seek(docTermStartFP);
+ }
+ posPendingFP = posTermStartFP;
+ payPendingFP = payTermStartFP;
+ posPendingCount = 0;
+ if (termState.totalTermFreq < BLOCK_SIZE) {
+ lastPosBlockFP = posTermStartFP;
+ } else if (termState.totalTermFreq == BLOCK_SIZE) {
+ lastPosBlockFP = -1;
+ } else {
+ lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+ }
+
+ this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+ this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+
+ doc = -1;
+ accum = 0;
+ blockUpto = 0;
+ if (docFreq > BLOCK_SIZE) {
+ nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+ } else {
+ nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+ }
+ docBufferUpto = BLOCK_SIZE;
+ skipped = false;
+ return this;
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return freq;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ final int left = docFreq - blockUpto;
+ assert left >= 0;
+
+ if (left >= BLOCK_SIZE) {
+ pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
+ pforUtil.decode(docIn, freqBuffer);
+ blockUpto += BLOCK_SIZE;
+ } else if (docFreq == 1) {
+ docBuffer[0] = singletonDocID;
+ freqBuffer[0] = totalTermFreq;
+ docBuffer[1] = NO_MORE_DOCS;
+ blockUpto++;
+ } else {
+ readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
+ prefixSum(docBuffer, left, accum);
+ docBuffer[left] = NO_MORE_DOCS;
+ blockUpto += left;
+ }
+ accum = docBuffer[BLOCK_SIZE - 1];
+ docBufferUpto = 0;
+ assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+ }
+
+ private void refillPositions() throws IOException {
+ if (posIn.getFilePointer() == lastPosBlockFP) {
+ final int count = (int) (totalTermFreq % BLOCK_SIZE);
+ int payloadLength = 0;
+ int offsetLength = 0;
+ payloadByteUpto = 0;
+ for (int i = 0; i < count; i++) {
+ int code = posIn.readVInt();
+ if (indexHasPayloads) {
+ if ((code & 1) != 0) {
+ payloadLength = posIn.readVInt();
+ }
+ payloadLengthBuffer[i] = payloadLength;
+ posDeltaBuffer[i] = code >>> 1;
+ if (payloadLength != 0) {
+ if (payloadByteUpto + payloadLength > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+ }
+ posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+ payloadByteUpto += payloadLength;
+ }
+ } else {
+ posDeltaBuffer[i] = code;
+ }
+
+ if (indexHasOffsets) {
+ int deltaCode = posIn.readVInt();
+ if ((deltaCode & 1) != 0) {
+ offsetLength = posIn.readVInt();
+ }
+ offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+ offsetLengthBuffer[i] = offsetLength;
+ }
+ }
+ payloadByteUpto = 0;
+ } else {
+ pforUtil.decode(posIn, posDeltaBuffer);
+
+ if (indexHasPayloads) {
+ if (needsPayloads) {
+ pforUtil.decode(payIn, payloadLengthBuffer);
+ int numBytes = payIn.readVInt();
+
+ if (numBytes > payloadBytes.length) {
+ payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
+ }
+ payIn.readBytes(payloadBytes, 0, numBytes);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be
+ // written
+ pforUtil.skip(payIn); // skip over lengths
+ int numBytes = payIn.readVInt(); // read length of payloadBytes
+ payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+ }
+ payloadByteUpto = 0;
+ }
+
+ if (indexHasOffsets) {
+ if (needsOffsets) {
+ pforUtil.decode(payIn, offsetStartDeltaBuffer);
+ pforUtil.decode(payIn, offsetLengthBuffer);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be
+ // written
+ pforUtil.skip(payIn); // skip over starts
+ pforUtil.skip(payIn); // skip over lengths
+ }
+ }
+ }
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
+
+ doc = (int) docBuffer[docBufferUpto];
+ freq = (int) freqBuffer[docBufferUpto];
+ posPendingCount += freq;
+ docBufferUpto++;
+
+ position = 0;
+ lastStartOffset = 0;
+ return doc;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ if (skipper == null) {
+ // Lazy init: first time this enum has ever been used for skipping
+ skipper =
+ new Lucene90SkipReader(
+ docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
+ }
+
+ if (!skipped) {
+ assert skipOffset != -1;
+ // This is the first time this enum has skipped
+ // since reset() was called; load the skip data:
+ skipper.init(
+ docTermStartFP + skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
+ skipped = true;
+ }
+
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto > blockUpto - BLOCK_SIZE + docBufferUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ blockUpto = newDocUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc();
+ docIn.seek(skipper.getDocPointer());
+ posPendingFP = skipper.getPosPointer();
+ payPendingFP = skipper.getPayPointer();
+ posPendingCount = skipper.getPosBufferUpto();
+ lastStartOffset = 0; // new document
+ payloadByteUpto = skipper.getPayloadByteUpto();
+ }
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ refillDocs();
+ }
+
+ // Now scan:
+ long doc;
+ while (true) {
+ doc = docBuffer[docBufferUpto];
+ freq = (int) freqBuffer[docBufferUpto];
+ posPendingCount += freq;
+ docBufferUpto++;
+
+ if (doc >= target) {
+ break;
+ }
+ }
+
+ position = 0;
+ lastStartOffset = 0;
+ return this.doc = (int) doc;
+ }
+
+ // TODO: in theory we could avoid loading frq block
+ // when not needed, ie, use skip data to load how far to
+ // seek the pos pointer ... instead of having to load frq
+ // blocks only to sum up how many positions to skip
+ private void skipPositions() throws IOException {
+ // Skip positions now:
+ int toSkip = posPendingCount - freq;
+ // if (DEBUG) {
+ // System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
+ // }
+
+ final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+ if (toSkip < leftInBlock) {
+ int end = posBufferUpto + toSkip;
+ while (posBufferUpto < end) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ } else {
+ toSkip -= leftInBlock;
+ while (toSkip >= BLOCK_SIZE) {
+ assert posIn.getFilePointer() != lastPosBlockFP;
+ pforUtil.skip(posIn);
+
+ if (indexHasPayloads) {
+ // Skip payloadLength block:
+ pforUtil.skip(payIn);
+
+ // Skip payloadBytes block:
+ int numBytes = payIn.readVInt();
+ payIn.seek(payIn.getFilePointer() + numBytes);
+ }
+
+ if (indexHasOffsets) {
+ pforUtil.skip(payIn);
+ pforUtil.skip(payIn);
+ }
+ toSkip -= BLOCK_SIZE;
+ }
+ refillPositions();
+ payloadByteUpto = 0;
+ posBufferUpto = 0;
+ while (posBufferUpto < toSkip) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ }
+
+ position = 0;
+ lastStartOffset = 0;
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ assert posPendingCount > 0;
+
+ if (posPendingFP != -1) {
+ posIn.seek(posPendingFP);
+ posPendingFP = -1;
+
+ if (payPendingFP != -1 && payIn != null) {
+ payIn.seek(payPendingFP);
+ payPendingFP = -1;
+ }
+
+ // Force buffer refill:
+ posBufferUpto = BLOCK_SIZE;
+ }
+
+ if (posPendingCount > freq) {
+ skipPositions();
+ posPendingCount = freq;
+ }
+
+ if (posBufferUpto == BLOCK_SIZE) {
+ refillPositions();
+ posBufferUpto = 0;
+ }
+ position += posDeltaBuffer[posBufferUpto];
+
+ if (indexHasPayloads) {
+ payloadLength = (int) payloadLengthBuffer[posBufferUpto];
+ payload.bytes = payloadBytes;
+ payload.offset = payloadByteUpto;
+ payload.length = payloadLength;
+ payloadByteUpto += payloadLength;
+ }
+
+ if (indexHasOffsets) {
+ startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto];
+ endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto];
+ lastStartOffset = startOffset;
+ }
+
+ posBufferUpto++;
+ posPendingCount--;
+ return position;
+ }
+
+ @Override
+ public int startOffset() {
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() {
+ return endOffset;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ if (payloadLength == 0) {
+ return null;
+ } else {
+ return payload;
+ }
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ final class BlockImpactsDocsEnum extends ImpactsEnum {
+
+ final PForUtil pforUtil = new PForUtil(new ForUtil());
+
+ private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+ private final long[] freqBuffer = new long[BLOCK_SIZE];
+
+ private int docBufferUpto;
+
+ private final Lucene90ScoreSkipReader skipper;
+
+ final IndexInput docIn;
+
+ final boolean indexHasFreqs;
+
+ private int docFreq; // number of docs in this posting list
+ private int blockUpto; // number of documents in or before the current block
+ private int doc; // doc we last read
+ private long accum; // accumulator for doc deltas
+
+ private int nextSkipDoc = -1;
+
+ private long seekTo = -1;
+
+ // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
+ // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
+ // (needsFreq=false)
+ private boolean isFreqsRead;
+
+ public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
+ throws IOException {
+ indexHasFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean indexHasPositions =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ final boolean indexHasOffsets =
+ fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ final boolean indexHasPayloads = fieldInfo.hasPayloads();
+
+ this.docIn = Lucene90PostingsReader.this.docIn.clone();
+
+ docFreq = termState.docFreq;
+ docIn.seek(termState.docStartFP);
+
+ doc = -1;
+ accum = 0;
+ blockUpto = 0;
+ docBufferUpto = BLOCK_SIZE;
+
+ skipper =
+ new Lucene90ScoreSkipReader(
+ docIn.clone(), MAX_SKIP_LEVELS, indexHasPositions, indexHasOffsets, indexHasPayloads);
+ skipper.init(
+ termState.docStartFP + termState.skipOffset,
+ termState.docStartFP,
+ termState.posStartFP,
+ termState.payStartFP,
+ docFreq);
+
+ // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+ // advance()
+ docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+ this.isFreqsRead = true;
+ if (indexHasFreqs == false) {
+ Arrays.fill(freqBuffer, 1L);
+ }
+ }
+
+ @Override
+ public int freq() throws IOException {
+ if (isFreqsRead == false) {
+ pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
+ isFreqsRead = true;
+ }
+ return (int) freqBuffer[docBufferUpto - 1];
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ // Check if we skipped reading the previous block of freqBuffer, and if yes, position docIn
+ // after it
+ if (isFreqsRead == false) {
+ pforUtil.skip(docIn);
+ isFreqsRead = true;
+ }
+
+ final int left = docFreq - blockUpto;
+ assert left >= 0;
+
+ if (left >= BLOCK_SIZE) {
+ pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
+ if (indexHasFreqs) {
+ pforUtil.decode(docIn, freqBuffer);
+ }
+ blockUpto += BLOCK_SIZE;
+ } else {
+ readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
+ prefixSum(docBuffer, left, accum);
+ docBuffer[left] = NO_MORE_DOCS;
+ blockUpto += left;
+ }
+ accum = docBuffer[BLOCK_SIZE - 1];
+ docBufferUpto = 0;
+ assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+ }
+
+ @Override
+ public void advanceShallow(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ // always plus one to fix the result, since skip position in Lucene90SkipReader
+ // is a little different from MultiLevelSkipListReader
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto >= blockUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ blockUpto = newDocUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc();
+ seekTo = skipper.getDocPointer(); // delay the seek
+ }
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ assert nextSkipDoc >= target;
+ }
+
+ @Override
+ public Impacts getImpacts() throws IOException {
+ advanceShallow(doc);
+ return skipper.getImpacts();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ advanceShallow(target);
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ if (seekTo >= 0) {
+ docIn.seek(seekTo);
+ isFreqsRead = true; // reset isFreqsRead
+ seekTo = -1;
+ }
+ refillDocs();
+ }
+
+ int next = findFirstGreater(docBuffer, target, docBufferUpto);
+ this.doc = (int) docBuffer[next];
+ docBufferUpto = next + 1;
+ return doc;
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ return -1;
+ }
+
+ @Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ return null;
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ final class BlockImpactsPostingsEnum extends ImpactsEnum {
+
+ final PForUtil pforUtil = new PForUtil(new ForUtil());
+
+ private final long[] docBuffer = new long[BLOCK_SIZE];
+ private final long[] freqBuffer = new long[BLOCK_SIZE];
+ private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
+
+ private int docBufferUpto;
+ private int posBufferUpto;
+
+ private final Lucene90ScoreSkipReader skipper;
+
+ final IndexInput docIn;
+ final IndexInput posIn;
+
+ final boolean indexHasOffsets;
+ final boolean indexHasPayloads;
+
+ private int docFreq; // number of docs in this posting list
+ private long totalTermFreq; // number of positions in this posting list
+ private int docUpto; // how many docs we've read
+ private int doc; // doc we last read
+ private long accum; // accumulator for doc deltas
+ private int freq; // freq we last read
+ private int position; // current position
+
+ // how many positions "behind" we are; nextPosition must
+ // skip these to "catch up":
+ private int posPendingCount;
+
+ // Lazy pos seek: if != -1 then we must seek to this FP
+ // before reading positions:
+ private long posPendingFP;
+
+ // Where this term's postings start in the .doc file:
+ private long docTermStartFP;
+
+ // Where this term's postings start in the .pos file:
+ private long posTermStartFP;
+
+ // Where this term's payloads/offsets start in the .pay
+ // file:
+ private long payTermStartFP;
+
+ // File pointer where the last (vInt encoded) pos delta
+ // block is. We need this to know whether to bulk
+ // decode vs vInt decode the block:
+ private long lastPosBlockFP;
+
+ private int nextSkipDoc = -1;
+
+ private long seekTo = -1;
+
+ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
+ throws IOException {
+ indexHasOffsets =
+ fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ indexHasPayloads = fieldInfo.hasPayloads();
+
+ this.docIn = Lucene90PostingsReader.this.docIn.clone();
+
+ this.posIn = Lucene90PostingsReader.this.posIn.clone();
+
+ docFreq = termState.docFreq;
+ docTermStartFP = termState.docStartFP;
+ posTermStartFP = termState.posStartFP;
+ payTermStartFP = termState.payStartFP;
+ totalTermFreq = termState.totalTermFreq;
+ docIn.seek(docTermStartFP);
+ posPendingFP = posTermStartFP;
+ posPendingCount = 0;
+ if (termState.totalTermFreq < BLOCK_SIZE) {
+ lastPosBlockFP = posTermStartFP;
+ } else if (termState.totalTermFreq == BLOCK_SIZE) {
+ lastPosBlockFP = -1;
+ } else {
+ lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+ }
+
+ doc = -1;
+ accum = 0;
+ docUpto = 0;
+ docBufferUpto = BLOCK_SIZE;
+
+ skipper =
+ new Lucene90ScoreSkipReader(
+ docIn.clone(), MAX_SKIP_LEVELS, true, indexHasOffsets, indexHasPayloads);
+ skipper.init(
+ docTermStartFP + termState.skipOffset,
+ docTermStartFP,
+ posTermStartFP,
+ payTermStartFP,
+ docFreq);
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return freq;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ final int left = docFreq - docUpto;
+ assert left >= 0;
+
+ if (left >= BLOCK_SIZE) {
+ pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
+ pforUtil.decode(docIn, freqBuffer);
+ } else {
+ readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
+ prefixSum(docBuffer, left, accum);
+ docBuffer[left] = NO_MORE_DOCS;
+ }
+ accum = docBuffer[BLOCK_SIZE - 1];
+ docBufferUpto = 0;
+ }
+
+ private void refillPositions() throws IOException {
+ if (posIn.getFilePointer() == lastPosBlockFP) {
+ final int count = (int) (totalTermFreq % BLOCK_SIZE);
+ int payloadLength = 0;
+ for (int i = 0; i < count; i++) {
+ int code = posIn.readVInt();
+ if (indexHasPayloads) {
+ if ((code & 1) != 0) {
+ payloadLength = posIn.readVInt();
+ }
+ posDeltaBuffer[i] = code >>> 1;
+ if (payloadLength != 0) {
+ posIn.seek(posIn.getFilePointer() + payloadLength);
+ }
+ } else {
+ posDeltaBuffer[i] = code;
+ }
+ if (indexHasOffsets) {
+ if ((posIn.readVInt() & 1) != 0) {
+ // offset length changed
+ posIn.readVInt();
+ }
+ }
+ }
+ } else {
+ pforUtil.decode(posIn, posDeltaBuffer);
+ }
+ }
+
+ @Override
+ public void advanceShallow(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ // always plus one to fix the result, since skip position in Lucene90SkipReader
+ // is a little different from MultiLevelSkipListReader
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto > docUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc();
+ posPendingFP = skipper.getPosPointer();
+ posPendingCount = skipper.getPosBufferUpto();
+ seekTo = skipper.getDocPointer(); // delay the seek
+ }
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ assert nextSkipDoc >= target;
+ }
+
+ @Override
+ public Impacts getImpacts() throws IOException {
+ advanceShallow(doc);
+ return skipper.getImpacts();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ advanceShallow(target);
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ if (seekTo >= 0) {
+ docIn.seek(seekTo);
+ seekTo = -1;
+ }
+ refillDocs();
+ }
+
+ int next = findFirstGreater(docBuffer, target, docBufferUpto);
+ if (next == BLOCK_SIZE) {
+ return doc = NO_MORE_DOCS;
+ }
+ this.doc = (int) docBuffer[next];
+ this.freq = (int) freqBuffer[next];
+ for (int i = docBufferUpto; i <= next; ++i) {
+ posPendingCount += freqBuffer[i];
+ }
+ docUpto += next - docBufferUpto + 1;
+ docBufferUpto = next + 1;
+ position = 0;
+ return doc;
+ }
+
+ // TODO: in theory we could avoid loading frq block
+ // when not needed, ie, use skip data to load how far to
+ // seek the pos pointer ... instead of having to load frq
+ // blocks only to sum up how many positions to skip
+ private void skipPositions() throws IOException {
+ // Skip positions now:
+ int toSkip = posPendingCount - freq;
+
+ final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+ if (toSkip < leftInBlock) {
+ posBufferUpto += toSkip;
+ } else {
+ toSkip -= leftInBlock;
+ while (toSkip >= BLOCK_SIZE) {
+ assert posIn.getFilePointer() != lastPosBlockFP;
+ pforUtil.skip(posIn);
+ toSkip -= BLOCK_SIZE;
+ }
+ refillPositions();
+ posBufferUpto = toSkip;
+ }
+
+ position = 0;
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ assert posPendingCount > 0;
+
+ if (posPendingFP != -1) {
+ posIn.seek(posPendingFP);
+ posPendingFP = -1;
+
+ // Force buffer refill:
+ posBufferUpto = BLOCK_SIZE;
+ }
+
+ if (posPendingCount > freq) {
+ skipPositions();
+ posPendingCount = freq;
+ }
+
+ if (posBufferUpto == BLOCK_SIZE) {
+ refillPositions();
+ posBufferUpto = 0;
+ }
+ position += posDeltaBuffer[posBufferUpto++];
+
+ posPendingCount--;
+ return position;
+ }
+
+ @Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ return null;
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ final class BlockImpactsEverythingEnum extends ImpactsEnum {
+
+ final PForUtil pforUtil = new PForUtil(new ForUtil());
+
+ private final long[] docBuffer = new long[BLOCK_SIZE];
+ private final long[] freqBuffer = new long[BLOCK_SIZE];
+ private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
+
+ private final long[] payloadLengthBuffer;
+ private final long[] offsetStartDeltaBuffer;
+ private final long[] offsetLengthBuffer;
+
+ private byte[] payloadBytes;
+ private int payloadByteUpto;
+ private int payloadLength;
+
+ private int lastStartOffset;
+ private int startOffset = -1;
+ private int endOffset = -1;
+
+ private int docBufferUpto;
+ private int posBufferUpto;
+
+ private final Lucene90ScoreSkipReader skipper;
+
+ final IndexInput docIn;
+ final IndexInput posIn;
+ final IndexInput payIn;
+ final BytesRef payload;
+
+ final boolean indexHasFreq;
+ final boolean indexHasPos;
+ final boolean indexHasOffsets;
+ final boolean indexHasPayloads;
+
+ private int docFreq; // number of docs in this posting list
+ private long totalTermFreq; // number of positions in this posting list
+ private int docUpto; // how many docs we've read
+ private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads
+ private int doc; // doc we last read
+ private long accum; // accumulator for doc deltas
+ private int position; // current position
+
+ // how many positions "behind" we are; nextPosition must
+ // skip these to "catch up":
+ private int posPendingCount;
+
+ // Lazy pos seek: if != -1 then we must seek to this FP
+ // before reading positions:
+ private long posPendingFP;
+
+ // Lazy pay seek: if != -1 then we must seek to this FP
+ // before reading payloads/offsets:
+ private long payPendingFP;
+
+ // Where this term's postings start in the .doc file:
+ private long docTermStartFP;
+
+ // Where this term's postings start in the .pos file:
+ private long posTermStartFP;
+
+ // Where this term's payloads/offsets start in the .pay
+ // file:
+ private long payTermStartFP;
+
+ // File pointer where the last (vInt encoded) pos delta
+ // block is. We need this to know whether to bulk
+ // decode vs vInt decode the block:
+ private long lastPosBlockFP;
+
+ private int nextSkipDoc = -1;
+
+ private final boolean needsPositions;
+ private final boolean needsOffsets; // true if we actually need offsets
+ private final boolean needsPayloads; // true if we actually need payloads
+
+ private boolean
+ isFreqsRead; // shows if freqBuffer for the current doc block are read into freqBuffer
+
+ private long seekTo = -1;
+
+ public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags)
+ throws IOException {
+ indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ indexHasPos =
+ fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ indexHasOffsets =
+ fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0;
+ indexHasPayloads = fieldInfo.hasPayloads();
+
+ needsPositions = PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS);
+ needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+ needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+
+ this.docIn = Lucene90PostingsReader.this.docIn.clone();
+
+ if (indexHasPos && needsPositions) {
+ this.posIn = Lucene90PostingsReader.this.posIn.clone();
+ } else {
+ this.posIn = null;
+ }
+
+ if ((indexHasOffsets && needsOffsets) || (indexHasPayloads && needsPayloads)) {
+ this.payIn = Lucene90PostingsReader.this.payIn.clone();
+ } else {
+ this.payIn = null;
+ }
+
+ if (indexHasOffsets) {
+ offsetStartDeltaBuffer = new long[BLOCK_SIZE];
+ offsetLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ startOffset = -1;
+ endOffset = -1;
+ }
+
+ if (indexHasPayloads) {
+ payloadLengthBuffer = new long[BLOCK_SIZE];
+ payloadBytes = new byte[128];
+ payload = new BytesRef();
+ } else {
+ payloadLengthBuffer = null;
+ payloadBytes = null;
+ payload = null;
+ }
+
+ docFreq = termState.docFreq;
+ docTermStartFP = termState.docStartFP;
+ posTermStartFP = termState.posStartFP;
+ payTermStartFP = termState.payStartFP;
+ totalTermFreq = termState.totalTermFreq;
+ docIn.seek(docTermStartFP);
+ posPendingFP = posTermStartFP;
+ payPendingFP = payTermStartFP;
+ posPendingCount = 0;
+ if (termState.totalTermFreq < BLOCK_SIZE) {
+ lastPosBlockFP = posTermStartFP;
+ } else if (termState.totalTermFreq == BLOCK_SIZE) {
+ lastPosBlockFP = -1;
+ } else {
+ lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
+ }
+
+ doc = -1;
+ accum = 0;
+ docUpto = 0;
+ posDocUpTo = 0;
+ isFreqsRead = true;
+ docBufferUpto = BLOCK_SIZE;
+
+ skipper =
+ new Lucene90ScoreSkipReader(
+ docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+ skipper.init(
+ docTermStartFP + termState.skipOffset,
+ docTermStartFP,
+ posTermStartFP,
+ payTermStartFP,
+ docFreq);
+
+ if (indexHasFreq == false) {
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+ freqBuffer[i] = 1;
+ }
+ }
+ }
+
+ @Override
+ public int freq() throws IOException {
+ if (indexHasFreq && (isFreqsRead == false)) {
+ pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this block
+ isFreqsRead = true;
+ }
+ return (int) freqBuffer[docBufferUpto - 1];
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ private void refillDocs() throws IOException {
+ if (indexHasFreq) {
+ if (isFreqsRead == false) { // previous freq block was not read
+ // check if we need to load the previous freq block to catch up on positions or we can
+ // skip it
+ if (indexHasPos && needsPositions && (posDocUpTo < docUpto)) {
+ pforUtil.decode(docIn, freqBuffer); // load the previous freq block
+ } else {
+ pforUtil.skip(docIn); // skip it
+ }
+ isFreqsRead = true;
+ }
+ if (indexHasPos && needsPositions) {
+ while (posDocUpTo
+ < docUpto) { // catch on positions, bring posPendingCount upto the current doc
+ posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
+ posDocUpTo++;
+ }
+ }
+ }
+
+ final int left = docFreq - docUpto;
+ assert left >= 0;
+
+ if (left >= BLOCK_SIZE) {
+ pforUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
+ if (indexHasFreq) {
+ isFreqsRead =
+ false; // freq block will be loaded lazily when necessary, we don't load it here
+ }
+ } else {
+ readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
+ prefixSum(docBuffer, left, accum);
+ docBuffer[left] = NO_MORE_DOCS;
+ }
+ accum = docBuffer[BLOCK_SIZE - 1];
+ docBufferUpto = 0;
+ }
+
+ private void refillPositions() throws IOException {
+ if (posIn.getFilePointer() == lastPosBlockFP) {
+ final int count = (int) (totalTermFreq % BLOCK_SIZE);
+ int payloadLength = 0;
+ int offsetLength = 0;
+ payloadByteUpto = 0;
+ for (int i = 0; i < count; i++) {
+ int code = posIn.readVInt();
+ if (indexHasPayloads) {
+ if ((code & 1) != 0) {
+ payloadLength = posIn.readVInt();
+ }
+ payloadLengthBuffer[i] = payloadLength;
+ posDeltaBuffer[i] = code >>> 1;
+ if (payloadLength != 0) {
+ if (payloadByteUpto + payloadLength > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+ }
+ posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+ payloadByteUpto += payloadLength;
+ }
+ } else {
+ posDeltaBuffer[i] = code;
+ }
+
+ if (indexHasOffsets) {
+ int deltaCode = posIn.readVInt();
+ if ((deltaCode & 1) != 0) {
+ offsetLength = posIn.readVInt();
+ }
+ offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+ offsetLengthBuffer[i] = offsetLength;
+ }
+ }
+ payloadByteUpto = 0;
+ } else {
+ pforUtil.decode(posIn, posDeltaBuffer);
+
+ if (indexHasPayloads && payIn != null) {
+ if (needsPayloads) {
+ pforUtil.decode(payIn, payloadLengthBuffer);
+ int numBytes = payIn.readVInt();
+
+ if (numBytes > payloadBytes.length) {
+ payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
+ }
+ payIn.readBytes(payloadBytes, 0, numBytes);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be
+ // written
+ pforUtil.skip(payIn); // skip over lengths
+ int numBytes = payIn.readVInt(); // read length of payloadBytes
+ payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+ }
+ payloadByteUpto = 0;
+ }
+
+ if (indexHasOffsets && payIn != null) {
+ if (needsOffsets) {
+ pforUtil.decode(payIn, offsetStartDeltaBuffer);
+ pforUtil.decode(payIn, offsetLengthBuffer);
+ } else {
+ // this works, because when writing a vint block we always force the first length to be
+ // written
+ pforUtil.skip(payIn); // skip over starts
+ pforUtil.skip(payIn); // skip over lengths
+ }
+ }
+ }
+ }
+
+ @Override
+ public void advanceShallow(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ // always plus one to fix the result, since skip position in Lucene90SkipReader
+ // is a little different from MultiLevelSkipListReader
+ final int newDocUpto = skipper.skipTo(target) + 1;
+
+ if (newDocUpto > docUpto) {
+ // Skipper moved
+ assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+ docUpto = newDocUpto;
+ posDocUpTo = docUpto;
+
+ // Force to read next block
+ docBufferUpto = BLOCK_SIZE;
+ accum = skipper.getDoc();
+ posPendingFP = skipper.getPosPointer();
+ payPendingFP = skipper.getPayPointer();
+ posPendingCount = skipper.getPosBufferUpto();
+ lastStartOffset = 0; // new document
+ payloadByteUpto = skipper.getPayloadByteUpto(); // actually, this is just lastSkipEntry
+ seekTo = skipper.getDocPointer(); // delay the seek
+ }
+ // next time we call advance, this is used to
+ // foresee whether skipper is necessary.
+ nextSkipDoc = skipper.getNextSkipDoc();
+ }
+ assert nextSkipDoc >= target;
+ }
+
+ @Override
+ public Impacts getImpacts() throws IOException {
+ advanceShallow(doc);
+ return skipper.getImpacts();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target > nextSkipDoc) {
+ advanceShallow(target);
+ }
+ if (docBufferUpto == BLOCK_SIZE) {
+ if (seekTo >= 0) {
+ docIn.seek(seekTo);
+ seekTo = -1;
+ isFreqsRead = true; // reset isFreqsRead
+ }
+ refillDocs();
+ }
+
+ // Now scan:
+ long doc;
+ while (true) {
+ doc = docBuffer[docBufferUpto];
+ docBufferUpto++;
+ docUpto++;
+
+ if (doc >= target) {
+ break;
+ }
+
+ if (docBufferUpto == BLOCK_SIZE) {
+ return this.doc = NO_MORE_DOCS;
+ }
+ }
+ position = 0;
+ lastStartOffset = 0;
+
+ return this.doc = (int) doc;
+ }
+
+ // TODO: in theory we could avoid loading frq block
+ // when not needed, ie, use skip data to load how far to
+ // seek the pos pointer ... instead of having to load frq
+ // blocks only to sum up how many positions to skip
+ private void skipPositions() throws IOException {
+ // Skip positions now:
+ int toSkip = posPendingCount - (int) freqBuffer[docBufferUpto - 1];
+ // if (DEBUG) {
+ // System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
+ // }
+
+ final int leftInBlock = BLOCK_SIZE - posBufferUpto;
+ if (toSkip < leftInBlock) {
+ int end = posBufferUpto + toSkip;
+ while (posBufferUpto < end) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ } else {
+ toSkip -= leftInBlock;
+ while (toSkip >= BLOCK_SIZE) {
+ assert posIn.getFilePointer() != lastPosBlockFP;
+ pforUtil.skip(posIn);
+
+ if (indexHasPayloads && payIn != null) {
+ // Skip payloadLength block:
+ pforUtil.skip(payIn);
+
+ // Skip payloadBytes block:
+ int numBytes = payIn.readVInt();
+ payIn.seek(payIn.getFilePointer() + numBytes);
+ }
+
+ if (indexHasOffsets && payIn != null) {
+ pforUtil.skip(payIn);
+ pforUtil.skip(payIn);
+ }
+ toSkip -= BLOCK_SIZE;
+ }
+ refillPositions();
+ payloadByteUpto = 0;
+ posBufferUpto = 0;
+ while (posBufferUpto < toSkip) {
+ if (indexHasPayloads) {
+ payloadByteUpto += payloadLengthBuffer[posBufferUpto];
+ }
+ posBufferUpto++;
+ }
+ }
+
+ position = 0;
+ lastStartOffset = 0;
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ if (indexHasPos == false || needsPositions == false) {
+ return -1;
+ }
+
+ if (isFreqsRead == false) {
+ pforUtil.decode(docIn, freqBuffer); // read freqBuffer for this docs block
+ isFreqsRead = true;
+ }
+ while (posDocUpTo < docUpto) { // bring posPendingCount upto the current doc
+ posPendingCount += freqBuffer[docBufferUpto - (docUpto - posDocUpTo)];
+ posDocUpTo++;
+ }
+
+ assert posPendingCount > 0;
+
+ if (posPendingFP != -1) {
+ posIn.seek(posPendingFP);
+ posPendingFP = -1;
+
+ if (payPendingFP != -1 && payIn != null) {
+ payIn.seek(payPendingFP);
+ payPendingFP = -1;
+ }
+
+ // Force buffer refill:
+ posBufferUpto = BLOCK_SIZE;
+ }
+
+ if (posPendingCount > freqBuffer[docBufferUpto - 1]) {
+ skipPositions();
+ posPendingCount = (int) freqBuffer[docBufferUpto - 1];
+ }
+
+ if (posBufferUpto == BLOCK_SIZE) {
+ refillPositions();
+ posBufferUpto = 0;
+ }
+ position += posDeltaBuffer[posBufferUpto];
+
+ if (indexHasPayloads) {
+ payloadLength = (int) payloadLengthBuffer[posBufferUpto];
+ payload.bytes = payloadBytes;
+ payload.offset = payloadByteUpto;
+ payload.length = payloadLength;
+ payloadByteUpto += payloadLength;
+ }
+
+ if (indexHasOffsets && needsOffsets) {
+ startOffset = lastStartOffset + (int) offsetStartDeltaBuffer[posBufferUpto];
+ endOffset = startOffset + (int) offsetLengthBuffer[posBufferUpto];
+ lastStartOffset = startOffset;
+ }
+
+ posBufferUpto++;
+ posPendingCount--;
+ return position;
+ }
+
+ @Override
+ public int startOffset() {
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() {
+ return endOffset;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ if (payloadLength == 0) {
+ return null;
+ } else {
+ return payload;
+ }
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ if (docIn != null) {
+ CodecUtil.checksumEntireFile(docIn);
+ }
+ if (posIn != null) {
+ CodecUtil.checksumEntireFile(posIn);
+ }
+ if (payIn != null) {
+ CodecUtil.checksumEntireFile(payIn);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName()
+ + "(positions="
+ + (posIn != null)
+ + ",payloads="
+ + (payIn != null)
+ + ")";
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java
new file mode 100644
index 000000000000..f640d8d2c573
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PostingsWriter.java
@@ -0,0 +1,536 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import static org.apache.lucene.codecs.lucene90.ForUtil.BLOCK_SIZE;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.DOC_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.MAX_SKIP_LEVELS;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.PAY_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.POS_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.TERMS_CODEC;
+import static org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.VERSION_CURRENT;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.PushPostingsWriterBase;
+import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat.IntBlockTermState;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Concrete class that writes docId(maybe frq,pos,offset,payloads) list with postings format.
+ *
+ * Postings list for each term will be stored separately.
+ *
+ * @see Lucene90SkipWriter for details about skipping setting and postings layout.
+ * @lucene.experimental
+ */
+public final class Lucene90PostingsWriter extends PushPostingsWriterBase {
+
+ IndexOutput docOut;
+ IndexOutput posOut;
+ IndexOutput payOut;
+
+ static final IntBlockTermState emptyState = new IntBlockTermState();
+ IntBlockTermState lastState;
+
+ // Holds starting file pointers for current term:
+ private long docStartFP;
+ private long posStartFP;
+ private long payStartFP;
+
+ final long[] docDeltaBuffer;
+ final long[] freqBuffer;
+ private int docBufferUpto;
+
+ final long[] posDeltaBuffer;
+ final long[] payloadLengthBuffer;
+ final long[] offsetStartDeltaBuffer;
+ final long[] offsetLengthBuffer;
+ private int posBufferUpto;
+
+ private byte[] payloadBytes;
+ private int payloadByteUpto;
+
+ private int lastBlockDocID;
+ private long lastBlockPosFP;
+ private long lastBlockPayFP;
+ private int lastBlockPosBufferUpto;
+ private int lastBlockPayloadByteUpto;
+
+ private int lastDocID;
+ private int lastPosition;
+ private int lastStartOffset;
+ private int docCount;
+
+ private final PForUtil pforUtil;
+ private final Lucene90SkipWriter skipWriter;
+
+ private boolean fieldHasNorms;
+ private NumericDocValues norms;
+ private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator =
+ new CompetitiveImpactAccumulator();
+
+ /** Creates a postings writer */
+ public Lucene90PostingsWriter(SegmentWriteState state) throws IOException {
+
+ String docFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene90PostingsFormat.DOC_EXTENSION);
+ docOut = state.directory.createOutput(docFileName, state.context);
+ IndexOutput posOut = null;
+ IndexOutput payOut = null;
+ boolean success = false;
+ try {
+ CodecUtil.writeIndexHeader(
+ docOut, DOC_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ pforUtil = new PForUtil(new ForUtil());
+ if (state.fieldInfos.hasProx()) {
+ posDeltaBuffer = new long[BLOCK_SIZE];
+ String posFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, Lucene90PostingsFormat.POS_EXTENSION);
+ posOut = state.directory.createOutput(posFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ posOut, POS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+
+ if (state.fieldInfos.hasPayloads()) {
+ payloadBytes = new byte[128];
+ payloadLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ payloadBytes = null;
+ payloadLengthBuffer = null;
+ }
+
+ if (state.fieldInfos.hasOffsets()) {
+ offsetStartDeltaBuffer = new long[BLOCK_SIZE];
+ offsetLengthBuffer = new long[BLOCK_SIZE];
+ } else {
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ }
+
+ if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
+ String payFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name,
+ state.segmentSuffix,
+ Lucene90PostingsFormat.PAY_EXTENSION);
+ payOut = state.directory.createOutput(payFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ payOut, PAY_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ }
+ } else {
+ posDeltaBuffer = null;
+ payloadLengthBuffer = null;
+ offsetStartDeltaBuffer = null;
+ offsetLengthBuffer = null;
+ payloadBytes = null;
+ }
+ this.payOut = payOut;
+ this.posOut = posOut;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+ }
+ }
+
+ docDeltaBuffer = new long[BLOCK_SIZE];
+ freqBuffer = new long[BLOCK_SIZE];
+
+ // TODO: should we try skipping every 2/4 blocks...?
+ skipWriter =
+ new Lucene90SkipWriter(
+ MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
+ }
+
+ @Override
+ public IntBlockTermState newTermState() {
+ return new IntBlockTermState();
+ }
+
+ @Override
+ public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
+ CodecUtil.writeIndexHeader(
+ termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ termsOut.writeVInt(BLOCK_SIZE);
+ }
+
+ @Override
+ public void setField(FieldInfo fieldInfo) {
+ super.setField(fieldInfo);
+ skipWriter.setField(writePositions, writeOffsets, writePayloads);
+ lastState = emptyState;
+ fieldHasNorms = fieldInfo.hasNorms();
+ }
+
+ @Override
+ public void startTerm(NumericDocValues norms) {
+ docStartFP = docOut.getFilePointer();
+ if (writePositions) {
+ posStartFP = posOut.getFilePointer();
+ if (writePayloads || writeOffsets) {
+ payStartFP = payOut.getFilePointer();
+ }
+ }
+ lastDocID = 0;
+ lastBlockDocID = -1;
+ skipWriter.resetSkip();
+ this.norms = norms;
+ competitiveFreqNormAccumulator.clear();
+ }
+
+ @Override
+ public void startDoc(int docID, int termDocFreq) throws IOException {
+ // Have collected a block of docs, and get a new doc.
+ // Should write skip data as well as postings list for
+ // current block.
+ if (lastBlockDocID != -1 && docBufferUpto == 0) {
+ skipWriter.bufferSkip(
+ lastBlockDocID,
+ competitiveFreqNormAccumulator,
+ docCount,
+ lastBlockPosFP,
+ lastBlockPayFP,
+ lastBlockPosBufferUpto,
+ lastBlockPayloadByteUpto);
+ competitiveFreqNormAccumulator.clear();
+ }
+
+ final int docDelta = docID - lastDocID;
+
+ if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
+ throw new CorruptIndexException(
+ "docs out of order (" + docID + " <= " + lastDocID + " )", docOut);
+ }
+
+ docDeltaBuffer[docBufferUpto] = docDelta;
+ if (writeFreqs) {
+ freqBuffer[docBufferUpto] = termDocFreq;
+ }
+
+ docBufferUpto++;
+ docCount++;
+
+ if (docBufferUpto == BLOCK_SIZE) {
+ pforUtil.encode(docDeltaBuffer, docOut);
+ if (writeFreqs) {
+ pforUtil.encode(freqBuffer, docOut);
+ }
+ // NOTE: don't set docBufferUpto back to 0 here;
+ // finishDoc will do so (because it needs to see that
+ // the block was filled so it can save skip data)
+ }
+
+ lastDocID = docID;
+ lastPosition = 0;
+ lastStartOffset = 0;
+
+ long norm;
+ if (fieldHasNorms) {
+ boolean found = norms.advanceExact(docID);
+ if (found == false) {
+ // This can happen if indexing hits a problem after adding a doc to the
+ // postings but before buffering the norm. Such documents are written
+ // deleted and will go away on the first merge.
+ norm = 1L;
+ } else {
+ norm = norms.longValue();
+ assert norm != 0 : docID;
+ }
+ } else {
+ norm = 1L;
+ }
+
+ competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm);
+ }
+
+ @Override
+ public void addPosition(int position, BytesRef payload, int startOffset, int endOffset)
+ throws IOException {
+ if (position > IndexWriter.MAX_POSITION) {
+ throw new CorruptIndexException(
+ "position="
+ + position
+ + " is too large (> IndexWriter.MAX_POSITION="
+ + IndexWriter.MAX_POSITION
+ + ")",
+ docOut);
+ }
+ if (position < 0) {
+ throw new CorruptIndexException("position=" + position + " is < 0", docOut);
+ }
+ posDeltaBuffer[posBufferUpto] = position - lastPosition;
+ if (writePayloads) {
+ if (payload == null || payload.length == 0) {
+ // no payload
+ payloadLengthBuffer[posBufferUpto] = 0;
+ } else {
+ payloadLengthBuffer[posBufferUpto] = payload.length;
+ if (payloadByteUpto + payload.length > payloadBytes.length) {
+ payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payload.length);
+ }
+ System.arraycopy(
+ payload.bytes, payload.offset, payloadBytes, payloadByteUpto, payload.length);
+ payloadByteUpto += payload.length;
+ }
+ }
+
+ if (writeOffsets) {
+ assert startOffset >= lastStartOffset;
+ assert endOffset >= startOffset;
+ offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastStartOffset;
+ offsetLengthBuffer[posBufferUpto] = endOffset - startOffset;
+ lastStartOffset = startOffset;
+ }
+
+ posBufferUpto++;
+ lastPosition = position;
+ if (posBufferUpto == BLOCK_SIZE) {
+ pforUtil.encode(posDeltaBuffer, posOut);
+
+ if (writePayloads) {
+ pforUtil.encode(payloadLengthBuffer, payOut);
+ payOut.writeVInt(payloadByteUpto);
+ payOut.writeBytes(payloadBytes, 0, payloadByteUpto);
+ payloadByteUpto = 0;
+ }
+ if (writeOffsets) {
+ pforUtil.encode(offsetStartDeltaBuffer, payOut);
+ pforUtil.encode(offsetLengthBuffer, payOut);
+ }
+ posBufferUpto = 0;
+ }
+ }
+
+ @Override
+ public void finishDoc() throws IOException {
+ // Since we don't know df for current term, we had to buffer
+ // those skip data for each block, and when a new doc comes,
+ // write them to skip file.
+ if (docBufferUpto == BLOCK_SIZE) {
+ lastBlockDocID = lastDocID;
+ if (posOut != null) {
+ if (payOut != null) {
+ lastBlockPayFP = payOut.getFilePointer();
+ }
+ lastBlockPosFP = posOut.getFilePointer();
+ lastBlockPosBufferUpto = posBufferUpto;
+ lastBlockPayloadByteUpto = payloadByteUpto;
+ }
+ docBufferUpto = 0;
+ }
+ }
+
+ /** Called when we are done adding docs to this term */
+ @Override
+ public void finishTerm(BlockTermState _state) throws IOException {
+ IntBlockTermState state = (IntBlockTermState) _state;
+ assert state.docFreq > 0;
+
+ // TODO: wasteful we are counting this (counting # docs
+ // for this term) in two places?
+ assert state.docFreq == docCount : state.docFreq + " vs " + docCount;
+
+ // docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to
+ // it.
+ final int singletonDocID;
+ if (state.docFreq == 1) {
+ // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
+ singletonDocID = (int) docDeltaBuffer[0];
+ } else {
+ singletonDocID = -1;
+ // vInt encode the remaining doc deltas and freqs:
+ for (int i = 0; i < docBufferUpto; i++) {
+ final int docDelta = (int) docDeltaBuffer[i];
+ final int freq = (int) freqBuffer[i];
+ if (!writeFreqs) {
+ docOut.writeVInt(docDelta);
+ } else if (freq == 1) {
+ docOut.writeVInt((docDelta << 1) | 1);
+ } else {
+ docOut.writeVInt(docDelta << 1);
+ docOut.writeVInt(freq);
+ }
+ }
+ }
+
+ final long lastPosBlockOffset;
+
+ if (writePositions) {
+ // totalTermFreq is just total number of positions(or payloads, or offsets)
+ // associated with current term.
+ assert state.totalTermFreq != -1;
+ if (state.totalTermFreq > BLOCK_SIZE) {
+ // record file offset for last pos in last block
+ lastPosBlockOffset = posOut.getFilePointer() - posStartFP;
+ } else {
+ lastPosBlockOffset = -1;
+ }
+ if (posBufferUpto > 0) {
+ // TODO: should we send offsets/payloads to
+ // .pay...? seems wasteful (have to store extra
+ // vLong for low (< BLOCK_SIZE) DF terms = vast vast
+ // majority)
+
+ // vInt encode the remaining positions/payloads/offsets:
+ int lastPayloadLength = -1; // force first payload length to be written
+ int lastOffsetLength = -1; // force first offset length to be written
+ int payloadBytesReadUpto = 0;
+ for (int i = 0; i < posBufferUpto; i++) {
+ final int posDelta = (int) posDeltaBuffer[i];
+ if (writePayloads) {
+ final int payloadLength = (int) payloadLengthBuffer[i];
+ if (payloadLength != lastPayloadLength) {
+ lastPayloadLength = payloadLength;
+ posOut.writeVInt((posDelta << 1) | 1);
+ posOut.writeVInt(payloadLength);
+ } else {
+ posOut.writeVInt(posDelta << 1);
+ }
+
+ if (payloadLength != 0) {
+ posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
+ payloadBytesReadUpto += payloadLength;
+ }
+ } else {
+ posOut.writeVInt(posDelta);
+ }
+
+ if (writeOffsets) {
+ int delta = (int) offsetStartDeltaBuffer[i];
+ int length = (int) offsetLengthBuffer[i];
+ if (length == lastOffsetLength) {
+ posOut.writeVInt(delta << 1);
+ } else {
+ posOut.writeVInt(delta << 1 | 1);
+ posOut.writeVInt(length);
+ lastOffsetLength = length;
+ }
+ }
+ }
+
+ if (writePayloads) {
+ assert payloadBytesReadUpto == payloadByteUpto;
+ payloadByteUpto = 0;
+ }
+ }
+ } else {
+ lastPosBlockOffset = -1;
+ }
+
+ long skipOffset;
+ if (docCount > BLOCK_SIZE) {
+ skipOffset = skipWriter.writeSkip(docOut) - docStartFP;
+ } else {
+ skipOffset = -1;
+ }
+
+ state.docStartFP = docStartFP;
+ state.posStartFP = posStartFP;
+ state.payStartFP = payStartFP;
+ state.singletonDocID = singletonDocID;
+ state.skipOffset = skipOffset;
+ state.lastPosBlockOffset = lastPosBlockOffset;
+ docBufferUpto = 0;
+ posBufferUpto = 0;
+ lastDocID = 0;
+ docCount = 0;
+ }
+
+ @Override
+ public void encodeTerm(
+ DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute)
+ throws IOException {
+ IntBlockTermState state = (IntBlockTermState) _state;
+ if (absolute) {
+ lastState = emptyState;
+ assert lastState.docStartFP == 0;
+ }
+
+ if (lastState.singletonDocID != -1
+ && state.singletonDocID != -1
+ && state.docStartFP == lastState.docStartFP) {
+ // With runs of rare values such as ID fields, the increment of pointers in the docs file is
+ // often 0.
+ // Furthermore some ID schemes like auto-increment IDs or Flake IDs are monotonic, so we
+ // encode the delta
+ // between consecutive doc IDs to save space.
+ final long delta = (long) state.singletonDocID - lastState.singletonDocID;
+ out.writeVLong((BitUtil.zigZagEncode(delta) << 1) | 0x01);
+ } else {
+ out.writeVLong((state.docStartFP - lastState.docStartFP) << 1);
+ if (state.singletonDocID != -1) {
+ out.writeVInt(state.singletonDocID);
+ }
+ }
+
+ if (writePositions) {
+ out.writeVLong(state.posStartFP - lastState.posStartFP);
+ if (writePayloads || writeOffsets) {
+ out.writeVLong(state.payStartFP - lastState.payStartFP);
+ }
+ }
+ if (writePositions) {
+ if (state.lastPosBlockOffset != -1) {
+ out.writeVLong(state.lastPosBlockOffset);
+ }
+ }
+ if (state.skipOffset != -1) {
+ out.writeVLong(state.skipOffset);
+ }
+ lastState = state;
+ }
+
+ @Override
+ public void close() throws IOException {
+ // TODO: add a finish() at least to PushBase? DV too...?
+ boolean success = false;
+ try {
+ if (docOut != null) {
+ CodecUtil.writeFooter(docOut);
+ }
+ if (posOut != null) {
+ CodecUtil.writeFooter(posOut);
+ }
+ if (payOut != null) {
+ CodecUtil.writeFooter(payOut);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(docOut, posOut, payOut);
+ } else {
+ IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
+ }
+ docOut = posOut = payOut = null;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java
new file mode 100644
index 000000000000..44789a983344
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90ScoreSkipReader.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.AbstractList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.RandomAccess;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.index.Impacts;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+
+final class Lucene90ScoreSkipReader extends Lucene90SkipReader {
+
+ private final byte[][] impactData;
+ private final int[] impactDataLength;
+ private final ByteArrayDataInput badi = new ByteArrayDataInput();
+ private final Impacts impacts;
+ private int numLevels = 1;
+ private final MutableImpactList[] perLevelImpacts;
+
+ public Lucene90ScoreSkipReader(
+ IndexInput skipStream,
+ int maxSkipLevels,
+ boolean hasPos,
+ boolean hasOffsets,
+ boolean hasPayloads) {
+ super(skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
+ this.impactData = new byte[maxSkipLevels][];
+ Arrays.fill(impactData, new byte[0]);
+ this.impactDataLength = new int[maxSkipLevels];
+ this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
+ for (int i = 0; i < perLevelImpacts.length; ++i) {
+ perLevelImpacts[i] = new MutableImpactList();
+ }
+ impacts =
+ new Impacts() {
+
+ @Override
+ public int numLevels() {
+ return numLevels;
+ }
+
+ @Override
+ public int getDocIdUpTo(int level) {
+ return skipDoc[level];
+ }
+
+ @Override
+ public List getImpacts(int level) {
+ assert level < numLevels;
+ if (impactDataLength[level] > 0) {
+ badi.reset(impactData[level], 0, impactDataLength[level]);
+ perLevelImpacts[level] = readImpacts(badi, perLevelImpacts[level]);
+ impactDataLength[level] = 0;
+ }
+ return perLevelImpacts[level];
+ }
+ };
+ }
+
+ @Override
+ public int skipTo(int target) throws IOException {
+ int result = super.skipTo(target);
+ if (numberOfSkipLevels > 0) {
+ numLevels = numberOfSkipLevels;
+ } else {
+ // End of postings don't have skip data anymore, so we fill with dummy data
+ // like SlowImpactsEnum.
+ numLevels = 1;
+ perLevelImpacts[0].length = 1;
+ perLevelImpacts[0].impacts[0].freq = Integer.MAX_VALUE;
+ perLevelImpacts[0].impacts[0].norm = 1L;
+ impactDataLength[0] = 0;
+ }
+ return result;
+ }
+
+ Impacts getImpacts() {
+ return impacts;
+ }
+
+ @Override
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ int length = skipStream.readVInt();
+ if (impactData[level].length < length) {
+ impactData[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
+ }
+ skipStream.readBytes(impactData[level], 0, length);
+ impactDataLength[level] = length;
+ }
+
+ static MutableImpactList readImpacts(ByteArrayDataInput in, MutableImpactList reuse) {
+ int maxNumImpacts = in.length(); // at most one impact per byte
+ if (reuse.impacts.length < maxNumImpacts) {
+ int oldLength = reuse.impacts.length;
+ reuse.impacts = ArrayUtil.grow(reuse.impacts, maxNumImpacts);
+ for (int i = oldLength; i < reuse.impacts.length; ++i) {
+ reuse.impacts[i] = new Impact(Integer.MAX_VALUE, 1L);
+ }
+ }
+
+ int freq = 0;
+ long norm = 0;
+ int length = 0;
+ while (in.getPosition() < in.length()) {
+ int freqDelta = in.readVInt();
+ if ((freqDelta & 0x01) != 0) {
+ freq += 1 + (freqDelta >>> 1);
+ try {
+ norm += 1 + in.readZLong();
+ } catch (IOException e) {
+ throw new RuntimeException(e); // cannot happen on a BADI
+ }
+ } else {
+ freq += 1 + (freqDelta >>> 1);
+ norm++;
+ }
+ Impact impact = reuse.impacts[length];
+ impact.freq = freq;
+ impact.norm = norm;
+ length++;
+ }
+ reuse.length = length;
+ return reuse;
+ }
+
+ static class MutableImpactList extends AbstractList implements RandomAccess {
+ int length = 1;
+ Impact[] impacts = new Impact[] {new Impact(Integer.MAX_VALUE, 1L)};
+
+ @Override
+ public Impact get(int index) {
+ return impacts[index];
+ }
+
+ @Override
+ public int size() {
+ return length;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SegmentInfoFormat.java
new file mode 100644
index 000000000000..fc102e0a608f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SegmentInfoFormat.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexSorter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SortFieldProvider;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Version;
+
+/**
+ * Lucene 9.0 Segment info format.
+ *
+ * Files:
+ *
+ *
+ * .si
: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files,
+ * Attributes, IndexSort, Footer
+ *
+ *
+ * Data types:
+ *
+ *
+ * - Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
+ *
- SegSize --> {@link DataOutput#writeInt Int32}
+ *
- SegVersion --> {@link DataOutput#writeString String}
+ *
- SegMinVersion --> {@link DataOutput#writeString String}
+ *
- Files --> {@link DataOutput#writeSetOfStrings Set<String>}
+ *
- Diagnostics,Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}
+ *
- IsCompoundFile --> {@link DataOutput#writeByte Int8}
+ *
- IndexSort --> {@link DataOutput#writeVInt Int32} count, followed by {@code count}
+ * SortField
+ *
- SortField --> {@link DataOutput#writeString String} sort class, followed by a per-sort
+ * bytestream (see {@link SortFieldProvider#readSortField(DataInput)})
+ *
- Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ *
+ *
+ * Field Descriptions:
+ *
+ *
+ * - SegVersion is the code version that created the segment.
+ *
- SegMinVersion is the minimum code version that contributed documents to the segment.
+ *
- SegSize is the number of documents contained in the segment index.
+ *
- IsCompoundFile records whether the segment is written as a compound file or not. If this is
+ * -1, the segment is not a compound file. If it is 1, the segment is a compound file.
+ *
- The Diagnostics Map is privately written by {@link IndexWriter}, as a debugging aid, for
+ * each segment it creates. It includes metadata like the current Lucene version, OS, Java
+ * version, why the segment was created (merge, flush, addIndexes), etc.
+ *
- Files is a list of files referred to by this segment.
+ *
+ *
+ * @see SegmentInfos
+ * @lucene.experimental
+ */
+public class Lucene90SegmentInfoFormat extends SegmentInfoFormat {
+
+ /** File extension used to store {@link SegmentInfo}. */
+ public static final String SI_EXTENSION = "si";
+
+ static final String CODEC_NAME = "Lucene90SegmentInfo";
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+
+ /** Sole constructor. */
+ public Lucene90SegmentInfoFormat() {}
+
+ @Override
+ public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context)
+ throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segment, "", SI_EXTENSION);
+ try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
+ Throwable priorE = null;
+ SegmentInfo si = null;
+ try {
+ CodecUtil.checkIndexHeader(
+ input, CODEC_NAME, VERSION_START, VERSION_CURRENT, segmentID, "");
+ si = parseSegmentInfo(dir, input, segment, segmentID);
+ } catch (Throwable exception) {
+ priorE = exception;
+ } finally {
+ CodecUtil.checkFooter(input, priorE);
+ }
+ return si;
+ }
+ }
+
+ private SegmentInfo parseSegmentInfo(
+ Directory dir, DataInput input, String segment, byte[] segmentID) throws IOException {
+ final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+ byte hasMinVersion = input.readByte();
+ final Version minVersion;
+ switch (hasMinVersion) {
+ case 0:
+ minVersion = null;
+ break;
+ case 1:
+ minVersion = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
+ break;
+ default:
+ throw new CorruptIndexException("Illegal boolean value " + hasMinVersion, input);
+ }
+
+ final int docCount = input.readInt();
+ if (docCount < 0) {
+ throw new CorruptIndexException("invalid docCount: " + docCount, input);
+ }
+ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
+
+ final Map diagnostics = input.readMapOfStrings();
+ final Set files = input.readSetOfStrings();
+ final Map attributes = input.readMapOfStrings();
+
+ int numSortFields = input.readVInt();
+ Sort indexSort;
+ if (numSortFields > 0) {
+ SortField[] sortFields = new SortField[numSortFields];
+ for (int i = 0; i < numSortFields; i++) {
+ String name = input.readString();
+ sortFields[i] = SortFieldProvider.forName(name).readSortField(input);
+ }
+ indexSort = new Sort(sortFields);
+ } else if (numSortFields < 0) {
+ throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
+ } else {
+ indexSort = null;
+ }
+
+ SegmentInfo si =
+ new SegmentInfo(
+ dir,
+ version,
+ minVersion,
+ segment,
+ docCount,
+ isCompoundFile,
+ false,
+ null,
+ diagnostics,
+ segmentID,
+ attributes,
+ indexSort);
+ si.setFiles(files);
+ return si;
+ }
+
+ @Override
+ public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(si.name, "", SI_EXTENSION);
+
+ try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
+ // Only add the file once we've successfully created it, else IFD assert can trip:
+ si.addFile(fileName);
+ CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, si.getId(), "");
+
+ writeSegmentInfo(output, si);
+
+ CodecUtil.writeFooter(output);
+ }
+ }
+
+ private void writeSegmentInfo(DataOutput output, SegmentInfo si) throws IOException {
+ Version version = si.getVersion();
+ if (version.major < 7) {
+ throw new IllegalArgumentException(
+ "invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
+ }
+ // Write the Lucene version that created this segment, since 3.1
+ output.writeInt(version.major);
+ output.writeInt(version.minor);
+ output.writeInt(version.bugfix);
+
+ // Write the min Lucene version that contributed docs to the segment, since 7.0
+ if (si.getMinVersion() != null) {
+ output.writeByte((byte) 1);
+ Version minVersion = si.getMinVersion();
+ output.writeInt(minVersion.major);
+ output.writeInt(minVersion.minor);
+ output.writeInt(minVersion.bugfix);
+ } else {
+ output.writeByte((byte) 0);
+ }
+
+ assert version.prerelease == 0;
+ output.writeInt(si.maxDoc());
+
+ output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
+ output.writeMapOfStrings(si.getDiagnostics());
+ Set files = si.files();
+ for (String file : files) {
+ if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
+ throw new IllegalArgumentException(
+ "invalid files: expected segment=" + si.name + ", got=" + files);
+ }
+ }
+ output.writeSetOfStrings(files);
+ output.writeMapOfStrings(si.getAttributes());
+
+ Sort indexSort = si.getIndexSort();
+ int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
+ output.writeVInt(numSortFields);
+ for (int i = 0; i < numSortFields; ++i) {
+ SortField sortField = indexSort.getSort()[i];
+ IndexSorter sorter = sortField.getIndexSorter();
+ if (sorter == null) {
+ throw new IllegalArgumentException("cannot serialize SortField " + sortField);
+ }
+ output.writeString(sorter.getProviderName());
+ SortFieldProvider.write(sortField, output);
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java
new file mode 100644
index 000000000000..da31bd75a80a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipReader.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.codecs.MultiLevelSkipListReader;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * Implements the skip list reader for block postings format that stores positions and payloads.
+ *
+ * Although this skipper uses MultiLevelSkipListReader as an interface, its definition of skip
+ * position will be a little different.
+ *
+ *
For example, when skipInterval = blockSize = 3, df = 2*skipInterval = 6,
+ *
+ *
+ * 0 1 2 3 4 5
+ * d d d d d d (posting list)
+ * ^ ^ (skip point in MultiLeveSkipWriter)
+ * ^ (skip point in Lucene90SkipWriter)
+ *
+ *
+ * In this case, MultiLevelSkipListReader will use the last document as a skip point, while
+ * Lucene90SkipReader should assume no skip point will comes.
+ *
+ *
If we use the interface directly in Lucene90SkipReader, it may silly try to read another skip
+ * data after the only skip point is loaded.
+ *
+ *
To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, and
+ * numSkipped+blockSize== df, the MultiLevelSkipListReader will assume the skip list isn't exhausted
+ * yet, and try to load a non-existed skip point
+ *
+ *
Therefore, we'll trim df before passing it to the interface. see trim(int)
+ */
+class Lucene90SkipReader extends MultiLevelSkipListReader {
+ private long[] docPointer;
+ private long[] posPointer;
+ private long[] payPointer;
+ private int[] posBufferUpto;
+ private int[] payloadByteUpto;
+
+ private long lastPosPointer;
+ private long lastPayPointer;
+ private int lastPayloadByteUpto;
+ private long lastDocPointer;
+ private int lastPosBufferUpto;
+
+ public Lucene90SkipReader(
+ IndexInput skipStream,
+ int maxSkipLevels,
+ boolean hasPos,
+ boolean hasOffsets,
+ boolean hasPayloads) {
+ super(skipStream, maxSkipLevels, ForUtil.BLOCK_SIZE, 8);
+ docPointer = new long[maxSkipLevels];
+ if (hasPos) {
+ posPointer = new long[maxSkipLevels];
+ posBufferUpto = new int[maxSkipLevels];
+ if (hasPayloads) {
+ payloadByteUpto = new int[maxSkipLevels];
+ } else {
+ payloadByteUpto = null;
+ }
+ if (hasOffsets || hasPayloads) {
+ payPointer = new long[maxSkipLevels];
+ } else {
+ payPointer = null;
+ }
+ } else {
+ posPointer = null;
+ }
+ }
+
+ /**
+ * Trim original docFreq to tell skipReader read proper number of skip points.
+ *
+ *
Since our definition in Lucene90Skip* is a little different from MultiLevelSkip* This
+ * trimmed docFreq will prevent skipReader from: 1. silly reading a non-existed skip point after
+ * the last block boundary 2. moving into the vInt block
+ */
+ protected int trim(int df) {
+ return df % ForUtil.BLOCK_SIZE == 0 ? df - 1 : df;
+ }
+
+ public void init(
+ long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df)
+ throws IOException {
+ super.init(skipPointer, trim(df));
+ lastDocPointer = docBasePointer;
+ lastPosPointer = posBasePointer;
+ lastPayPointer = payBasePointer;
+
+ Arrays.fill(docPointer, docBasePointer);
+ if (posPointer != null) {
+ Arrays.fill(posPointer, posBasePointer);
+ if (payPointer != null) {
+ Arrays.fill(payPointer, payBasePointer);
+ }
+ } else {
+ assert posBasePointer == 0;
+ }
+ }
+
+ /**
+ * Returns the doc pointer of the doc to which the last call of {@link
+ * MultiLevelSkipListReader#skipTo(int)} has skipped.
+ */
+ public long getDocPointer() {
+ return lastDocPointer;
+ }
+
+ public long getPosPointer() {
+ return lastPosPointer;
+ }
+
+ public int getPosBufferUpto() {
+ return lastPosBufferUpto;
+ }
+
+ public long getPayPointer() {
+ return lastPayPointer;
+ }
+
+ public int getPayloadByteUpto() {
+ return lastPayloadByteUpto;
+ }
+
+ public int getNextSkipDoc() {
+ return skipDoc[0];
+ }
+
+ @Override
+ protected void seekChild(int level) throws IOException {
+ super.seekChild(level);
+ docPointer[level] = lastDocPointer;
+ if (posPointer != null) {
+ posPointer[level] = lastPosPointer;
+ posBufferUpto[level] = lastPosBufferUpto;
+ if (payloadByteUpto != null) {
+ payloadByteUpto[level] = lastPayloadByteUpto;
+ }
+ if (payPointer != null) {
+ payPointer[level] = lastPayPointer;
+ }
+ }
+ }
+
+ @Override
+ protected void setLastSkipData(int level) {
+ super.setLastSkipData(level);
+ lastDocPointer = docPointer[level];
+
+ if (posPointer != null) {
+ lastPosPointer = posPointer[level];
+ lastPosBufferUpto = posBufferUpto[level];
+ if (payPointer != null) {
+ lastPayPointer = payPointer[level];
+ }
+ if (payloadByteUpto != null) {
+ lastPayloadByteUpto = payloadByteUpto[level];
+ }
+ }
+ }
+
+ @Override
+ protected int readSkipData(int level, IndexInput skipStream) throws IOException {
+ int delta = skipStream.readVInt();
+ docPointer[level] += skipStream.readVLong();
+
+ if (posPointer != null) {
+ posPointer[level] += skipStream.readVLong();
+ posBufferUpto[level] = skipStream.readVInt();
+
+ if (payloadByteUpto != null) {
+ payloadByteUpto[level] = skipStream.readVInt();
+ }
+
+ if (payPointer != null) {
+ payPointer[level] += skipStream.readVLong();
+ }
+ }
+ readImpacts(level, skipStream);
+ return delta;
+ }
+
+ // The default impl skips impacts
+ protected void readImpacts(int level, IndexInput skipStream) throws IOException {
+ skipStream.skipBytes(skipStream.readVInt());
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java
new file mode 100644
index 000000000000..48deafa42251
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90SkipWriter.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.index.Impact;
+import org.apache.lucene.store.ByteBuffersDataOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * Write skip lists with multiple levels, and support skip within block ints.
+ *
+ *
Assume that docFreq = 28, skipInterval = blockSize = 12
+ *
+ *
+ * | block#0 | | block#1 | |vInts|
+ * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
+ * ^ ^ (level 0 skip point)
+ *
+ *
+ * Note that skipWriter will ignore first document in block#0, since it is useless as a skip
+ * point. Also, we'll never skip into the vInts block, only record skip data at the start its start
+ * point(if it exist).
+ *
+ *
For each skip point, we will record: 1. docID in former position, i.e. for position 12, record
+ * docID[11], etc. 2. its related file points(position, payload), 3. related numbers or
+ * uptos(position, payload). 4. start offset.
+ */
+final class Lucene90SkipWriter extends MultiLevelSkipListWriter {
+ private int[] lastSkipDoc;
+ private long[] lastSkipDocPointer;
+ private long[] lastSkipPosPointer;
+ private long[] lastSkipPayPointer;
+ private int[] lastPayloadByteUpto;
+
+ private final IndexOutput docOut;
+ private final IndexOutput posOut;
+ private final IndexOutput payOut;
+
+ private int curDoc;
+ private long curDocPointer;
+ private long curPosPointer;
+ private long curPayPointer;
+ private int curPosBufferUpto;
+ private int curPayloadByteUpto;
+ private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
+ private boolean fieldHasPositions;
+ private boolean fieldHasOffsets;
+ private boolean fieldHasPayloads;
+
+ public Lucene90SkipWriter(
+ int maxSkipLevels,
+ int blockSize,
+ int docCount,
+ IndexOutput docOut,
+ IndexOutput posOut,
+ IndexOutput payOut) {
+ super(blockSize, 8, maxSkipLevels, docCount);
+ this.docOut = docOut;
+ this.posOut = posOut;
+ this.payOut = payOut;
+
+ lastSkipDoc = new int[maxSkipLevels];
+ lastSkipDocPointer = new long[maxSkipLevels];
+ if (posOut != null) {
+ lastSkipPosPointer = new long[maxSkipLevels];
+ if (payOut != null) {
+ lastSkipPayPointer = new long[maxSkipLevels];
+ }
+ lastPayloadByteUpto = new int[maxSkipLevels];
+ }
+ curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
+ for (int i = 0; i < maxSkipLevels; ++i) {
+ curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
+ }
+ }
+
+ public void setField(
+ boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
+ this.fieldHasPositions = fieldHasPositions;
+ this.fieldHasOffsets = fieldHasOffsets;
+ this.fieldHasPayloads = fieldHasPayloads;
+ }
+
+ // tricky: we only skip data for blocks (terms with more than 128 docs), but re-init'ing the
+ // skipper
+ // is pretty slow for rare terms in large segments as we have to fill O(log #docs in segment) of
+ // junk.
+ // this is the vast majority of terms (worst case: ID field or similar). so in resetSkip() we
+ // save
+ // away the previous pointers, and lazy-init only if we need to buffer skip data for the term.
+ private boolean initialized;
+ long lastDocFP;
+ long lastPosFP;
+ long lastPayFP;
+
+ @Override
+ public void resetSkip() {
+ lastDocFP = docOut.getFilePointer();
+ if (fieldHasPositions) {
+ lastPosFP = posOut.getFilePointer();
+ if (fieldHasOffsets || fieldHasPayloads) {
+ lastPayFP = payOut.getFilePointer();
+ }
+ }
+ if (initialized) {
+ for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
+ acc.clear();
+ }
+ }
+ initialized = false;
+ }
+
+ private void initSkip() {
+ if (!initialized) {
+ super.resetSkip();
+ Arrays.fill(lastSkipDoc, 0);
+ Arrays.fill(lastSkipDocPointer, lastDocFP);
+ if (fieldHasPositions) {
+ Arrays.fill(lastSkipPosPointer, lastPosFP);
+ if (fieldHasPayloads) {
+ Arrays.fill(lastPayloadByteUpto, 0);
+ }
+ if (fieldHasOffsets || fieldHasPayloads) {
+ Arrays.fill(lastSkipPayPointer, lastPayFP);
+ }
+ }
+ // sets of competitive freq,norm pairs should be empty at this point
+ assert Arrays.stream(curCompetitiveFreqNorms)
+ .map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
+ .mapToInt(Collection::size)
+ .sum()
+ == 0;
+ initialized = true;
+ }
+ }
+
+ /** Sets the values for the current skip data. */
+ public void bufferSkip(
+ int doc,
+ CompetitiveImpactAccumulator competitiveFreqNorms,
+ int numDocs,
+ long posFP,
+ long payFP,
+ int posBufferUpto,
+ int payloadByteUpto)
+ throws IOException {
+ initSkip();
+ this.curDoc = doc;
+ this.curDocPointer = docOut.getFilePointer();
+ this.curPosPointer = posFP;
+ this.curPayPointer = payFP;
+ this.curPosBufferUpto = posBufferUpto;
+ this.curPayloadByteUpto = payloadByteUpto;
+ this.curCompetitiveFreqNorms[0].addAll(competitiveFreqNorms);
+ bufferSkip(numDocs);
+ }
+
+ private final ByteBuffersDataOutput freqNormOut = ByteBuffersDataOutput.newResettableInstance();
+
+ @Override
+ protected void writeSkipData(int level, DataOutput skipBuffer) throws IOException {
+
+ int delta = curDoc - lastSkipDoc[level];
+
+ skipBuffer.writeVInt(delta);
+ lastSkipDoc[level] = curDoc;
+
+ skipBuffer.writeVLong(curDocPointer - lastSkipDocPointer[level]);
+ lastSkipDocPointer[level] = curDocPointer;
+
+ if (fieldHasPositions) {
+
+ skipBuffer.writeVLong(curPosPointer - lastSkipPosPointer[level]);
+ lastSkipPosPointer[level] = curPosPointer;
+ skipBuffer.writeVInt(curPosBufferUpto);
+
+ if (fieldHasPayloads) {
+ skipBuffer.writeVInt(curPayloadByteUpto);
+ }
+
+ if (fieldHasOffsets || fieldHasPayloads) {
+ skipBuffer.writeVLong(curPayPointer - lastSkipPayPointer[level]);
+ lastSkipPayPointer[level] = curPayPointer;
+ }
+ }
+
+ CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
+ assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
+ if (level + 1 < numberOfSkipLevels) {
+ curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
+ }
+ writeImpacts(competitiveFreqNorms, freqNormOut);
+ skipBuffer.writeVInt(Math.toIntExact(freqNormOut.size()));
+ freqNormOut.copyTo(skipBuffer);
+ freqNormOut.reset();
+ competitiveFreqNorms.clear();
+ }
+
+ static void writeImpacts(CompetitiveImpactAccumulator acc, DataOutput out) throws IOException {
+ Collection impacts = acc.getCompetitiveFreqNormPairs();
+ Impact previous = new Impact(0, 0);
+ for (Impact impact : impacts) {
+ assert impact.freq > previous.freq;
+ assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
+ int freqDelta = impact.freq - previous.freq - 1;
+ long normDelta = impact.norm - previous.norm - 1;
+ if (normDelta == 0) {
+ // most of time, norm only increases by 1, so we can fold everything in a single byte
+ out.writeVInt(freqDelta << 1);
+ } else {
+ out.writeVInt((freqDelta << 1) | 1);
+ out.writeZLong(normDelta);
+ }
+ previous = impact;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
new file mode 100644
index 000000000000..eb735c84b83f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/PForUtil.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.LongHeap;
+import org.apache.lucene.util.packed.PackedInts;
+
+/** Utility class to encode sequences of 128 small positive integers. */
+final class PForUtil {
+
+ private static final int MAX_EXCEPTIONS = 7;
+ private static final int HALF_BLOCK_SIZE = ForUtil.BLOCK_SIZE / 2;
+
+ // IDENTITY_PLUS_ONE[i] == i + 1
+ private static final long[] IDENTITY_PLUS_ONE = new long[ForUtil.BLOCK_SIZE];
+
+ static {
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+ IDENTITY_PLUS_ONE[i] = i + 1;
+ }
+ }
+
+ static boolean allEqual(long[] l) {
+ for (int i = 1; i < ForUtil.BLOCK_SIZE; ++i) {
+ if (l[i] != l[0]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private final ForUtil forUtil;
+ // buffer for reading exception data; each exception uses two bytes (pos + high-order bits of the
+ // exception)
+ private final byte[] exceptionBuff = new byte[MAX_EXCEPTIONS * 2];
+
+ PForUtil(ForUtil forUtil) {
+ assert ForUtil.BLOCK_SIZE <= 256 : "blocksize must fit in one byte. got " + ForUtil.BLOCK_SIZE;
+ this.forUtil = forUtil;
+ }
+
+ /** Encode 128 integers from {@code longs} into {@code out}. */
+ void encode(long[] longs, DataOutput out) throws IOException {
+ // Determine the top MAX_EXCEPTIONS + 1 values
+ final LongHeap top = new LongHeap(MAX_EXCEPTIONS + 1);
+ for (int i = 0; i <= MAX_EXCEPTIONS; ++i) {
+ top.push(longs[i]);
+ }
+ long topValue = top.top();
+ for (int i = MAX_EXCEPTIONS + 1; i < ForUtil.BLOCK_SIZE; ++i) {
+ if (longs[i] > topValue) {
+ topValue = top.updateTop(longs[i]);
+ }
+ }
+
+ long max = 0L;
+ for (int i = 1; i <= top.size(); ++i) {
+ max = Math.max(max, top.get(i));
+ }
+
+ final int maxBitsRequired = PackedInts.bitsRequired(max);
+ // We store the patch on a byte, so we can't decrease the number of bits required by more than 8
+ final int patchedBitsRequired =
+ Math.max(PackedInts.bitsRequired(topValue), maxBitsRequired - 8);
+ int numExceptions = 0;
+ final long maxUnpatchedValue = (1L << patchedBitsRequired) - 1;
+ for (int i = 2; i <= top.size(); ++i) {
+ if (top.get(i) > maxUnpatchedValue) {
+ numExceptions++;
+ }
+ }
+ final byte[] exceptions = new byte[numExceptions * 2];
+ if (numExceptions > 0) {
+ int exceptionCount = 0;
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+ if (longs[i] > maxUnpatchedValue) {
+ exceptions[exceptionCount * 2] = (byte) i;
+ exceptions[exceptionCount * 2 + 1] = (byte) (longs[i] >>> patchedBitsRequired);
+ longs[i] &= maxUnpatchedValue;
+ exceptionCount++;
+ }
+ }
+ assert exceptionCount == numExceptions : exceptionCount + " " + numExceptions;
+ }
+
+ if (allEqual(longs) && maxBitsRequired <= 8) {
+ for (int i = 0; i < numExceptions; ++i) {
+ exceptions[2 * i + 1] =
+ (byte) (Byte.toUnsignedLong(exceptions[2 * i + 1]) << patchedBitsRequired);
+ }
+ out.writeByte((byte) (numExceptions << 5));
+ out.writeVLong(longs[0]);
+ } else {
+ final int token = (numExceptions << 5) | patchedBitsRequired;
+ out.writeByte((byte) token);
+ forUtil.encode(longs, patchedBitsRequired, out);
+ }
+ out.writeBytes(exceptions, exceptions.length);
+ }
+
+ /** Decode 128 integers into {@code ints}. */
+ void decode(DataInput in, long[] longs) throws IOException {
+ final int token = Byte.toUnsignedInt(in.readByte());
+ final int bitsPerValue = token & 0x1f;
+ final int numExceptions = token >>> 5;
+ if (bitsPerValue == 0) {
+ Arrays.fill(longs, 0, ForUtil.BLOCK_SIZE, in.readVLong());
+ } else {
+ forUtil.decode(bitsPerValue, in, longs);
+ }
+ for (int i = 0; i < numExceptions; ++i) {
+ longs[Byte.toUnsignedInt(in.readByte())] |=
+ Byte.toUnsignedLong(in.readByte()) << bitsPerValue;
+ }
+ }
+
+ /** Decode deltas, compute the prefix sum and add {@code base} to all decoded longs. */
+ void decodeAndPrefixSum(DataInput in, long base, long[] longs) throws IOException {
+ final int token = Byte.toUnsignedInt(in.readByte());
+ final int bitsPerValue = token & 0x1f;
+ final int numExceptions = token >>> 5;
+ if (numExceptions == 0) {
+ // when there are no exceptions to apply, we can be a bit more efficient with our decoding
+ if (bitsPerValue == 0) {
+ // a bpv of zero indicates all delta values are the same
+ long val = in.readVLong();
+ if (val == 1) {
+ // this will often be the common case when working with doc IDs, so we special-case it to
+ // be slightly more efficient
+ prefixSumOfOnes(longs, base);
+ } else {
+ prefixSumOf(longs, base, val);
+ }
+ } else {
+ // decode the deltas then apply the prefix sum logic
+ forUtil.decodeTo32(bitsPerValue, in, longs);
+ prefixSum32(longs, base);
+ }
+ } else {
+ // pack two values per long so we can apply prefixes two-at-a-time
+ if (bitsPerValue == 0) {
+ fillSameValue32(longs, in.readVLong());
+ } else {
+ forUtil.decodeTo32(bitsPerValue, in, longs);
+ }
+ applyExceptions32(bitsPerValue, numExceptions, in, longs);
+ prefixSum32(longs, base);
+ }
+ }
+
+ /** Skip 128 integers. */
+ void skip(DataInput in) throws IOException {
+ final int token = Byte.toUnsignedInt(in.readByte());
+ final int bitsPerValue = token & 0x1f;
+ final int numExceptions = token >>> 5;
+ if (bitsPerValue == 0) {
+ in.readVLong();
+ in.skipBytes((numExceptions << 1));
+ } else {
+ in.skipBytes(forUtil.numBytes(bitsPerValue) + (numExceptions << 1));
+ }
+ }
+
+ /**
+ * Fill {@code longs} with the final values for the case of all deltas being 1. Note this assumes
+ * there are no exceptions to apply.
+ */
+ private static void prefixSumOfOnes(long[] longs, long base) {
+ System.arraycopy(IDENTITY_PLUS_ONE, 0, longs, 0, ForUtil.BLOCK_SIZE);
+ // This loop gets auto-vectorized
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+ longs[i] += base;
+ }
+ }
+
+ /**
+ * Fill {@code longs} with the final values for the case of all deltas being {@code val}. Note
+ * this assumes there are no exceptions to apply.
+ */
+ private static void prefixSumOf(long[] longs, long base, long val) {
+ for (int i = 0; i < ForUtil.BLOCK_SIZE; i++) {
+ longs[i] = (i + 1) * val + base;
+ }
+ }
+
+ /**
+ * Fills the {@code longs} with the provided {@code val}, packed two values per long (using 32
+ * bits per value).
+ */
+ private static void fillSameValue32(long[] longs, long val) {
+ final long token = val << 32 | val;
+ Arrays.fill(longs, 0, HALF_BLOCK_SIZE, token);
+ }
+
+ /** Apply the exceptions where the values are packed two-per-long in {@code longs}. */
+ private void applyExceptions32(int bitsPerValue, int numExceptions, DataInput in, long[] longs)
+ throws IOException {
+ in.readBytes(exceptionBuff, 0, numExceptions * 2);
+ for (int i = 0; i < numExceptions; ++i) {
+ final int exceptionPos = Byte.toUnsignedInt(exceptionBuff[i * 2]);
+ final long exception = Byte.toUnsignedLong(exceptionBuff[i * 2 + 1]);
+ // note that we pack two values per long, so the index is [0..63] for 128 values
+ final int idx = exceptionPos & 0x3f; // mod 64
+ // we need to shift by 1) the bpv, and 2) 32 for positions [0..63] (and no 32 shift for
+ // [64..127])
+ final int shift = bitsPerValue + ((1 ^ (exceptionPos >>> 6)) << 5);
+ longs[idx] |= exception << shift;
+ }
+ }
+
+ /** Apply prefix sum logic where the values are packed two-per-long in {@code longs}. */
+ private static void prefixSum32(long[] longs, long base) {
+ longs[0] += base << 32;
+ innerPrefixSum32(longs);
+ expand32(longs);
+ final long l = longs[HALF_BLOCK_SIZE - 1];
+ for (int i = HALF_BLOCK_SIZE; i < ForUtil.BLOCK_SIZE; ++i) {
+ longs[i] += l;
+ }
+ }
+
+ /**
+ * Expand the values packed two-per-long in {@code longs} into 128 individual long values stored
+ * back into {@code longs}.
+ */
+ private static void expand32(long[] longs) {
+ for (int i = 0; i < 64; ++i) {
+ final long l = longs[i];
+ longs[i] = l >>> 32;
+ longs[64 + i] = l & 0xFFFFFFFFL;
+ }
+ }
+
+ /**
+ * Unrolled "inner" prefix sum logic where the values are packed two-per-long in {@code longs}.
+ * After this method, the final values will be correct for all high-order bits (values [0..63])
+ * but a final prefix loop will still need to run to "correct" the values of [64..127] in the
+ * low-order bits, which need the 64th value added to all of them.
+ */
+ private static void innerPrefixSum32(long[] longs) {
+ longs[1] += longs[0];
+ longs[2] += longs[1];
+ longs[3] += longs[2];
+ longs[4] += longs[3];
+ longs[5] += longs[4];
+ longs[6] += longs[5];
+ longs[7] += longs[6];
+ longs[8] += longs[7];
+ longs[9] += longs[8];
+ longs[10] += longs[9];
+ longs[11] += longs[10];
+ longs[12] += longs[11];
+ longs[13] += longs[12];
+ longs[14] += longs[13];
+ longs[15] += longs[14];
+ longs[16] += longs[15];
+ longs[17] += longs[16];
+ longs[18] += longs[17];
+ longs[19] += longs[18];
+ longs[20] += longs[19];
+ longs[21] += longs[20];
+ longs[22] += longs[21];
+ longs[23] += longs[22];
+ longs[24] += longs[23];
+ longs[25] += longs[24];
+ longs[26] += longs[25];
+ longs[27] += longs[26];
+ longs[28] += longs[27];
+ longs[29] += longs[28];
+ longs[30] += longs[29];
+ longs[31] += longs[30];
+ longs[32] += longs[31];
+ longs[33] += longs[32];
+ longs[34] += longs[33];
+ longs[35] += longs[34];
+ longs[36] += longs[35];
+ longs[37] += longs[36];
+ longs[38] += longs[37];
+ longs[39] += longs[38];
+ longs[40] += longs[39];
+ longs[41] += longs[40];
+ longs[42] += longs[41];
+ longs[43] += longs[42];
+ longs[44] += longs[43];
+ longs[45] += longs[44];
+ longs[46] += longs[45];
+ longs[47] += longs[46];
+ longs[48] += longs[47];
+ longs[49] += longs[48];
+ longs[50] += longs[49];
+ longs[51] += longs[50];
+ longs[52] += longs[51];
+ longs[53] += longs[52];
+ longs[54] += longs[53];
+ longs[55] += longs[54];
+ longs[56] += longs[55];
+ longs[57] += longs[56];
+ longs[58] += longs[57];
+ longs[59] += longs[58];
+ longs[60] += longs[59];
+ longs[61] += longs[60];
+ longs[62] += longs[61];
+ longs[63] += longs[62];
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
index 90b34750463d..6ea8a0104eb6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
@@ -262,7 +262,7 @@ public Lucene90BlockTreeTermsWriter(
postingsWriter,
minItemsInBlock,
maxItemsInBlock,
- Lucene90BlockTreeTermsReader.VERSION_CURRENT);
+ Lucene90BlockTreeTermsReader.VERSION_START);
}
/** Expert constructor that allows configuring the version, used for bw tests. */
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py
new file mode 100644
index 000000000000..9f4b03ab39b5
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/gen_ForUtil.py
@@ -0,0 +1,442 @@
+#! /usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from fractions import gcd
+
+"""Code generation for ForUtil.java"""
+
+MAX_SPECIALIZED_BITS_PER_VALUE = 24
+OUTPUT_FILE = "ForUtil.java"
+PRIMITIVE_SIZE = [8, 16, 32]
+HEADER = """// This file has been automatically generated, DO NOT EDIT
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import java.io.IOException;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+// Inspired from https://fulmicoton.com/posts/bitpacking/
+// Encodes multiple integers in a long to get SIMD-like speedups.
+// If bitsPerValue <= 8 then we pack 8 ints per long
+// else if bitsPerValue <= 16 we pack 4 ints per long
+// else we pack 2 ints per long
+final class ForUtil {
+
+ static final int BLOCK_SIZE = 128;
+ private static final int BLOCK_SIZE_LOG2 = 7;
+
+ private static long expandMask32(long mask32) {
+ return mask32 | (mask32 << 32);
+ }
+
+ private static long expandMask16(long mask16) {
+ return expandMask32(mask16 | (mask16 << 16));
+ }
+
+ private static long expandMask8(long mask8) {
+ return expandMask16(mask8 | (mask8 << 8));
+ }
+
+ private static long mask32(int bitsPerValue) {
+ return expandMask32((1L << bitsPerValue) - 1);
+ }
+
+ private static long mask16(int bitsPerValue) {
+ return expandMask16((1L << bitsPerValue) - 1);
+ }
+
+ private static long mask8(int bitsPerValue) {
+ return expandMask8((1L << bitsPerValue) - 1);
+ }
+
+ private static void expand8(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 56) & 0xFFL;
+ arr[16 + i] = (l >>> 48) & 0xFFL;
+ arr[32 + i] = (l >>> 40) & 0xFFL;
+ arr[48 + i] = (l >>> 32) & 0xFFL;
+ arr[64 + i] = (l >>> 24) & 0xFFL;
+ arr[80 + i] = (l >>> 16) & 0xFFL;
+ arr[96 + i] = (l >>> 8) & 0xFFL;
+ arr[112 + i] = l & 0xFFL;
+ }
+ }
+
+ private static void expand8To32(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 24) & 0x000000FF000000FFL;
+ arr[16 + i] = (l >>> 16) & 0x000000FF000000FFL;
+ arr[32 + i] = (l >>> 8) & 0x000000FF000000FFL;
+ arr[48 + i] = l & 0x000000FF000000FFL;
+ }
+ }
+
+ private static void collapse8(long[] arr) {
+ for (int i = 0; i < 16; ++i) {
+ arr[i] =
+ (arr[i] << 56)
+ | (arr[16 + i] << 48)
+ | (arr[32 + i] << 40)
+ | (arr[48 + i] << 32)
+ | (arr[64 + i] << 24)
+ | (arr[80 + i] << 16)
+ | (arr[96 + i] << 8)
+ | arr[112 + i];
+ }
+ }
+
+ private static void expand16(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 48) & 0xFFFFL;
+ arr[32 + i] = (l >>> 32) & 0xFFFFL;
+ arr[64 + i] = (l >>> 16) & 0xFFFFL;
+ arr[96 + i] = l & 0xFFFFL;
+ }
+ }
+
+ private static void expand16To32(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ long l = arr[i];
+ arr[i] = (l >>> 16) & 0x0000FFFF0000FFFFL;
+ arr[32 + i] = l & 0x0000FFFF0000FFFFL;
+ }
+ }
+
+ private static void collapse16(long[] arr) {
+ for (int i = 0; i < 32; ++i) {
+ arr[i] = (arr[i] << 48) | (arr[32 + i] << 32) | (arr[64 + i] << 16) | arr[96 + i];
+ }
+ }
+
+ private static void expand32(long[] arr) {
+ for (int i = 0; i < 64; ++i) {
+ long l = arr[i];
+ arr[i] = l >>> 32;
+ arr[64 + i] = l & 0xFFFFFFFFL;
+ }
+ }
+
+ private static void collapse32(long[] arr) {
+ for (int i = 0; i < 64; ++i) {
+ arr[i] = (arr[i] << 32) | arr[64 + i];
+ }
+ }
+
+ private final long[] tmp = new long[BLOCK_SIZE / 2];
+
+ /** Encode 128 integers from {@code longs} into {@code out}. */
+ void encode(long[] longs, int bitsPerValue, DataOutput out) throws IOException {
+ final int nextPrimitive;
+ final int numLongs;
+ if (bitsPerValue <= 8) {
+ nextPrimitive = 8;
+ numLongs = BLOCK_SIZE / 8;
+ collapse8(longs);
+ } else if (bitsPerValue <= 16) {
+ nextPrimitive = 16;
+ numLongs = BLOCK_SIZE / 4;
+ collapse16(longs);
+ } else {
+ nextPrimitive = 32;
+ numLongs = BLOCK_SIZE / 2;
+ collapse32(longs);
+ }
+
+ final int numLongsPerShift = bitsPerValue * 2;
+ int idx = 0;
+ int shift = nextPrimitive - bitsPerValue;
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ tmp[i] = longs[idx++] << shift;
+ }
+ for (shift = shift - bitsPerValue; shift >= 0; shift -= bitsPerValue) {
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ tmp[i] |= longs[idx++] << shift;
+ }
+ }
+
+ final int remainingBitsPerLong = shift + bitsPerValue;
+ final long maskRemainingBitsPerLong;
+ if (nextPrimitive == 8) {
+ maskRemainingBitsPerLong = MASKS8[remainingBitsPerLong];
+ } else if (nextPrimitive == 16) {
+ maskRemainingBitsPerLong = MASKS16[remainingBitsPerLong];
+ } else {
+ maskRemainingBitsPerLong = MASKS32[remainingBitsPerLong];
+ }
+
+ int tmpIdx = 0;
+ int remainingBitsPerValue = bitsPerValue;
+ while (idx < numLongs) {
+ if (remainingBitsPerValue >= remainingBitsPerLong) {
+ remainingBitsPerValue -= remainingBitsPerLong;
+ tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & maskRemainingBitsPerLong;
+ if (remainingBitsPerValue == 0) {
+ idx++;
+ remainingBitsPerValue = bitsPerValue;
+ }
+ } else {
+ final long mask1, mask2;
+ if (nextPrimitive == 8) {
+ mask1 = MASKS8[remainingBitsPerValue];
+ mask2 = MASKS8[remainingBitsPerLong - remainingBitsPerValue];
+ } else if (nextPrimitive == 16) {
+ mask1 = MASKS16[remainingBitsPerValue];
+ mask2 = MASKS16[remainingBitsPerLong - remainingBitsPerValue];
+ } else {
+ mask1 = MASKS32[remainingBitsPerValue];
+ mask2 = MASKS32[remainingBitsPerLong - remainingBitsPerValue];
+ }
+ tmp[tmpIdx] |= (longs[idx++] & mask1) << (remainingBitsPerLong - remainingBitsPerValue);
+ remainingBitsPerValue = bitsPerValue - remainingBitsPerLong + remainingBitsPerValue;
+ tmp[tmpIdx++] |= (longs[idx] >>> remainingBitsPerValue) & mask2;
+ }
+ }
+
+ for (int i = 0; i < numLongsPerShift; ++i) {
+ // Java longs are big endian and we want to read little endian longs, so we need to reverse
+ // bytes
+ long l = tmp[i];
+ out.writeLong(l);
+ }
+ }
+
+ /** Number of bytes required to encode 128 integers of {@code bitsPerValue} bits per value. */
+ int numBytes(int bitsPerValue) {
+ return bitsPerValue << (BLOCK_SIZE_LOG2 - 3);
+ }
+
+ private static void decodeSlow(int bitsPerValue, DataInput in, long[] tmp, long[] longs)
+ throws IOException {
+ final int numLongs = bitsPerValue << 1;
+ in.readLongs(tmp, 0, numLongs);
+ final long mask = MASKS32[bitsPerValue];
+ int longsIdx = 0;
+ int shift = 32 - bitsPerValue;
+ for (; shift >= 0; shift -= bitsPerValue) {
+ shiftLongs(tmp, numLongs, longs, longsIdx, shift, mask);
+ longsIdx += numLongs;
+ }
+ final int remainingBitsPerLong = shift + bitsPerValue;
+ final long mask32RemainingBitsPerLong = MASKS32[remainingBitsPerLong];
+ int tmpIdx = 0;
+ int remainingBits = remainingBitsPerLong;
+ for (; longsIdx < BLOCK_SIZE / 2; ++longsIdx) {
+ int b = bitsPerValue - remainingBits;
+ long l = (tmp[tmpIdx++] & MASKS32[remainingBits]) << b;
+ while (b >= remainingBitsPerLong) {
+ b -= remainingBitsPerLong;
+ l |= (tmp[tmpIdx++] & mask32RemainingBitsPerLong) << b;
+ }
+ if (b > 0) {
+ l |= (tmp[tmpIdx] >>> (remainingBitsPerLong - b)) & MASKS32[b];
+ remainingBits = remainingBitsPerLong - b;
+ } else {
+ remainingBits = remainingBitsPerLong;
+ }
+ longs[longsIdx] = l;
+ }
+ }
+
+ /**
+ * The pattern that this shiftLongs method applies is recognized by the C2 compiler, which
+ * generates SIMD instructions for it in order to shift multiple longs at once.
+ */
+ private static void shiftLongs(long[] a, int count, long[] b, int bi, int shift, long mask) {
+ for (int i = 0; i < count; ++i) {
+ b[bi + i] = (a[i] >>> shift) & mask;
+ }
+ }
+
+"""
+
+def writeRemainderWithSIMDOptimize(bpv, next_primitive, remaining_bits_per_long, o, num_values, f):
+ iteration = 1
+ num_longs = bpv * num_values / remaining_bits_per_long
+ while num_longs % 2 == 0 and num_values % 2 == 0:
+ num_longs /= 2
+ num_values /= 2
+ iteration *= 2
+
+ f.write(' shiftLongs(tmp, %d, tmp, 0, 0, MASK%d_%d);\n' % (iteration * num_longs, next_primitive, remaining_bits_per_long))
+ f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values))
+ tmp_idx = 0
+ b = bpv
+ b -= remaining_bits_per_long
+ f.write(' long l0 = tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b))
+ tmp_idx += 1
+ while b >= remaining_bits_per_long:
+ b -= remaining_bits_per_long
+ f.write(' l0 |= tmp[tmpIdx + %d] << %d;\n' %(tmp_idx, b))
+ tmp_idx += 1
+ f.write(' longs[longsIdx + 0] = l0;\n')
+ f.write(' }\n')
+
+
+def writeRemainder(bpv, next_primitive, remaining_bits_per_long, o, num_values, f):
+ iteration = 1
+ num_longs = bpv * num_values / remaining_bits_per_long
+ while num_longs % 2 == 0 and num_values % 2 == 0:
+ num_longs /= 2
+ num_values /= 2
+ iteration *= 2
+ f.write(' for (int iter = 0, tmpIdx = 0, longsIdx = %d; iter < %d; ++iter, tmpIdx += %d, longsIdx += %d) {\n' %(o, iteration, num_longs, num_values))
+ i = 0
+ remaining_bits = 0
+ tmp_idx = 0
+ for i in range(num_values):
+ b = bpv
+ if remaining_bits == 0:
+ b -= remaining_bits_per_long
+ f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
+ else:
+ b -= remaining_bits
+ f.write(' long l%d = (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits, b))
+ tmp_idx += 1
+ while b >= remaining_bits_per_long:
+ b -= remaining_bits_per_long
+ f.write(' l%d |= (tmp[tmpIdx + %d] & MASK%d_%d) << %d;\n' %(i, tmp_idx, next_primitive, remaining_bits_per_long, b))
+ tmp_idx += 1
+ if b > 0:
+ f.write(' l%d |= (tmp[tmpIdx + %d] >>> %d) & MASK%d_%d;\n' %(i, tmp_idx, remaining_bits_per_long-b, next_primitive, b))
+ remaining_bits = remaining_bits_per_long-b
+ f.write(' longs[longsIdx + %d] = l%d;\n' %(i, i))
+ f.write(' }\n')
+
+
+def writeDecode(bpv, f):
+ next_primitive = 32
+ if bpv <= 8:
+ next_primitive = 8
+ elif bpv <= 16:
+ next_primitive = 16
+ f.write(' private static void decode%d(DataInput in, long[] tmp, long[] longs) throws IOException {\n' %bpv)
+ num_values_per_long = 64 / next_primitive
+ if bpv == next_primitive:
+ f.write(' in.readLongs(longs, 0, %d);\n' %(bpv*2))
+ else:
+ f.write(' in.readLongs(tmp, 0, %d);\n' %(bpv*2))
+ shift = next_primitive - bpv
+ o = 0
+ while shift >= 0:
+ f.write(' shiftLongs(tmp, %d, longs, %d, %d, MASK%d_%d);\n' %(bpv*2, o, shift, next_primitive, bpv))
+ o += bpv*2
+ shift -= bpv
+ if shift + bpv > 0:
+ if bpv % (next_primitive % bpv) == 0:
+ writeRemainderWithSIMDOptimize(bpv, next_primitive, shift + bpv, o, 128/num_values_per_long - o, f)
+ else:
+ writeRemainder(bpv, next_primitive, shift + bpv, o, 128/num_values_per_long - o, f)
+ f.write(' }\n')
+
+
+if __name__ == '__main__':
+ f = open(OUTPUT_FILE, 'w')
+ f.write(HEADER)
+ for primitive_size in PRIMITIVE_SIZE:
+ f.write(' private static final long[] MASKS%d = new long[%d];\n' %(primitive_size, primitive_size))
+ f.write('\n')
+ f.write(' static {\n')
+ for primitive_size in PRIMITIVE_SIZE:
+ f.write(' for (int i = 0; i < %d; ++i) {\n' %primitive_size)
+ f.write(' MASKS%d[i] = mask%d(i);\n' %(primitive_size, primitive_size))
+ f.write(' }\n')
+ f.write(' }')
+ f.write("""
+ // mark values in array as final longs to avoid the cost of reading array, arrays should only be
+ // used when the idx is a variable
+""")
+ for primitive_size in PRIMITIVE_SIZE:
+ for bpv in range(1, min(MAX_SPECIALIZED_BITS_PER_VALUE + 1, primitive_size)):
+ if bpv * 2 != primitive_size or primitive_size == 8:
+ f.write(' private static final long MASK%d_%d = MASKS%d[%d];\n' %(primitive_size, bpv, primitive_size, bpv))
+
+ f.write("""
+ /** Decode 128 integers into {@code longs}. */
+ void decode(int bitsPerValue, DataInput in, long[] longs) throws IOException {
+ switch (bitsPerValue) {
+""")
+ for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1):
+ next_primitive = 32
+ if bpv <= 8:
+ next_primitive = 8
+ elif bpv <= 16:
+ next_primitive = 16
+ f.write(' case %d:\n' %bpv)
+ f.write(' decode%d(in, tmp, longs);\n' %bpv)
+ f.write(' expand%d(longs);\n' %next_primitive)
+ f.write(' break;\n')
+ f.write(' default:\n')
+ f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
+ f.write(' expand32(longs);\n')
+ f.write(' break;\n')
+ f.write(' }\n')
+ f.write(' }\n')
+
+ f.write("""
+ /**
+ * Decodes 128 integers into 64 {@code longs} such that each long contains two values, each
+ * represented with 32 bits. Values [0..63] are encoded in the high-order bits of {@code longs}
+ * [0..63], and values [64..127] are encoded in the low-order bits of {@code longs} [0..63]. This
+ * representation may allow subsequent operations to be performed on two values at a time.
+ */
+ void decodeTo32(int bitsPerValue, DataInput in, long[] longs) throws IOException {
+ switch (bitsPerValue) {
+""")
+ for bpv in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1):
+ next_primitive = 32
+ if bpv <= 8:
+ next_primitive = 8
+ elif bpv <= 16:
+ next_primitive = 16
+ f.write(' case %d:\n' %bpv)
+ f.write(' decode%d(in, tmp, longs);\n' %bpv)
+ if next_primitive <= 16:
+ f.write(' expand%dTo32(longs);\n' %next_primitive)
+ f.write(' break;\n')
+ f.write(' default:\n')
+ f.write(' decodeSlow(bitsPerValue, in, tmp, longs);\n')
+ f.write(' break;\n')
+ f.write(' }\n')
+ f.write(' }\n')
+
+ f.write('\n')
+ for i in range(1, MAX_SPECIALIZED_BITS_PER_VALUE+1):
+ writeDecode(i, f)
+ if i < MAX_SPECIALIZED_BITS_PER_VALUE:
+ f.write('\n')
+
+ f.write('}\n')
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
index 796abd35252d..9cf34dfd3df5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
@@ -378,7 +378,7 @@ public void write(
CodecUtil.writeIndexHeader(
output,
Lucene94FieldInfosFormat.CODEC_NAME,
- Lucene94FieldInfosFormat.FORMAT_CURRENT,
+ Lucene94FieldInfosFormat.FORMAT_START, // temporarily backdate this
segmentInfo.getId(),
segmentSuffix);
output.writeVInt(infos.size());
@@ -393,7 +393,10 @@ public void write(
if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
- if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
+ if (fi.isParentField()) {
+ throw new IllegalArgumentException("temporarily disallow this");
+ // bits |= PARENT_FIELD_FIELD;
+ }
output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95Codec.java
new file mode 100644
index 000000000000..140d698a6f79
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95Codec.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene95;
+
+import java.util.Objects;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
+import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 9.5 index format
+ *
+ * If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene95 package documentation for file format details.
+ * @lucene.experimental
+ */
+public class Lucene95Codec extends Codec {
+
+ /** Configuration option for the codec. */
+ public enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED(Lucene90StoredFieldsFormat.Mode.BEST_SPEED),
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION(Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION);
+
+ private final Lucene90StoredFieldsFormat.Mode storedMode;
+
+ private Mode(Lucene90StoredFieldsFormat.Mode storedMode) {
+ this.storedMode = Objects.requireNonNull(storedMode);
+ }
+ }
+
+ private final TermVectorsFormat vectorsFormat = new Lucene90TermVectorsFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene94FieldInfosFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene90SegmentInfoFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
+ private final CompoundFormat compoundFormat = new Lucene90CompoundFormat();
+ private final NormsFormat normsFormat = new Lucene90NormsFormat();
+
+ private final PostingsFormat defaultPostingsFormat;
+ private final PostingsFormat postingsFormat =
+ new PerFieldPostingsFormat() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return Lucene95Codec.this.getPostingsFormatForField(field);
+ }
+ };
+
+ private final DocValuesFormat defaultDVFormat;
+ private final DocValuesFormat docValuesFormat =
+ new PerFieldDocValuesFormat() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return Lucene95Codec.this.getDocValuesFormatForField(field);
+ }
+ };
+
+ private final KnnVectorsFormat defaultKnnVectorsFormat;
+ private final KnnVectorsFormat knnVectorsFormat =
+ new PerFieldKnnVectorsFormat() {
+ @Override
+ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+ return Lucene95Codec.this.getKnnVectorsFormatForField(field);
+ }
+ };
+
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /** Instantiates a new codec. */
+ public Lucene95Codec() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /**
+ * Instantiates a new codec, specifying the stored fields compression mode to use.
+ *
+ * @param mode stored fields compression mode to use for newly flushed/merged segments.
+ */
+ public Lucene95Codec(Mode mode) {
+ super("Lucene95");
+ this.storedFieldsFormat =
+ new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
+ this.defaultPostingsFormat = new Lucene90PostingsFormat();
+ this.defaultDVFormat = new Lucene90DocValuesFormat();
+ this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); // FS: not used
+ }
+
+ @Override
+ public final StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
+ }
+
+ @Override
+ public final TermVectorsFormat termVectorsFormat() {
+ return vectorsFormat;
+ }
+
+ @Override
+ public final PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
+ @Override
+ public final FieldInfosFormat fieldInfosFormat() {
+ return fieldInfosFormat;
+ }
+
+ @Override
+ public final SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
+ @Override
+ public final LiveDocsFormat liveDocsFormat() {
+ return liveDocsFormat;
+ }
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return compoundFormat;
+ }
+
+ @Override
+ public final PointsFormat pointsFormat() {
+ return new Lucene90PointsFormat();
+ }
+
+ @Override
+ public final KnnVectorsFormat knnVectorsFormat() {
+ return knnVectorsFormat;
+ }
+
+ /**
+ * Returns the postings format that should be used for writing new segments of field
.
+ *
+ *
The default implementation always returns "Lucene90".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation,
+ */
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return defaultPostingsFormat;
+ }
+
+ /**
+ * Returns the docvalues format that should be used for writing new segments of field
+ * .
+ *
+ *
The default implementation always returns "Lucene90".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation.
+ */
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return defaultDVFormat;
+ }
+
+ /**
+ * Returns the vectors format that should be used for writing new segments of field
+ *
+ *
The default implementation always returns "Lucene95".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation.
+ */
+ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+ return defaultKnnVectorsFormat;
+ }
+
+ @Override
+ public final DocValuesFormat docValuesFormat() {
+ return docValuesFormat;
+ }
+
+ @Override
+ public final NormsFormat normsFormat() {
+ return normsFormat;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
index 6bb5718d5c75..4c23466afe78 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
@@ -547,7 +547,7 @@ public void save(DataOutput metaOut, DataOutput out) throws IOException {
* @param metaOut the DataOutput to write the metadata to
*/
public void saveMetadata(DataOutput metaOut) throws IOException {
- CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
+ CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_90);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (metadata.emptyOutput != null) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
index 3f0b2b4deb21..dab86f6ee720 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java
@@ -246,7 +246,7 @@ public static class Builder {
private boolean allowFixedLengthArcs = true;
private DataOutput dataOutput;
private float directAddressingMaxOversizingFactor = DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR;
- private int version = FST.VERSION_CURRENT;
+ private int version = FST.VERSION_90;
/**
* @param inputType The input type (transition labels). Can be anything from {@link INPUT_TYPE}
@@ -333,12 +333,12 @@ public Builder directAddressingMaxOversizingFactor(float factor) {
/** Expert: Set the codec version. * */
public Builder setVersion(int version) {
- if (version < FST.VERSION_90 || version > FST.VERSION_CURRENT) {
+ if (version < FST.VERSION_90 || version > FST.VERSION_90) {
throw new IllegalArgumentException(
"Expected version in range ["
+ FST.VERSION_90
+ ", "
- + FST.VERSION_CURRENT
+ + FST.VERSION_90
+ "], got "
+ version);
}