From b48850e85e216d825e064d8308bb549d795837f9 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 14 Nov 2025 13:07:38 +0100 Subject: [PATCH 1/2] Implement formatted doc values for exponential histograms --- .../ExponentialHistogramFieldMapper.java | 43 ++++++++++++ .../ExponentialHistogramFieldMapperTests.java | 67 +++++++++++++++++++ ...xponentialHistogramAggregatorTestCase.java | 34 ++++++---- 3 files changed, 130 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java index bcf755bb18271..6df1447c5d3c3 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java @@ -29,6 +29,7 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.exponentialhistogram.ZeroBucket; import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.FormattedDocValues; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.mapper.BlockLoader; @@ -258,6 +259,11 @@ public SortedBinaryDocValues getBytesValues() { ); } + @Override + public FormattedDocValues getFormattedValues(DocValueFormat format){ + return createFormattedDocValues(context.reader(), fieldName); + } + @Override public long ramBytesUsed() { return 0; // No dynamic allocations @@ -397,6 +403,43 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I } } + // Visible for testing + static FormattedDocValues createFormattedDocValues(LeafReader reader, String fieldName) { + return new FormattedDocValues() { + + boolean hasNext = false; + ExponentialHistogramValuesReader delegate; + + private ExponentialHistogramValuesReader lazyDelegate() throws IOException { + if (delegate == null) { + delegate = new DocValuesReader(reader, fieldName); + } + return delegate; + } + + @Override + public boolean advanceExact(int docId) throws IOException { + hasNext = lazyDelegate().advanceExact(docId); + return hasNext; + } + + @Override + public int docValueCount() throws IOException { + return 1; // no multivalue support, so always 1 + } + + @Override + public Object nextValue() throws IOException { + if (hasNext == false) { + throw new IllegalStateException("No value available, make sure to call advanceExact() first"); + } + hasNext = false; + return lazyDelegate().histogramValue(); + } + + }; + } + @Override protected boolean supportsParsingObject() { return true; diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index f782e7f30b7e3..ad2d2d0103193 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -7,19 +7,39 @@ package org.elasticsearch.xpack.exponentialhistogram; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.core.Types; +import org.elasticsearch.exponentialhistogram.ExponentialHistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.exponentialhistogram.ZeroBucket; +import org.elasticsearch.index.engine.Engine; +import org.elasticsearch.index.fielddata.FormattedDocValues; +import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.analytics.mapper.ExponentialHistogramParser; import org.elasticsearch.xpack.analytics.mapper.IndexWithCount; +import org.elasticsearch.xpack.exponentialhistogram.aggregations.ExponentialHistogramAggregatorTestCase; import org.junit.AssumptionViolatedException; import org.junit.Before; @@ -35,12 +55,14 @@ import java.util.Map; import java.util.OptionalDouble; import java.util.Set; +import java.util.stream.IntStream; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; public class ExponentialHistogramFieldMapperTests extends MapperTestCase { @@ -596,6 +618,51 @@ public List invalidExample() { }; } + public void testFormattedDocValues() throws IOException { + try (Directory directory = newDirectory()) { + ExponentialHistogramCircuitBreaker noopBreaker = ExponentialHistogramCircuitBreaker.noop(); + + List inputHistograms = IntStream.range(0, randomIntBetween(1, 100)) + .mapToObj(i -> ExponentialHistogramTestUtils.randomHistogram(noopBreaker)) + .map(histo -> ExponentialHistogram.builder(histo, noopBreaker) + // make sure we have a double-based zero bucket, as we can only serialize those exactly + .zeroBucket(ZeroBucket.create(histo.zeroBucket().zeroThreshold(), histo.zeroBucket().count())) + .build()) + .map(histogram -> randomBoolean() ? null : histogram) + .toList(); + + IndexWriterConfig config = LuceneTestCase.newIndexWriterConfig(random(), new MockAnalyzer(random())); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, config); + inputHistograms.forEach(histo -> ExponentialHistogramAggregatorTestCase.addHistogramDoc(indexWriter, "field", histo)); + indexWriter.close(); + + try ( + DirectoryReader reader = DirectoryReader.open(directory) + ) { + for (int i = 0; i < reader.leaves().size(); i++) { + LeafReaderContext leaf = reader.leaves().get(i); + int docBase = leaf.docBase; + LeafReader leafReader = leaf.reader(); + int maxDoc = leafReader.maxDoc(); + FormattedDocValues docValues = ExponentialHistogramFieldMapper.createFormattedDocValues(leafReader, "field"); + for (int j = 0; j < maxDoc; j++) { + var expectedHistogram = inputHistograms.get(docBase + j); + if (expectedHistogram == null) { + assertThat(docValues.advanceExact(j), equalTo(false)); + expectThrows(IllegalStateException.class, docValues::nextValue); + } else { + assertThat(docValues.advanceExact(j), equalTo(true)); + assertThat(docValues.docValueCount(), equalTo(1)); + Object actualHistogram = docValues.nextValue(); + assertThat(actualHistogram, equalTo(expectedHistogram)); + expectThrows(IllegalStateException.class, docValues::nextValue); + } + } + } + } + } + } + @Override public void testSyntheticSourceKeepArrays() { // exponential_histogram can't be used within an array diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/aggregations/ExponentialHistogramAggregatorTestCase.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/aggregations/ExponentialHistogramAggregatorTestCase.java index 72f0ae9dcfe94..778a66d93ca9a 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/aggregations/ExponentialHistogramAggregatorTestCase.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/aggregations/ExponentialHistogramAggregatorTestCase.java @@ -9,6 +9,7 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.core.Nullable; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils; import org.elasticsearch.plugins.SearchPlugin; @@ -20,6 +21,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -42,25 +44,29 @@ protected static List createRandomHistograms(int count) { return IntStream.range(0, count).mapToObj(i -> ExponentialHistogramTestUtils.randomHistogram()).toList(); } - protected static void addHistogramDoc( + public static void addHistogramDoc( RandomIndexWriter iw, String fieldName, - ExponentialHistogram histogram, + @Nullable ExponentialHistogram histogram, IndexableField... additionalFields ) { try { - ExponentialHistogramFieldMapper.HistogramDocValueFields docValues = ExponentialHistogramFieldMapper.buildDocValueFields( - fieldName, - histogram.scale(), - IndexWithCount.fromIterator(histogram.negativeBuckets().iterator()), - IndexWithCount.fromIterator(histogram.positiveBuckets().iterator()), - histogram.zeroBucket().zeroThreshold(), - histogram.valueCount(), - histogram.sum(), - histogram.min(), - histogram.max() - ); - iw.addDocument(Stream.concat(docValues.fieldsAsList().stream(), Arrays.stream(additionalFields)).toList()); + if (histogram == null) { + iw.addDocument(Collections.emptyList()); + } else { + ExponentialHistogramFieldMapper.HistogramDocValueFields docValues = ExponentialHistogramFieldMapper.buildDocValueFields( + fieldName, + histogram.scale(), + IndexWithCount.fromIterator(histogram.negativeBuckets().iterator()), + IndexWithCount.fromIterator(histogram.positiveBuckets().iterator()), + histogram.zeroBucket().zeroThreshold(), + histogram.valueCount(), + histogram.sum(), + histogram.min(), + histogram.max() + ); + iw.addDocument(Stream.concat(docValues.fieldsAsList().stream(), Arrays.stream(additionalFields)).toList()); + } } catch (IOException e) { throw new RuntimeException(e); } From 3fee7dc2e2fddfea7c4143f1e5eb1e8268b8ce0e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 14 Nov 2025 13:09:48 +0000 Subject: [PATCH 2/2] [CI] Auto commit changes from spotless --- .../ExponentialHistogramFieldMapper.java | 2 +- .../ExponentialHistogramFieldMapperTests.java | 21 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java index 6df1447c5d3c3..df5af4278173d 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java @@ -260,7 +260,7 @@ public SortedBinaryDocValues getBytesValues() { } @Override - public FormattedDocValues getFormattedValues(DocValueFormat format){ + public FormattedDocValues getFormattedValues(DocValueFormat format) { return createFormattedDocValues(context.reader(), fieldName); } diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index ad2d2d0103193..f36d034da7dc6 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -11,10 +11,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.LogDocMergePolicy; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.RandomIndexWriter; @@ -25,16 +21,13 @@ import org.elasticsearch.exponentialhistogram.ExponentialHistogramTestUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.exponentialhistogram.ZeroBucket; -import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.fielddata.FormattedDocValues; -import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.SourceToParse; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.analytics.mapper.ExponentialHistogramParser; @@ -624,10 +617,12 @@ public void testFormattedDocValues() throws IOException { List inputHistograms = IntStream.range(0, randomIntBetween(1, 100)) .mapToObj(i -> ExponentialHistogramTestUtils.randomHistogram(noopBreaker)) - .map(histo -> ExponentialHistogram.builder(histo, noopBreaker) - // make sure we have a double-based zero bucket, as we can only serialize those exactly - .zeroBucket(ZeroBucket.create(histo.zeroBucket().zeroThreshold(), histo.zeroBucket().count())) - .build()) + .map( + histo -> ExponentialHistogram.builder(histo, noopBreaker) + // make sure we have a double-based zero bucket, as we can only serialize those exactly + .zeroBucket(ZeroBucket.create(histo.zeroBucket().zeroThreshold(), histo.zeroBucket().count())) + .build() + ) .map(histogram -> randomBoolean() ? null : histogram) .toList(); @@ -636,9 +631,7 @@ public void testFormattedDocValues() throws IOException { inputHistograms.forEach(histo -> ExponentialHistogramAggregatorTestCase.addHistogramDoc(indexWriter, "field", histo)); indexWriter.close(); - try ( - DirectoryReader reader = DirectoryReader.open(directory) - ) { + try (DirectoryReader reader = DirectoryReader.open(directory)) { for (int i = 0; i < reader.leaves().size(); i++) { LeafReaderContext leaf = reader.leaves().get(i); int docBase = leaf.docBase;