88
99import org .apache .lucene .document .BinaryDocValuesField ;
1010import org .apache .lucene .document .Field ;
11+ import org .apache .lucene .document .NumericDocValuesField ;
1112import org .apache .lucene .index .BinaryDocValues ;
1213import org .apache .lucene .index .DocValues ;
1314import org .apache .lucene .index .LeafReader ;
1415import org .apache .lucene .index .LeafReaderContext ;
1516import org .apache .lucene .search .Query ;
1617import org .apache .lucene .search .SortField ;
1718import org .apache .lucene .util .BytesRef ;
18- import org .elasticsearch . TransportVersions ;
19+ import org .apache . lucene . util . NumericUtils ;
1920import org .elasticsearch .common .Explicit ;
2021import org .elasticsearch .common .io .stream .ByteArrayStreamInput ;
2122import org .elasticsearch .common .io .stream .BytesStreamOutput ;
@@ -67,6 +68,10 @@ public class TDigestFieldMapper extends FieldMapper {
6768
6869 public static final String CENTROIDS_NAME = "centroids" ;
6970 public static final String COUNTS_NAME = "counts" ;
71+ public static final String SUM_FIELD_NAME = "sum" ;
72+ public static final String TOTAL_COUNT_FIELD_NAME = "count" ;
73+ public static final String MIN_FIELD_NAME = "min" ;
74+ public static final String MAX_FIELD_NAME = "max" ;
7075 public static final String CONTENT_TYPE = "tdigest" ;
7176
7277 private static TDigestFieldMapper toType (FieldMapper in ) {
@@ -202,7 +207,7 @@ public LeafHistogramFieldData load(LeafReaderContext context) {
202207 public HistogramValues getHistogramValues () throws IOException {
203208 try {
204209 final BinaryDocValues values = DocValues .getBinary (context .reader (), fieldName );
205- final InternalHistogramValue value = new InternalHistogramValue ();
210+ final InternalTDigestValue value = new InternalTDigestValue ();
206211 return new HistogramValues () {
207212
208213 @ Override
@@ -234,7 +239,7 @@ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
234239 public FormattedDocValues getFormattedValues (DocValueFormat format ) {
235240 try {
236241 final BinaryDocValues values = DocValues .getBinary (context .reader (), fieldName );
237- final InternalHistogramValue value = new InternalHistogramValue ();
242+ final InternalTDigestValue value = new InternalTDigestValue ();
238243 return new FormattedDocValues () {
239244 @ Override
240245 public boolean advanceExact (int docId ) throws IOException {
@@ -337,20 +342,44 @@ public void parse(DocumentParserContext context) throws IOException {
337342 }
338343 subParser .nextToken ();
339344 // TODO: Here we should build a t-digest out of the input, based on the settings on the field
340- TDigestParser .ParsedHistogram parsedHistogram = TDigestParser .parse (fullPath (), subParser );
345+ TDigestParser .ParsedTDigest parsedTDigest = TDigestParser .parse (fullPath (), subParser );
341346
342347 BytesStreamOutput streamOutput = new BytesStreamOutput ();
343- for (int i = 0 ; i < parsedHistogram .centroids ().size (); i ++) {
344- long count = parsedHistogram .counts ().get (i );
348+
349+ for (int i = 0 ; i < parsedTDigest .centroids ().size (); i ++) {
350+ long count = parsedTDigest .counts ().get (i );
345351 assert count >= 0 ;
346352 // we do not add elements with count == 0
347353 if (count > 0 ) {
348354 streamOutput .writeVLong (count );
349- streamOutput .writeLong ( Double . doubleToRawLongBits ( parsedHistogram . centroids ().get (i ) ));
355+ streamOutput .writeDouble ( parsedTDigest . centroids ().get (i ));
350356 }
351357 }
358+
352359 BytesRef docValue = streamOutput .bytes ().toBytesRef ();
353- Field field = new BinaryDocValuesField (fullPath (), docValue );
360+ Field digestField = new BinaryDocValuesField (fullPath (), docValue );
361+
362+ // Add numeric doc values fields for the summary data
363+ NumericDocValuesField maxField = null ;
364+ if (Double .isNaN (parsedTDigest .max ()) == false ) {
365+ maxField = new NumericDocValuesField (
366+ valuesMaxSubFieldName (fullPath ()),
367+ NumericUtils .doubleToSortableLong (parsedTDigest .max ())
368+ );
369+ }
370+
371+ NumericDocValuesField minField = null ;
372+ if (Double .isNaN (parsedTDigest .min ()) == false ) {
373+ minField = new NumericDocValuesField (
374+ valuesMinSubFieldName (fullPath ()),
375+ NumericUtils .doubleToSortableLong (parsedTDigest .min ())
376+ );
377+ }
378+ NumericDocValuesField countField = new NumericDocValuesField (valuesCountSubFieldName (fullPath ()), parsedTDigest .count ());
379+ NumericDocValuesField sumField = new NumericDocValuesField (
380+ valuesSumSubFieldName (fullPath ()),
381+ NumericUtils .doubleToSortableLong (parsedTDigest .sum ())
382+ );
354383 if (context .doc ().getByKey (fieldType ().name ()) != null ) {
355384 throw new IllegalArgumentException (
356385 "Field ["
@@ -360,7 +389,15 @@ public void parse(DocumentParserContext context) throws IOException {
360389 + "] doesn't support indexing multiple values for the same field in the same document"
361390 );
362391 }
363- context .doc ().addWithKey (fieldType ().name (), field );
392+ context .doc ().addWithKey (fieldType ().name (), digestField );
393+ context .doc ().add (countField );
394+ context .doc ().add (sumField );
395+ if (maxField != null ) {
396+ context .doc ().add (maxField );
397+ }
398+ if (minField != null ) {
399+ context .doc ().add (minField );
400+ }
364401
365402 } catch (Exception ex ) {
366403 if (ignoreMalformed .value () == false ) {
@@ -390,19 +427,36 @@ public void parse(DocumentParserContext context) throws IOException {
390427 context .path ().remove ();
391428 }
392429
430+ private static String valuesCountSubFieldName (String fullPath ) {
431+ return fullPath + "._values_count" ;
432+ }
433+
434+ private static String valuesSumSubFieldName (String fullPath ) {
435+ return fullPath + "._values_sum" ;
436+ }
437+
438+ private static String valuesMinSubFieldName (String fullPath ) {
439+ return fullPath + "._values_min" ;
440+ }
441+
442+ private static String valuesMaxSubFieldName (String fullPath ) {
443+ return fullPath + "._values_max" ;
444+ }
445+
393446 /** re-usable {@link HistogramValue} implementation */
394- private static class InternalHistogramValue extends HistogramValue {
447+ private static class InternalTDigestValue extends HistogramValue {
395448 double value ;
396449 long count ;
397450 boolean isExhausted ;
451+
398452 final ByteArrayStreamInput streamInput ;
399453
400- InternalHistogramValue () {
454+ InternalTDigestValue () {
401455 streamInput = new ByteArrayStreamInput ();
402456 }
403457
404458 /** reset the value for the histogram */
405- void reset (BytesRef bytesRef ) {
459+ void reset (BytesRef bytesRef ) throws IOException {
406460 streamInput .reset (bytesRef .bytes , bytesRef .offset , bytesRef .length );
407461 isExhausted = false ;
408462 value = 0 ;
@@ -412,12 +466,8 @@ void reset(BytesRef bytesRef) {
412466 @ Override
413467 public boolean next () throws IOException {
414468 if (streamInput .available () > 0 ) {
415- if (streamInput .getTransportVersion ().onOrAfter (TransportVersions .V_8_11_X )) {
416- count = streamInput .readVLong ();
417- } else {
418- count = streamInput .readVInt ();
419- }
420- value = Double .longBitsToDouble (streamInput .readLong ());
469+ count = streamInput .readVLong ();
470+ value = streamInput .readDouble ();
421471 return true ;
422472 }
423473 isExhausted = true ;
@@ -447,14 +497,14 @@ protected SyntheticSourceSupport syntheticSourceSupport() {
447497 () -> new CompositeSyntheticFieldLoader (
448498 leafName (),
449499 fullPath (),
450- new HistogramSyntheticFieldLoader (),
500+ new TDigestSyntheticFieldLoader (),
451501 new CompositeSyntheticFieldLoader .MalformedValuesLayer (fullPath ())
452502 )
453503 );
454504 }
455505
456- private class HistogramSyntheticFieldLoader implements CompositeSyntheticFieldLoader .DocValuesLayer {
457- private final InternalHistogramValue value = new InternalHistogramValue ();
506+ private class TDigestSyntheticFieldLoader implements CompositeSyntheticFieldLoader .DocValuesLayer {
507+ private final InternalTDigestValue value = new InternalTDigestValue ();
458508 private BytesRef binaryValue ;
459509
460510 @ Override
@@ -485,9 +535,10 @@ public void write(XContentBuilder b) throws IOException {
485535 if (binaryValue == null ) {
486536 return ;
487537 }
488- b .startObject ();
489-
490538 value .reset (binaryValue );
539+
540+ b .startObject ();
541+ // TODO: Load the summary values out of the sub-fields, if they exist
491542 b .startArray (CENTROIDS_NAME );
492543 while (value .next ()) {
493544 b .value (value .value ());
0 commit comments