diff --git a/docs/changelog/138123.yaml b/docs/changelog/138123.yaml new file mode 100644 index 0000000000000..c68e47500ffe0 --- /dev/null +++ b/docs/changelog/138123.yaml @@ -0,0 +1,5 @@ +pr: 138123 +summary: ES|QL Update CHUNK to support `chunking_settings` as optional argument +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md index 4f875b1214fab..ec291cb115e3f 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md @@ -1,6 +1,6 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. -**Example** +**Examples** ```{applies_to} stack: preview 9.3.0 @@ -8,15 +8,32 @@ stack: preview 9.3.0 ```esql FROM books -| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20}) +| EVAL chunks = CHUNK(description) ``` | book_no:keyword | title:text | chunks:keyword | | --- | --- | --- | -| 1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. | -| 1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 | -| 1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. | -| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept | -| 1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. | +| 1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition. | +| 1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion | +| 1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses). | +| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories. | +| 1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality. | + +```{applies_to} +stack: preview 9.3.0 +``` + +```esql +FROM books +| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) +``` + +| book_no:keyword | title:text | chunks:keyword | +| --- | --- | --- | +| 1211 | The brothers Karamazov | [In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.] | +| 1463 | Realms of Tolkien: Images of Middle-earth | [Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58, volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from, , each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion] | +| 1502 | Selected Passages from Correspondence with Friends | [Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank., Many are reproduced here (the letters, not noses).] | +| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | [This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept, key works and shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.] | +| 1985 | Brothers Karamazov | [Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.] | diff --git a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md index 265551c8bee8a..6a7405397851d 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md +++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md @@ -2,9 +2,3 @@ **Supported function named parameters** -`num_chunks` -: (integer) The number of chunks to return. Defaults to return all chunks. - -`chunk_size` -: (integer) The size of sentence-based chunks to use. Defaults to 300 - diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md index a3e67be49499a..174db24b5949b 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md @@ -1,6 +1,10 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. ## `CHUNK` [esql-chunk] +```{applies_to} +stack: preview 9.3.0 +serverless: preview +``` **Syntax** diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md index f287627d571ee..3c4d856262f1a 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md @@ -5,6 +5,6 @@ `field` : The input to chunk. -`options` -: Options to customize chunking behavior. +`chunking_settings` +: Options to customize chunking behavior. Refer to the [Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`. diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md index 8ebe22b61286c..19f035575cf25 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md +++ b/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md @@ -2,7 +2,7 @@ **Supported types** -| field | options | result | +| field | chunking_settings | result | | --- | --- | --- | | keyword | | keyword | | text | | keyword | diff --git a/docs/reference/query-languages/esql/images/functions/chunk.svg b/docs/reference/query-languages/esql/images/functions/chunk.svg index 56003f305a080..0031ba125d06c 100644 --- a/docs/reference/query-languages/esql/images/functions/chunk.svg +++ b/docs/reference/query-languages/esql/images/functions/chunk.svg @@ -1 +1 @@ -CHUNK(field,options) \ No newline at end of file +CHUNK(field,chunking_settings) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json b/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json index 2be5b9665c320..9347b38f28575 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json @@ -30,7 +30,8 @@ } ], "examples" : [ - "FROM books\n| EVAL chunks = CHUNK(description, {\"num_chunks\":1, \"chunk_size\":20})" + "FROM books\n| EVAL chunks = CHUNK(description)", + "FROM books\n| EVAL chunks = CHUNK(description, {\"strategy\": \"sentence\", \"max_chunk_size\": 20, \"sentence_overlap\": 0})" ], "preview" : true, "snapshot_only" : true diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md b/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md index 2af9e41799859..c5f426e32cdda 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md @@ -5,5 +5,5 @@ Use `CHUNK` to split a text field into smaller chunks. ```esql FROM books -| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20}) +| EVAL chunks = CHUNK(description) ``` diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java index c051a9fa724fc..eb0fbbcbfb4a7 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java @@ -24,4 +24,8 @@ public InvalidArgumentException(Throwable cause, String message, Object... args) super(cause, message, args); } + public InvalidArgumentException(String message, Throwable cause) { + super(message, cause); + } + } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec index 1bf73acb2999b..2ae6ce51555c0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec @@ -2,12 +2,12 @@ # Tests for Chunk function # -chunkExample -required_capability: chunk_function +chunkDefaults +required_capability: chunk_function_v2 // tag::chunk-with-field[] FROM books -| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20}) +| EVAL chunks = CHUNK(description) // end::chunk-with-field[] | KEEP book_no, title, chunks | SORT book_no @@ -16,100 +16,141 @@ FROM books // tag::chunk-with-field-result[] book_no:keyword | title:text | chunks:keyword -1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. -1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 -1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. -1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept -1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. +1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition. +1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion +1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses). +1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories. +1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality. // end::chunk-with-field-result[] ; -chunkDefaults -required_capability: chunk_function +chunkWithChunkingSettings +required_capability: chunk_function_v2 +// tag::chunk-with-chunking-settings[] FROM books -| EVAL chunks = CHUNK(description) +| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) +// end::chunk-with-chunking-settings[] | KEEP book_no, title, chunks | SORT book_no | LIMIT 5 ; +// tag::chunk-with-chunking-settings-result[] book_no:keyword | title:text | chunks:keyword -1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition. -1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion -1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses). -1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories. -1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality. +1211 | The brothers Karamazov | [In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.] +1463 | Realms of Tolkien: Images of Middle-earth | [Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58, volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from, , each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion] +1502 | Selected Passages from Correspondence with Friends | [Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank., Many are reproduced here (the letters, not noses).] +1937 | The Best Short Stories of Dostoevsky (Modern Library) | [This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept, key works and shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.] +1985 | Brothers Karamazov | [Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.] +// end::chunk-with-chunking-settings-result[] ; chunkTextWithMatch -required_capability: chunk_function +required_capability: chunk_function_v2 FROM books | WHERE MATCH(title, "Return") -| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20}) +| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) | KEEP book_no, title, chunks; ignoreOrder:true book_no:keyword | title:text | chunks:keyword -2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of -7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings. +2714 | Return of the King Being the Third Part of The Lord of the Rings | [Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of, part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the, , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth., The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider., Men, Dwarves, Elves and Ents unite forces to do battle agains the Dark., Meanwhile, Frodo and Sam struggle further into Mordor in their heroic quest to destroy the One Ring., The devastating conclusion of J.R.R., Tolkien s classic tale of magic and adventure, begun in The Fellowship of the Ring and The Two Towers, features, Fellowship of the Ring and The Two Towers, features the definitive edition of the text and includes the Appendices and, edition of the text and includes the Appendices and a revised Index in full., To celebrate the release of the first of Peter Jackson s two-part film adaptation of The Hobbit, THE HOBBIT, two-part film adaptation of The Hobbit, THE HOBBIT: AN UNEXPECTED JOURNEY, this third part of The Lord of the, JOURNEY, this third part of The Lord of the Rings is available for a limited time with an exclusive cover, available for a limited time with an exclusive cover image from Peter Jackson s award-winning trilogy.] +7350 | Return of the Shadow | [In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings., In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of, first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution, notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of, Fellowship of the Ring and the gradual emergence of the conceptions that transformed what J.R.R., Tolkien for long believed would be a far shorter book, 'a sequel to The Hobbit'., The enlargement of Bilbo's 'magic ring' into the supremely potent and dangerous Ruling Ring of the Dark Lord is traced, dangerous Ruling Ring of the Dark Lord is traced and the precise moment is seen when, in an astonishing and, precise moment is seen when, in an astonishing and unforeseen leap in the earliest narrative, a Black Rider first rode, in the earliest narrative, a Black Rider first rode into the Shire, his significance still unknown., The character of the hobbit called Trotter (afterwards Strider or Aragorn) is developed while his indentity remains an absolute puzzle, ) is developed while his indentity remains an absolute puzzle, and the suspicion only very slowly becomes certainty that he must, suspicion only very slowly becomes certainty that he must after all be a Man., The hobbits, Frodo's companions, undergo intricate permutations of name and personality, and other major figures appear in strange modes: a, , and other major figures appear in strange modes: a sinister Treebeard, in league with the Enemy, a ferocious and malevolent, , in league with the Enemy, a ferocious and malevolent Farmer Maggot., The story in this book ends at the point where J.R.R., Tolkien halted in the story for a long time, as the Company of the Ring, still lacking Legolas and Gimli, Company of the Ring, still lacking Legolas and Gimli, stood before the tomb of Balin in the Mines of Moria., The Return of the Shadow is illustrated with reproductions of the first maps and notable pages from the earliest manuscripts.] ; -chunkTextWithMatchMultipleChunks -required_capability: chunk_function +chunkTextWithMatchMultipleChunksMvExpand +required_capability: chunk_function_v2 FROM books | WHERE MATCH(title, "Return") -| EVAL chunks = CHUNK(description, {"num_chunks":3, "chunk_size":20}) +| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) +| MV_EXPAND chunks | KEEP book_no, title, chunks; ignoreOrder:true book_no:keyword | title:text | chunks:keyword -2714 | Return of the King Being the Third Part of The Lord of the Rings | [Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of, part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the, , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth.] -7350 | Return of the Shadow | [In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings., In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of, first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution] +2714 | Return of the King Being the Third Part of The Lord of the Rings | , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth. +2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of +2714 | Return of the King Being the Third Part of The Lord of the Rings | Fellowship of the Ring and The Two Towers, features the definitive edition of the text and includes the Appendices and +2714 | Return of the King Being the Third Part of The Lord of the Rings | JOURNEY, this third part of The Lord of the Rings is available for a limited time with an exclusive cover +2714 | Return of the King Being the Third Part of The Lord of the Rings | Meanwhile, Frodo and Sam struggle further into Mordor in their heroic quest to destroy the One Ring. +2714 | Return of the King Being the Third Part of The Lord of the Rings | Men, Dwarves, Elves and Ents unite forces to do battle agains the Dark. +2714 | Return of the King Being the Third Part of The Lord of the Rings | The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider. +2714 | Return of the King Being the Third Part of The Lord of the Rings | The devastating conclusion of J.R.R. +2714 | Return of the King Being the Third Part of The Lord of the Rings | To celebrate the release of the first of Peter Jackson s two-part film adaptation of The Hobbit, THE HOBBIT +2714 | Return of the King Being the Third Part of The Lord of the Rings | Tolkien s classic tale of magic and adventure, begun in The Fellowship of the Ring and The Two Towers, features +2714 | Return of the King Being the Third Part of The Lord of the Rings | available for a limited time with an exclusive cover image from Peter Jackson s award-winning trilogy. +2714 | Return of the King Being the Third Part of The Lord of the Rings | edition of the text and includes the Appendices and a revised Index in full. +2714 | Return of the King Being the Third Part of The Lord of the Rings | part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the +2714 | Return of the King Being the Third Part of The Lord of the Rings | two-part film adaptation of The Hobbit, THE HOBBIT: AN UNEXPECTED JOURNEY, this third part of The Lord of the +7350 | Return of the Shadow | ) is developed while his indentity remains an absolute puzzle, and the suspicion only very slowly becomes certainty that he must +7350 | Return of the Shadow | , and other major figures appear in strange modes: a sinister Treebeard, in league with the Enemy, a ferocious and malevolent +7350 | Return of the Shadow | , in league with the Enemy, a ferocious and malevolent Farmer Maggot. +7350 | Return of the Shadow | Company of the Ring, still lacking Legolas and Gimli, stood before the tomb of Balin in the Mines of Moria. +7350 | Return of the Shadow | Fellowship of the Ring and the gradual emergence of the conceptions that transformed what J.R.R. +7350 | Return of the Shadow | In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of +7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings. +7350 | Return of the Shadow | The Return of the Shadow is illustrated with reproductions of the first maps and notable pages from the earliest manuscripts. +7350 | Return of the Shadow | The character of the hobbit called Trotter (afterwards Strider or Aragorn) is developed while his indentity remains an absolute puzzle +7350 | Return of the Shadow | The enlargement of Bilbo's 'magic ring' into the supremely potent and dangerous Ruling Ring of the Dark Lord is traced +7350 | Return of the Shadow | The hobbits, Frodo's companions, undergo intricate permutations of name and personality, and other major figures appear in strange modes: a +7350 | Return of the Shadow | The story in this book ends at the point where J.R.R. +7350 | Return of the Shadow | Tolkien for long believed would be a far shorter book, 'a sequel to The Hobbit'. +7350 | Return of the Shadow | Tolkien halted in the story for a long time, as the Company of the Ring, still lacking Legolas and Gimli +7350 | Return of the Shadow | dangerous Ruling Ring of the Dark Lord is traced and the precise moment is seen when, in an astonishing and +7350 | Return of the Shadow | first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution +7350 | Return of the Shadow | in the earliest narrative, a Black Rider first rode into the Shire, his significance still unknown. +7350 | Return of the Shadow | notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of +7350 | Return of the Shadow | precise moment is seen when, in an astonishing and unforeseen leap in the earliest narrative, a Black Rider first rode +7350 | Return of the Shadow | suspicion only very slowly becomes certainty that he must after all be a Man. ; -chunkTextWithMatchMultipleChunksMvExpand -required_capability: chunk_function +chunkTextWithMatchMultipleChunksMvSliceMvExpand +required_capability: chunk_function_v2 FROM books | WHERE MATCH(title, "Return") -| EVAL chunks = CHUNK(description, {"num_chunks":3, "chunk_size":20}) -| MV_EXPAND chunks -| KEEP book_no, title, chunks; +| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) +| EVAL truncated = MV_SLICE(chunks, 0, 3) +| MV_EXPAND truncated +| KEEP book_no, title, truncated; ignoreOrder:true -book_no:keyword | title:text | chunks:keyword +book_no:keyword | title:text | truncated:keyword 2714 | Return of the King Being the Third Part of The Lord of the Rings | , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth. 2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of +2714 | Return of the King Being the Third Part of The Lord of the Rings | The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider. 2714 | Return of the King Being the Third Part of The Lord of the Rings | part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the 7350 | Return of the Shadow | In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of 7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings. 7350 | Return of the Shadow | first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution +7350 | Return of the Shadow | notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of ; + chunkTextWithConcatenatedField -required_capability: chunk_function +required_capability: chunk_function_v2 FROM books -| EVAL title_description = CONCAT(title, description) -| EVAL chunks = CHUNK(title_description, {"num_chunks":1, "chunk_size":20}) +| EVAL title_description = CONCAT(title, " ", description) +| EVAL chunks = CHUNK(title_description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0}) | KEEP book_no, title, chunks | SORT book_no | LIMIT 5 ; book_no:keyword | title:text | chunks:keyword -1211 | The brothers Karamazov | The brothers KaramazovIn 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his -1463 | Realms of Tolkien: Images of Middle-earth | Realms of Tolkien: Images of Middle-earthTwenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an -1502 | Selected Passages from Correspondence with Friends | Selected Passages from Correspondence with FriendsNikolai Gogol wrote some letters to his friends, none of which were a nose of -1937 | The Best Short Stories of Dostoevsky (Modern Library) | The Best Short Stories of Dostoevsky (Modern Library)This collection, unique to the Modern Library, gathers seven of Dostoevsky's key -1985 | Brothers Karamazov | Brothers KaramazovFour brothers reunite in their hometown in Russia. +1211 | The brothers Karamazov | [The brothers Karamazov In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all, literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.] +1463 | Realms of Tolkien: Images of Middle-earth | [Realms of Tolkien: Images of Middle-earth Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing, Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of, variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The, accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion] +1502 | Selected Passages from Correspondence with Friends | [Selected Passages from Correspondence with Friends Nikolai Gogol wrote some letters to his friends, none of which were a nose, to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses).] +1937 | The Best Short Stories of Dostoevsky (Modern Library) | [The Best Short Stories of Dostoevsky (Modern Library) This collection, unique to the Modern Library, gathers seven of Dostoevsky's key, to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short, shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.] +1985 | Brothers Karamazov | [Brothers Karamazov Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.] ; chunkTextWithMultivaluedField -required_capability: chunk_function +required_capability: chunk_function_v2 FROM employees | EVAL chunks = CHUNK(job_positions) @@ -132,4 +173,3 @@ emp_no:integer | first_name:keyword | last_name:keyword | chunks:keyword - diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java index ed3e581175987..2d393f4008b2f 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java @@ -12,13 +12,12 @@ import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.Warnings; import org.elasticsearch.core.Releasables; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.xpack.esql.core.tree.Source; /** @@ -32,44 +31,28 @@ public final class ChunkBytesRefEvaluator implements EvalOperator.ExpressionEval private final EvalOperator.ExpressionEvaluator str; - private final EvalOperator.ExpressionEvaluator numChunks; - - private final EvalOperator.ExpressionEvaluator chunkSize; + private final ChunkingSettings chunkingSettings; private final DriverContext driverContext; private Warnings warnings; public ChunkBytesRefEvaluator(Source source, EvalOperator.ExpressionEvaluator str, - EvalOperator.ExpressionEvaluator numChunks, EvalOperator.ExpressionEvaluator chunkSize, - DriverContext driverContext) { + ChunkingSettings chunkingSettings, DriverContext driverContext) { this.source = source; this.str = str; - this.numChunks = numChunks; - this.chunkSize = chunkSize; + this.chunkingSettings = chunkingSettings; this.driverContext = driverContext; } @Override public Block eval(Page page) { try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) { - try (IntBlock numChunksBlock = (IntBlock) numChunks.eval(page)) { - try (IntBlock chunkSizeBlock = (IntBlock) chunkSize.eval(page)) { - BytesRefVector strVector = strBlock.asVector(); - if (strVector == null) { - return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock); - } - IntVector numChunksVector = numChunksBlock.asVector(); - if (numChunksVector == null) { - return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock); - } - IntVector chunkSizeVector = chunkSizeBlock.asVector(); - if (chunkSizeVector == null) { - return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock); - } - return eval(page.getPositionCount(), strVector, numChunksVector, chunkSizeVector); - } + BytesRefVector strVector = strBlock.asVector(); + if (strVector == null) { + return eval(page.getPositionCount(), strBlock); } + return eval(page.getPositionCount(), strVector); } } @@ -77,13 +60,10 @@ public Block eval(Page page) { public long baseRamBytesUsed() { long baseRamBytesUsed = BASE_RAM_BYTES_USED; baseRamBytesUsed += str.baseRamBytesUsed(); - baseRamBytesUsed += numChunks.baseRamBytesUsed(); - baseRamBytesUsed += chunkSize.baseRamBytesUsed(); return baseRamBytesUsed; } - public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, IntBlock numChunksBlock, - IntBlock chunkSizeBlock) { + public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock) { try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { BytesRef strScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { @@ -98,46 +78,19 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, IntBlock nu result.appendNull(); continue position; } - switch (numChunksBlock.getValueCount(p)) { - case 0: - result.appendNull(); - continue position; - case 1: - break; - default: - warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); - result.appendNull(); - continue position; - } - switch (chunkSizeBlock.getValueCount(p)) { - case 0: - result.appendNull(); - continue position; - case 1: - break; - default: - warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); - result.appendNull(); - continue position; - } BytesRef str = strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch); - int numChunks = numChunksBlock.getInt(numChunksBlock.getFirstValueIndex(p)); - int chunkSize = chunkSizeBlock.getInt(chunkSizeBlock.getFirstValueIndex(p)); - Chunk.process(result, str, numChunks, chunkSize); + Chunk.process(result, str, this.chunkingSettings); } return result.build(); } } - public BytesRefBlock eval(int positionCount, BytesRefVector strVector, IntVector numChunksVector, - IntVector chunkSizeVector) { + public BytesRefBlock eval(int positionCount, BytesRefVector strVector) { try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { BytesRef strScratch = new BytesRef(); position: for (int p = 0; p < positionCount; p++) { BytesRef str = strVector.getBytesRef(p, strScratch); - int numChunks = numChunksVector.getInt(p); - int chunkSize = chunkSizeVector.getInt(p); - Chunk.process(result, str, numChunks, chunkSize); + Chunk.process(result, str, this.chunkingSettings); } return result.build(); } @@ -145,12 +98,12 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, IntVector @Override public String toString() { - return "ChunkBytesRefEvaluator[" + "str=" + str + ", numChunks=" + numChunks + ", chunkSize=" + chunkSize + "]"; + return "ChunkBytesRefEvaluator[" + "str=" + str + ", chunkingSettings=" + chunkingSettings + "]"; } @Override public void close() { - Releasables.closeExpectNoException(str, numChunks, chunkSize); + Releasables.closeExpectNoException(str); } private Warnings warnings() { @@ -170,27 +123,23 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final EvalOperator.ExpressionEvaluator.Factory str; - private final EvalOperator.ExpressionEvaluator.Factory numChunks; - - private final EvalOperator.ExpressionEvaluator.Factory chunkSize; + private final ChunkingSettings chunkingSettings; public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str, - EvalOperator.ExpressionEvaluator.Factory numChunks, - EvalOperator.ExpressionEvaluator.Factory chunkSize) { + ChunkingSettings chunkingSettings) { this.source = source; this.str = str; - this.numChunks = numChunks; - this.chunkSize = chunkSize; + this.chunkingSettings = chunkingSettings; } @Override public ChunkBytesRefEvaluator get(DriverContext context) { - return new ChunkBytesRefEvaluator(source, str.get(context), numChunks.get(context), chunkSize.get(context), context); + return new ChunkBytesRefEvaluator(source, str.get(context), chunkingSettings, context); } @Override public String toString() { - return "ChunkBytesRefEvaluator[" + "str=" + str + ", numChunks=" + numChunks + ", chunkSize=" + chunkSize + "]"; + return "ChunkBytesRefEvaluator[" + "str=" + str + ", chunkingSettings=" + chunkingSettings + "]"; } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 6497ddfc6afbf..7f051e8c7d9df 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1658,7 +1658,7 @@ public enum Cap { /** * Chunk function. */ - CHUNK_FUNCTION(Build.current().isSnapshot()), + CHUNK_FUNCTION_V2(Build.current().isSnapshot()), /** * Support for vector similarity functtions pushdown diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java index c11063616b88d..67f1ecec992b4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java @@ -12,20 +12,25 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.xpack.core.inference.chunking.Chunker; import org.elasticsearch.xpack.core.inference.chunking.ChunkerBuilder; +import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder; +import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsOptions; import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.MapParam; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; @@ -35,11 +40,12 @@ import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; +import static java.util.Map.entry; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; @@ -48,56 +54,49 @@ public class Chunk extends EsqlScalarFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Chunk", Chunk::new); - public static final int DEFAULT_NUM_CHUNKS = Integer.MAX_VALUE; - public static final int DEFAULT_CHUNK_SIZE = 300; - - private final Expression field, options; - - static final String NUM_CHUNKS = "num_chunks"; - static final String CHUNK_SIZE = "chunk_size"; - - public static final Map ALLOWED_OPTIONS = Map.of(NUM_CHUNKS, DataType.INTEGER, CHUNK_SIZE, DataType.INTEGER); - - @FunctionInfo(returnType = "keyword", preview = true, description = """ - Use `CHUNK` to split a text field into smaller chunks.""", detailedDescription = """ - Chunk can be used on fields from the text famiy like <> and <>. - Chunk will split a text field into smaller chunks, using a sentence-based chunking strategy. - The number of chunks returned, and the length of the sentences used to create the chunks can be specified. - """, examples = { @Example(file = "chunk", tag = "chunk-with-field", applies_to = "stack: preview 9.3.0") }) + static final int DEFAULT_CHUNK_SIZE = 300; + public static final ChunkingSettings DEFAULT_CHUNKING_SETTINGS = new SentenceBoundaryChunkingSettings(DEFAULT_CHUNK_SIZE, 0); + + private final Expression field, chunkingSettings; + + public static final Map ALLOWED_CHUNKING_SETTING_OPTIONS = Map.ofEntries( + entry(ChunkingSettingsOptions.STRATEGY.toString(), DataType.KEYWORD), + entry(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), DataType.INTEGER), + entry(ChunkingSettingsOptions.OVERLAP.toString(), DataType.INTEGER), + entry(ChunkingSettingsOptions.SENTENCE_OVERLAP.toString(), DataType.INTEGER), + entry(ChunkingSettingsOptions.SEPARATOR_GROUP.toString(), DataType.KEYWORD), + entry(ChunkingSettingsOptions.SEPARATORS.toString(), DataType.KEYWORD) + ); + + @FunctionInfo( + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.3.0") }, + returnType = "keyword", + preview = true, + description = """ + Use `CHUNK` to split a text field into smaller chunks.""", + detailedDescription = """ + Chunk can be used on fields from the text famiy like <> and <>. + Chunk will split a text field into smaller chunks, using a sentence-based chunking strategy. + The number of chunks returned, and the length of the sentences used to create the chunks can be specified. + """, + examples = { + @Example(file = "chunk", tag = "chunk-with-field", applies_to = "stack: preview 9.3.0"), + @Example(file = "chunk", tag = "chunk-with-chunking-settings", applies_to = "stack: preview 9.3.0") } + ) public Chunk( Source source, @Param(name = "field", type = { "keyword", "text" }, description = "The input to chunk.") Expression field, @MapParam( - name = "options", - params = { - @MapParam.MapParamEntry( - name = "num_chunks", - type = "integer", - description = "The number of chunks to return. Defaults to return all chunks." - ), - @MapParam.MapParamEntry( - name = "chunk_size", - type = "integer", - description = "The size of sentence-based chunks to use. Defaults to " + DEFAULT_CHUNK_SIZE - ), }, - description = "Options to customize chunking behavior.", + name = "chunking_settings", + description = "Options to customize chunking behavior. Refer to the " + + "[Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put" + + "#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`.", optional = true - ) Expression options - ) { - super(source, options == null ? List.of(field) : List.of(field, options)); - this.field = field; - this.options = options; - } - - private Chunk( - Source source, - Expression field, - Expression options, - boolean unused // dummy parameter to differentiate constructors + ) Expression chunkingSettings ) { - super(source, options == null ? List.of(field) : List.of(field, options)); + super(source, chunkingSettings == null ? List.of(field) : List.of(field, chunkingSettings)); this.field = field; - this.options = options; + this.chunkingSettings = chunkingSettings; } public Chunk(StreamInput in) throws IOException { @@ -112,7 +111,7 @@ public Chunk(StreamInput in) throws IOException { public void writeTo(StreamOutput out) throws IOException { source().writeTo(out); out.writeNamedWriteable(field); - out.writeOptionalNamedWriteable(options); + out.writeOptionalNamedWriteable(chunkingSettings); } @Override @@ -130,28 +129,27 @@ protected TypeResolution resolveType() { if (childrenResolved() == false) { return new TypeResolution("Unresolved children"); } - return isString(field(), sourceText(), FIRST).and(Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions)); + + return isString(field(), sourceText(), FIRST).and( + Options.resolve(chunkingSettings, source(), SECOND, ALLOWED_CHUNKING_SETTING_OPTIONS, this::validateChunkingSettings) + ); } - private void verifyOptions(Map optionsMap) { - if (options == null) { + private void validateChunkingSettings(Map chunkingSettingsMap) { + if (chunkingSettings == null) { return; } - Integer numChunks = (Integer) optionsMap.get(NUM_CHUNKS); - if (numChunks != null && numChunks < 0) { - throw new InvalidArgumentException("[{}] cannot be negative, found [{}]", NUM_CHUNKS, numChunks); - } - Integer chunkSize = (Integer) optionsMap.get(CHUNK_SIZE); - if (chunkSize != null && chunkSize < 0) { - throw new InvalidArgumentException("[{}] cannot be negative, found [{}]", CHUNK_SIZE, chunkSize); + try { + toChunkingSettings(chunkingSettingsMap); + } catch (IllegalArgumentException e) { + throw new InvalidArgumentException(e.getMessage(), e); } - } @Override public boolean foldable() { - return field().foldable() && (options() == null || options().foldable()); + return field().foldable() && (chunkingSettings() == null || chunkingSettings().foldable()); } @Override @@ -165,23 +163,22 @@ public Expression replaceChildren(List newChildren) { @Override protected NodeInfo info() { - return NodeInfo.create(this, Chunk::new, field, options); + return NodeInfo.create(this, Chunk::new, field, chunkingSettings); } Expression field() { return field; } - Expression options() { - return options; + Expression chunkingSettings() { + return chunkingSettings; } @Evaluator(extraName = "BytesRef") - static void process(BytesRefBlock.Builder builder, BytesRef str, int numChunks, int chunkSize) { + static void process(BytesRefBlock.Builder builder, BytesRef str, @Fixed ChunkingSettings chunkingSettings) { String content = str.utf8ToString(); - ChunkingSettings settings = new SentenceBoundaryChunkingSettings(chunkSize, 0); - List chunks = chunkText(content, settings, numChunks); + List chunks = chunkText(content, chunkingSettings); boolean multivalued = chunks.size() > 1; if (multivalued) { @@ -196,43 +193,48 @@ static void process(BytesRefBlock.Builder builder, BytesRef str, int numChunks, } } - public static List chunkText(String content, ChunkingSettings chunkingSettings, int numChunks) { + public static List chunkText(String content, ChunkingSettings chunkingSettings) { Chunker chunker = ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy()); - return chunker.chunk(content, chunkingSettings) - .stream() - .map(offset -> content.substring(offset.start(), offset.end())) - .limit(numChunks > 0 ? numChunks : DEFAULT_NUM_CHUNKS) - .toList(); + return chunker.chunk(content, chunkingSettings).stream().map(offset -> content.substring(offset.start(), offset.end())).toList(); } @Override public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Chunk chunk = (Chunk) o; - return Objects.equals(field(), chunk.field()) && Objects.equals(options(), chunk.options()); + return Objects.equals(field(), chunk.field()) && Objects.equals(chunkingSettings(), chunk.chunkingSettings()); } @Override public int hashCode() { - return Objects.hash(field(), options()); + return Objects.hash(field(), chunkingSettings()); } @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + ChunkingSettings chunkingSettings = DEFAULT_CHUNKING_SETTINGS; - Map optionsMap = new HashMap<>(); - if (options() != null) { - Options.populateMap(((MapExpression) options), optionsMap, source(), SECOND, ALLOWED_OPTIONS); + if (chunkingSettings() != null) { + chunkingSettings = toChunkingSettings((MapExpression) chunkingSettings()); } - int numChunks = (Integer) optionsMap.getOrDefault(NUM_CHUNKS, DEFAULT_NUM_CHUNKS); - int chunkSize = (Integer) optionsMap.getOrDefault(CHUNK_SIZE, DEFAULT_CHUNK_SIZE); + return new ChunkBytesRefEvaluator.Factory(source(), toEvaluator.apply(field), chunkingSettings); + } - return new ChunkBytesRefEvaluator.Factory( - source(), - toEvaluator.apply(field), - toEvaluator.apply(new Literal(source(), numChunks, DataType.INTEGER)), - toEvaluator.apply(new Literal(source(), chunkSize, DataType.INTEGER)) - ); + // TODO remove? + private static ChunkingSettings toChunkingSettings(MapExpression map) { + Map chunkingSettingsMap = map.keyFoldedMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> { + Object value = e.getValue().fold(FoldContext.small()); + return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value; + })); + return ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); + } + + private static ChunkingSettings toChunkingSettings(Map expressionMap) { + Map chunkingSettingsMap = expressionMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> { + Object value = e.getValue(); + return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value; + })); + return ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index fd7cbfb6fa723..0c0d05fc13119 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -357,7 +357,7 @@ public final void test() throws Throwable { ); assumeFalse( "CSV tests cannot currently handle CHUNK function", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.CHUNK_FUNCTION.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.CHUNK_FUNCTION_V2.capabilityName()) ); if (Build.current().isSnapshot()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 1de00620b898b..1ba31934cb60e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -3319,38 +3319,48 @@ public void testSubqueryInFromWithLookupJoinOnFullTextFunction() { } public void testChunkFunctionInvalidInputs() { - if (EsqlCapabilities.Cap.CHUNK_FUNCTION.isEnabled()) { + if (EsqlCapabilities.Cap.CHUNK_FUNCTION_V2.isEnabled()) { + assertThat( + error("from test | EVAL chunks = CHUNK(body, null)", fullTextAnalyzer, VerificationException.class), + equalTo("1:27: second argument of [CHUNK(body, null)] cannot be null, received [null]") + ); + assertThat( + error("from test | EVAL chunks = CHUNK(body, {\"strategy\": \"invalid\"})", fullTextAnalyzer, VerificationException.class), + equalTo("1:27: Invalid chunkingStrategy invalid") + ); assertThat( error( - "from test | EVAL chunks = CHUNK(body, {\"num_chunks\": null, \"chunk_size\": 20})", + "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 5, \"sentence_overlap\": 1})", fullTextAnalyzer, - ParsingException.class + VerificationException.class ), - equalTo("1:39: Invalid named parameter [\"num_chunks\":null], NULL is not supported") + equalTo( + "1:27: Validation Failed: 1: [chunking_settings] Invalid value [5.0]. " + + "[max_chunk_size] must be a greater than or equal to [20.0];" + ) ); assertThat( error( - "from test | EVAL chunks = CHUNK(body, {\"num_chunks\": 3, \"chunk_size\": null})", + "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 5, \"sentence_overlap\": 5})", fullTextAnalyzer, - ParsingException.class + VerificationException.class ), - equalTo("1:39: Invalid named parameter [\"chunk_size\":null], NULL is not supported") - ); - assertThat( - error("from test | EVAL chunks = CHUNK(body, {\"num_chunks\":\"foo\"})", fullTextAnalyzer), - equalTo("1:27: Invalid option [num_chunks] in [CHUNK(body, {\"num_chunks\":\"foo\"})], cannot cast [foo] to [integer]") - ); - assertThat( - error("from test | EVAL chunks = CHUNK(body, {\"chunk_size\":\"foo\"})", fullTextAnalyzer), - equalTo("1:27: Invalid option [chunk_size] in [CHUNK(body, {\"chunk_size\":\"foo\"})], cannot cast [foo] to [integer]") - ); - assertThat( - error("from test | EVAL chunks = CHUNK(body, {\"num_chunks\":-1})", fullTextAnalyzer), - equalTo("1:27: [num_chunks] cannot be negative, found [-1]") + equalTo( + "1:27: Validation Failed: 1: [chunking_settings] Invalid value [5.0]. " + + "[max_chunk_size] must be a greater than or equal to [20.0];2: sentence_overlap[5] must be either 0 or 1;" + ) ); assertThat( - error("from test | EVAL chunks = CHUNK(body, {\"chunk_size\":-1})", fullTextAnalyzer), - equalTo("1:27: [chunk_size] cannot be negative, found [-1]") + error( + "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 20, " + + "\"sentence_overlap\": 1, \"extra_value\": \"foo\"})", + fullTextAnalyzer, + VerificationException.class + ), + containsString( + "1:27: Invalid option [extra_value] in [CHUNK(body, {\"strategy\": \"sentence\", " + + "\"max_chunk_size\": 20, \"sentence_overlap\": 1, \"extra_value\": \"foo\"})], expected one of [" + ) ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java index 21592b5b95424..5f9d8dda4a746 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.inference.ChunkingSettings; +import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsOptions; import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -24,15 +25,18 @@ import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; -import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.CHUNK_SIZE; -import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.NUM_CHUNKS; +import static org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsTests.createRandomChunkingSettings; +import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.ALLOWED_CHUNKING_SETTING_OPTIONS; +import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.DEFAULT_CHUNKING_SETTINGS; import static org.hamcrest.Matchers.equalTo; public class ChunkTests extends AbstractScalarFunctionTestCase { @@ -64,18 +68,15 @@ public static Iterable parameters() { String text = randomWordsBetween(25, 50); ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(Chunk.DEFAULT_CHUNK_SIZE, 0); - List chunks = Chunk.chunkText(text, chunkingSettings, Chunk.DEFAULT_NUM_CHUNKS); + List chunks = Chunk.chunkText(text, chunkingSettings); Object expectedResult = chunks.size() == 1 ? new BytesRef(chunks.get(0).trim()) : chunks.stream().map(s -> new BytesRef(s.trim())).toList(); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str")), - "ChunkBytesRefEvaluator[str=Attribute[channel=0], numChunks=LiteralsEvaluator[lit=" - + Chunk.DEFAULT_NUM_CHUNKS - + "], chunkSize=LiteralsEvaluator[lit=" - + Chunk.DEFAULT_CHUNK_SIZE - + "]]", + "ChunkBytesRefEvaluator[str=Attribute[channel=0], " + + "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}]", DataType.KEYWORD, equalTo(expectedResult) ); @@ -83,18 +84,15 @@ public static Iterable parameters() { String text = randomWordsBetween(25, 50); ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(Chunk.DEFAULT_CHUNK_SIZE, 0); - List chunks = Chunk.chunkText(text, chunkingSettings, Chunk.DEFAULT_NUM_CHUNKS); + List chunks = Chunk.chunkText(text, chunkingSettings); Object expectedResult = chunks.size() == 1 ? new BytesRef(chunks.get(0).trim()) : chunks.stream().map(s -> new BytesRef(s.trim())).toList(); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(new BytesRef(text), DataType.TEXT, "str")), - "ChunkBytesRefEvaluator[str=Attribute[channel=0], numChunks=LiteralsEvaluator[lit=" - + Chunk.DEFAULT_NUM_CHUNKS - + "], chunkSize=LiteralsEvaluator[lit=" - + Chunk.DEFAULT_CHUNK_SIZE - + "]]", + "ChunkBytesRefEvaluator[str=Attribute[channel=0], " + + "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}]", DataType.KEYWORD, equalTo(expectedResult) ); @@ -102,67 +100,72 @@ public static Iterable parameters() { ); } - private static MapExpression createOptionsMap(Integer numChunks, Integer chunkSize) { - List keyValuePairs = new ArrayList<>(); + private static MapExpression createChunkingSettings(ChunkingSettings chunkingSettings) { + List chunkingSettingsMap = new ArrayList<>(); - if (Objects.nonNull(numChunks)) { - keyValuePairs.add(Literal.keyword(Source.EMPTY, NUM_CHUNKS)); - keyValuePairs.add(new Literal(Source.EMPTY, numChunks, DataType.INTEGER)); + if (Objects.nonNull(chunkingSettings)) { + chunkingSettings.asMap().forEach((key, value) -> { + chunkingSettingsMap.add(Literal.keyword(Source.EMPTY, key)); + chunkingSettingsMap.add(new Literal(Source.EMPTY, value, ALLOWED_CHUNKING_SETTING_OPTIONS.get(key))); + }); } - if (Objects.nonNull(chunkSize)) { - keyValuePairs.add(Literal.keyword(Source.EMPTY, CHUNK_SIZE)); - keyValuePairs.add(new Literal(Source.EMPTY, chunkSize, DataType.INTEGER)); - } - - return new MapExpression(Source.EMPTY, keyValuePairs); + return new MapExpression(Source.EMPTY, chunkingSettingsMap); } @Override protected Expression build(Source source, List args) { // With MapParam, args contains: field, options_map Expression options = args.size() < 2 ? null : args.get(1); - // TODO needed? - if (options instanceof Literal lit && lit.value() == null) { - options = null; - } return new Chunk(source, args.get(0), options); } public void testDefaults() { // Default of 300 is huge, only one chunk returned in this case - verifyChunks(null, null, 1); + verifyChunks(null, 1); } - public void testDefaultNumChunks() { - int chunkSize = 20; - verifyChunks(null, chunkSize, 8); + public void testDefaultChunkingSettings() { + verifyChunks(null, 1); } - public void testDefaultChunkSize() { - int numChunks = 1; // Default of 300 is huge, only one chunk returned in this case - verifyChunks(numChunks, null, numChunks); + public void testSpecifiedChunkingSettings() { + // We can't randomize here, because we're testing on specifically specified chunk size that's variable. + int chunkSize = 25; + int expectedNumChunks = 6; + ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(chunkSize, 0); + verifyChunks(chunkingSettings, expectedNumChunks); } - public void testSpecifiedOptions() { - int numChunks = randomIntBetween(2, 4); - int chunkSize = randomIntBetween(20, 30); - verifyChunks(numChunks, chunkSize, numChunks); + public void testRandomChunkingSettings() { + ChunkingSettings chunkingSettings = createRandomChunkingSettings(); + List result = process(PARAGRAPH_INPUT, chunkingSettings); + assertNotNull(result); + assertFalse(result.isEmpty()); + // Actual results depend on chunking settings passed in + } + + // Paranoia check, this test will fail if we add new chunking settings options without updating the Chunk function + public void testChunkDefinesAllAllowedChunkingSettingsOptions() { + Set allowedOptions = ALLOWED_CHUNKING_SETTING_OPTIONS.keySet(); + Set allOptions = Arrays.stream(ChunkingSettingsOptions.values()) + .map(ChunkingSettingsOptions::toString) + .collect(Collectors.toSet()); + + assertEquals(allOptions, allowedOptions); } - private void verifyChunks(Integer numChunks, Integer chunkSize, int expectedNumChunksReturned) { - int numChunksOrDefault = numChunks != null ? numChunks : Chunk.DEFAULT_NUM_CHUNKS; - int chunkSizeOrDefault = chunkSize != null ? chunkSize : Chunk.DEFAULT_CHUNK_SIZE; - ChunkingSettings settings = new SentenceBoundaryChunkingSettings(chunkSizeOrDefault, 0); - List expected = Chunk.chunkText(PARAGRAPH_INPUT, settings, numChunksOrDefault).stream().map(String::trim).toList(); + private void verifyChunks(ChunkingSettings chunkingSettings, int expectedNumChunksReturned) { + ChunkingSettings chunkingSettingsOrDefault = chunkingSettings != null ? chunkingSettings : DEFAULT_CHUNKING_SETTINGS; + List expected = Chunk.chunkText(PARAGRAPH_INPUT, chunkingSettingsOrDefault).stream().map(String::trim).toList(); - List result = process(PARAGRAPH_INPUT, numChunksOrDefault, chunkSizeOrDefault); + List result = process(PARAGRAPH_INPUT, chunkingSettingsOrDefault); assertThat(result.size(), equalTo(expectedNumChunksReturned)); assertThat(result, equalTo(expected)); } - private List process(String str, Integer numChunks, Integer chunkSize) { - MapExpression optionsMap = (numChunks == null && chunkSize == null) ? null : createOptionsMap(numChunks, chunkSize); + private List process(String str, ChunkingSettings chunkingSettings) { + MapExpression optionsMap = chunkingSettings == null ? null : createChunkingSettings(chunkingSettings); try ( EvalOperator.ExpressionEvaluator eval = evaluator(new Chunk(Source.EMPTY, field("str", DataType.KEYWORD), optionsMap)).get(