diff --git a/docs/changelog/138123.yaml b/docs/changelog/138123.yaml
new file mode 100644
index 0000000000000..c68e47500ffe0
--- /dev/null
+++ b/docs/changelog/138123.yaml
@@ -0,0 +1,5 @@
+pr: 138123
+summary: ES|QL Update CHUNK to support `chunking_settings` as optional argument
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md
index 4f875b1214fab..ec291cb115e3f 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/examples/chunk.md
@@ -1,6 +1,6 @@
% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
-**Example**
+**Examples**
```{applies_to}
stack: preview 9.3.0
@@ -8,15 +8,32 @@ stack: preview 9.3.0
```esql
FROM books
-| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20})
+| EVAL chunks = CHUNK(description)
```
| book_no:keyword | title:text | chunks:keyword |
| --- | --- | --- |
-| 1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. |
-| 1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 |
-| 1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. |
-| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept |
-| 1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. |
+| 1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition. |
+| 1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion |
+| 1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses). |
+| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories. |
+| 1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality. |
+
+```{applies_to}
+stack: preview 9.3.0
+```
+
+```esql
+FROM books
+| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
+```
+
+| book_no:keyword | title:text | chunks:keyword |
+| --- | --- | --- |
+| 1211 | The brothers Karamazov | [In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.] |
+| 1463 | Realms of Tolkien: Images of Middle-earth | [Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58, volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from, , each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion] |
+| 1502 | Selected Passages from Correspondence with Friends | [Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank., Many are reproduced here (the letters, not noses).] |
+| 1937 | The Best Short Stories of Dostoevsky (Modern Library) | [This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept, key works and shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.] |
+| 1985 | Brothers Karamazov | [Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.] |
diff --git a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md
index 265551c8bee8a..6a7405397851d 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/chunk.md
@@ -2,9 +2,3 @@
**Supported function named parameters**
-`num_chunks`
-: (integer) The number of chunks to return. Defaults to return all chunks.
-
-`chunk_size`
-: (integer) The size of sentence-based chunks to use. Defaults to 300
-
diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md
index a3e67be49499a..174db24b5949b 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/layout/chunk.md
@@ -1,6 +1,10 @@
% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
## `CHUNK` [esql-chunk]
+```{applies_to}
+stack: preview 9.3.0
+serverless: preview
+```
**Syntax**
diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md
index f287627d571ee..3c4d856262f1a 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/chunk.md
@@ -5,6 +5,6 @@
`field`
: The input to chunk.
-`options`
-: Options to customize chunking behavior.
+`chunking_settings`
+: Options to customize chunking behavior. Refer to the [Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`.
diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md b/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md
index 8ebe22b61286c..19f035575cf25 100644
--- a/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/chunk.md
@@ -2,7 +2,7 @@
**Supported types**
-| field | options | result |
+| field | chunking_settings | result |
| --- | --- | --- |
| keyword | | keyword |
| text | | keyword |
diff --git a/docs/reference/query-languages/esql/images/functions/chunk.svg b/docs/reference/query-languages/esql/images/functions/chunk.svg
index 56003f305a080..0031ba125d06c 100644
--- a/docs/reference/query-languages/esql/images/functions/chunk.svg
+++ b/docs/reference/query-languages/esql/images/functions/chunk.svg
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json b/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json
index 2be5b9665c320..9347b38f28575 100644
--- a/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json
+++ b/docs/reference/query-languages/esql/kibana/definition/functions/chunk.json
@@ -30,7 +30,8 @@
}
],
"examples" : [
- "FROM books\n| EVAL chunks = CHUNK(description, {\"num_chunks\":1, \"chunk_size\":20})"
+ "FROM books\n| EVAL chunks = CHUNK(description)",
+ "FROM books\n| EVAL chunks = CHUNK(description, {\"strategy\": \"sentence\", \"max_chunk_size\": 20, \"sentence_overlap\": 0})"
],
"preview" : true,
"snapshot_only" : true
diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md b/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md
index 2af9e41799859..c5f426e32cdda 100644
--- a/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md
+++ b/docs/reference/query-languages/esql/kibana/docs/functions/chunk.md
@@ -5,5 +5,5 @@ Use `CHUNK` to split a text field into smaller chunks.
```esql
FROM books
-| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20})
+| EVAL chunks = CHUNK(description)
```
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java
index c051a9fa724fc..eb0fbbcbfb4a7 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/InvalidArgumentException.java
@@ -24,4 +24,8 @@ public InvalidArgumentException(Throwable cause, String message, Object... args)
super(cause, message, args);
}
+ public InvalidArgumentException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec
index 1bf73acb2999b..2ae6ce51555c0 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/chunk.csv-spec
@@ -2,12 +2,12 @@
# Tests for Chunk function
#
-chunkExample
-required_capability: chunk_function
+chunkDefaults
+required_capability: chunk_function_v2
// tag::chunk-with-field[]
FROM books
-| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20})
+| EVAL chunks = CHUNK(description)
// end::chunk-with-field[]
| KEEP book_no, title, chunks
| SORT book_no
@@ -16,100 +16,141 @@ FROM books
// tag::chunk-with-field-result[]
book_no:keyword | title:text | chunks:keyword
-1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life.
-1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58
-1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank.
-1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept
-1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia.
+1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.
+1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion
+1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses).
+1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.
+1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.
// end::chunk-with-field-result[]
;
-chunkDefaults
-required_capability: chunk_function
+chunkWithChunkingSettings
+required_capability: chunk_function_v2
+// tag::chunk-with-chunking-settings[]
FROM books
-| EVAL chunks = CHUNK(description)
+| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
+// end::chunk-with-chunking-settings[]
| KEEP book_no, title, chunks
| SORT book_no
| LIMIT 5
;
+// tag::chunk-with-chunking-settings-result[]
book_no:keyword | title:text | chunks:keyword
-1211 | The brothers Karamazov | In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life. Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child. Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a gripping courtroom drama. But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God. A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.
-1463 | Realms of Tolkien: Images of Middle-earth | Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion
-1502 | Selected Passages from Correspondence with Friends | Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses).
-1937 | The Best Short Stories of Dostoevsky (Modern Library) | This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short story as with the novel. Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of existential literature. Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime. Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.
-1985 | Brothers Karamazov | Four brothers reunite in their hometown in Russia. The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.
+1211 | The brothers Karamazov | [In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.]
+1463 | Realms of Tolkien: Images of Middle-earth | [Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58, volume, breathing an extraordinary variety of life into 58 different scenes, each of which is accompanied by appropriate passage from, , each of which is accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion]
+1502 | Selected Passages from Correspondence with Friends | [Nikolai Gogol wrote some letters to his friends, none of which were a nose of high rank., Many are reproduced here (the letters, not noses).]
+1937 | The Best Short Stories of Dostoevsky (Modern Library) | [This collection, unique to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept, key works and shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.]
+1985 | Brothers Karamazov | [Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.]
+// end::chunk-with-chunking-settings-result[]
;
chunkTextWithMatch
-required_capability: chunk_function
+required_capability: chunk_function_v2
FROM books
| WHERE MATCH(title, "Return")
-| EVAL chunks = CHUNK(description, {"num_chunks":1, "chunk_size":20})
+| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
| KEEP book_no, title, chunks;
ignoreOrder:true
book_no:keyword | title:text | chunks:keyword
-2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of
-7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | [Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of, part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the, , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth., The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider., Men, Dwarves, Elves and Ents unite forces to do battle agains the Dark., Meanwhile, Frodo and Sam struggle further into Mordor in their heroic quest to destroy the One Ring., The devastating conclusion of J.R.R., Tolkien s classic tale of magic and adventure, begun in The Fellowship of the Ring and The Two Towers, features, Fellowship of the Ring and The Two Towers, features the definitive edition of the text and includes the Appendices and, edition of the text and includes the Appendices and a revised Index in full., To celebrate the release of the first of Peter Jackson s two-part film adaptation of The Hobbit, THE HOBBIT, two-part film adaptation of The Hobbit, THE HOBBIT: AN UNEXPECTED JOURNEY, this third part of The Lord of the, JOURNEY, this third part of The Lord of the Rings is available for a limited time with an exclusive cover, available for a limited time with an exclusive cover image from Peter Jackson s award-winning trilogy.]
+7350 | Return of the Shadow | [In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings., In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of, first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution, notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of, Fellowship of the Ring and the gradual emergence of the conceptions that transformed what J.R.R., Tolkien for long believed would be a far shorter book, 'a sequel to The Hobbit'., The enlargement of Bilbo's 'magic ring' into the supremely potent and dangerous Ruling Ring of the Dark Lord is traced, dangerous Ruling Ring of the Dark Lord is traced and the precise moment is seen when, in an astonishing and, precise moment is seen when, in an astonishing and unforeseen leap in the earliest narrative, a Black Rider first rode, in the earliest narrative, a Black Rider first rode into the Shire, his significance still unknown., The character of the hobbit called Trotter (afterwards Strider or Aragorn) is developed while his indentity remains an absolute puzzle, ) is developed while his indentity remains an absolute puzzle, and the suspicion only very slowly becomes certainty that he must, suspicion only very slowly becomes certainty that he must after all be a Man., The hobbits, Frodo's companions, undergo intricate permutations of name and personality, and other major figures appear in strange modes: a, , and other major figures appear in strange modes: a sinister Treebeard, in league with the Enemy, a ferocious and malevolent, , in league with the Enemy, a ferocious and malevolent Farmer Maggot., The story in this book ends at the point where J.R.R., Tolkien halted in the story for a long time, as the Company of the Ring, still lacking Legolas and Gimli, Company of the Ring, still lacking Legolas and Gimli, stood before the tomb of Balin in the Mines of Moria., The Return of the Shadow is illustrated with reproductions of the first maps and notable pages from the earliest manuscripts.]
;
-chunkTextWithMatchMultipleChunks
-required_capability: chunk_function
+chunkTextWithMatchMultipleChunksMvExpand
+required_capability: chunk_function_v2
FROM books
| WHERE MATCH(title, "Return")
-| EVAL chunks = CHUNK(description, {"num_chunks":3, "chunk_size":20})
+| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
+| MV_EXPAND chunks
| KEEP book_no, title, chunks;
ignoreOrder:true
book_no:keyword | title:text | chunks:keyword
-2714 | Return of the King Being the Third Part of The Lord of the Rings | [Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of, part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the, , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth.]
-7350 | Return of the Shadow | [In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings., In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of, first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution]
+2714 | Return of the King Being the Third Part of The Lord of the Rings | , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of
+2714 | Return of the King Being the Third Part of The Lord of the Rings | Fellowship of the Ring and The Two Towers, features the definitive edition of the text and includes the Appendices and
+2714 | Return of the King Being the Third Part of The Lord of the Rings | JOURNEY, this third part of The Lord of the Rings is available for a limited time with an exclusive cover
+2714 | Return of the King Being the Third Part of The Lord of the Rings | Meanwhile, Frodo and Sam struggle further into Mordor in their heroic quest to destroy the One Ring.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | Men, Dwarves, Elves and Ents unite forces to do battle agains the Dark.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | The devastating conclusion of J.R.R.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | To celebrate the release of the first of Peter Jackson s two-part film adaptation of The Hobbit, THE HOBBIT
+2714 | Return of the King Being the Third Part of The Lord of the Rings | Tolkien s classic tale of magic and adventure, begun in The Fellowship of the Ring and The Two Towers, features
+2714 | Return of the King Being the Third Part of The Lord of the Rings | available for a limited time with an exclusive cover image from Peter Jackson s award-winning trilogy.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | edition of the text and includes the Appendices and a revised Index in full.
+2714 | Return of the King Being the Third Part of The Lord of the Rings | part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the
+2714 | Return of the King Being the Third Part of The Lord of the Rings | two-part film adaptation of The Hobbit, THE HOBBIT: AN UNEXPECTED JOURNEY, this third part of The Lord of the
+7350 | Return of the Shadow | ) is developed while his indentity remains an absolute puzzle, and the suspicion only very slowly becomes certainty that he must
+7350 | Return of the Shadow | , and other major figures appear in strange modes: a sinister Treebeard, in league with the Enemy, a ferocious and malevolent
+7350 | Return of the Shadow | , in league with the Enemy, a ferocious and malevolent Farmer Maggot.
+7350 | Return of the Shadow | Company of the Ring, still lacking Legolas and Gimli, stood before the tomb of Balin in the Mines of Moria.
+7350 | Return of the Shadow | Fellowship of the Ring and the gradual emergence of the conceptions that transformed what J.R.R.
+7350 | Return of the Shadow | In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of
+7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings.
+7350 | Return of the Shadow | The Return of the Shadow is illustrated with reproductions of the first maps and notable pages from the earliest manuscripts.
+7350 | Return of the Shadow | The character of the hobbit called Trotter (afterwards Strider or Aragorn) is developed while his indentity remains an absolute puzzle
+7350 | Return of the Shadow | The enlargement of Bilbo's 'magic ring' into the supremely potent and dangerous Ruling Ring of the Dark Lord is traced
+7350 | Return of the Shadow | The hobbits, Frodo's companions, undergo intricate permutations of name and personality, and other major figures appear in strange modes: a
+7350 | Return of the Shadow | The story in this book ends at the point where J.R.R.
+7350 | Return of the Shadow | Tolkien for long believed would be a far shorter book, 'a sequel to The Hobbit'.
+7350 | Return of the Shadow | Tolkien halted in the story for a long time, as the Company of the Ring, still lacking Legolas and Gimli
+7350 | Return of the Shadow | dangerous Ruling Ring of the Dark Lord is traced and the precise moment is seen when, in an astonishing and
+7350 | Return of the Shadow | first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution
+7350 | Return of the Shadow | in the earliest narrative, a Black Rider first rode into the Shire, his significance still unknown.
+7350 | Return of the Shadow | notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of
+7350 | Return of the Shadow | precise moment is seen when, in an astonishing and unforeseen leap in the earliest narrative, a Black Rider first rode
+7350 | Return of the Shadow | suspicion only very slowly becomes certainty that he must after all be a Man.
;
-chunkTextWithMatchMultipleChunksMvExpand
-required_capability: chunk_function
+chunkTextWithMatchMultipleChunksMvSliceMvExpand
+required_capability: chunk_function_v2
FROM books
| WHERE MATCH(title, "Return")
-| EVAL chunks = CHUNK(description, {"num_chunks":3, "chunk_size":20})
-| MV_EXPAND chunks
-| KEEP book_no, title, chunks;
+| EVAL chunks = CHUNK(description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
+| EVAL truncated = MV_SLICE(chunks, 0, 3)
+| MV_EXPAND truncated
+| KEEP book_no, title, truncated;
ignoreOrder:true
-book_no:keyword | title:text | chunks:keyword
+book_no:keyword | title:text | truncated:keyword
2714 | Return of the King Being the Third Part of The Lord of the Rings | , featuring an exclusive cover image from the film, the definitive text, and a detailed map of Middle-earth.
2714 | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is the final part of Tolkien s epic masterpiece, The Lord of
+2714 | Return of the King Being the Third Part of The Lord of the Rings | The armies of the Dark Lord Sauron are massing as his evil shadow spreads ever wider.
2714 | Return of the King Being the Third Part of The Lord of the Rings | part of Tolkien s epic masterpiece, The Lord of the Rings, featuring an exclusive cover image from the film, the
7350 | Return of the Shadow | In The Return of the Shadow (an abandoned title for the first volume) Christopher Tolkien describes, with full citation of
7350 | Return of the Shadow | In this sixth volume of The History of Middle-earth the story reaches The Lord of the Rings.
7350 | Return of the Shadow | first volume) Christopher Tolkien describes, with full citation of the earliest notes, outline plans, and narrative drafts, the intricate evolution
+7350 | Return of the Shadow | notes, outline plans, and narrative drafts, the intricate evolution of The Fellowship of the Ring and the gradual emergence of
;
+
chunkTextWithConcatenatedField
-required_capability: chunk_function
+required_capability: chunk_function_v2
FROM books
-| EVAL title_description = CONCAT(title, description)
-| EVAL chunks = CHUNK(title_description, {"num_chunks":1, "chunk_size":20})
+| EVAL title_description = CONCAT(title, " ", description)
+| EVAL chunks = CHUNK(title_description, {"strategy": "sentence", "max_chunk_size": 20, "sentence_overlap": 0})
| KEEP book_no, title, chunks
| SORT book_no
| LIMIT 5
;
book_no:keyword | title:text | chunks:keyword
-1211 | The brothers Karamazov | The brothers KaramazovIn 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all his
-1463 | Realms of Tolkien: Images of Middle-earth | Realms of Tolkien: Images of Middle-earthTwenty new and familiar Tolkien artists are represented in this fabulous volume, breathing an
-1502 | Selected Passages from Correspondence with Friends | Selected Passages from Correspondence with FriendsNikolai Gogol wrote some letters to his friends, none of which were a nose of
-1937 | The Best Short Stories of Dostoevsky (Modern Library) | The Best Short Stories of Dostoevsky (Modern Library)This collection, unique to the Modern Library, gathers seven of Dostoevsky's key
-1985 | Brothers Karamazov | Brothers KaramazovFour brothers reunite in their hometown in Russia.
+1211 | The brothers Karamazov | [The brothers Karamazov In 1880 Dostoevsky completed The Brothers Karamazov, the literary effort for which he had been preparing all, literary effort for which he had been preparing all his life., Compelling, profound, complex, it is the story of a patricide and of the four sons who each had a motive, of the four sons who each had a motive for murder: Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, : Dmitry, the sensualist, Ivan, the intellectual, Alyosha, the mystic, and twisted, cunning Smerdyakov, the bastard child., Frequently lurid, nightmarish, always brilliant, the novel plunges the reader into a sordid love triangle, a pathological obsession, and a, a sordid love triangle, a pathological obsession, and a gripping courtroom drama., But throughout the whole, Dostoevsky searhes for the truth--about man, about life, about the existence of God., A terrifying answer to man's eternal questions, this monumental work remains the crowning achievement of perhaps the finest novelist of, the crowning achievement of perhaps the finest novelist of all time. From the Paperback edition.]
+1463 | Realms of Tolkien: Images of Middle-earth | [Realms of Tolkien: Images of Middle-earth Twenty new and familiar Tolkien artists are represented in this fabulous volume, breathing, Tolkien artists are represented in this fabulous volume, breathing an extraordinary variety of life into 58 different scenes, each of, variety of life into 58 different scenes, each of which is accompanied by appropriate passage from The Hobbit and The, accompanied by appropriate passage from The Hobbit and The Lord of the Rings and The Silmarillion]
+1502 | Selected Passages from Correspondence with Friends | [Selected Passages from Correspondence with Friends Nikolai Gogol wrote some letters to his friends, none of which were a nose, to his friends, none of which were a nose of high rank. Many are reproduced here (the letters, not noses).]
+1937 | The Best Short Stories of Dostoevsky (Modern Library) | [The Best Short Stories of Dostoevsky (Modern Library) This collection, unique to the Modern Library, gathers seven of Dostoevsky's key, to the Modern Library, gathers seven of Dostoevsky's key works and shows him to be equally adept at the short, shows him to be equally adept at the short story as with the novel., Exploring many of the same themes as in his longer works, these small masterpieces move from the tender and romantic, , these small masterpieces move from the tender and romantic White Nights, an archetypal nineteenth-century morality tale of pathos and, , an archetypal nineteenth-century morality tale of pathos and loss, to the famous Notes from the Underground, a story of, the famous Notes from the Underground, a story of guilt, ineffectiveness, and uncompromising cynicism, and the first major work of, , and uncompromising cynicism, and the first major work of existential literature., Among Dostoevsky's prototypical characters is Yemelyan in The Honest Thief, whose tragedy turns on an inability to resist crime., Presented in chronological order, in David Magarshack's celebrated translation, this is the definitive edition of Dostoevsky's best stories.]
+1985 | Brothers Karamazov | [Brothers Karamazov Four brothers reunite in their hometown in Russia., The murder of their father forces the brothers to question their beliefs about each other, religion, and morality.]
;
chunkTextWithMultivaluedField
-required_capability: chunk_function
+required_capability: chunk_function_v2
FROM employees
| EVAL chunks = CHUNK(job_positions)
@@ -132,4 +173,3 @@ emp_no:integer | first_name:keyword | last_name:keyword | chunks:keyword
-
diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java
index ed3e581175987..2d393f4008b2f 100644
--- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java
+++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkBytesRefEvaluator.java
@@ -12,13 +12,12 @@
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
-import org.elasticsearch.compute.data.IntBlock;
-import org.elasticsearch.compute.data.IntVector;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.compute.operator.Warnings;
import org.elasticsearch.core.Releasables;
+import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
@@ -32,44 +31,28 @@ public final class ChunkBytesRefEvaluator implements EvalOperator.ExpressionEval
private final EvalOperator.ExpressionEvaluator str;
- private final EvalOperator.ExpressionEvaluator numChunks;
-
- private final EvalOperator.ExpressionEvaluator chunkSize;
+ private final ChunkingSettings chunkingSettings;
private final DriverContext driverContext;
private Warnings warnings;
public ChunkBytesRefEvaluator(Source source, EvalOperator.ExpressionEvaluator str,
- EvalOperator.ExpressionEvaluator numChunks, EvalOperator.ExpressionEvaluator chunkSize,
- DriverContext driverContext) {
+ ChunkingSettings chunkingSettings, DriverContext driverContext) {
this.source = source;
this.str = str;
- this.numChunks = numChunks;
- this.chunkSize = chunkSize;
+ this.chunkingSettings = chunkingSettings;
this.driverContext = driverContext;
}
@Override
public Block eval(Page page) {
try (BytesRefBlock strBlock = (BytesRefBlock) str.eval(page)) {
- try (IntBlock numChunksBlock = (IntBlock) numChunks.eval(page)) {
- try (IntBlock chunkSizeBlock = (IntBlock) chunkSize.eval(page)) {
- BytesRefVector strVector = strBlock.asVector();
- if (strVector == null) {
- return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock);
- }
- IntVector numChunksVector = numChunksBlock.asVector();
- if (numChunksVector == null) {
- return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock);
- }
- IntVector chunkSizeVector = chunkSizeBlock.asVector();
- if (chunkSizeVector == null) {
- return eval(page.getPositionCount(), strBlock, numChunksBlock, chunkSizeBlock);
- }
- return eval(page.getPositionCount(), strVector, numChunksVector, chunkSizeVector);
- }
+ BytesRefVector strVector = strBlock.asVector();
+ if (strVector == null) {
+ return eval(page.getPositionCount(), strBlock);
}
+ return eval(page.getPositionCount(), strVector);
}
}
@@ -77,13 +60,10 @@ public Block eval(Page page) {
public long baseRamBytesUsed() {
long baseRamBytesUsed = BASE_RAM_BYTES_USED;
baseRamBytesUsed += str.baseRamBytesUsed();
- baseRamBytesUsed += numChunks.baseRamBytesUsed();
- baseRamBytesUsed += chunkSize.baseRamBytesUsed();
return baseRamBytesUsed;
}
- public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, IntBlock numChunksBlock,
- IntBlock chunkSizeBlock) {
+ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock) {
try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef strScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
@@ -98,46 +78,19 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, IntBlock nu
result.appendNull();
continue position;
}
- switch (numChunksBlock.getValueCount(p)) {
- case 0:
- result.appendNull();
- continue position;
- case 1:
- break;
- default:
- warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value"));
- result.appendNull();
- continue position;
- }
- switch (chunkSizeBlock.getValueCount(p)) {
- case 0:
- result.appendNull();
- continue position;
- case 1:
- break;
- default:
- warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value"));
- result.appendNull();
- continue position;
- }
BytesRef str = strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch);
- int numChunks = numChunksBlock.getInt(numChunksBlock.getFirstValueIndex(p));
- int chunkSize = chunkSizeBlock.getInt(chunkSizeBlock.getFirstValueIndex(p));
- Chunk.process(result, str, numChunks, chunkSize);
+ Chunk.process(result, str, this.chunkingSettings);
}
return result.build();
}
}
- public BytesRefBlock eval(int positionCount, BytesRefVector strVector, IntVector numChunksVector,
- IntVector chunkSizeVector) {
+ public BytesRefBlock eval(int positionCount, BytesRefVector strVector) {
try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef strScratch = new BytesRef();
position: for (int p = 0; p < positionCount; p++) {
BytesRef str = strVector.getBytesRef(p, strScratch);
- int numChunks = numChunksVector.getInt(p);
- int chunkSize = chunkSizeVector.getInt(p);
- Chunk.process(result, str, numChunks, chunkSize);
+ Chunk.process(result, str, this.chunkingSettings);
}
return result.build();
}
@@ -145,12 +98,12 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, IntVector
@Override
public String toString() {
- return "ChunkBytesRefEvaluator[" + "str=" + str + ", numChunks=" + numChunks + ", chunkSize=" + chunkSize + "]";
+ return "ChunkBytesRefEvaluator[" + "str=" + str + ", chunkingSettings=" + chunkingSettings + "]";
}
@Override
public void close() {
- Releasables.closeExpectNoException(str, numChunks, chunkSize);
+ Releasables.closeExpectNoException(str);
}
private Warnings warnings() {
@@ -170,27 +123,23 @@ static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final EvalOperator.ExpressionEvaluator.Factory str;
- private final EvalOperator.ExpressionEvaluator.Factory numChunks;
-
- private final EvalOperator.ExpressionEvaluator.Factory chunkSize;
+ private final ChunkingSettings chunkingSettings;
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory str,
- EvalOperator.ExpressionEvaluator.Factory numChunks,
- EvalOperator.ExpressionEvaluator.Factory chunkSize) {
+ ChunkingSettings chunkingSettings) {
this.source = source;
this.str = str;
- this.numChunks = numChunks;
- this.chunkSize = chunkSize;
+ this.chunkingSettings = chunkingSettings;
}
@Override
public ChunkBytesRefEvaluator get(DriverContext context) {
- return new ChunkBytesRefEvaluator(source, str.get(context), numChunks.get(context), chunkSize.get(context), context);
+ return new ChunkBytesRefEvaluator(source, str.get(context), chunkingSettings, context);
}
@Override
public String toString() {
- return "ChunkBytesRefEvaluator[" + "str=" + str + ", numChunks=" + numChunks + ", chunkSize=" + chunkSize + "]";
+ return "ChunkBytesRefEvaluator[" + "str=" + str + ", chunkingSettings=" + chunkingSettings + "]";
}
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index 6497ddfc6afbf..7f051e8c7d9df 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1658,7 +1658,7 @@ public enum Cap {
/**
* Chunk function.
*/
- CHUNK_FUNCTION(Build.current().isSnapshot()),
+ CHUNK_FUNCTION_V2(Build.current().isSnapshot()),
/**
* Support for vector similarity functtions pushdown
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java
index c11063616b88d..67f1ecec992b4 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Chunk.java
@@ -12,20 +12,25 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.ann.Evaluator;
+import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.xpack.core.inference.chunking.Chunker;
import org.elasticsearch.xpack.core.inference.chunking.ChunkerBuilder;
+import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder;
+import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsOptions;
import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.MapExpression;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.Example;
+import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo;
+import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.MapParam;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
@@ -35,11 +40,12 @@
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import java.io.IOException;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.stream.Collectors;
+import static java.util.Map.entry;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
@@ -48,56 +54,49 @@ public class Chunk extends EsqlScalarFunction implements OptionalArgument {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Chunk", Chunk::new);
- public static final int DEFAULT_NUM_CHUNKS = Integer.MAX_VALUE;
- public static final int DEFAULT_CHUNK_SIZE = 300;
-
- private final Expression field, options;
-
- static final String NUM_CHUNKS = "num_chunks";
- static final String CHUNK_SIZE = "chunk_size";
-
- public static final Map ALLOWED_OPTIONS = Map.of(NUM_CHUNKS, DataType.INTEGER, CHUNK_SIZE, DataType.INTEGER);
-
- @FunctionInfo(returnType = "keyword", preview = true, description = """
- Use `CHUNK` to split a text field into smaller chunks.""", detailedDescription = """
- Chunk can be used on fields from the text famiy like <> and <>.
- Chunk will split a text field into smaller chunks, using a sentence-based chunking strategy.
- The number of chunks returned, and the length of the sentences used to create the chunks can be specified.
- """, examples = { @Example(file = "chunk", tag = "chunk-with-field", applies_to = "stack: preview 9.3.0") })
+ static final int DEFAULT_CHUNK_SIZE = 300;
+ public static final ChunkingSettings DEFAULT_CHUNKING_SETTINGS = new SentenceBoundaryChunkingSettings(DEFAULT_CHUNK_SIZE, 0);
+
+ private final Expression field, chunkingSettings;
+
+ public static final Map ALLOWED_CHUNKING_SETTING_OPTIONS = Map.ofEntries(
+ entry(ChunkingSettingsOptions.STRATEGY.toString(), DataType.KEYWORD),
+ entry(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), DataType.INTEGER),
+ entry(ChunkingSettingsOptions.OVERLAP.toString(), DataType.INTEGER),
+ entry(ChunkingSettingsOptions.SENTENCE_OVERLAP.toString(), DataType.INTEGER),
+ entry(ChunkingSettingsOptions.SEPARATOR_GROUP.toString(), DataType.KEYWORD),
+ entry(ChunkingSettingsOptions.SEPARATORS.toString(), DataType.KEYWORD)
+ );
+
+ @FunctionInfo(
+ appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.3.0") },
+ returnType = "keyword",
+ preview = true,
+ description = """
+ Use `CHUNK` to split a text field into smaller chunks.""",
+ detailedDescription = """
+ Chunk can be used on fields from the text famiy like <> and <>.
+ Chunk will split a text field into smaller chunks, using a sentence-based chunking strategy.
+ The number of chunks returned, and the length of the sentences used to create the chunks can be specified.
+ """,
+ examples = {
+ @Example(file = "chunk", tag = "chunk-with-field", applies_to = "stack: preview 9.3.0"),
+ @Example(file = "chunk", tag = "chunk-with-chunking-settings", applies_to = "stack: preview 9.3.0") }
+ )
public Chunk(
Source source,
@Param(name = "field", type = { "keyword", "text" }, description = "The input to chunk.") Expression field,
@MapParam(
- name = "options",
- params = {
- @MapParam.MapParamEntry(
- name = "num_chunks",
- type = "integer",
- description = "The number of chunks to return. Defaults to return all chunks."
- ),
- @MapParam.MapParamEntry(
- name = "chunk_size",
- type = "integer",
- description = "The size of sentence-based chunks to use. Defaults to " + DEFAULT_CHUNK_SIZE
- ), },
- description = "Options to customize chunking behavior.",
+ name = "chunking_settings",
+ description = "Options to customize chunking behavior. Refer to the "
+ + "[Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put"
+ + "#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`.",
optional = true
- ) Expression options
- ) {
- super(source, options == null ? List.of(field) : List.of(field, options));
- this.field = field;
- this.options = options;
- }
-
- private Chunk(
- Source source,
- Expression field,
- Expression options,
- boolean unused // dummy parameter to differentiate constructors
+ ) Expression chunkingSettings
) {
- super(source, options == null ? List.of(field) : List.of(field, options));
+ super(source, chunkingSettings == null ? List.of(field) : List.of(field, chunkingSettings));
this.field = field;
- this.options = options;
+ this.chunkingSettings = chunkingSettings;
}
public Chunk(StreamInput in) throws IOException {
@@ -112,7 +111,7 @@ public Chunk(StreamInput in) throws IOException {
public void writeTo(StreamOutput out) throws IOException {
source().writeTo(out);
out.writeNamedWriteable(field);
- out.writeOptionalNamedWriteable(options);
+ out.writeOptionalNamedWriteable(chunkingSettings);
}
@Override
@@ -130,28 +129,27 @@ protected TypeResolution resolveType() {
if (childrenResolved() == false) {
return new TypeResolution("Unresolved children");
}
- return isString(field(), sourceText(), FIRST).and(Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions));
+
+ return isString(field(), sourceText(), FIRST).and(
+ Options.resolve(chunkingSettings, source(), SECOND, ALLOWED_CHUNKING_SETTING_OPTIONS, this::validateChunkingSettings)
+ );
}
- private void verifyOptions(Map optionsMap) {
- if (options == null) {
+ private void validateChunkingSettings(Map chunkingSettingsMap) {
+ if (chunkingSettings == null) {
return;
}
- Integer numChunks = (Integer) optionsMap.get(NUM_CHUNKS);
- if (numChunks != null && numChunks < 0) {
- throw new InvalidArgumentException("[{}] cannot be negative, found [{}]", NUM_CHUNKS, numChunks);
- }
- Integer chunkSize = (Integer) optionsMap.get(CHUNK_SIZE);
- if (chunkSize != null && chunkSize < 0) {
- throw new InvalidArgumentException("[{}] cannot be negative, found [{}]", CHUNK_SIZE, chunkSize);
+ try {
+ toChunkingSettings(chunkingSettingsMap);
+ } catch (IllegalArgumentException e) {
+ throw new InvalidArgumentException(e.getMessage(), e);
}
-
}
@Override
public boolean foldable() {
- return field().foldable() && (options() == null || options().foldable());
+ return field().foldable() && (chunkingSettings() == null || chunkingSettings().foldable());
}
@Override
@@ -165,23 +163,22 @@ public Expression replaceChildren(List newChildren) {
@Override
protected NodeInfo extends Expression> info() {
- return NodeInfo.create(this, Chunk::new, field, options);
+ return NodeInfo.create(this, Chunk::new, field, chunkingSettings);
}
Expression field() {
return field;
}
- Expression options() {
- return options;
+ Expression chunkingSettings() {
+ return chunkingSettings;
}
@Evaluator(extraName = "BytesRef")
- static void process(BytesRefBlock.Builder builder, BytesRef str, int numChunks, int chunkSize) {
+ static void process(BytesRefBlock.Builder builder, BytesRef str, @Fixed ChunkingSettings chunkingSettings) {
String content = str.utf8ToString();
- ChunkingSettings settings = new SentenceBoundaryChunkingSettings(chunkSize, 0);
- List chunks = chunkText(content, settings, numChunks);
+ List chunks = chunkText(content, chunkingSettings);
boolean multivalued = chunks.size() > 1;
if (multivalued) {
@@ -196,43 +193,48 @@ static void process(BytesRefBlock.Builder builder, BytesRef str, int numChunks,
}
}
- public static List chunkText(String content, ChunkingSettings chunkingSettings, int numChunks) {
+ public static List chunkText(String content, ChunkingSettings chunkingSettings) {
Chunker chunker = ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy());
- return chunker.chunk(content, chunkingSettings)
- .stream()
- .map(offset -> content.substring(offset.start(), offset.end()))
- .limit(numChunks > 0 ? numChunks : DEFAULT_NUM_CHUNKS)
- .toList();
+ return chunker.chunk(content, chunkingSettings).stream().map(offset -> content.substring(offset.start(), offset.end())).toList();
}
@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
Chunk chunk = (Chunk) o;
- return Objects.equals(field(), chunk.field()) && Objects.equals(options(), chunk.options());
+ return Objects.equals(field(), chunk.field()) && Objects.equals(chunkingSettings(), chunk.chunkingSettings());
}
@Override
public int hashCode() {
- return Objects.hash(field(), options());
+ return Objects.hash(field(), chunkingSettings());
}
@Override
public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
+ ChunkingSettings chunkingSettings = DEFAULT_CHUNKING_SETTINGS;
- Map optionsMap = new HashMap<>();
- if (options() != null) {
- Options.populateMap(((MapExpression) options), optionsMap, source(), SECOND, ALLOWED_OPTIONS);
+ if (chunkingSettings() != null) {
+ chunkingSettings = toChunkingSettings((MapExpression) chunkingSettings());
}
- int numChunks = (Integer) optionsMap.getOrDefault(NUM_CHUNKS, DEFAULT_NUM_CHUNKS);
- int chunkSize = (Integer) optionsMap.getOrDefault(CHUNK_SIZE, DEFAULT_CHUNK_SIZE);
+ return new ChunkBytesRefEvaluator.Factory(source(), toEvaluator.apply(field), chunkingSettings);
+ }
- return new ChunkBytesRefEvaluator.Factory(
- source(),
- toEvaluator.apply(field),
- toEvaluator.apply(new Literal(source(), numChunks, DataType.INTEGER)),
- toEvaluator.apply(new Literal(source(), chunkSize, DataType.INTEGER))
- );
+ // TODO remove?
+ private static ChunkingSettings toChunkingSettings(MapExpression map) {
+ Map chunkingSettingsMap = map.keyFoldedMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> {
+ Object value = e.getValue().fold(FoldContext.small());
+ return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value;
+ }));
+ return ChunkingSettingsBuilder.fromMap(chunkingSettingsMap);
+ }
+
+ private static ChunkingSettings toChunkingSettings(Map expressionMap) {
+ Map chunkingSettingsMap = expressionMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> {
+ Object value = e.getValue();
+ return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value;
+ }));
+ return ChunkingSettingsBuilder.fromMap(chunkingSettingsMap);
}
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
index fd7cbfb6fa723..0c0d05fc13119 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -357,7 +357,7 @@ public final void test() throws Throwable {
);
assumeFalse(
"CSV tests cannot currently handle CHUNK function",
- testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.CHUNK_FUNCTION.capabilityName())
+ testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.CHUNK_FUNCTION_V2.capabilityName())
);
if (Build.current().isSnapshot()) {
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
index 1de00620b898b..1ba31934cb60e 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -3319,38 +3319,48 @@ public void testSubqueryInFromWithLookupJoinOnFullTextFunction() {
}
public void testChunkFunctionInvalidInputs() {
- if (EsqlCapabilities.Cap.CHUNK_FUNCTION.isEnabled()) {
+ if (EsqlCapabilities.Cap.CHUNK_FUNCTION_V2.isEnabled()) {
+ assertThat(
+ error("from test | EVAL chunks = CHUNK(body, null)", fullTextAnalyzer, VerificationException.class),
+ equalTo("1:27: second argument of [CHUNK(body, null)] cannot be null, received [null]")
+ );
+ assertThat(
+ error("from test | EVAL chunks = CHUNK(body, {\"strategy\": \"invalid\"})", fullTextAnalyzer, VerificationException.class),
+ equalTo("1:27: Invalid chunkingStrategy invalid")
+ );
assertThat(
error(
- "from test | EVAL chunks = CHUNK(body, {\"num_chunks\": null, \"chunk_size\": 20})",
+ "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 5, \"sentence_overlap\": 1})",
fullTextAnalyzer,
- ParsingException.class
+ VerificationException.class
),
- equalTo("1:39: Invalid named parameter [\"num_chunks\":null], NULL is not supported")
+ equalTo(
+ "1:27: Validation Failed: 1: [chunking_settings] Invalid value [5.0]. "
+ + "[max_chunk_size] must be a greater than or equal to [20.0];"
+ )
);
assertThat(
error(
- "from test | EVAL chunks = CHUNK(body, {\"num_chunks\": 3, \"chunk_size\": null})",
+ "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 5, \"sentence_overlap\": 5})",
fullTextAnalyzer,
- ParsingException.class
+ VerificationException.class
),
- equalTo("1:39: Invalid named parameter [\"chunk_size\":null], NULL is not supported")
- );
- assertThat(
- error("from test | EVAL chunks = CHUNK(body, {\"num_chunks\":\"foo\"})", fullTextAnalyzer),
- equalTo("1:27: Invalid option [num_chunks] in [CHUNK(body, {\"num_chunks\":\"foo\"})], cannot cast [foo] to [integer]")
- );
- assertThat(
- error("from test | EVAL chunks = CHUNK(body, {\"chunk_size\":\"foo\"})", fullTextAnalyzer),
- equalTo("1:27: Invalid option [chunk_size] in [CHUNK(body, {\"chunk_size\":\"foo\"})], cannot cast [foo] to [integer]")
- );
- assertThat(
- error("from test | EVAL chunks = CHUNK(body, {\"num_chunks\":-1})", fullTextAnalyzer),
- equalTo("1:27: [num_chunks] cannot be negative, found [-1]")
+ equalTo(
+ "1:27: Validation Failed: 1: [chunking_settings] Invalid value [5.0]. "
+ + "[max_chunk_size] must be a greater than or equal to [20.0];2: sentence_overlap[5] must be either 0 or 1;"
+ )
);
assertThat(
- error("from test | EVAL chunks = CHUNK(body, {\"chunk_size\":-1})", fullTextAnalyzer),
- equalTo("1:27: [chunk_size] cannot be negative, found [-1]")
+ error(
+ "from test | EVAL chunks = CHUNK(body, {\"strategy\": \"sentence\", \"max_chunk_size\": 20, "
+ + "\"sentence_overlap\": 1, \"extra_value\": \"foo\"})",
+ fullTextAnalyzer,
+ VerificationException.class
+ ),
+ containsString(
+ "1:27: Invalid option [extra_value] in [CHUNK(body, {\"strategy\": \"sentence\", "
+ + "\"max_chunk_size\": 20, \"sentence_overlap\": 1, \"extra_value\": \"foo\"})], expected one of ["
+ )
);
}
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java
index 21592b5b95424..5f9d8dda4a746 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java
@@ -14,6 +14,7 @@
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.inference.ChunkingSettings;
+import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsOptions;
import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Literal;
@@ -24,15 +25,18 @@
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Objects;
+import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
-import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.CHUNK_SIZE;
-import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.NUM_CHUNKS;
+import static org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsTests.createRandomChunkingSettings;
+import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.ALLOWED_CHUNKING_SETTING_OPTIONS;
+import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.DEFAULT_CHUNKING_SETTINGS;
import static org.hamcrest.Matchers.equalTo;
public class ChunkTests extends AbstractScalarFunctionTestCase {
@@ -64,18 +68,15 @@ public static Iterable