@@ -32,17 +32,27 @@ class CompletionContext
32
32
protected $ charIndex = 0 ;
33
33
34
34
/**
35
- * An array containing the individual words in the current command line.
35
+ * An array of the individual words in the current command line.
36
36
*
37
37
* This is not set until $this->splitCommand() is called, when it is populated by
38
38
* $commandLine exploded by $wordBreaks
39
39
*
40
40
* Bash equivalent: COMP_WORDS
41
41
*
42
- * @var array |null
42
+ * @var string[] |null
43
43
*/
44
44
protected $ words = null ;
45
45
46
+ /**
47
+ * Words from the currently command-line before quotes and escaping is processed
48
+ *
49
+ * This is indexed the same as $this->words, but in their raw input terms are in their input form, including
50
+ * quotes and escaping.
51
+ *
52
+ * @var string[]|null
53
+ */
54
+ protected $ rawWords = null ;
55
+
46
56
/**
47
57
* The index in $this->words containing the word at the current cursor position.
48
58
*
@@ -61,7 +71,7 @@ class CompletionContext
61
71
*
62
72
* @var string
63
73
*/
64
- protected $ wordBreaks = "' \" () = \t\n" ;
74
+ protected $ wordBreaks = "= \t\n" ;
65
75
66
76
/**
67
77
* Set the whole contents of the command line as a string
@@ -101,6 +111,22 @@ public function getCurrentWord()
101
111
return '' ;
102
112
}
103
113
114
+ /**
115
+ * Return the unprocessed string for the word under the cursor
116
+ *
117
+ * This preserves any quotes and escaping that are present in the input command line.
118
+ *
119
+ * @return string
120
+ */
121
+ public function getRawCurrentWord ()
122
+ {
123
+ if (isset ($ this ->rawWords [$ this ->wordIndex ])) {
124
+ return $ this ->rawWords [$ this ->wordIndex ];
125
+ }
126
+
127
+ return '' ;
128
+ }
129
+
104
130
/**
105
131
* Return a word by index from the command line
106
132
*
@@ -132,6 +158,22 @@ public function getWords()
132
158
return $ this ->words ;
133
159
}
134
160
161
+ /**
162
+ * Get the unprocessed/literal words from the command line
163
+ *
164
+ * This is indexed the same as getWords(), but preserves any quoting and escaping from the command line
165
+ *
166
+ * @return string[]
167
+ */
168
+ public function getRawWords ()
169
+ {
170
+ if ($ this ->rawWords === null ) {
171
+ $ this ->splitCommand ();
172
+ }
173
+
174
+ return $ this ->rawWords ;
175
+ }
176
+
135
177
/**
136
178
* Get the index of the word the cursor is currently in
137
179
*
@@ -178,12 +220,15 @@ public function setCharIndex($index)
178
220
* This defaults to a sane value based on BASH's word break characters and shouldn't
179
221
* need to be changed unless your completions contain the default word break characters.
180
222
*
223
+ * @deprecated This is becoming an internal setting that doesn't make sense to expose publicly.
224
+ *
181
225
* @see wordBreaks
182
226
* @param string $charList - a single string containing all of the characters to break words on
183
227
*/
184
228
public function setWordBreaks ($ charList )
185
229
{
186
- $ this ->wordBreaks = $ charList ;
230
+ // Drop quotes from break characters - strings are handled separately to word breaks now
231
+ $ this ->wordBreaks = str_replace (array ('" ' , '\'' ), '' , $ charList );;
187
232
$ this ->reset ();
188
233
}
189
234
@@ -194,57 +239,146 @@ public function setWordBreaks($charList)
194
239
*/
195
240
protected function splitCommand ()
196
241
{
197
- $ this ->words = array ();
198
- $ this ->wordIndex = null ;
199
- $ cursor = 0 ;
242
+ $ tokens = $ this ->tokenizeString ($ this ->commandLine );
200
243
201
- $ breaks = preg_quote ($ this ->wordBreaks );
202
-
203
- if (!preg_match_all ("/([^ $ breaks]*)([ $ breaks]*)/ " , $ this ->commandLine , $ matches )) {
204
- return ;
205
- }
206
-
207
- // Groups:
208
- // 1: Word
209
- // 2: Break characters
210
- foreach ($ matches [0 ] as $ index => $ wholeMatch ) {
211
- // Determine which word the cursor is in
212
- $ cursor += strlen ($ wholeMatch );
213
- $ word = $ matches [1 ][$ index ];
214
- $ breaks = $ matches [2 ][$ index ];
215
-
216
- if ($ this ->wordIndex === null && $ cursor >= $ this ->charIndex ) {
217
- $ this ->wordIndex = $ index ;
218
-
219
- // Find the user's cursor position relative to the end of this word
220
- // The end of the word is the internal cursor minus any break characters that were captured
221
- $ cursorWordOffset = $ this ->charIndex - ($ cursor - strlen ($ breaks ));
244
+ foreach ($ tokens as $ token ) {
245
+ if ($ token ['type ' ] != 'break ' ) {
246
+ $ this ->words [] = $ this ->getTokenValue ($ token );
247
+ $ this ->rawWords [] = $ token ['value ' ];
248
+ }
222
249
223
- if ($ cursorWordOffset < 0 ) {
224
- // Cursor is inside the word - truncate the word at the cursor
225
- // (This emulates normal BASH completion behaviour I've observed, though I'm not entirely sure if it's useful)
226
- $ word = substr ($ word , 0 , strlen ($ word ) + $ cursorWordOffset );
250
+ // Determine which word index the cursor is inside once we reach it's offset
251
+ if ($ this ->wordIndex === null && $ this ->charIndex <= $ token ['offsetEnd ' ]) {
252
+ $ this ->wordIndex = count ($ this ->words ) - 1 ;
227
253
228
- } elseif ( $ cursorWordOffset > 0 ) {
254
+ if ( $ token [ ' type ' ] == ' break ' ) {
229
255
// Cursor is in the break-space after a word
230
256
// Push an empty word at the cursor to allow completion of new terms at the cursor, ignoring words ahead
231
257
$ this ->wordIndex ++;
232
- $ this ->words [] = $ word ;
233
258
$ this ->words [] = '' ;
259
+ $ this ->rawWords [] = '' ;
234
260
continue ;
235
261
}
236
- }
237
262
238
- if ($ word !== '' ) {
239
- $ this ->words [] = $ word ;
263
+ if ($ this ->charIndex < $ token ['offsetEnd ' ]) {
264
+ // Cursor is inside the current word - truncate the word at the cursor to complete on
265
+ // This emulates BASH completion's behaviour with COMP_CWORD
266
+
267
+ // Create a copy of the token with its value truncated
268
+ $ truncatedToken = $ token ;
269
+ $ relativeOffset = $ this ->charIndex - $ token ['offset ' ];
270
+ $ truncatedToken ['value ' ] = substr ($ token ['value ' ], 0 , $ relativeOffset );
271
+
272
+ // Replace the current word with the truncated value
273
+ $ this ->words [$ this ->wordIndex ] = $ this ->getTokenValue ($ truncatedToken );
274
+ $ this ->rawWords [$ this ->wordIndex ] = $ truncatedToken ['value ' ];
275
+ }
240
276
}
241
277
}
242
278
243
- if ($ this ->wordIndex > count ($ this ->words ) - 1 ) {
244
- $ this ->wordIndex = count ($ this ->words ) - 1 ;
279
+ // Cursor position is past the end of the command line string - consider it a new word
280
+ if ($ this ->wordIndex === null ) {
281
+ $ this ->wordIndex = count ($ this ->words );
282
+ $ this ->words [] = '' ;
283
+ $ this ->rawWords [] = '' ;
245
284
}
246
285
}
247
286
287
+ /**
288
+ * Return a token's value with escaping and quotes removed
289
+ *
290
+ * @see self::tokenizeString()
291
+ * @param array $token
292
+ * @return string
293
+ */
294
+ protected function getTokenValue ($ token )
295
+ {
296
+ $ value = $ token ['value ' ];
297
+
298
+ // Remove outer quote characters (or first quote if unclosed)
299
+ if ($ token ['type ' ] == 'quoted ' ) {
300
+ $ value = preg_replace ('/^(?:[ \'"])(.*?)(?:[ \'"])?$/ ' , '$1 ' , $ value );
301
+ }
302
+
303
+ // Remove escape characters
304
+ $ value = preg_replace ('/ \\\\(.)/ ' , '$1 ' , $ value );
305
+
306
+ return $ value ;
307
+ }
308
+
309
+ /**
310
+ * Break a string into words, quoted strings and non-words (breaks)
311
+ *
312
+ * Returns an array of unmodified segments of $string with offset and type information.
313
+ *
314
+ * @param string $string
315
+ * @return array as [ [type => string, value => string, offset => int], ... ]
316
+ */
317
+ protected function tokenizeString ($ string )
318
+ {
319
+ // Map capture groups to returned token type
320
+ $ typeMap = array (
321
+ 'double_quote_string ' => 'quoted ' ,
322
+ 'single_quote_string ' => 'quoted ' ,
323
+ 'word ' => 'word ' ,
324
+ 'break ' => 'break ' ,
325
+ );
326
+
327
+ // Escape every word break character including whitespace
328
+ // preg_quote won't work here as it doesn't understand the ignore whitespace flag ("x")
329
+ $ breaks = preg_replace ('/(.)/ ' , '\\\$1 ' , $ this ->wordBreaks );
330
+
331
+ $ pattern = <<<"REGEX"
332
+ /(?:
333
+ (?P<double_quote_string>
334
+ "(\\\\.|[^ \"\\\\])*(?:"|$)
335
+ ) |
336
+ (?P<single_quote_string>
337
+ '(\\\\.|[^'\\\\])*(?:'|$)
338
+ ) |
339
+ (?P<word>
340
+ (?:\\\\.|[^ $ breaks])+
341
+ ) |
342
+ (?P<break>
343
+ [ $ breaks]+
344
+ )
345
+ )/x
346
+ REGEX ;
347
+
348
+ $ tokens = array ();
349
+
350
+ if (!preg_match_all ($ pattern , $ string , $ matches , PREG_OFFSET_CAPTURE | PREG_SET_ORDER )) {
351
+ return $ tokens ;
352
+ }
353
+
354
+ foreach ($ matches as $ set ) {
355
+ foreach ($ set as $ groupName => $ match ) {
356
+
357
+ // Ignore integer indices preg_match outputs (duplicates of named groups)
358
+ if (is_integer ($ groupName )) {
359
+ continue ;
360
+ }
361
+
362
+ // Skip if the offset indicates this group didn't match
363
+ if ($ match [1 ] === -1 ) {
364
+ continue ;
365
+ }
366
+
367
+ $ tokens [] = array (
368
+ 'type ' => $ typeMap [$ groupName ],
369
+ 'value ' => $ match [0 ],
370
+ 'offset ' => $ match [1 ],
371
+ 'offsetEnd ' => $ match [1 ] + strlen ($ match [0 ])
372
+ );
373
+
374
+ // Move to the next set (only one group should match per set)
375
+ continue ;
376
+ }
377
+ }
378
+
379
+ return $ tokens ;
380
+ }
381
+
248
382
/**
249
383
* Reset the computed words so that $this->splitWords is forced to run again
250
384
*/
0 commit comments