Skip to content

Commit bc095fe

Browse files
authored
Merge pull request #82 from stecman/string-arguments
Rewrite command-line splitting to tokenize quoted strings
2 parents bd07a24 + cbec6e8 commit bc095fe

File tree

6 files changed

+374
-51
lines changed

6 files changed

+374
-51
lines changed

src/CompletionCommand.php

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,54 @@ protected function execute(InputInterface $input, OutputInterface $output)
121121
$output->write($hook, true);
122122
} else {
123123
$handler->setContext(new EnvironmentCompletionContext());
124-
$output->write($this->runCompletion(), true);
124+
125+
// Get completion results
126+
$results = $this->runCompletion();
127+
128+
// Escape results for the current shell
129+
$shellType = $input->getOption('shell-type') ?: $this->getShellType();
130+
131+
foreach ($results as &$result) {
132+
$result = $this->escapeForShell($result, $shellType);
133+
}
134+
135+
$output->write($results, true);
136+
}
137+
}
138+
139+
/**
140+
* Escape each completion result for the specified shell
141+
*
142+
* @param string $result - Completion results that should appear in the shell
143+
* @param string $shellType - Valid shell type from HookFactory
144+
* @return string
145+
*/
146+
protected function escapeForShell($result, $shellType)
147+
{
148+
switch ($shellType) {
149+
// BASH requires special escaping for multi-word and special character results
150+
// This emulates registering completion with`-o filenames`, without side-effects like dir name slashes
151+
case 'bash':
152+
$context = $this->handler->getContext();
153+
$wordStart = substr($context->getRawCurrentWord(), 0, 1);
154+
155+
if ($wordStart == "'") {
156+
// If the current word is single-quoted, escape any single quotes in the result
157+
$result = str_replace("'", "\\'", $result);
158+
} else if ($wordStart == '"') {
159+
// If the current word is double-quoted, escape any double quotes in the result
160+
$result = str_replace('"', '\\"', $result);
161+
} else {
162+
// Otherwise assume the string is unquoted and word breaks should be escaped
163+
$result = preg_replace('/([\s\'"\\\\])/', '\\\\$1', $result);
164+
}
165+
166+
// Escape output to prevent special characters being lost when passing results to compgen
167+
return escapeshellarg($result);
168+
169+
// No transformation by default
170+
default:
171+
return $result;
125172
}
126173
}
127174

src/CompletionContext.php

Lines changed: 173 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,27 @@ class CompletionContext
3232
protected $charIndex = 0;
3333

3434
/**
35-
* An array containing the individual words in the current command line.
35+
* An array of the individual words in the current command line.
3636
*
3737
* This is not set until $this->splitCommand() is called, when it is populated by
3838
* $commandLine exploded by $wordBreaks
3939
*
4040
* Bash equivalent: COMP_WORDS
4141
*
42-
* @var array|null
42+
* @var string[]|null
4343
*/
4444
protected $words = null;
4545

46+
/**
47+
* Words from the currently command-line before quotes and escaping is processed
48+
*
49+
* This is indexed the same as $this->words, but in their raw input terms are in their input form, including
50+
* quotes and escaping.
51+
*
52+
* @var string[]|null
53+
*/
54+
protected $rawWords = null;
55+
4656
/**
4757
* The index in $this->words containing the word at the current cursor position.
4858
*
@@ -61,7 +71,7 @@ class CompletionContext
6171
*
6272
* @var string
6373
*/
64-
protected $wordBreaks = "'\"()= \t\n";
74+
protected $wordBreaks = "= \t\n";
6575

6676
/**
6777
* Set the whole contents of the command line as a string
@@ -101,6 +111,22 @@ public function getCurrentWord()
101111
return '';
102112
}
103113

114+
/**
115+
* Return the unprocessed string for the word under the cursor
116+
*
117+
* This preserves any quotes and escaping that are present in the input command line.
118+
*
119+
* @return string
120+
*/
121+
public function getRawCurrentWord()
122+
{
123+
if (isset($this->rawWords[$this->wordIndex])) {
124+
return $this->rawWords[$this->wordIndex];
125+
}
126+
127+
return '';
128+
}
129+
104130
/**
105131
* Return a word by index from the command line
106132
*
@@ -132,6 +158,22 @@ public function getWords()
132158
return $this->words;
133159
}
134160

161+
/**
162+
* Get the unprocessed/literal words from the command line
163+
*
164+
* This is indexed the same as getWords(), but preserves any quoting and escaping from the command line
165+
*
166+
* @return string[]
167+
*/
168+
public function getRawWords()
169+
{
170+
if ($this->rawWords === null) {
171+
$this->splitCommand();
172+
}
173+
174+
return $this->rawWords;
175+
}
176+
135177
/**
136178
* Get the index of the word the cursor is currently in
137179
*
@@ -178,12 +220,15 @@ public function setCharIndex($index)
178220
* This defaults to a sane value based on BASH's word break characters and shouldn't
179221
* need to be changed unless your completions contain the default word break characters.
180222
*
223+
* @deprecated This is becoming an internal setting that doesn't make sense to expose publicly.
224+
*
181225
* @see wordBreaks
182226
* @param string $charList - a single string containing all of the characters to break words on
183227
*/
184228
public function setWordBreaks($charList)
185229
{
186-
$this->wordBreaks = $charList;
230+
// Drop quotes from break characters - strings are handled separately to word breaks now
231+
$this->wordBreaks = str_replace(array('"', '\''), '', $charList);;
187232
$this->reset();
188233
}
189234

@@ -194,57 +239,146 @@ public function setWordBreaks($charList)
194239
*/
195240
protected function splitCommand()
196241
{
197-
$this->words = array();
198-
$this->wordIndex = null;
199-
$cursor = 0;
242+
$tokens = $this->tokenizeString($this->commandLine);
200243

201-
$breaks = preg_quote($this->wordBreaks);
202-
203-
if (!preg_match_all("/([^$breaks]*)([$breaks]*)/", $this->commandLine, $matches)) {
204-
return;
205-
}
206-
207-
// Groups:
208-
// 1: Word
209-
// 2: Break characters
210-
foreach ($matches[0] as $index => $wholeMatch) {
211-
// Determine which word the cursor is in
212-
$cursor += strlen($wholeMatch);
213-
$word = $matches[1][$index];
214-
$breaks = $matches[2][$index];
215-
216-
if ($this->wordIndex === null && $cursor >= $this->charIndex) {
217-
$this->wordIndex = $index;
218-
219-
// Find the user's cursor position relative to the end of this word
220-
// The end of the word is the internal cursor minus any break characters that were captured
221-
$cursorWordOffset = $this->charIndex - ($cursor - strlen($breaks));
244+
foreach ($tokens as $token) {
245+
if ($token['type'] != 'break') {
246+
$this->words[] = $this->getTokenValue($token);
247+
$this->rawWords[] = $token['value'];
248+
}
222249

223-
if ($cursorWordOffset < 0) {
224-
// Cursor is inside the word - truncate the word at the cursor
225-
// (This emulates normal BASH completion behaviour I've observed, though I'm not entirely sure if it's useful)
226-
$word = substr($word, 0, strlen($word) + $cursorWordOffset);
250+
// Determine which word index the cursor is inside once we reach it's offset
251+
if ($this->wordIndex === null && $this->charIndex <= $token['offsetEnd']) {
252+
$this->wordIndex = count($this->words) - 1;
227253

228-
} elseif ($cursorWordOffset > 0) {
254+
if ($token['type'] == 'break') {
229255
// Cursor is in the break-space after a word
230256
// Push an empty word at the cursor to allow completion of new terms at the cursor, ignoring words ahead
231257
$this->wordIndex++;
232-
$this->words[] = $word;
233258
$this->words[] = '';
259+
$this->rawWords[] = '';
234260
continue;
235261
}
236-
}
237262

238-
if ($word !== '') {
239-
$this->words[] = $word;
263+
if ($this->charIndex < $token['offsetEnd']) {
264+
// Cursor is inside the current word - truncate the word at the cursor to complete on
265+
// This emulates BASH completion's behaviour with COMP_CWORD
266+
267+
// Create a copy of the token with its value truncated
268+
$truncatedToken = $token;
269+
$relativeOffset = $this->charIndex - $token['offset'];
270+
$truncatedToken['value'] = substr($token['value'], 0, $relativeOffset);
271+
272+
// Replace the current word with the truncated value
273+
$this->words[$this->wordIndex] = $this->getTokenValue($truncatedToken);
274+
$this->rawWords[$this->wordIndex] = $truncatedToken['value'];
275+
}
240276
}
241277
}
242278

243-
if ($this->wordIndex > count($this->words) - 1) {
244-
$this->wordIndex = count($this->words) - 1;
279+
// Cursor position is past the end of the command line string - consider it a new word
280+
if ($this->wordIndex === null) {
281+
$this->wordIndex = count($this->words);
282+
$this->words[] = '';
283+
$this->rawWords[] = '';
245284
}
246285
}
247286

287+
/**
288+
* Return a token's value with escaping and quotes removed
289+
*
290+
* @see self::tokenizeString()
291+
* @param array $token
292+
* @return string
293+
*/
294+
protected function getTokenValue($token)
295+
{
296+
$value = $token['value'];
297+
298+
// Remove outer quote characters (or first quote if unclosed)
299+
if ($token['type'] == 'quoted') {
300+
$value = preg_replace('/^(?:[\'"])(.*?)(?:[\'"])?$/', '$1', $value);
301+
}
302+
303+
// Remove escape characters
304+
$value = preg_replace('/\\\\(.)/', '$1', $value);
305+
306+
return $value;
307+
}
308+
309+
/**
310+
* Break a string into words, quoted strings and non-words (breaks)
311+
*
312+
* Returns an array of unmodified segments of $string with offset and type information.
313+
*
314+
* @param string $string
315+
* @return array as [ [type => string, value => string, offset => int], ... ]
316+
*/
317+
protected function tokenizeString($string)
318+
{
319+
// Map capture groups to returned token type
320+
$typeMap = array(
321+
'double_quote_string' => 'quoted',
322+
'single_quote_string' => 'quoted',
323+
'word' => 'word',
324+
'break' => 'break',
325+
);
326+
327+
// Escape every word break character including whitespace
328+
// preg_quote won't work here as it doesn't understand the ignore whitespace flag ("x")
329+
$breaks = preg_replace('/(.)/', '\\\$1', $this->wordBreaks);
330+
331+
$pattern = <<<"REGEX"
332+
/(?:
333+
(?P<double_quote_string>
334+
"(\\\\.|[^\"\\\\])*(?:"|$)
335+
) |
336+
(?P<single_quote_string>
337+
'(\\\\.|[^'\\\\])*(?:'|$)
338+
) |
339+
(?P<word>
340+
(?:\\\\.|[^$breaks])+
341+
) |
342+
(?P<break>
343+
[$breaks]+
344+
)
345+
)/x
346+
REGEX;
347+
348+
$tokens = array();
349+
350+
if (!preg_match_all($pattern, $string, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
351+
return $tokens;
352+
}
353+
354+
foreach ($matches as $set) {
355+
foreach ($set as $groupName => $match) {
356+
357+
// Ignore integer indices preg_match outputs (duplicates of named groups)
358+
if (is_integer($groupName)) {
359+
continue;
360+
}
361+
362+
// Skip if the offset indicates this group didn't match
363+
if ($match[1] === -1) {
364+
continue;
365+
}
366+
367+
$tokens[] = array(
368+
'type' => $typeMap[$groupName],
369+
'value' => $match[0],
370+
'offset' => $match[1],
371+
'offsetEnd' => $match[1] + strlen($match[0])
372+
);
373+
374+
// Move to the next set (only one group should match per set)
375+
continue;
376+
}
377+
}
378+
379+
return $tokens;
380+
}
381+
248382
/**
249383
* Reset the computed words so that $this->splitWords is forced to run again
250384
*/

0 commit comments

Comments
 (0)