Skip to content

Commit e0d6b4a

Browse files
authored
[CLI] add --max-tokens to vllm complete (#28109)
Signed-off-by: Iceber Gu <caiwei95@hotmail.com>
1 parent 72b1c2a commit e0d6b4a

File tree

1 file changed

+14
-6
lines changed

1 file changed

+14
-6
lines changed

vllm/entrypoints/cli/openai.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,15 @@ class CompleteCommand(CLISubcommand):
195195
def cmd(args: argparse.Namespace) -> None:
196196
model_name, client = _interactive_cli(args)
197197

198+
kwargs = {
199+
"model": model_name,
200+
"stream": True,
201+
}
202+
if args.max_tokens:
203+
kwargs["max_tokens"] = args.max_tokens
204+
198205
if args.quick:
199-
stream = client.completions.create(
200-
model=model_name, prompt=args.quick, stream=True
201-
)
206+
stream = client.completions.create(prompt=args.quick, **kwargs)
202207
_print_completion_stream(stream)
203208
return
204209

@@ -208,15 +213,18 @@ def cmd(args: argparse.Namespace) -> None:
208213
input_prompt = input("> ")
209214
except EOFError:
210215
break
211-
stream = client.completions.create(
212-
model=model_name, prompt=input_prompt, stream=True
213-
)
216+
stream = client.completions.create(prompt=input_prompt, **kwargs)
214217
_print_completion_stream(stream)
215218

216219
@staticmethod
217220
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
218221
"""Add CLI arguments for the complete command."""
219222
_add_query_options(parser)
223+
parser.add_argument(
224+
"--max-tokens",
225+
type=int,
226+
help="Maximum number of tokens to generate per output sequence.",
227+
)
220228
parser.add_argument(
221229
"-q",
222230
"--quick",

0 commit comments

Comments
 (0)