|
| 1 | +""" |
| 2 | +NexaAI ASR Example - Speech to Text (non-streaming) |
| 3 | +
|
| 4 | +This example demonstrates how to use the NexaAI SDK to transcribe an audio file. |
| 5 | +""" |
| 6 | + |
| 7 | +import argparse |
| 8 | +import os |
| 9 | + |
| 10 | +from nexaai.asr import ASR, ASRConfig |
| 11 | + |
| 12 | +def main(): |
| 13 | + parser = argparse.ArgumentParser(description="NexaAI ASR Example") |
| 14 | + parser.add_argument("--model", |
| 15 | + default="NexaAI/parakeet-npu", |
| 16 | + help="Model id or path") |
| 17 | + parser.add_argument("--audio", |
| 18 | + required=True, |
| 19 | + help="Path to the input audio file") |
| 20 | + parser.add_argument("--language", default="en", |
| 21 | + help="Language code (e.g., en, zh). Empty for auto-detect if supported") |
| 22 | + parser.add_argument("--beam-size", type=int, default=5, |
| 23 | + help="Beam size for decoding") |
| 24 | + parser.add_argument("--timestamps", default="segment", |
| 25 | + help="Timestamps granularity: none|segment|word (if supported)") |
| 26 | + parser.add_argument("--plugin-id", default="npu", help="Plugin ID to use") |
| 27 | + parser.add_argument("--device", default="npu", help="Device to run on (e.g., cpu, gpu, 0)") |
| 28 | + args = parser.parse_args() |
| 29 | + |
| 30 | + model_path = os.path.expanduser(args.model) |
| 31 | + audio_path = os.path.expanduser(args.audio) |
| 32 | + |
| 33 | + if not os.path.exists(audio_path): |
| 34 | + raise FileNotFoundError(f"Audio file not found: {audio_path}") |
| 35 | + |
| 36 | + asr = ASR.from_(name_or_path=model_path, plugin_id=args.plugin_id, device_id=args.device) |
| 37 | + |
| 38 | + cfg = ASRConfig(timestamps=args.timestamps, beam_size=args.beam_size, stream=False) |
| 39 | + result = asr.transcribe(audio_path=audio_path, language=args.language, config=cfg) |
| 40 | + print(result.transcript) |
| 41 | + |
| 42 | + |
| 43 | +if __name__ == "__main__": |
| 44 | + main() |
| 45 | + |
| 46 | + |
0 commit comments