Skip to content

Commit 05c2235

Browse files
authored
Merge pull request #708 from NexaAI/feat/mengsheng/pythonbind-npu
feat: refactor python and add npu doc
2 parents 559948f + da36f83 commit 05c2235

File tree

7 files changed

+210
-74
lines changed

7 files changed

+210
-74
lines changed

examples/python/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,66 @@ nexa pull NexaAI/paddle-ocr-mlx
6666
6767
python cv_ocr.py
6868
```
69+
## Running Examples (Windows ARM64, Snapdragon X Elite)
70+
71+
### LLM
72+
```bash
73+
nexa pull NexaAI/Llama3.2-3B-NPU-Turbo
74+
75+
python llm.py --model NexaAI/Llama3.2-3B-NPU-Turbo --plugin-id npu --device npu --max-tokens 100 --system "You are a helpful assistant."
76+
```
77+
78+
### Multi-Modal
79+
80+
```bash
81+
nexa pull NexaAI/OmniNeural-4B
82+
83+
python vlm.py --model NexaAI/OmniNeural-4B --plugin-id npu --device npu --max-tokens 100 --system "You are a helpful assistant."
84+
```
85+
86+
### Reranker
87+
```bash
88+
nexa pull NexaAI/jina-v2-rerank-npu
89+
90+
python rerank.py --model NexaAI/jina-v2-rerank-npu --plugin-id npu --query "Where is on-device AI?" --documents "On-device AI is a type of AI that is processed on the device itself, rather than in the cloud." "edge computing" "A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality." "The capital of France is Paris."
91+
```
92+
93+
### Embedder
94+
```bash
95+
nexa pull NexaAI/embeddinggemma-300m-npu
96+
97+
python embedder.py --model NexaAI/embeddinggemma-300m-npu --plugin-id npu --texts "On-device AI is a type of AI that is processed on the device itself, rather than in the cloud." "edge computing" "A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality." "The capital of France is Paris." --query "what is on device AI" --batch-size 2
98+
```
99+
100+
### CV
101+
102+
#### OCR
103+
```bash
104+
nexa pull NexaAI/paddleocr-npu
105+
106+
python cv_ocr.py --det-model NexaAI/paddleocr-npu --rec-model NexaAI/paddleocr-npu --image path/to/image.png
107+
```
108+
109+
### ASR
110+
```bash
111+
nexa pull NexaAI/parakeet-npu
112+
113+
python asr.py --model NexaAI/parakeet-npu --audio path/to/audio.wav
114+
```
115+
116+
## Common Arguments
117+
118+
- `--model`: Path to the model file
119+
- `--device`: Device to run on (cpu, gpu, etc.)
120+
- `--max-tokens`: Maximum tokens to generate (for LLM/VLM)
121+
- `--batch-size`: Batch size for processing
122+
- `--system`: System message for chat models
123+
- `--plugin-id`: Plugin ID to use (default: cpu_gpu)
124+
125+
## Plugin ID Options
126+
127+
The `--plugin-id` parameter supports different backends:
128+
- `cpu_gpu`: Default, supports both CPU and GPU
129+
- `mlx`: Apple Silicon optimized (for supported models)
130+
- `llama_cpp`: For GGUF format models
131+
- `onnx`: ONNX runtime backend

examples/python/asr.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
NexaAI ASR Example - Speech to Text (non-streaming)
3+
4+
This example demonstrates how to use the NexaAI SDK to transcribe an audio file.
5+
"""
6+
7+
import argparse
8+
import os
9+
10+
from nexaai.asr import ASR, ASRConfig
11+
12+
def main():
13+
parser = argparse.ArgumentParser(description="NexaAI ASR Example")
14+
parser.add_argument("--model",
15+
default="NexaAI/parakeet-npu",
16+
help="Model id or path")
17+
parser.add_argument("--audio",
18+
required=True,
19+
help="Path to the input audio file")
20+
parser.add_argument("--language", default="en",
21+
help="Language code (e.g., en, zh). Empty for auto-detect if supported")
22+
parser.add_argument("--beam-size", type=int, default=5,
23+
help="Beam size for decoding")
24+
parser.add_argument("--timestamps", default="segment",
25+
help="Timestamps granularity: none|segment|word (if supported)")
26+
parser.add_argument("--plugin-id", default="npu", help="Plugin ID to use")
27+
parser.add_argument("--device", default="npu", help="Device to run on (e.g., cpu, gpu, 0)")
28+
args = parser.parse_args()
29+
30+
model_path = os.path.expanduser(args.model)
31+
audio_path = os.path.expanduser(args.audio)
32+
33+
if not os.path.exists(audio_path):
34+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
35+
36+
asr = ASR.from_(name_or_path=model_path, plugin_id=args.plugin_id, device_id=args.device)
37+
38+
cfg = ASRConfig(timestamps=args.timestamps, beam_size=args.beam_size, stream=False)
39+
result = asr.transcribe(audio_path=audio_path, language=args.language, config=cfg)
40+
print(result.transcript)
41+
42+
43+
if __name__ == "__main__":
44+
main()
45+
46+

examples/python/cv_ocr.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,34 @@
44
This example demonstrates how to use the NexaAI SDK to perform OCR on an image.
55
"""
66

7+
import argparse
78
import os
89
from nexaai.cv import CVCapabilities, CVModel, CVModelConfig, CVResults
910

1011

1112
def main():
12-
det_model_path = os.path.expanduser(
13-
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_det_infer.safetensors")
14-
rec_model_path = os.path.expanduser(
15-
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_rec_infer.safetensors")
13+
parser = argparse.ArgumentParser(description="NexaAI CV OCR Example")
14+
parser.add_argument("--det-model",
15+
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_det_infer.safetensors",
16+
help="Path to detection model")
17+
parser.add_argument("--rec-model",
18+
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_rec_infer.safetensors",
19+
help="Path to recognition model")
20+
parser.add_argument("--image",
21+
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/test_input.jpg",
22+
help="Path to input image")
23+
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
24+
args = parser.parse_args()
25+
26+
det_model_path = os.path.expanduser(args.det_model)
27+
rec_model_path = os.path.expanduser(args.rec_model)
28+
image_path = os.path.expanduser(args.image)
1629

1730
config = CVModelConfig(capabilities=CVCapabilities.OCR,
1831
det_model_path=det_model_path, rec_model_path=rec_model_path)
1932

20-
# For now, this modality is only supported in MLX.
21-
cv: CVModel = CVModel.from_(
22-
name_or_path=det_model_path, config=config, plugin_id="mlx")
23-
24-
results: CVResults = cv.infer(os.path.expanduser(
25-
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/test_input.jpg"))
33+
cv = CVModel.from_(name_or_path=det_model_path, config=config, plugin_id=args.plugin_id)
34+
results = cv.infer(image_path)
2635

2736
print(f"Number of results: {results.result_count}")
2837
for result in results.results:

examples/python/embedder.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,65 +7,66 @@
77
It includes basic model initialization, single and batch embedding generation, and embedding analysis.
88
"""
99

10+
import argparse
1011
import os
1112
import numpy as np
1213

1314
from nexaai.embedder import Embedder, EmbeddingConfig
1415

1516
def main():
16-
model_path = os.path.expanduser(
17-
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-fp16-mlx/model.safetensors")
17+
parser = argparse.ArgumentParser(description="NexaAI Embedding Example")
18+
parser.add_argument("--model", default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-fp16-mlx/model.safetensors",
19+
help="Path to the embedding model")
20+
parser.add_argument("--texts", nargs="+",
21+
default=["On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
22+
"Nexa AI allows you to run state-of-the-art AI models locally on CPU, GPU, or NPU — from instant use cases to production deployments.",
23+
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
24+
"The capital of France is Paris."],
25+
help="Texts to embed")
26+
parser.add_argument("--query", default="what is on device AI",
27+
help="Query text for similarity analysis")
28+
parser.add_argument("--batch-size", type=int, help="Batch size for processing")
29+
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
30+
args = parser.parse_args()
1831

19-
# For now, this modality is only supported in MLX.
20-
embedder: Embedder = Embedder.from_(
21-
name_or_path=model_path, plugin_id="mlx")
32+
model_path = os.path.expanduser(args.model)
33+
embedder = Embedder.from_(name_or_path=model_path, plugin_id=args.plugin_id)
2234
print('Embedder loaded successfully!')
2335

2436
dim = embedder.get_embedding_dim()
2537
print(f"Dimension: {dim}")
2638

27-
texts = [
28-
"On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
29-
"Nexa AI allows you to run state-of-the-art AI models locally on CPU, GPU, or NPU — from instant use cases to production deployments.",
30-
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
31-
"The capital of France is Paris."
32-
]
39+
batch_size = args.batch_size or len(args.texts)
3340
embeddings = embedder.generate(
34-
texts=texts, config=EmbeddingConfig(batch_size=len(texts)))
41+
texts=args.texts, config=EmbeddingConfig(batch_size=batch_size))
3542

3643
print("\n" + "="*80)
3744
print("GENERATED EMBEDDINGS")
3845
print("="*80)
3946

40-
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
47+
for i, (text, embedding) in enumerate(zip(args.texts, embeddings)):
4148
print(f"\nText {i+1}:")
4249
print(f" Content: {text}")
4350
print(f" Embedding shape: {len(embedding)} dimensions")
4451
print(f" First 10 elements: {embedding[:10]}")
4552
print("-" * 70)
4653

47-
# Generate embedding for query
48-
query = "what is on device AI"
4954
print(f"\n" + "="*80)
5055
print("QUERY PROCESSING")
5156
print("="*80)
52-
print(f"Query: '{query}'")
57+
print(f"Query: '{args.query}'")
5358

5459
query_embedding = embedder.generate(
55-
texts=[query], config=EmbeddingConfig(batch_size=1))[0]
60+
texts=[args.query], config=EmbeddingConfig(batch_size=1))[0]
5661
print(f"Query embedding shape: {len(query_embedding)} dimensions")
5762

58-
# Compute inner product between query and all texts
5963
print(f"\n" + "="*80)
6064
print("SIMILARITY ANALYSIS (Inner Product)")
6165
print("="*80)
6266

63-
for i, (text, embedding) in enumerate(zip(texts, embeddings)):
64-
# Convert to numpy arrays for easier computation
67+
for i, (text, embedding) in enumerate(zip(args.texts, embeddings)):
6568
query_vec = np.array(query_embedding)
6669
text_vec = np.array(embedding)
67-
68-
# Compute inner product (dot product)
6970
inner_product = np.dot(query_vec, text_vec)
7071

7172
print(f"\nText {i+1}:")

examples/python/llm.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
This example demonstrates how to use the NexaAI SDK to work with LLM models.
55
"""
66

7+
import argparse
78
import io
89
import os
910
from typing import List
@@ -13,19 +14,23 @@
1314

1415

1516
def main():
16-
# Your model path
17-
model = os.path.expanduser(
18-
"~/.cache/nexa.ai/nexa_sdk/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf")
19-
20-
# Model configuration
17+
parser = argparse.ArgumentParser(description="NexaAI LLM Example")
18+
parser.add_argument("--model",
19+
default="~/.cache/nexa.ai/nexa_sdk/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf",
20+
help="Path to the LLM model")
21+
parser.add_argument("--device", default="cpu", help="Device to run on")
22+
parser.add_argument("--max-tokens", type=int, default=100, help="Maximum tokens to generate")
23+
parser.add_argument("--system", default="You are a helpful assistant.",
24+
help="System message")
25+
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
26+
args = parser.parse_args()
27+
28+
model_path = os.path.expanduser(args.model)
2129
m_cfg = ModelConfig()
2230

23-
# Load model
24-
instance: LLM = LLM.from_(
25-
model, plugin_id="llama_cpp", device_id="cpu", m_cfg=m_cfg)
31+
instance = LLM.from_(model_path, plugin_id=args.plugin_id, device_id=args.device, m_cfg=m_cfg)
2632

27-
conversation: List[ChatMessage] = [ChatMessage(
28-
role="system", content="You are a helpful assistant.")]
33+
conversation: List[ChatMessage] = [ChatMessage(role="system", content=args.system)]
2934
strbuff = io.StringIO()
3035

3136
print("Multi-round conversation started. Type '/quit' or '/exit' to end.")
@@ -60,26 +65,21 @@ def main():
6065
continue
6166

6267
conversation.append(ChatMessage(role="user", content=user_input))
63-
64-
# Apply the chat template
6568
prompt = instance.apply_chat_template(conversation)
6669

6770
strbuff.truncate(0)
6871
strbuff.seek(0)
6972

7073
print("Assistant: ", end="", flush=True)
71-
# Generate the model response
72-
for token in instance.generate_stream(prompt, g_cfg=GenerationConfig(max_tokens=100)):
74+
for token in instance.generate_stream(prompt, g_cfg=GenerationConfig(max_tokens=args.max_tokens)):
7375
print(token, end="", flush=True)
7476
strbuff.write(token)
7577

76-
# Get profiling data
7778
profiling_data = instance.get_profiling_data()
7879
if profiling_data is not None:
7980
print(profiling_data)
8081

81-
conversation.append(ChatMessage(
82-
role="assistant", content=strbuff.getvalue()))
82+
conversation.append(ChatMessage(role="assistant", content=strbuff.getvalue()))
8383

8484

8585
if __name__ == "__main__":

examples/python/rerank.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,40 @@
77
It includes basic model initialization, document reranking, and score analysis.
88
"""
99

10+
import argparse
1011
import os
1112
from nexaai.rerank import Reranker, RerankConfig
1213

1314

1415
def main():
15-
model_path = os.path.expanduser("~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-rerank-mlx/jina-reranker-v2-base-multilingual-f16.safetensors")
16+
parser = argparse.ArgumentParser(description="NexaAI Rerank Example")
17+
parser.add_argument("--model",
18+
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-rerank-mlx/jina-reranker-v2-base-multilingual-f16.safetensors",
19+
help="Path to the rerank model")
20+
parser.add_argument("--query", default="Where is on-device AI?",
21+
help="Query text for reranking")
22+
parser.add_argument("--documents", nargs="+",
23+
default=["On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
24+
"edge computing",
25+
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
26+
"The capital of France is Paris."],
27+
help="Documents to rerank")
28+
parser.add_argument("--batch-size", type=int, help="Batch size for processing")
29+
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
30+
args = parser.parse_args()
31+
32+
model_path = os.path.expanduser(args.model)
33+
reranker = Reranker.from_(name_or_path=model_path, plugin_id=args.plugin_id)
1634

17-
# For now, this modality is only supported in MLX.
18-
reranker: Reranker = Reranker.from_(name_or_path=model_path, plugin_id="mlx")
19-
documents = [
20-
"On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
21-
"edge computing",
22-
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
23-
"The capital of France is Paris."
24-
]
35+
batch_size = args.batch_size or len(args.documents)
36+
scores = reranker.rerank(query=args.query, documents=args.documents,
37+
config=RerankConfig(batch_size=batch_size))
2538

26-
query = "Where is on-device AI?"
27-
28-
scores = reranker.rerank(query=query, documents=documents, config=RerankConfig(batch_size=len(documents)))
29-
30-
print(f"Query: {query}")
31-
print(f"Documents: {len(documents)} documents")
39+
print(f"Query: {args.query}")
40+
print(f"Documents: {len(args.documents)} documents")
3241
print("-" * 50)
3342
for i, score in enumerate(scores):
34-
print(f"[{score:.4f}] : {documents[i]}")
43+
print(f"[{score:.4f}] : {args.documents[i]}")
3544

3645

3746
if __name__ == "__main__":

0 commit comments

Comments
 (0)