Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions examples/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,66 @@ nexa pull NexaAI/paddle-ocr-mlx

python cv_ocr.py
```
## Running Examples (Windows ARM64, Snapdragon X Elite)

### LLM
```bash
nexa pull NexaAI/Llama3.2-3B-NPU-Turbo

python llm.py --model NexaAI/Llama3.2-3B-NPU-Turbo --plugin-id npu --device npu --max-tokens 100 --system "You are a helpful assistant."
```

### Multi-Modal

```bash
nexa pull NexaAI/OmniNeural-4B

python vlm.py --model NexaAI/OmniNeural-4B --plugin-id npu --device npu --max-tokens 100 --system "You are a helpful assistant."
```

### Reranker
```bash
nexa pull NexaAI/jina-v2-rerank-npu

python rerank.py --model NexaAI/jina-v2-rerank-npu --plugin-id npu --query "Where is on-device AI?" --documents "On-device AI is a type of AI that is processed on the device itself, rather than in the cloud." "edge computing" "A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality." "The capital of France is Paris."
```

### Embedder
```bash
nexa pull NexaAI/embeddinggemma-300m-npu

python embedder.py --model NexaAI/embeddinggemma-300m-npu --plugin-id npu --texts "On-device AI is a type of AI that is processed on the device itself, rather than in the cloud." "edge computing" "A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality." "The capital of France is Paris." --query "what is on device AI" --batch-size 2
```

### CV

#### OCR
```bash
nexa pull NexaAI/paddleocr-npu

python cv_ocr.py --det-model NexaAI/paddleocr-npu --rec-model NexaAI/paddleocr-npu --image path/to/image.png
```

### ASR
```bash
nexa pull NexaAI/parakeet-npu

python asr.py --model NexaAI/parakeet-npu --audio path/to/audio.wav
```

## Common Arguments

- `--model`: Path to the model file
- `--device`: Device to run on (cpu, gpu, etc.)
- `--max-tokens`: Maximum tokens to generate (for LLM/VLM)
- `--batch-size`: Batch size for processing
- `--system`: System message for chat models
- `--plugin-id`: Plugin ID to use (default: cpu_gpu)

## Plugin ID Options

The `--plugin-id` parameter supports different backends:
- `cpu_gpu`: Default, supports both CPU and GPU
- `mlx`: Apple Silicon optimized (for supported models)
- `llama_cpp`: For GGUF format models
- `onnx`: ONNX runtime backend
46 changes: 46 additions & 0 deletions examples/python/asr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
NexaAI ASR Example - Speech to Text (non-streaming)

This example demonstrates how to use the NexaAI SDK to transcribe an audio file.
"""

import argparse
import os

from nexaai.asr import ASR, ASRConfig

def main():
parser = argparse.ArgumentParser(description="NexaAI ASR Example")
parser.add_argument("--model",
default="NexaAI/parakeet-npu",
help="Model id or path")
parser.add_argument("--audio",
required=True,
help="Path to the input audio file")
parser.add_argument("--language", default="en",
help="Language code (e.g., en, zh). Empty for auto-detect if supported")
parser.add_argument("--beam-size", type=int, default=5,
help="Beam size for decoding")
parser.add_argument("--timestamps", default="segment",
help="Timestamps granularity: none|segment|word (if supported)")
parser.add_argument("--plugin-id", default="npu", help="Plugin ID to use")
parser.add_argument("--device", default="npu", help="Device to run on (e.g., cpu, gpu, 0)")
args = parser.parse_args()

model_path = os.path.expanduser(args.model)
audio_path = os.path.expanduser(args.audio)

if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file not found: {audio_path}")

asr = ASR.from_(name_or_path=model_path, plugin_id=args.plugin_id, device_id=args.device)

cfg = ASRConfig(timestamps=args.timestamps, beam_size=args.beam_size, stream=False)
result = asr.transcribe(audio_path=audio_path, language=args.language, config=cfg)
print(result.transcript)


if __name__ == "__main__":
main()


29 changes: 19 additions & 10 deletions examples/python/cv_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,34 @@
This example demonstrates how to use the NexaAI SDK to perform OCR on an image.
"""

import argparse
import os
from nexaai.cv import CVCapabilities, CVModel, CVModelConfig, CVResults


def main():
det_model_path = os.path.expanduser(
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_det_infer.safetensors")
rec_model_path = os.path.expanduser(
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_rec_infer.safetensors")
parser = argparse.ArgumentParser(description="NexaAI CV OCR Example")
parser.add_argument("--det-model",
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_det_infer.safetensors",
help="Path to detection model")
parser.add_argument("--rec-model",
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/ch_ptocr_v4_rec_infer.safetensors",
help="Path to recognition model")
parser.add_argument("--image",
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/test_input.jpg",
help="Path to input image")
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
args = parser.parse_args()

det_model_path = os.path.expanduser(args.det_model)
rec_model_path = os.path.expanduser(args.rec_model)
image_path = os.path.expanduser(args.image)

config = CVModelConfig(capabilities=CVCapabilities.OCR,
det_model_path=det_model_path, rec_model_path=rec_model_path)

# For now, this modality is only supported in MLX.
cv: CVModel = CVModel.from_(
name_or_path=det_model_path, config=config, plugin_id="mlx")

results: CVResults = cv.infer(os.path.expanduser(
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/paddle-ocr-mlx/test_input.jpg"))
cv = CVModel.from_(name_or_path=det_model_path, config=config, plugin_id=args.plugin_id)
results = cv.infer(image_path)

print(f"Number of results: {results.result_count}")
for result in results.results:
Expand Down
45 changes: 23 additions & 22 deletions examples/python/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,65 +7,66 @@
It includes basic model initialization, single and batch embedding generation, and embedding analysis.
"""

import argparse
import os
import numpy as np

from nexaai.embedder import Embedder, EmbeddingConfig

def main():
model_path = os.path.expanduser(
"~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-fp16-mlx/model.safetensors")
parser = argparse.ArgumentParser(description="NexaAI Embedding Example")
parser.add_argument("--model", default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-fp16-mlx/model.safetensors",
help="Path to the embedding model")
parser.add_argument("--texts", nargs="+",
default=["On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
"Nexa AI allows you to run state-of-the-art AI models locally on CPU, GPU, or NPU — from instant use cases to production deployments.",
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
"The capital of France is Paris."],
help="Texts to embed")
parser.add_argument("--query", default="what is on device AI",
help="Query text for similarity analysis")
parser.add_argument("--batch-size", type=int, help="Batch size for processing")
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
args = parser.parse_args()

# For now, this modality is only supported in MLX.
embedder: Embedder = Embedder.from_(
name_or_path=model_path, plugin_id="mlx")
model_path = os.path.expanduser(args.model)
embedder = Embedder.from_(name_or_path=model_path, plugin_id=args.plugin_id)
print('Embedder loaded successfully!')

dim = embedder.get_embedding_dim()
print(f"Dimension: {dim}")

texts = [
"On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
"Nexa AI allows you to run state-of-the-art AI models locally on CPU, GPU, or NPU — from instant use cases to production deployments.",
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
"The capital of France is Paris."
]
batch_size = args.batch_size or len(args.texts)
embeddings = embedder.generate(
texts=texts, config=EmbeddingConfig(batch_size=len(texts)))
texts=args.texts, config=EmbeddingConfig(batch_size=batch_size))

print("\n" + "="*80)
print("GENERATED EMBEDDINGS")
print("="*80)

for i, (text, embedding) in enumerate(zip(texts, embeddings)):
for i, (text, embedding) in enumerate(zip(args.texts, embeddings)):
print(f"\nText {i+1}:")
print(f" Content: {text}")
print(f" Embedding shape: {len(embedding)} dimensions")
print(f" First 10 elements: {embedding[:10]}")
print("-" * 70)

# Generate embedding for query
query = "what is on device AI"
print(f"\n" + "="*80)
print("QUERY PROCESSING")
print("="*80)
print(f"Query: '{query}'")
print(f"Query: '{args.query}'")

query_embedding = embedder.generate(
texts=[query], config=EmbeddingConfig(batch_size=1))[0]
texts=[args.query], config=EmbeddingConfig(batch_size=1))[0]
print(f"Query embedding shape: {len(query_embedding)} dimensions")

# Compute inner product between query and all texts
print(f"\n" + "="*80)
print("SIMILARITY ANALYSIS (Inner Product)")
print("="*80)

for i, (text, embedding) in enumerate(zip(texts, embeddings)):
# Convert to numpy arrays for easier computation
for i, (text, embedding) in enumerate(zip(args.texts, embeddings)):
query_vec = np.array(query_embedding)
text_vec = np.array(embedding)

# Compute inner product (dot product)
inner_product = np.dot(query_vec, text_vec)

print(f"\nText {i+1}:")
Expand Down
34 changes: 17 additions & 17 deletions examples/python/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
This example demonstrates how to use the NexaAI SDK to work with LLM models.
"""

import argparse
import io
import os
from typing import List
Expand All @@ -13,19 +14,23 @@


def main():
# Your model path
model = os.path.expanduser(
"~/.cache/nexa.ai/nexa_sdk/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf")

# Model configuration
parser = argparse.ArgumentParser(description="NexaAI LLM Example")
parser.add_argument("--model",
default="~/.cache/nexa.ai/nexa_sdk/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf",
help="Path to the LLM model")
parser.add_argument("--device", default="cpu", help="Device to run on")
parser.add_argument("--max-tokens", type=int, default=100, help="Maximum tokens to generate")
parser.add_argument("--system", default="You are a helpful assistant.",
help="System message")
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
args = parser.parse_args()

model_path = os.path.expanduser(args.model)
m_cfg = ModelConfig()

# Load model
instance: LLM = LLM.from_(
model, plugin_id="llama_cpp", device_id="cpu", m_cfg=m_cfg)
instance = LLM.from_(model_path, plugin_id=args.plugin_id, device_id=args.device, m_cfg=m_cfg)

conversation: List[ChatMessage] = [ChatMessage(
role="system", content="You are a helpful assistant.")]
conversation: List[ChatMessage] = [ChatMessage(role="system", content=args.system)]
strbuff = io.StringIO()

print("Multi-round conversation started. Type '/quit' or '/exit' to end.")
Expand Down Expand Up @@ -60,26 +65,21 @@ def main():
continue

conversation.append(ChatMessage(role="user", content=user_input))

# Apply the chat template
prompt = instance.apply_chat_template(conversation)

strbuff.truncate(0)
strbuff.seek(0)

print("Assistant: ", end="", flush=True)
# Generate the model response
for token in instance.generate_stream(prompt, g_cfg=GenerationConfig(max_tokens=100)):
for token in instance.generate_stream(prompt, g_cfg=GenerationConfig(max_tokens=args.max_tokens)):
print(token, end="", flush=True)
strbuff.write(token)

# Get profiling data
profiling_data = instance.get_profiling_data()
if profiling_data is not None:
print(profiling_data)

conversation.append(ChatMessage(
role="assistant", content=strbuff.getvalue()))
conversation.append(ChatMessage(role="assistant", content=strbuff.getvalue()))


if __name__ == "__main__":
Expand Down
41 changes: 25 additions & 16 deletions examples/python/rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,40 @@
It includes basic model initialization, document reranking, and score analysis.
"""

import argparse
import os
from nexaai.rerank import Reranker, RerankConfig


def main():
model_path = os.path.expanduser("~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-rerank-mlx/jina-reranker-v2-base-multilingual-f16.safetensors")
parser = argparse.ArgumentParser(description="NexaAI Rerank Example")
parser.add_argument("--model",
default="~/.cache/nexa.ai/nexa_sdk/models/NexaAI/jina-v2-rerank-mlx/jina-reranker-v2-base-multilingual-f16.safetensors",
help="Path to the rerank model")
parser.add_argument("--query", default="Where is on-device AI?",
help="Query text for reranking")
parser.add_argument("--documents", nargs="+",
default=["On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
"edge computing",
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
"The capital of France is Paris."],
help="Documents to rerank")
parser.add_argument("--batch-size", type=int, help="Batch size for processing")
parser.add_argument("--plugin-id", default="cpu_gpu", help="Plugin ID to use")
args = parser.parse_args()

model_path = os.path.expanduser(args.model)
reranker = Reranker.from_(name_or_path=model_path, plugin_id=args.plugin_id)

# For now, this modality is only supported in MLX.
reranker: Reranker = Reranker.from_(name_or_path=model_path, plugin_id="mlx")
documents = [
"On-device AI is a type of AI that is processed on the device itself, rather than in the cloud.",
"edge computing",
"A ragdoll is a breed of cat that is known for its long, flowing hair and gentle personality.",
"The capital of France is Paris."
]
batch_size = args.batch_size or len(args.documents)
scores = reranker.rerank(query=args.query, documents=args.documents,
config=RerankConfig(batch_size=batch_size))

query = "Where is on-device AI?"

scores = reranker.rerank(query=query, documents=documents, config=RerankConfig(batch_size=len(documents)))

print(f"Query: {query}")
print(f"Documents: {len(documents)} documents")
print(f"Query: {args.query}")
print(f"Documents: {len(args.documents)} documents")
print("-" * 50)
for i, score in enumerate(scores):
print(f"[{score:.4f}] : {documents[i]}")
print(f"[{score:.4f}] : {args.documents[i]}")


if __name__ == "__main__":
Expand Down
Loading