Skip to content

Commit b1e9a00

Browse files
feat: swift-binding-release (#545)
* add nexaskd swift binding * add readme * add generationStream * Update README.md * add Example * Add embedding by input IDs to Embedder * Make `generationStream` public in LLMLlama and VLMLlama * Refactor test * Refactor DeviceList to Device Replaces the DeviceList struct with a Device struct containing id and name. Updates getLlamaDeviceList to return an array of Device instead of DeviceList, and adjusts logic to build the array accordingly. This simplifies the API and improves clarity. * Refactor embedding and generation APIs, and add embedding search - Refactored embedding output to return 2D arrays and updated related logic. - Unified and renamed LLM and VLM generation methods to 'generate' and 'generateAsyncStream' for consistency - Updated Model protocol and extensions accordingly - Added an EmbeddingSearch utility and test for embedding-based document search - Updated tests to use new APIs and improved test coverage. * update: `ml.h` align * Update framework revision * Update README.md * Fix typo: templeteOptions to templateOptions - Renamed all instances of 'templeteOptions' to 'templateOptions' in GenerationOptions - Updated test and usage in README for clarity. * remove example * update package revision
1 parent 2fb5f2b commit b1e9a00

26 files changed

+2285
-1
lines changed

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,10 @@ tmp/
7171
__debug_bin
7272

7373
# TLS certificates
74-
*.pem
74+
*.pem
75+
/.swiftpm
76+
/bindings/ios/Example/Example.xcodeproj/project.xcworkspace/xcuserdata
77+
/bindings/ios/NexaAI/.swiftpm
78+
/.build
79+
/bindings/ios/Example/Example.xcodeproj/project.xcworkspace/xcshareddata
80+
/bindings/ios/Example/Example.xcodeproj/xcuserdata

Package.resolved

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Package.swift

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// swift-tools-version: 5.9
2+
// The swift-tools-version declares the minimum version of Swift required to build this package.
3+
import PackageDescription
4+
5+
let package = Package(
6+
name: "NexaAI",
7+
platforms: [
8+
.macOS(.v14), .iOS(.v17),
9+
],
10+
products: [
11+
.library(name: "NexaAI", targets: ["NexaAI"])
12+
],
13+
dependencies: [
14+
.package(url: "git@github.com:NexaAI/nexasdk-mobile-iOS-framework.git", branch: "main")
15+
],
16+
targets: [
17+
// Targets are the basic building blocks of a package, defining a module or a test suite.
18+
// Targets can depend on other targets in this package and products from dependencies.
19+
.target(
20+
name: "NexaAI",
21+
dependencies: [
22+
.product(name: "NexaBridge", package: "nexasdk-mobile-iOS-framework"),
23+
.product(name: "LlamaPlugin", package: "nexasdk-mobile-iOS-framework"),
24+
.product(name: "llama", package: "nexasdk-mobile-iOS-framework"),
25+
.product(name: "common", package: "nexasdk-mobile-iOS-framework"),
26+
.product(name: "mtmd", package: "nexasdk-mobile-iOS-framework"),
27+
.product(name: "ggml", package: "nexasdk-mobile-iOS-framework"),
28+
.product(name: "ggml-base", package: "nexasdk-mobile-iOS-framework"),
29+
.product(name: "ggml-cpu", package: "nexasdk-mobile-iOS-framework"),
30+
.product(name: "ggml-metal", package: "nexasdk-mobile-iOS-framework")
31+
],
32+
path: "bindings/ios/NexaAI/Sources",
33+
swiftSettings: [.interoperabilityMode(.Cxx)]
34+
),
35+
.testTarget(
36+
name: "NexaAITests",
37+
dependencies: ["NexaAI"],
38+
path: "bindings/ios/NexaAI/Tests",
39+
swiftSettings: [.interoperabilityMode(.Cxx)],
40+
),
41+
]
42+
)

bindings/ios/NexaAI/Package.resolved

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bindings/ios/NexaAI/Package.swift

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// swift-tools-version: 5.9
2+
// The swift-tools-version declares the minimum version of Swift required to build this package.
3+
4+
import PackageDescription
5+
6+
let package = Package(
7+
name: "NexaAI",
8+
platforms: [.iOS(.v17), .macOS(.v14)],
9+
products: [
10+
// Products define the executables and libraries a package produces, making them visible to other packages.
11+
.library(name: "NexaAI", targets: ["NexaAI"]),
12+
],
13+
14+
dependencies: [
15+
.package(url: "git@github.com:NexaAI/nexasdk-mobile-iOS-framework.git", branch: "main")
16+
],
17+
18+
targets: [
19+
// Targets are the basic building blocks of a package, defining a module or a test suite.
20+
// Targets can depend on other targets in this package and products from dependencies.
21+
.target(
22+
name: "NexaAI",
23+
dependencies: [
24+
.product(name: "NexaBridge", package: "nexasdk-mobile-iOS-framework"),
25+
.product(name: "LlamaPlugin", package: "nexasdk-mobile-iOS-framework"),
26+
.product(name: "llama", package: "nexasdk-mobile-iOS-framework"),
27+
.product(name: "common", package: "nexasdk-mobile-iOS-framework"),
28+
.product(name: "mtmd", package: "nexasdk-mobile-iOS-framework"),
29+
.product(name: "ggml", package: "nexasdk-mobile-iOS-framework"),
30+
.product(name: "ggml-base", package: "nexasdk-mobile-iOS-framework"),
31+
.product(name: "ggml-cpu", package: "nexasdk-mobile-iOS-framework"),
32+
.product(name: "ggml-metal", package: "nexasdk-mobile-iOS-framework")
33+
],
34+
swiftSettings: [.interoperabilityMode(.Cxx)],
35+
),
36+
.testTarget(
37+
name: "NexaAITests",
38+
dependencies: ["NexaAI"],
39+
swiftSettings: [.interoperabilityMode(.Cxx)],
40+
),
41+
]
42+
)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import Foundation
2+
3+
public struct ChatMessage {
4+
public var role: Role
5+
public var content: String
6+
public var images: [String]
7+
public var audios: [String]
8+
9+
public init(
10+
role: Role,
11+
content: String,
12+
images: [String] = [],
13+
audios: [String] = []
14+
) {
15+
self.role = role
16+
self.content = content
17+
self.images = images
18+
self.audios = audios
19+
}
20+
}
21+
22+
public enum Role: String {
23+
case user
24+
case assistant
25+
case system
26+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
import Foundation
3+
4+
public struct EmbeddingConfig {
5+
6+
public var batchSize: Int32
7+
public var normalize: Bool
8+
public var normalizeMethod: NormalizeMethod
9+
10+
public init(batchSize: Int32, normalize: Bool, normalizeMethod: NormalizeMethod) {
11+
self.batchSize = batchSize
12+
self.normalize = normalize
13+
self.normalizeMethod = normalizeMethod
14+
}
15+
}
16+
17+
public extension EmbeddingConfig {
18+
19+
enum NormalizeMethod: String {
20+
case l2
21+
case mean
22+
case none
23+
}
24+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import Foundation
2+
3+
/*
4+
LLM / VLM generation configuration (IMPROVED: support multiple images and audios)
5+
typedef struct {
6+
int32_t max_tokens; /* Maximum tokens to generate */
7+
const char** stop; /* Array of stop sequences */
8+
int32_t stop_count; /* Number of stop sequences */
9+
int32_t n_past; /* Number of past tokens to consider */
10+
ml_SamplerConfig* sampler_config; /* Advanced sampling config */
11+
// --- Improved multimodal support ---
12+
ml_Path* image_paths; /* Array of image paths for VLM (NULL if none) */
13+
int32_t image_count; /* Number of images */
14+
ml_Path* audio_paths; /* Array of audio paths for VLM (NULL if none) */
15+
int32_t audio_count; /* Number of audios */
16+
} ml_GenerationConfig;
17+
*/
18+
19+
/// LLM / VLM generation configuration
20+
public struct GenerationConfig: Codable {
21+
public var maxTokens: Int32 // Maximum tokens to generate
22+
public var stop: [String] // Array of stop sequences
23+
public var nPast: Int32 // Number of past tokens to consider
24+
public var samplerConfig: SamplerConfig // Advanced sampling config
25+
public var imagePaths: [String] // Array of image paths for VLM
26+
public var audioPaths: [String] // Array of audio paths for VLM
27+
28+
public init(
29+
maxTokens: Int32 = 1024,
30+
stop: [String] = [],
31+
nPast: Int32 = 0,
32+
samplerConfig: SamplerConfig = .default,
33+
imagePaths: [String] = [],
34+
audioPaths: [String] = []
35+
) {
36+
self.maxTokens = maxTokens
37+
self.stop = stop
38+
self.nPast = nPast
39+
self.samplerConfig = samplerConfig
40+
self.imagePaths = imagePaths
41+
self.audioPaths = audioPaths
42+
}
43+
44+
public static let `default` = GenerationConfig()
45+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import NexaBridge
2+
3+
/** LLM / VLM model configuration */
4+
/*
5+
typedef struct {
6+
int32_t n_ctx; // text context, 0 = from model
7+
int32_t n_threads; // number of threads to use for generation
8+
int32_t n_threads_batch; // number of threads to use for batch processing
9+
int32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
10+
int32_t n_ubatch; // physical maximum batch size
11+
int32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
12+
ml_Path chat_template_path; // path to chat template file, optional
13+
const char* chat_template_content; // content of chat template file, optional
14+
// For QNN
15+
ml_Path system_library_path; /* System library path */
16+
ml_Path backend_library_path; /* Backend library path */
17+
ml_Path extension_library_path; /* Extension library path */
18+
ml_Path config_file_path; /* Config file path */
19+
ml_Path embedded_tokens_path; /* Embedded tokens path */
20+
int32_t max_tokens; /* Maximum tokens */
21+
bool enable_thinking; /* Enable thinking */
22+
bool verbose; /* Verbose */
23+
} ml_ModelConfig;
24+
*/
25+
26+
public struct ModelConfig: Codable {
27+
public var nCtx: Int32
28+
public var nThreads: Int32
29+
public var nThreadsBatch: Int32
30+
public var nBatch: Int32
31+
public var nUbatch: Int32
32+
public var nSeqMax: Int32
33+
34+
public var chatTemplatePath: String?
35+
public var chatTemplateContent: String?
36+
37+
public static let `default`: ModelConfig = {
38+
return .init(
39+
nCtx: 2048,
40+
nThreads: 0,
41+
nThreadsBatch: 0,
42+
nBatch: 0,
43+
nUbatch: 0,
44+
nSeqMax: 0,
45+
chatTemplatePath: nil,
46+
chatTemplateContent: nil
47+
)
48+
}()
49+
50+
public init(
51+
nCtx: Int32 = 2048,
52+
nThreads: Int32 = 0,
53+
nThreadsBatch: Int32 = 0,
54+
nBatch: Int32 = 0,
55+
nUbatch: Int32 = 0,
56+
nSeqMax: Int32 = 0,
57+
chatTemplatePath: String? = nil,
58+
chatTemplateContent: String? = nil
59+
) {
60+
self.nCtx = nCtx
61+
self.nThreads = nThreads
62+
self.nThreadsBatch = nThreadsBatch
63+
self.nBatch = nBatch
64+
self.nUbatch = nUbatch
65+
self.nSeqMax = nSeqMax
66+
self.chatTemplatePath = chatTemplatePath
67+
self.chatTemplateContent = chatTemplateContent
68+
}
69+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
/** Profiling data structure for LLM/VLM performance metrics */
3+
typedef struct {
4+
int64_t ttft; /* Time to first token (us) */
5+
int64_t prompt_time; /* Prompt processing time (us) */
6+
int64_t decode_time; /* Token generation time (us) */
7+
8+
int64_t prompt_tokens; /* Number of prompt tokens */
9+
int64_t generated_tokens; /* Number of generated tokens */
10+
int64_t audio_duration; /* Audio duration (us) */
11+
12+
double prefill_speed; /* Prefill speed (tokens/sec) */
13+
double decoding_speed; /* Decoding speed (tokens/sec) */
14+
double real_time_factor; /* Real-Time Factor(RTF) (1.0 = real-time, >1.0 = faster, <1.0 = slower) */
15+
16+
const char* stop_reason; /* Stop reason: "eos", "length", "user", "stop_sequence" */
17+
} ml_ProfileData;
18+
19+
*/
20+
import NexaBridge
21+
22+
public struct ProfileData: CustomStringConvertible {
23+
public let ttft: Int64
24+
public let promptTime: Int64
25+
public let decodeTime: Int64
26+
27+
public let promptTokens: Int64
28+
public let generatedTokens: Int64
29+
public let audioDuration: Int64
30+
31+
public let prefillSpeed: Double
32+
public let decodingSpeed: Double
33+
public let realTimeFactor: Double
34+
35+
public let stopReason: String
36+
37+
public init(
38+
ttft: Int64 = 0,
39+
promptTime: Int64 = 0,
40+
decodeTime: Int64 = 0,
41+
promptTokens: Int64 = 0,
42+
generatedTokens: Int64 = 0,
43+
audioDuration: Int64 = 0,
44+
prefillSpeed: Double = 0.0,
45+
decodingSpeed: Double = 0.0,
46+
realTimeFactor: Double = 0.0,
47+
stopReason: String = ""
48+
) {
49+
self.ttft = ttft
50+
self.promptTime = promptTime
51+
self.decodeTime = decodeTime
52+
self.promptTokens = promptTokens
53+
self.generatedTokens = generatedTokens
54+
self.audioDuration = audioDuration
55+
self.prefillSpeed = prefillSpeed
56+
self.decodingSpeed = decodingSpeed
57+
self.realTimeFactor = realTimeFactor
58+
self.stopReason = stopReason
59+
}
60+
61+
init(from cProfileData: ml_ProfileData) {
62+
self.ttft = cProfileData.ttft
63+
self.promptTime = cProfileData.prompt_time
64+
self.decodeTime = cProfileData.decode_time
65+
self.promptTokens = cProfileData.prompt_tokens
66+
self.generatedTokens = cProfileData.generated_tokens
67+
self.audioDuration = cProfileData.audio_duration
68+
self.prefillSpeed = cProfileData.prefill_speed
69+
self.decodingSpeed = cProfileData.decoding_speed
70+
self.realTimeFactor = cProfileData.real_time_factor
71+
self.stopReason = cProfileData.stop_reason == nil ? "" : String(cString: cProfileData.stop_reason!)
72+
}
73+
74+
public var description: String {
75+
"""
76+
TTFT: \(ttft) us
77+
Prompt Time: \(promptTime) us
78+
Decode Time: \(decodeTime) us
79+
Prompt Tokens: \(promptTokens)
80+
Generated Tokens: \(generatedTokens)
81+
Audio Duration: \(audioDuration) us
82+
Prefill Speed: \(prefillSpeed) t/s
83+
Decoding Speed: \(decodingSpeed) t/s
84+
Real Time Factor: \(realTimeFactor)
85+
Stop reason: \(stopReason)
86+
"""
87+
}
88+
}

0 commit comments

Comments
 (0)