From 614a9afd2a9738dfb82b8a6f72513106009466b7 Mon Sep 17 00:00:00 2001 From: cculbreath Date: Fri, 10 Oct 2025 10:57:08 -0500 Subject: [PATCH 1/4] Add streaming TTS support and enrich API errors --- .../ChatDemo/ChatProvider.swift | 16 +- .../ChatStructuredOutputProvider.swift | 16 +- README.md | 16 +- Sources/OpenAI/AIProxy/AIProxyService.swift | 16 + Sources/OpenAI/AIProxy/Endpoint+AIProxy.swift | 10 +- .../Azure/DefaultOpenAIAzureService.swift | 7 + .../LocalModelService/LocalModelService.swift | 7 + .../Networking/AsyncHTTPClientAdapter.swift | 44 ++- .../URLSessionHTTPClientAdapter.swift | 36 ++- .../Audio/AudioSpeechParameters.swift | 7 +- .../Audio/AudioSpeechChunkObject.swift | 28 ++ .../Public/Service/DefaultOpenAIService.swift | 16 + .../OpenAI/Public/Service/OpenAIService.swift | 288 +++++++++++++++++- Tests/OpenAITests/AudioStreamingTests.swift | 100 ++++++ 14 files changed, 550 insertions(+), 57 deletions(-) create mode 100644 Sources/OpenAI/Public/ResponseModels/Audio/AudioSpeechChunkObject.swift create mode 100644 Tests/OpenAITests/AudioStreamingTests.swift diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatDemo/ChatProvider.swift b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatDemo/ChatProvider.swift index 11c9b9d..8bee668 100644 --- a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatDemo/ChatProvider.swift +++ b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatDemo/ChatProvider.swift @@ -32,8 +32,12 @@ class ChatProvider { messages = choices?.compactMap(\.message?.content) ?? [] dump(chatUsage) usage = chatUsage - } catch APIError.responseUnsuccessful(let description, let statusCode) { - self.errorMessage = "Network error with status code: \(statusCode) and description: \(description)" + } catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { + var message = "Network error with status code: \(statusCode) and description: \(description)" + if let responseBody { + message += " — Response body: \(responseBody)" + } + self.errorMessage = message } catch { errorMessage = error.localizedDescription } @@ -50,8 +54,12 @@ class ChatProvider { let content = result.choices?.first?.delta?.content ?? "" self.message += content } - } catch APIError.responseUnsuccessful(let description, let statusCode) { - self.errorMessage = "Network error with status code: \(statusCode) and description: \(description)" + } catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { + var message = "Network error with status code: \(statusCode) and description: \(description)" + if let responseBody { + message += " — Response body: \(responseBody)" + } + self.errorMessage = message } catch { self.errorMessage = error.localizedDescription } diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatStructuredOutputs/ChatStructuredOutputProvider.swift b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatStructuredOutputs/ChatStructuredOutputProvider.swift index cb70205..9156f10 100644 --- a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatStructuredOutputs/ChatStructuredOutputProvider.swift +++ b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/ChatStructuredOutputs/ChatStructuredOutputProvider.swift @@ -36,8 +36,12 @@ final class ChatStructuredOutputProvider { messages = choices.compactMap(\.message?.content).map { $0.asJsonFormatted() } assert(messages.count == 1) errorMessage = choices.first?.message?.refusal ?? "" - } catch APIError.responseUnsuccessful(let description, let statusCode) { - self.errorMessage = "Network error with status code: \(statusCode) and description: \(description)" + } catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { + var message = "Network error with status code: \(statusCode) and description: \(description)" + if let responseBody { + message += " — Response body: \(responseBody)" + } + self.errorMessage = message } catch { errorMessage = error.localizedDescription } @@ -58,8 +62,12 @@ final class ChatStructuredOutputProvider { self.message = self.message.asJsonFormatted() } } - } catch APIError.responseUnsuccessful(let description, let statusCode) { - self.errorMessage = "Network error with status code: \(statusCode) and description: \(description)" + } catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { + var message = "Network error with status code: \(statusCode) and description: \(description)" + if let responseBody { + message += " — Response body: \(responseBody)" + } + self.errorMessage = message } catch { self.errorMessage = error.localizedDescription } diff --git a/README.md b/README.md index fb741a7..9d74de6 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ That's all you need to begin accessing the full range of OpenAI endpoints. You may want to build UI around the type of error that the API returns. For example, a `429` means that your requests are being rate limited. -The `APIError` type has a case `responseUnsuccessful` with two associated values: a `description` and `statusCode`. +The `APIError` type has a case `responseUnsuccessful` with three associated values: a `description`, `statusCode`, and an optional `responseBody`. Here is a usage example using the chat completion API: ```swift @@ -181,8 +181,11 @@ let parameters = ChatCompletionParameters(messages: [.init(role: .user, content: do { let choices = try await service.startChat(parameters: parameters).choices // Work with choices -} catch APIError.responseUnsuccessful(let description, let statusCode) { +} catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { print("Network error with status code: \(statusCode) and description: \(description)") + if let responseBody { + print("Response body: \(responseBody)") + } } catch { print(error.localizedDescription) } @@ -4274,8 +4277,12 @@ do { self.reasoningMessage += reasoning } } -} catch APIError.responseUnsuccessful(let description, let statusCode) { - self.errorMessage = "Network error with status code: \(statusCode) and description: \(description)" +} catch APIError.responseUnsuccessful(let description, let statusCode, let responseBody) { + var message = "Network error with status code: \(statusCode) and description: \(description)" + if let responseBody { + message += " — Response body: \(responseBody)" + } + self.errorMessage = message } catch { self.errorMessage = error.localizedDescription } @@ -4328,4 +4335,3 @@ let stream = try await service.startStreamedChat(parameters: parameters) ## Collaboration Open a PR for any proposed change pointing it to `main` branch. Unit tests are highly appreciated ❤️ - diff --git a/Sources/OpenAI/AIProxy/AIProxyService.swift b/Sources/OpenAI/AIProxy/AIProxyService.swift index 4a586b8..560eea5 100644 --- a/Sources/OpenAI/AIProxy/AIProxyService.swift +++ b/Sources/OpenAI/AIProxy/AIProxyService.swift @@ -95,6 +95,22 @@ struct AIProxyService: OpenAIService { return AudioSpeechObject(output: data) } + func createStreamingSpeech( + parameters: AudioSpeechParameters) + async throws -> AsyncThrowingStream + { + var streamingParameters = parameters + streamingParameters.stream = true + let request = try await OpenAIAPI.audio(.speech).request( + aiproxyPartialKey: partialKey, + clientID: clientID, + organizationID: organizationID, + openAIEnvironment: openAIEnvironment, + method: .post, + params: streamingParameters) + return try await fetchAudioStream(debugEnabled: debugEnabled, with: request) + } + // MARK: Chat func startChat( diff --git a/Sources/OpenAI/AIProxy/Endpoint+AIProxy.swift b/Sources/OpenAI/AIProxy/Endpoint+AIProxy.swift index 9b15f66..686c225 100644 --- a/Sources/OpenAI/AIProxy/Endpoint+AIProxy.swift +++ b/Sources/OpenAI/AIProxy/Endpoint+AIProxy.swift @@ -45,8 +45,9 @@ extension Endpoint { async throws -> URLRequest { let finalPath = path(in: openAIEnvironment) - var request = URLRequest(url: urlComponents(serviceURL: openAIEnvironment.baseURL, path: finalPath, queryItems: queryItems) - .url!) + var request = URLRequest( + url: urlComponents(serviceURL: openAIEnvironment.baseURL, path: finalPath, queryItems: queryItems) + .url!) request.addValue("application/json", forHTTPHeaderField: "Content-Type") request.addValue(aiproxyPartialKey, forHTTPHeaderField: "aiproxy-partial-key") if let organizationID { @@ -84,8 +85,9 @@ extension Endpoint { async throws -> URLRequest { let finalPath = path(in: openAIEnvironment) - var request = URLRequest(url: urlComponents(serviceURL: openAIEnvironment.baseURL, path: finalPath, queryItems: queryItems) - .url!) + var request = URLRequest( + url: urlComponents(serviceURL: openAIEnvironment.baseURL, path: finalPath, queryItems: queryItems) + .url!) request.httpMethod = method.rawValue request.addValue(aiproxyPartialKey, forHTTPHeaderField: "aiproxy-partial-key") if let organizationID { diff --git a/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift b/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift index a58fdb2..f552391 100644 --- a/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift +++ b/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift @@ -50,6 +50,13 @@ public final class DefaultOpenAIAzureService: OpenAIService { "Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.") } + public func createStreamingSpeech(parameters _: AudioSpeechParameters) async throws + -> AsyncThrowingStream + { + fatalError( + "Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.") + } + public func startChat(parameters: ChatCompletionParameters) async throws -> ChatCompletionObject { var chatParameters = parameters chatParameters.stream = false diff --git a/Sources/OpenAI/LocalModelService/LocalModelService.swift b/Sources/OpenAI/LocalModelService/LocalModelService.swift index 6ad9f05..6cb6a18 100644 --- a/Sources/OpenAI/LocalModelService/LocalModelService.swift +++ b/Sources/OpenAI/LocalModelService/LocalModelService.swift @@ -67,6 +67,13 @@ struct LocalModelService: OpenAIService { "Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.") } + func createStreamingSpeech(parameters _: AudioSpeechParameters) async throws + -> AsyncThrowingStream + { + fatalError( + "Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.") + } + func startChat( parameters: ChatCompletionParameters) async throws -> ChatCompletionObject diff --git a/Sources/OpenAI/Private/Networking/AsyncHTTPClientAdapter.swift b/Sources/OpenAI/Private/Networking/AsyncHTTPClientAdapter.swift index 4f39129..cfa4fa4 100644 --- a/Sources/OpenAI/Private/Networking/AsyncHTTPClientAdapter.swift +++ b/Sources/OpenAI/Private/Networking/AsyncHTTPClientAdapter.swift @@ -62,29 +62,47 @@ public class AsyncHTTPClientAdapter: HTTPClient { let asyncHTTPClientRequest = try createAsyncHTTPClientRequest(from: request) let response = try await client.execute(asyncHTTPClientRequest, deadline: .now() + .seconds(60)) + let contentType = response.headers.first(name: "content-type") ?? "" let httpResponse = HTTPResponse( statusCode: Int(response.status.code), headers: convertHeaders(response.headers)) - let stream = AsyncThrowingStream { continuation in - Task { - do { - for try await byteBuffer in response.body { - if let string = byteBuffer.getString(at: 0, length: byteBuffer.readableBytes) { - let lines = string.split(separator: "\n", omittingEmptySubsequences: false) - for line in lines { - continuation.yield(String(line)) + if contentType.lowercased().contains("text/event-stream") { + let stream = AsyncThrowingStream { continuation in + Task { + do { + for try await byteBuffer in response.body { + if let string = byteBuffer.getString(at: 0, length: byteBuffer.readableBytes) { + let lines = string.split(separator: "\n", omittingEmptySubsequences: false) + for line in lines { + continuation.yield(String(line)) + } } } + continuation.finish() + } catch { + continuation.finish(throwing: error) } - continuation.finish() - } catch { - continuation.finish(throwing: error) } } + return (.lines(stream), httpResponse) + } else { + let byteStream = AsyncThrowingStream { continuation in + Task { + do { + for try await byteBuffer in response.body { + for byte in byteBuffer.readableBytesView { + continuation.yield(byte) + } + } + continuation.finish() + } catch { + continuation.finish(throwing: error) + } + } + } + return (.bytes(byteStream), httpResponse) } - - return (.lines(stream), httpResponse) } /// Properly shutdown the HTTP client diff --git a/Sources/OpenAI/Private/Networking/URLSessionHTTPClientAdapter.swift b/Sources/OpenAI/Private/Networking/URLSessionHTTPClientAdapter.swift index b806a06..63ce57d 100644 --- a/Sources/OpenAI/Private/Networking/URLSessionHTTPClientAdapter.swift +++ b/Sources/OpenAI/Private/Networking/URLSessionHTTPClientAdapter.swift @@ -48,20 +48,36 @@ public class URLSessionHTTPClientAdapter: HTTPClient { statusCode: httpURLResponse.statusCode, headers: convertHeaders(httpURLResponse.allHeaderFields)) - let stream = AsyncThrowingStream { continuation in - Task { - do { - for try await line in asyncBytes.lines { - continuation.yield(line) + let contentType = httpURLResponse.value(forHTTPHeaderField: "Content-Type") ?? httpURLResponse.mimeType ?? "" + if contentType.lowercased().contains("text/event-stream") { + let stream = AsyncThrowingStream { continuation in + Task { + do { + for try await line in asyncBytes.lines { + continuation.yield(line) + } + continuation.finish() + } catch { + continuation.finish(throwing: error) } - continuation.finish() - } catch { - continuation.finish(throwing: error) } } + return (.lines(stream), response) + } else { + let byteStream = AsyncThrowingStream { continuation in + Task { + do { + for try await byte in asyncBytes { + continuation.yield(byte) + } + continuation.finish() + } catch { + continuation.finish(throwing: error) + } + } + } + return (.bytes(byteStream), response) } - - return (.lines(stream), response) } private let urlSession: URLSession diff --git a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift index 8150c38..89e0351 100644 --- a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift +++ b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift @@ -14,13 +14,15 @@ public struct AudioSpeechParameters: Encodable { input: String, voice: Voice, responseFormat: ResponseFormat? = nil, - speed: Double? = nil) + speed: Double? = nil, + stream: Bool? = nil) { self.model = model.rawValue self.input = input self.voice = voice.rawValue self.responseFormat = responseFormat?.rawValue self.speed = speed + self.stream = stream } public enum TTSModel { @@ -65,6 +67,7 @@ public struct AudioSpeechParameters: Encodable { case voice case responseFormat = "response_format" case speed + case stream } /// One of the available [TTS models](https://platform.openai.com/docs/models/tts): tts-1 or tts-1-hd @@ -77,4 +80,6 @@ public struct AudioSpeechParameters: Encodable { let responseFormat: String? /// Defaults to 1, The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. let speed: Double? + /// When true, the API will return streaming audio chunks instead of a single response payload. + var stream: Bool? } diff --git a/Sources/OpenAI/Public/ResponseModels/Audio/AudioSpeechChunkObject.swift b/Sources/OpenAI/Public/ResponseModels/Audio/AudioSpeechChunkObject.swift new file mode 100644 index 0000000..05005a1 --- /dev/null +++ b/Sources/OpenAI/Public/ResponseModels/Audio/AudioSpeechChunkObject.swift @@ -0,0 +1,28 @@ +// +// AudioSpeechChunkObject.swift +// +// +// Created by SwiftOpenAI Community on 5/18/25. +// + +import Foundation + +/// Represents a single chunk of streaming audio data returned by the TTS API. +public struct AudioSpeechChunkObject { + /// Raw audio data for this chunk. + public let chunk: Data + /// Indicates whether this is the final chunk in the stream. + public let isLastChunk: Bool + /// Optional sequential index for the chunk, useful for external bookkeeping. + public let chunkIndex: Int? + + public init( + chunk: Data, + isLastChunk: Bool = false, + chunkIndex: Int? = nil) + { + self.chunk = chunk + self.isLastChunk = isLastChunk + self.chunkIndex = chunkIndex + } +} diff --git a/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift b/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift index b10b1a9..a34fd30 100644 --- a/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift +++ b/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift @@ -78,6 +78,22 @@ struct DefaultOpenAIService: OpenAIService { return AudioSpeechObject(output: data) } + func createStreamingSpeech( + parameters: AudioSpeechParameters) + async throws -> AsyncThrowingStream + { + var streamingParameters = parameters + streamingParameters.stream = true + let request = try OpenAIAPI.audio(.speech).request( + apiKey: apiKey, + openAIEnvironment: openAIEnvironment, + organizationID: organizationID, + method: .post, + params: streamingParameters, + extraHeaders: extraHeaders) + return try await fetchAudioStream(debugEnabled: debugEnabled, with: request) + } + // MARK: Chat func startChat( diff --git a/Sources/OpenAI/Public/Service/OpenAIService.swift b/Sources/OpenAI/Public/Service/OpenAIService.swift index 86ff55d..b8e09bf 100644 --- a/Sources/OpenAI/Public/Service/OpenAIService.swift +++ b/Sources/OpenAI/Public/Service/OpenAIService.swift @@ -14,7 +14,7 @@ import FoundationNetworking public enum APIError: Error { case requestFailed(description: String) - case responseUnsuccessful(description: String, statusCode: Int) + case responseUnsuccessful(description: String, statusCode: Int, responseBody: String? = nil) case invalidData case jsonDecodingFailure(description: String) case dataCouldNotBeReadMissingData(description: String) @@ -23,13 +23,30 @@ public enum APIError: Error { public var displayDescription: String { switch self { - case .requestFailed(let description): description - case .responseUnsuccessful(let description, _): description - case .invalidData: "Invalid data" - case .jsonDecodingFailure(let description): description - case .dataCouldNotBeReadMissingData(let description): description - case .bothDecodingStrategiesFailed: "Decoding strategies failed." - case .timeOutError: "Time Out Error." + case .requestFailed(let description): + description + + case .responseUnsuccessful(let description, let statusCode, let responseBody): + if let responseBody, !responseBody.isEmpty { + "Status \(statusCode): \(description) - Response: \(responseBody)" + } else { + "Status \(statusCode): \(description)" + } + + case .invalidData: + "Invalid data" + + case .jsonDecodingFailure(let description): + description + + case .dataCouldNotBeReadMissingData(let description): + description + + case .bothDecodingStrategiesFailed: + "Decoding strategies failed." + + case .timeOutError: + "Time Out Error." } } } @@ -125,6 +142,14 @@ public protocol OpenAIService { func createSpeech( parameters: AudioSpeechParameters) async throws -> AudioSpeechObject + /// - Parameter parameters: The audio speech parameters. + /// - Returns: A streamed sequence of audio chunks. + /// - Throws: An error if the process fails. + /// + /// For more information, refer to [OpenAI's Audio Speech API documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech). + func createStreamingSpeech( + parameters: AudioSpeechParameters) + async throws -> AsyncThrowingStream // MARK: Chat @@ -1106,6 +1131,7 @@ extension OpenAIService { guard response.statusCode == 200 else { var errorMessage = "status code \(response.statusCode)" + let responseBody = String(data: data, encoding: .utf8) do { let error = try decoder.decode(OpenAIErrorResponse.self, from: data) errorMessage = error.error.message ?? "NO ERROR MESSAGE PROVIDED" @@ -1114,7 +1140,8 @@ extension OpenAIService { } throw APIError.responseUnsuccessful( description: errorMessage, - statusCode: response.statusCode) + statusCode: response.statusCode, + responseBody: responseBody) } var content = [[String: Any]]() if let jsonString = String(data: data, encoding: String.Encoding.utf8) { @@ -1154,11 +1181,15 @@ extension OpenAIService { guard response.statusCode == 200 else { var errorMessage = "Status code \(response.statusCode)" + var responseBody = String(data: data, encoding: .utf8) do { let errorResponse = try decoder.decode(OpenAIErrorResponse.self, from: data) errorMessage = errorResponse.error.message ?? "NO ERROR MESSAGE PROVIDED" + if responseBody == nil, let message = errorResponse.error.message { + responseBody = message + } } catch { - if let errorString = String(data: data, encoding: .utf8), !errorString.isEmpty { + if let errorString = responseBody, !errorString.isEmpty { errorMessage += " - \(errorString)" } else { errorMessage += " - No error message provided" @@ -1166,11 +1197,154 @@ extension OpenAIService { } throw APIError.responseUnsuccessful( description: errorMessage, - statusCode: response.statusCode) + statusCode: response.statusCode, + responseBody: responseBody) } return data } + /// Streams audio data from the TTS endpoint and yields incremental chunks. + public func fetchAudioStream( + debugEnabled: Bool, + with request: URLRequest) + async throws -> AsyncThrowingStream + { + if debugEnabled { + printCurlCommand(request) + } + + let httpRequest = try HTTPRequest(from: request) + + let (byteStream, response) = try await httpClient.bytes(for: httpRequest) + + if debugEnabled { + printHTTPResponse(response) + } + + guard response.statusCode == 200 else { + let (errorData, _) = try await httpClient.data(for: httpRequest) + var errorMessage = "Status code \(response.statusCode)" + var responseBody: String? + do { + let errorResponse = try decoder.decode(OpenAIErrorResponse.self, from: errorData) + errorMessage = errorResponse.error.message ?? errorMessage + responseBody = String(data: errorData, encoding: .utf8) + } catch { + responseBody = String(data: errorData, encoding: .utf8) + if let responseBody, !responseBody.isEmpty { + errorMessage += " - \(responseBody)" + } else { + errorMessage += " - No error message provided" + } + } + throw APIError.responseUnsuccessful( + description: errorMessage, + statusCode: response.statusCode, + responseBody: responseBody) + } + + return AsyncThrowingStream { continuation in + let streamTask = Task { + var chunkIndex = 0 + var hasEmittedFinalChunk = false + do { + switch byteStream { + case .bytes(let bytesStream): + var currentChunk = Data() + let chunkSize = 4096 + for try await byte in bytesStream { + currentChunk.append(byte) + if currentChunk.count >= chunkSize { + continuation.yield( + AudioSpeechChunkObject( + chunk: currentChunk, + isLastChunk: false, + chunkIndex: chunkIndex)) + chunkIndex += 1 + currentChunk = Data() + } + } + if !currentChunk.isEmpty { + continuation.yield( + AudioSpeechChunkObject( + chunk: currentChunk, + isLastChunk: false, + chunkIndex: chunkIndex)) + chunkIndex += 1 + } + + case .lines(let lineStream): + let whitespace = CharacterSet.whitespacesAndNewlines + for try await line in lineStream { + guard line.hasPrefix("data:") else { + continue + } + let payload = line.dropFirst(5).trimmingCharacters(in: whitespace) + if payload.isEmpty { + continue + } + if payload == "[DONE]" { + hasEmittedFinalChunk = true + continuation.yield( + AudioSpeechChunkObject( + chunk: Data(), + isLastChunk: true, + chunkIndex: chunkIndex)) + continuation.finish() + return + } + let payloadData = Data(payload.utf8) + if let audioData = Data(base64Encoded: payload) { + continuation.yield( + AudioSpeechChunkObject( + chunk: audioData, + isLastChunk: false, + chunkIndex: chunkIndex)) + chunkIndex += 1 + continue + } + if + let jsonObject = try? JSONSerialization.jsonObject(with: payloadData) as? [String: Any], + let audioData = extractAudioBase64(from: jsonObject) + { + continuation.yield( + AudioSpeechChunkObject( + chunk: audioData, + isLastChunk: false, + chunkIndex: chunkIndex)) + chunkIndex += 1 + if isTerminalAudioEvent(jsonObject) { + hasEmittedFinalChunk = true + continuation.yield( + AudioSpeechChunkObject( + chunk: Data(), + isLastChunk: true, + chunkIndex: chunkIndex)) + continuation.finish() + return + } + continue + } + } + } + if !hasEmittedFinalChunk { + continuation.yield( + AudioSpeechChunkObject( + chunk: Data(), + isLastChunk: true, + chunkIndex: chunkIndex)) + } + continuation.finish() + } catch { + continuation.finish(throwing: error) + } + } + continuation.onTermination = { @Sendable _ in + streamTask.cancel() + } + } + } + /// Asynchronously fetches a decodable data type from OpenAI's API. /// /// - Parameters: @@ -1200,6 +1374,7 @@ extension OpenAIService { guard response.statusCode == 200 else { var errorMessage = "status code \(response.statusCode)" + let responseBody = String(data: data, encoding: .utf8) do { let error = try decoder.decode(OpenAIErrorResponse.self, from: data) errorMessage = error.error.message ?? "NO ERROR MESSAGE PROVIDED" @@ -1208,7 +1383,8 @@ extension OpenAIService { } throw APIError.responseUnsuccessful( description: errorMessage, - statusCode: response.statusCode) + statusCode: response.statusCode, + responseBody: responseBody) } #if DEBUG if debugEnabled { @@ -1268,18 +1444,23 @@ extension OpenAIService { guard response.statusCode == 200 else { var errorMessage = "status code \(response.statusCode)" + var responseBody: String? do { // For error responses, we need to get the raw data instead of using the stream // as error responses are regular JSON, not streaming data let (errorData, _) = try await httpClient.data(for: httpRequest) + responseBody = String(data: errorData, encoding: .utf8) let error = try decoder.decode(OpenAIErrorResponse.self, from: errorData) errorMessage = error.error.message ?? "NO ERROR MESSAGE PROVIDED" } catch { - // If decoding fails, keep the original error message with status code + if let responseBody, !responseBody.isEmpty { + errorMessage += " - \(responseBody)" + } } throw APIError.responseUnsuccessful( description: errorMessage, - statusCode: response.statusCode) + statusCode: response.statusCode, + responseBody: responseBody) } // Create a stream from the lines @@ -1366,18 +1547,23 @@ extension OpenAIService { guard response.statusCode == 200 else { var errorMessage = "status code \(response.statusCode)" + var responseBody: String? do { // For error responses, we need to get the raw data instead of using the stream // as error responses are regular JSON, not streaming data let (errorData, _) = try await httpClient.data(for: httpRequest) + responseBody = String(data: errorData, encoding: .utf8) let error = try decoder.decode(OpenAIErrorResponse.self, from: errorData) errorMessage = error.error.message ?? "NO ERROR MESSAGE PROVIDED" } catch { - // If decoding fails, keep the original error message with status code + if let responseBody, !responseBody.isEmpty { + errorMessage += " - \(responseBody)" + } } throw APIError.responseUnsuccessful( description: errorMessage, - statusCode: response.statusCode) + statusCode: response.statusCode, + responseBody: responseBody) } // Create a stream from the lines @@ -1508,6 +1694,76 @@ extension OpenAIService { } } + private func extractAudioBase64(from jsonObject: [String: Any]) -> Data? { + if let deltaString = jsonObject["delta"] as? String, let data = Data(base64Encoded: deltaString) { + return data + } + + if + let deltaDictionary = jsonObject["delta"] as? [String: Any], + let nested = extractAudioBase64(from: deltaDictionary) + { + return nested + } + + if + let audioDictionary = jsonObject["audio"] as? [String: Any], + let base64String = audioDictionary["data"] as? String ?? audioDictionary["chunk"] as? String, + let data = Data(base64Encoded: base64String) + { + return data + } + + if let dataString = jsonObject["data"] as? String, let data = Data(base64Encoded: dataString) { + return data + } + + if let chunkString = jsonObject["chunk"] as? String, let data = Data(base64Encoded: chunkString) { + return data + } + + if + let resultDictionary = jsonObject["result"] as? [String: Any], + let nested = extractAudioBase64(from: resultDictionary) + { + return nested + } + + if let outputs = jsonObject["output"] as? [[String: Any]] { + for item in outputs { + if let data = extractAudioBase64(from: item) { + return data + } + } + } + + if let choices = jsonObject["choices"] as? [[String: Any]] { + for choice in choices { + if + let deltaDictionary = choice["delta"] as? [String: Any], + let data = extractAudioBase64(from: deltaDictionary) + { + return data + } + } + } + + return nil + } + + private func isTerminalAudioEvent(_ jsonObject: [String: Any]) -> Bool { + if let done = jsonObject["done"] as? Bool, done { + return true + } + if let type = jsonObject["type"] as? String { + return type == "response.output_audio.done" || type == "response.done" || type == "response.completed" + } + if let status = jsonObject["status"] as? String { + return status == "completed" + } + return false + } + // MARK: Debug Helpers private func prettyPrintJSON( diff --git a/Tests/OpenAITests/AudioStreamingTests.swift b/Tests/OpenAITests/AudioStreamingTests.swift new file mode 100644 index 0000000..dacd39a --- /dev/null +++ b/Tests/OpenAITests/AudioStreamingTests.swift @@ -0,0 +1,100 @@ +import XCTest +@testable import SwiftOpenAI + +// MARK: - AudioStreamingTests + +final class AudioStreamingTests: XCTestCase { + func testAudioSpeechParametersSupportStreamingFlag() { + let parameters = AudioSpeechParameters( + model: .tts1, + input: "Hello", + voice: .alloy, + stream: true) + + XCTAssertEqual(parameters.input, "Hello") + XCTAssertEqual(parameters.voice, "alloy") + XCTAssertEqual(parameters.stream, true) + } + + func testAudioSpeechChunkObjectStoresMetadata() { + let data = Data([0x01, 0x02]) + let chunk = AudioSpeechChunkObject(chunk: data, isLastChunk: false, chunkIndex: 3) + + XCTAssertEqual(chunk.chunk, data) + XCTAssertFalse(chunk.isLastChunk) + XCTAssertEqual(chunk.chunkIndex, 3) + } + + func testCreateStreamingSpeechDecodesSSEAudio() async throws { + let expectedAudio = Data([0x01, 0x02, 0x03]) + let base64Audio = expectedAudio.base64EncodedString() + let events = [ + "data: {\"type\":\"response.output_audio.delta\",\"delta\":\"\(base64Audio)\"}", + "data: {\"type\":\"response.output_audio.done\"}", + "data: [DONE]", + ] + + let mockHTTPClient = MockHTTPClient() + mockHTTPClient.bytesResponse = ( + .lines(AsyncThrowingStream { continuation in + for event in events { + continuation.yield(event) + } + continuation.finish() + }), + HTTPResponse(statusCode: 200, headers: ["Content-Type": "text/event-stream"])) + + let service = DefaultOpenAIService( + apiKey: "test", + httpClient: mockHTTPClient, + decoder: JSONDecoder(), + debugEnabled: false) + + let stream = try await service.createStreamingSpeech( + parameters: AudioSpeechParameters( + model: .tts1, + input: "Stream me", + voice: .alloy)) + + var collectedChunks: [AudioSpeechChunkObject] = [] + for try await chunk in stream { + collectedChunks.append(chunk) + } + + XCTAssertEqual(collectedChunks.count, 2) + XCTAssertEqual(collectedChunks[0].chunk, expectedAudio) + XCTAssertFalse(collectedChunks[0].isLastChunk) + XCTAssertTrue(collectedChunks[1].isLastChunk) + + guard let body = mockHTTPClient.lastRequest?.body else { + return XCTFail("Expected request body") + } + + let json = try XCTUnwrap( + JSONSerialization.jsonObject(with: body, options: []) as? [String: Any]) + XCTAssertEqual(json["stream"] as? Bool, true) + } +} + +// MARK: - MockHTTPClient + +private final class MockHTTPClient: HTTPClient { + var bytesResponse: (HTTPByteStream, HTTPResponse)? + var lastRequest: HTTPRequest? + + func data(for _: HTTPRequest) async throws -> (Data, HTTPResponse) { + XCTFail("Unexpected data(for:) call in mock") + return (Data(), HTTPResponse(statusCode: 500, headers: [:])) + } + + func bytes(for request: HTTPRequest) async throws -> (HTTPByteStream, HTTPResponse) { + lastRequest = request + if let bytesResponse { + return bytesResponse + } + XCTFail("bytes(for:) response not stubbed") + return (.bytes(AsyncThrowingStream { continuation in + continuation.finish() + }), HTTPResponse(statusCode: 500, headers: [:])) + } +} From 5213eb20610046e30ea5608203ab825daf4babc4 Mon Sep 17 00:00:00 2001 From: cculbreath Date: Fri, 10 Oct 2025 13:28:25 -0500 Subject: [PATCH 2/4] Expose streaming flags and reasoning overrides --- .../Audio/AudioSpeechParameters.swift | 2 +- .../Chat/ChatCompletionParameters.swift | 28 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift index 89e0351..2db033e 100644 --- a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift +++ b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift @@ -81,5 +81,5 @@ public struct AudioSpeechParameters: Encodable { /// Defaults to 1, The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. let speed: Double? /// When true, the API will return streaming audio chunks instead of a single response payload. - var stream: Bool? + public var stream: Bool? } diff --git a/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift b/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift index 95ad61b..61305a2 100644 --- a/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift +++ b/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift @@ -39,6 +39,7 @@ public struct ChatCompletionParameters: Encodable { temperature: Double? = nil, topProbability: Double? = nil, user: String? = nil, + reasoning: ReasoningOverrides? = nil, streamOptions: StreamOptions? = nil) { self.messages = messages @@ -69,6 +70,7 @@ public struct ChatCompletionParameters: Encodable { self.temperature = temperature topP = topProbability self.user = user + self.reasoning = reasoning self.streamOptions = streamOptions } @@ -418,6 +420,25 @@ public struct ChatCompletionParameters: Encodable { case low } + /// Provider-specific reasoning overrides (e.g. OpenRouter). + public struct ReasoningOverrides: Encodable { + public init(effort: String? = nil, exclude: Bool? = nil, maxTokens: Int? = nil) { + self.effort = effort + self.exclude = exclude + self.maxTokens = maxTokens + } + + enum CodingKeys: String, CodingKey { + case effort + case exclude + case maxTokens = "max_tokens" + } + + public var effort: String? + public var exclude: Bool? + public var maxTokens: Int? + } + /// A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models) public var messages: [Message] /// ID of the model to use. See the [model endpoint compatibility](https://platform.openai.com/docs/models/how-we-use-your-data) table for details on which models work with the Chat API. @@ -489,6 +510,8 @@ public struct ChatCompletionParameters: Encodable { public var seed: Int? /// Up to 4 sequences where the API will stop generating further tokens. Defaults to null. public var stop: [String]? + /// Provider-specific reasoning overrides. + public var reasoning: ReasoningOverrides? /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. /// We generally recommend altering this or `top_p` but not both. Defaults to 1. public var temperature: Double? @@ -526,6 +549,7 @@ public struct ChatCompletionParameters: Encodable { case seed case serviceTier = "service_tier" case stop + case reasoning case stream case streamOptions = "stream_options" case temperature @@ -535,7 +559,7 @@ public struct ChatCompletionParameters: Encodable { /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) as they become available, with the stream terminated by a data: [DONE] message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions ). /// Defaults to false. - var stream: Bool? + public var stream: Bool? /// Options for streaming response. Only set this when you set stream: true - var streamOptions: StreamOptions? + public var streamOptions: StreamOptions? } From 5b107c72f29d9ccfe2b44ae81ca86e74ca86cb0b Mon Sep 17 00:00:00 2001 From: cculbreath Date: Fri, 10 Oct 2025 14:33:25 -0500 Subject: [PATCH 3/4] Document streaming APIs and test reasoning overrides --- README.md | 42 ++++++++++++++++++- .../Audio/AudioSpeechParameters.swift | 5 ++- .../Chat/ChatCompletionParameters.swift | 11 ++--- .../ChatCompletionParametersTests.swift | 28 +++++++++++++ 4 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 Tests/OpenAITests/ChatCompletionParametersTests.swift diff --git a/README.md b/README.md index 9d74de6..d91df49 100644 --- a/README.md +++ b/README.md @@ -394,10 +394,30 @@ playAudio(from: audioObjectData) } catch { // Handle errors print("Error playing audio: \(error.localizedDescription)") - } } +} ``` +#### Streaming audio responses + +```swift +var parameters = AudioSpeechParameters( + model: .tts1, + input: "Streaming sample", + voice: .nova, + stream: true +) + +let audioStream = try await service.createStreamingSpeech(parameters: parameters) + +for try await chunk in audioStream { + // Each chunk contains audio data you can append or play immediately. + handleAudioChunk(chunk.chunk, isLast: chunk.isLastChunk) +} +``` + +The `AudioSpeechChunkObject` exposed in each iteration includes the raw `Data` for playback plus lightweight metadata (`isLastChunk`, `chunkIndex`) so callers can manage buffers or gracefully end playback when the stream finishes. + ### Chat Parameters ```swift @@ -1020,6 +1040,26 @@ let parameters = ChatCompletionParameters(messages: [.init(role: .user, content: let chatCompletionObject = try await service.startStreamedChat(parameters: parameters) ``` +#### Streaming chat with reasoning overrides + +```swift +var parameters = ChatCompletionParameters( + messages: [.init(role: .user, content: .text("Give me a concise summary of the Manhattan project"))], + model: .gpt4o, + reasoning: .init(effort: "medium", maxTokens: 256) +) + +parameters.stream = true +parameters.streamOptions = .init(includeUsage: true) + +let stream = try await service.startStreamedChat(parameters: parameters) +for try await chunk in stream { + handleChunk(chunk) +} +``` + +The new `reasoning` override lets you pass provider-specific reasoning hints (e.g., OpenRouter’s `effort`, `exclude`, or `max_tokens`). When you toggle `stream` and `streamOptions.includeUsage`, the service returns streamed chat deltas plus a final usage summary chunk. + ### Function Calling Chat Completion also supports [Function Calling](https://platform.openai.com/docs/guides/function-calling) and [Parallel Function Calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling). `functions` has been deprecated in favor of `tools` check [OpenAI Documentation](https://platform.openai.com/docs/api-reference/chat/create) for more. diff --git a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift index 2db033e..866a2f1 100644 --- a/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift +++ b/Sources/OpenAI/Public/Parameters/Audio/AudioSpeechParameters.swift @@ -61,6 +61,9 @@ public struct AudioSpeechParameters: Encodable { case flac } + /// When true, the API will return streaming audio chunks instead of a single response payload. + public var stream: Bool? + enum CodingKeys: String, CodingKey { case model case input @@ -80,6 +83,4 @@ public struct AudioSpeechParameters: Encodable { let responseFormat: String? /// Defaults to 1, The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default. let speed: Double? - /// When true, the API will return streaming audio chunks instead of a single response payload. - public var stream: Bool? } diff --git a/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift b/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift index 61305a2..7516ace 100644 --- a/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift +++ b/Sources/OpenAI/Public/Parameters/Chat/ChatCompletionParameters.swift @@ -522,6 +522,12 @@ public struct ChatCompletionParameters: Encodable { /// [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). public var user: String? + /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) as they become available, with the stream terminated by a data: [DONE] message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions ). + /// Defaults to false. + public var stream: Bool? + /// Options for streaming response. Only set this when you set stream: true + public var streamOptions: StreamOptions? + enum CodingKeys: String, CodingKey { case messages case model @@ -557,9 +563,4 @@ public struct ChatCompletionParameters: Encodable { case user } - /// If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format) as they become available, with the stream terminated by a data: [DONE] message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions ). - /// Defaults to false. - public var stream: Bool? - /// Options for streaming response. Only set this when you set stream: true - public var streamOptions: StreamOptions? } diff --git a/Tests/OpenAITests/ChatCompletionParametersTests.swift b/Tests/OpenAITests/ChatCompletionParametersTests.swift new file mode 100644 index 0000000..a3ce875 --- /dev/null +++ b/Tests/OpenAITests/ChatCompletionParametersTests.swift @@ -0,0 +1,28 @@ +import XCTest +@testable import SwiftOpenAI + +final class ChatCompletionParametersTests: XCTestCase { + func testReasoningOverridesEncoding() throws { + var parameters = ChatCompletionParameters( + messages: [.init(role: .user, content: .text("hello"))], + model: .gpt4o, + reasoning: .init(effort: "medium", exclude: true, maxTokens: 256), + streamOptions: .init(includeUsage: true)) + parameters.stream = true + + let encoder = JSONEncoder() + let data = try encoder.encode(parameters) + + let root = try XCTUnwrap(JSONSerialization.jsonObject(with: data) as? [String: Any]) + + XCTAssertEqual(root["stream"] as? Bool, true) + + let streamOptions = try XCTUnwrap(root["stream_options"] as? [String: Any]) + XCTAssertEqual(streamOptions["include_usage"] as? Bool, true) + + let reasoning = try XCTUnwrap(root["reasoning"] as? [String: Any]) + XCTAssertEqual(reasoning["effort"] as? String, "medium") + XCTAssertEqual(reasoning["exclude"] as? Bool, true) + XCTAssertEqual(reasoning["max_tokens"] as? Int, 256) + } +} From b232048d7dce065dfbaec64e139742d92c6914b2 Mon Sep 17 00:00:00 2001 From: Christopher Culbreath Date: Wed, 22 Oct 2025 15:50:00 -0500 Subject: [PATCH 4/4] Make name parameter optional when streaming. --- .../Public/ResponseModels/Response/ResponseStreamEvent.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/OpenAI/Public/ResponseModels/Response/ResponseStreamEvent.swift b/Sources/OpenAI/Public/ResponseModels/Response/ResponseStreamEvent.swift index b71b05d..d4072cc 100644 --- a/Sources/OpenAI/Public/ResponseModels/Response/ResponseStreamEvent.swift +++ b/Sources/OpenAI/Public/ResponseModels/Response/ResponseStreamEvent.swift @@ -607,7 +607,7 @@ public struct FunctionCallArgumentsDoneEvent: Decodable { public let type: String public let itemId: String public let outputIndex: Int - public let name: String + public let name: String? public let arguments: String public let sequenceNumber: Int?