diff --git a/Demo/TranscriberDemo.xcodeproj/project.pbxproj b/Demo/TranscriberDemo.xcodeproj/project.pbxproj index a3f424a..0ad84dd 100644 --- a/Demo/TranscriberDemo.xcodeproj/project.pbxproj +++ b/Demo/TranscriberDemo.xcodeproj/project.pbxproj @@ -276,7 +276,7 @@ "INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 18.2; + IPHONEOS_DEPLOYMENT_TARGET = 15.6; LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 15.2; @@ -317,7 +317,7 @@ "INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 18.2; + IPHONEOS_DEPLOYMENT_TARGET = 15.6; LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 15.2; diff --git a/Demo/TranscriberDemo/ContentView.swift b/Demo/TranscriberDemo/ContentView.swift index 25f87e2..36ee615 100644 --- a/Demo/TranscriberDemo/ContentView.swift +++ b/Demo/TranscriberDemo/ContentView.swift @@ -5,6 +5,7 @@ import Speech import Transcriber // Example usage in view +@available(iOS 17, *) struct ContentView: View { @State private var presenter = DefaultTranscriberPresenter() @@ -69,6 +70,75 @@ struct ContentView: View { } } +@available(*, deprecated, renamed: "ContentView", message: "Use ContentView instead.") +struct LegacyContentView: View { + @StateObject private var presenter = LegacyTranscriberPresenter() + + var body: some View { + VStack { + #if os(iOS) + // Add input selection picker with proper selection handling + Picker("Audio Input", selection: Binding( + get: { presenter.selectedInput }, + set: { if let input = $0 { presenter.selectInput(input) }} + )) { + ForEach(presenter.availableInputs, id: \.uid) { input in + HStack { + Text(input.portName) + if input.uid == presenter.selectedInput?.uid { + Image(systemName: "checkmark") + } + } + .tag(Optional(input)) + } + } + .pickerStyle(.menu) + .padding() + #endif + + Text(presenter.transcribedText.isEmpty ? "No transcription yet" : presenter.transcribedText) + .padding() + + SpeechButton( + isRecording: presenter.isRecording, + rmsValue: presenter.rmsLevel, + isProcessing: false, + supportsThinkingState: false, + onTap: { + presenter.toggleRecording { finalText in + print("Recording completed with text: \(finalText)") + } + } + ) + .disabled(presenter.authStatus != .authorized) + + if let error = presenter.error { + Text(error.localizedDescription) + .foregroundColor(.red) + .padding() + } + } + .padding() + .task { + do { + try await presenter.requestAuthorization() + } catch { + print(error.localizedDescription) + } + } + // Refresh inputs when view appears + .onAppear { + #if os(iOS) + presenter.fetchAvailableInputs() + #endif + } + } +} + #Preview { - ContentView() + if #available(iOS 17, *) { + ContentView() + } else { + LegacyContentView() + } } diff --git a/Demo/TranscriberDemo/TranscriberDemoApp.swift b/Demo/TranscriberDemo/TranscriberDemoApp.swift index 9851796..1c6ca63 100644 --- a/Demo/TranscriberDemo/TranscriberDemoApp.swift +++ b/Demo/TranscriberDemo/TranscriberDemoApp.swift @@ -6,7 +6,11 @@ import SwiftUI struct TranscriberDemoApp: App { var body: some Scene { WindowGroup { - ContentView() + if #available(iOS 17.0, *) { + ContentView() + } else { + LegacyContentView() + } } } } diff --git a/Package.swift b/Package.swift index b8b5627..cc32888 100644 --- a/Package.swift +++ b/Package.swift @@ -4,7 +4,7 @@ import PackageDescription let package = Package( name: "Transcriber", - platforms: [.macOS(.v14), .iOS(.v17), .tvOS(.v17), .visionOS(.v1)], + platforms: [.macOS(.v14), .iOS(.v15), .tvOS(.v15), .visionOS(.v1)], products: [.library(name: "Transcriber", targets: ["Transcriber"])], targets: [.target(name: "Transcriber")] ) diff --git a/Sources/Transcriber/Core/Transcriber.swift b/Sources/Transcriber/Core/Transcriber.swift index ac64e5a..b437aeb 100644 --- a/Sources/Transcriber/Core/Transcriber.swift +++ b/Sources/Transcriber/Core/Transcriber.swift @@ -12,11 +12,6 @@ public actor Transcriber { private let audioEngine: AVAudioEngine private let logger: DebugLogger - private lazy var languageModelManager: LanguageModelManager? = { - guard let modelInfo = config.languageModelInfo else { return nil } - return LanguageModelManager(modelInfo: modelInfo) - }() - private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? @@ -41,7 +36,6 @@ public actor Transcriber { } } - // MARK: - Recognition Setup private func setupRecognition() throws -> SFSpeechAudioBufferRecognitionRequest { let request = SFSpeechAudioBufferRecognitionRequest() @@ -49,7 +43,11 @@ public actor Transcriber { // Apply all configuration settings request.shouldReportPartialResults = config.shouldReportPartialResults request.requiresOnDeviceRecognition = config.requiresOnDeviceRecognition - request.addsPunctuation = config.addsPunctuation + + if #available(iOS 16, *) { + request.addsPunctuation = config.addsPunctuation + } + request.taskHint = config.taskHint // Only set contextual strings if provided @@ -91,21 +89,15 @@ public actor Transcriber { private func startCombinedStream() async throws -> AsyncThrowingStream { logger.debug("Starting transcription stream...") - if let languageModelManager = languageModelManager { - try await languageModelManager.waitForModel() - } - - // Reset state - resetRecognitionState() - let localRequest = try setupRecognition() - // Configure language model if available - if let languageModel = languageModelManager { - try await languageModel.waitForModel() - if let lmConfig = await languageModel.getConfiguration() { - localRequest.requiresOnDeviceRecognition = true - localRequest.customizedLanguageModel = lmConfig + if #available(iOS 17, *) { + if let languageModel = configureLanguageModel() { + try await languageModel.waitForModel() + if let lmConfig = await languageModel.getConfiguration() { + localRequest.requiresOnDeviceRecognition = true + localRequest.customizedLanguageModel = lmConfig + } } } @@ -143,7 +135,7 @@ public actor Transcriber { self.logger.debug("RMS: \(rms)") // Send RMS value to stream using local continuation - localRMSContinuation?.yield(rms) + localRMSContinuation?.yield(rms) // Send RMS value to stream using local continuation if silenceState.update( rms: rms, @@ -295,6 +287,12 @@ public actor Transcriber { } } } + + @available(iOS 17, *) + private func configureLanguageModel() -> LanguageModelManager? { + guard let modelInfo = config.languageModelInfo else { return nil } + return LanguageModelManager(modelInfo: modelInfo) + } } // MARK: - SilenceState diff --git a/Sources/Transcriber/LanguageModel/LanguageModelManager.swift b/Sources/Transcriber/LanguageModel/LanguageModelManager.swift index 28db8f3..d938c21 100644 --- a/Sources/Transcriber/LanguageModel/LanguageModelManager.swift +++ b/Sources/Transcriber/LanguageModel/LanguageModelManager.swift @@ -3,6 +3,7 @@ import Speech /// Manages custom language model preparation and configuration +@available(iOS 17.0, *) public actor LanguageModelManager { private var hasBuiltLm = false private var customLmTask: Task? diff --git a/Sources/Transcriber/SwiftUI/DefaultTranscriberPresenter.swift b/Sources/Transcriber/SwiftUI/DefaultTranscriberPresenter.swift index 12d9b24..09c0d89 100644 --- a/Sources/Transcriber/SwiftUI/DefaultTranscriberPresenter.swift +++ b/Sources/Transcriber/SwiftUI/DefaultTranscriberPresenter.swift @@ -5,6 +5,7 @@ import AVFoundation /// Default implementation of SpeechRecognitionPresenter /// Provides ready-to-use speech recognition functionality for SwiftUI views +@available(iOS 17.0, *) @Observable @MainActor public class DefaultTranscriberPresenter: TranscriberPresenter { @@ -124,3 +125,123 @@ public class DefaultTranscriberPresenter: TranscriberPresenter { } #endif } + +@available(iOS, introduced: 15.0, deprecated, renamed: "DefaultTranscriberPresenter", message: "Use DefaultTranscriberPresenter instead.") +@MainActor +public class LegacyTranscriberPresenter: TranscriberPresenter, ObservableObject { + @Published public var isRecording = false + @Published public var transcribedText = "" + @Published public var authStatus: SFSpeechRecognizerAuthorizationStatus = .notDetermined + @Published public var error: Error? + @Published public var rmsLevel: Float = 0 + + private let transcriber: Transcriber? + private var recordingTask: Task? + private var onCompleteHandler: ((String) -> Void)? + + #if os(iOS) + @Published public var availableInputs: [AVAudioSessionPortDescription] = [] + @Published public var selectedInput: AVAudioSessionPortDescription? + #endif + + public init(config: TranscriberConfiguration = TranscriberConfiguration()) { + self.transcriber = Transcriber(config: config, debugLogging: true) + + #if os(iOS) + setupAudioSession() + self.fetchAvailableInputs() + #endif + } + + private func setupAudioSession() { + do { + let session = AVAudioSession.sharedInstance() + // Configure for both playback and recording with all possible options + try session.setCategory(.playAndRecord, mode: .spokenAudio, options: [ + .allowAirPlay, + .allowBluetooth, + .allowBluetoothA2DP, + .defaultToSpeaker + ]) + // Set preferred I/O buffer duration + try session.setPreferredIOBufferDuration(0.005) + try session.setActive(true) + } catch { + fatalError("Error: \(error.localizedDescription)") + } + } + + public func toggleRecording(onComplete: ((String) -> Void)? = nil) { + self.onCompleteHandler = onComplete + + guard let transcriber else { + error = TranscriberError.noRecognizer + return + } + + if isRecording { + recordingTask?.cancel() + recordingTask = nil + Task { + await transcriber.stopStream() + isRecording = false + onCompleteHandler?(transcribedText) + } + } else { + transcribedText = "" // Reset text when starting new recording + recordingTask = Task { + do { + isRecording = true + let stream = try await transcriber.startStream() + + for try await signal in stream { + switch signal { + case .rms(let float): + print("PRESENTER RMS: \(float)") + rmsLevel = float + case .transcription(let string): + transcribedText = string + } + } + + // Stream ended naturally (silence detected) + isRecording = false + onCompleteHandler?(transcribedText) + } catch { + self.error = error + isRecording = false + } + } + } + } + + public func requestAuthorization() async throws { + guard let transcriber else { + throw TranscriberError.noRecognizer + } + authStatus = await transcriber.requestAuthorization() + guard authStatus == .authorized else { + throw TranscriberError.notAuthorized + } + } + + #if os(iOS) + public func fetchAvailableInputs() { + availableInputs = AudioInputs.getAvailableInputs() + // Set initial selection to current input + if let currentInput = AVAudioSession.sharedInstance().currentRoute.inputs.first, + let matchingInput = availableInputs.first(where: { $0.uid == currentInput.uid }) { + selectedInput = matchingInput + } + } + + public func selectInput(_ input: AVAudioSessionPortDescription) { + do { + try AudioInputs.selectInput(input) + selectedInput = input + } catch { + self.error = TranscriberError.audioSessionFailure(error) + } + } + #endif +} diff --git a/Sources/Transcriber/UI/SpeechButton.swift b/Sources/Transcriber/UI/SpeechButton.swift index 1efd602..995a0b9 100644 --- a/Sources/Transcriber/UI/SpeechButton.swift +++ b/Sources/Transcriber/UI/SpeechButton.swift @@ -129,13 +129,13 @@ public struct SpeechButton: View { .onTapGesture { onTap() } - .onChange(of: isRecording) { _, newIsRecording in + .onChange(of: isRecording) { newIsRecording in updateState(isRecording: newIsRecording, isProcessing: isProcessing) } - .onChange(of: isProcessing) { _, newIsProcessing in + .onChange(of: isProcessing) { newIsProcessing in updateState(isRecording: isRecording, isProcessing: newIsProcessing) } - .onChange(of: rmsValue) { _, newRMS in + .onChange(of: rmsValue) { newRMS in updateAmplitudes(with: newRMS) } .onAppear {