Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Demo/TranscriberDemo.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@
"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
IPHONEOS_DEPLOYMENT_TARGET = 18.2;
IPHONEOS_DEPLOYMENT_TARGET = 15.6;
LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 15.2;
Expand Down Expand Up @@ -317,7 +317,7 @@
"INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
IPHONEOS_DEPLOYMENT_TARGET = 18.2;
IPHONEOS_DEPLOYMENT_TARGET = 15.6;
LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 15.2;
Expand Down
72 changes: 71 additions & 1 deletion Demo/TranscriberDemo/ContentView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Speech
import Transcriber

// Example usage in view
@available(iOS 17, *)
struct ContentView: View {
@State private var presenter = DefaultTranscriberPresenter()

Expand Down Expand Up @@ -69,6 +70,75 @@ struct ContentView: View {
}
}

@available(*, deprecated, renamed: "ContentView", message: "Use ContentView instead.")
struct LegacyContentView: View {
@StateObject private var presenter = LegacyTranscriberPresenter()

var body: some View {
VStack {
#if os(iOS)
// Add input selection picker with proper selection handling
Picker("Audio Input", selection: Binding(
get: { presenter.selectedInput },
set: { if let input = $0 { presenter.selectInput(input) }}
)) {
ForEach(presenter.availableInputs, id: \.uid) { input in
HStack {
Text(input.portName)
if input.uid == presenter.selectedInput?.uid {
Image(systemName: "checkmark")
}
}
.tag(Optional(input))
}
}
.pickerStyle(.menu)
.padding()
#endif

Text(presenter.transcribedText.isEmpty ? "No transcription yet" : presenter.transcribedText)
.padding()

SpeechButton(
isRecording: presenter.isRecording,
rmsValue: presenter.rmsLevel,
isProcessing: false,
supportsThinkingState: false,
onTap: {
presenter.toggleRecording { finalText in
print("Recording completed with text: \(finalText)")
}
}
)
.disabled(presenter.authStatus != .authorized)

if let error = presenter.error {
Text(error.localizedDescription)
.foregroundColor(.red)
.padding()
}
}
.padding()
.task {
do {
try await presenter.requestAuthorization()
} catch {
print(error.localizedDescription)
}
}
// Refresh inputs when view appears
.onAppear {
#if os(iOS)
presenter.fetchAvailableInputs()
#endif
}
}
}

#Preview {
ContentView()
if #available(iOS 17, *) {
ContentView()
} else {
LegacyContentView()
}
}
6 changes: 5 additions & 1 deletion Demo/TranscriberDemo/TranscriberDemoApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ import SwiftUI
struct TranscriberDemoApp: App {
var body: some Scene {
WindowGroup {
ContentView()
if #available(iOS 17.0, *) {
ContentView()
} else {
LegacyContentView()
}
}
}
}
2 changes: 1 addition & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import PackageDescription

let package = Package(
name: "Transcriber",
platforms: [.macOS(.v14), .iOS(.v17), .tvOS(.v17), .visionOS(.v1)],
platforms: [.macOS(.v14), .iOS(.v15), .tvOS(.v15), .visionOS(.v1)],
products: [.library(name: "Transcriber", targets: ["Transcriber"])],
targets: [.target(name: "Transcriber")]
)
40 changes: 19 additions & 21 deletions Sources/Transcriber/Core/Transcriber.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ public actor Transcriber {
private let audioEngine: AVAudioEngine
private let logger: DebugLogger

private lazy var languageModelManager: LanguageModelManager? = {
guard let modelInfo = config.languageModelInfo else { return nil }
return LanguageModelManager(modelInfo: modelInfo)
}()

private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?

Expand All @@ -41,15 +36,18 @@ public actor Transcriber {
}
}


// MARK: - Recognition Setup
private func setupRecognition() throws -> SFSpeechAudioBufferRecognitionRequest {
let request = SFSpeechAudioBufferRecognitionRequest()

// Apply all configuration settings
request.shouldReportPartialResults = config.shouldReportPartialResults
request.requiresOnDeviceRecognition = config.requiresOnDeviceRecognition
request.addsPunctuation = config.addsPunctuation

if #available(iOS 16, *) {
request.addsPunctuation = config.addsPunctuation
}

request.taskHint = config.taskHint

// Only set contextual strings if provided
Expand Down Expand Up @@ -91,21 +89,15 @@ public actor Transcriber {
private func startCombinedStream() async throws -> AsyncThrowingStream<String, Error> {
logger.debug("Starting transcription stream...")

if let languageModelManager = languageModelManager {
try await languageModelManager.waitForModel()
}

// Reset state
resetRecognitionState()

let localRequest = try setupRecognition()

// Configure language model if available
if let languageModel = languageModelManager {
try await languageModel.waitForModel()
if let lmConfig = await languageModel.getConfiguration() {
localRequest.requiresOnDeviceRecognition = true
localRequest.customizedLanguageModel = lmConfig
if #available(iOS 17, *) {
if let languageModel = configureLanguageModel() {
try await languageModel.waitForModel()
if let lmConfig = await languageModel.getConfiguration() {
localRequest.requiresOnDeviceRecognition = true
localRequest.customizedLanguageModel = lmConfig
}
}
}

Expand Down Expand Up @@ -143,7 +135,7 @@ public actor Transcriber {
self.logger.debug("RMS: \(rms)")

// Send RMS value to stream using local continuation
localRMSContinuation?.yield(rms)
localRMSContinuation?.yield(rms)
// Send RMS value to stream using local continuation
if silenceState.update(
rms: rms,
Expand Down Expand Up @@ -295,6 +287,12 @@ public actor Transcriber {
}
}
}

@available(iOS 17, *)
private func configureLanguageModel() -> LanguageModelManager? {
guard let modelInfo = config.languageModelInfo else { return nil }
return LanguageModelManager(modelInfo: modelInfo)
}
}

// MARK: - SilenceState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import Speech

/// Manages custom language model preparation and configuration
@available(iOS 17.0, *)
public actor LanguageModelManager {
private var hasBuiltLm = false
private var customLmTask: Task<Void, Error>?
Expand Down
121 changes: 121 additions & 0 deletions Sources/Transcriber/SwiftUI/DefaultTranscriberPresenter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import AVFoundation

/// Default implementation of SpeechRecognitionPresenter
/// Provides ready-to-use speech recognition functionality for SwiftUI views
@available(iOS 17.0, *)
@Observable
@MainActor
public class DefaultTranscriberPresenter: TranscriberPresenter {
Expand Down Expand Up @@ -124,3 +125,123 @@ public class DefaultTranscriberPresenter: TranscriberPresenter {
}
#endif
}

@available(iOS, introduced: 15.0, deprecated, renamed: "DefaultTranscriberPresenter", message: "Use DefaultTranscriberPresenter instead.")
@MainActor
public class LegacyTranscriberPresenter: TranscriberPresenter, ObservableObject {
@Published public var isRecording = false
@Published public var transcribedText = ""
@Published public var authStatus: SFSpeechRecognizerAuthorizationStatus = .notDetermined
@Published public var error: Error?
@Published public var rmsLevel: Float = 0

private let transcriber: Transcriber?
private var recordingTask: Task<Void, Never>?
private var onCompleteHandler: ((String) -> Void)?

#if os(iOS)
@Published public var availableInputs: [AVAudioSessionPortDescription] = []
@Published public var selectedInput: AVAudioSessionPortDescription?
#endif

public init(config: TranscriberConfiguration = TranscriberConfiguration()) {
self.transcriber = Transcriber(config: config, debugLogging: true)

#if os(iOS)
setupAudioSession()
self.fetchAvailableInputs()
#endif
}

private func setupAudioSession() {
do {
let session = AVAudioSession.sharedInstance()
// Configure for both playback and recording with all possible options
try session.setCategory(.playAndRecord, mode: .spokenAudio, options: [
.allowAirPlay,
.allowBluetooth,
.allowBluetoothA2DP,
.defaultToSpeaker
])
// Set preferred I/O buffer duration
try session.setPreferredIOBufferDuration(0.005)
try session.setActive(true)
} catch {
fatalError("Error: \(error.localizedDescription)")
}
}

public func toggleRecording(onComplete: ((String) -> Void)? = nil) {
self.onCompleteHandler = onComplete

guard let transcriber else {
error = TranscriberError.noRecognizer
return
}

if isRecording {
recordingTask?.cancel()
recordingTask = nil
Task {
await transcriber.stopStream()
isRecording = false
onCompleteHandler?(transcribedText)
}
} else {
transcribedText = "" // Reset text when starting new recording
recordingTask = Task {
do {
isRecording = true
let stream = try await transcriber.startStream()

for try await signal in stream {
switch signal {
case .rms(let float):
print("PRESENTER RMS: \(float)")
rmsLevel = float
case .transcription(let string):
transcribedText = string
}
}

// Stream ended naturally (silence detected)
isRecording = false
onCompleteHandler?(transcribedText)
} catch {
self.error = error
isRecording = false
}
}
}
}

public func requestAuthorization() async throws {
guard let transcriber else {
throw TranscriberError.noRecognizer
}
authStatus = await transcriber.requestAuthorization()
guard authStatus == .authorized else {
throw TranscriberError.notAuthorized
}
}

#if os(iOS)
public func fetchAvailableInputs() {
availableInputs = AudioInputs.getAvailableInputs()
// Set initial selection to current input
if let currentInput = AVAudioSession.sharedInstance().currentRoute.inputs.first,
let matchingInput = availableInputs.first(where: { $0.uid == currentInput.uid }) {
selectedInput = matchingInput
}
}

public func selectInput(_ input: AVAudioSessionPortDescription) {
do {
try AudioInputs.selectInput(input)
selectedInput = input
} catch {
self.error = TranscriberError.audioSessionFailure(error)
}
}
#endif
}
6 changes: 3 additions & 3 deletions Sources/Transcriber/UI/SpeechButton.swift
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ public struct SpeechButton: View {
.onTapGesture {
onTap()
}
.onChange(of: isRecording) { _, newIsRecording in
.onChange(of: isRecording) { newIsRecording in
updateState(isRecording: newIsRecording, isProcessing: isProcessing)
}
.onChange(of: isProcessing) { _, newIsProcessing in
.onChange(of: isProcessing) { newIsProcessing in
updateState(isRecording: isRecording, isProcessing: newIsProcessing)
}
.onChange(of: rmsValue) { _, newRMS in
.onChange(of: rmsValue) { newRMS in
updateAmplitudes(with: newRMS)
}
.onAppear {
Expand Down