- 
                Notifications
    You must be signed in to change notification settings 
- Fork 113
Demo Branch #111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Demo Branch #111
Changes from 10 commits
f70c968
              04fd51e
              d0a2e8b
              c7aca64
              41b1958
              ac43175
              800343c
              021d848
              5eb50ba
              dbee7ab
              d001f59
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| // | ||
| // RealTimeAPIDemoView.swift | ||
| // SwiftOpenAIExample | ||
| // | ||
| // Created by James Rochabrun on 1/18/25. | ||
| // | ||
|  | ||
| import SwiftUI | ||
| import AVFoundation | ||
| import SwiftOpenAI | ||
|  | ||
| struct RealTimeAPIDemoView: View { | ||
|  | ||
| @State private var realTimeAPIViewModel: RealTimeAPIViewModel | ||
| @State private var microphonePermission: AVAudioSession.RecordPermission | ||
|  | ||
| init(service: OpenAIService) { | ||
| realTimeAPIViewModel = .init(service: service) | ||
| // TODO: Update this with latest API. | ||
| _microphonePermission = State(initialValue: AVAudioSession.sharedInstance().recordPermission) | ||
| } | ||
|  | ||
| var body: some View { | ||
| Group { | ||
| switch microphonePermission { | ||
| case .undetermined: | ||
| requestPermissionButton | ||
| case .denied: | ||
| deniedPermissionView | ||
| case .granted: | ||
| actionButtons | ||
| default: | ||
| Text("Unknown permission state") | ||
| } | ||
| } | ||
| .onAppear { | ||
| updateMicrophonePermission() | ||
| } | ||
| } | ||
|  | ||
| private var actionButtons: some View { | ||
| VStack(spacing: 40) { | ||
| startSessionButton | ||
| endSessionButton | ||
| } | ||
| } | ||
|  | ||
| private var startSessionButton: some View { | ||
| Button { | ||
| Task { | ||
| await realTimeAPIViewModel.testOpenAIRealtime() | ||
| } | ||
| } label: { | ||
| Label("Start session", systemImage: "microphone") | ||
| } | ||
| } | ||
|  | ||
| public var endSessionButton: some View { | ||
| Button { | ||
| Task { | ||
| await realTimeAPIViewModel.disconnect() | ||
| } | ||
| } label: { | ||
| Label("Stop session", systemImage: "stop") | ||
| } | ||
| } | ||
|  | ||
| private var requestPermissionButton: some View { | ||
| Button { | ||
| requestMicrophonePermission() | ||
| } label: { | ||
| Label("Allow microphone access", systemImage: "mic.slash") | ||
| } | ||
| } | ||
|  | ||
| private var deniedPermissionView: some View { | ||
| VStack(spacing: 12) { | ||
| Image(systemName: "mic.slash.circle") | ||
| .font(.largeTitle) | ||
| .foregroundColor(.red) | ||
|  | ||
| Text("Microphone access is required") | ||
| .font(.headline) | ||
|  | ||
| Button("Open Settings") { | ||
| if let settingsUrl = URL(string: UIApplication.openSettingsURLString) { | ||
| UIApplication.shared.open(settingsUrl) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|  | ||
| private func updateMicrophonePermission() { | ||
| microphonePermission = AVAudioSession.sharedInstance().recordPermission | ||
| } | ||
|  | ||
| private func requestMicrophonePermission() { | ||
| AVAudioSession.sharedInstance().requestRecordPermission { granted in | ||
| DispatchQueue.main.async { | ||
| microphonePermission = granted ? .granted : .denied | ||
| } | ||
| } | ||
| } | ||
| } | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| // | ||
| // RealTimeAPIViewModel.swift | ||
| // SwiftOpenAI | ||
| // | ||
| // Created by James Rochabrun on 1/18/25. | ||
| // | ||
|  | ||
| import AVFoundation | ||
| import Foundation | ||
| import SwiftOpenAI | ||
|  | ||
| @Observable | ||
| final class RealTimeAPIViewModel { | ||
|  | ||
| let service: OpenAIService | ||
|  | ||
| init(service: OpenAIService) { | ||
| self.service = service | ||
| } | ||
|  | ||
| var kMicrophoneSampleVendor: MicrophonePCMSampleVendor? | ||
| var kRealtimeSession: OpenAIRealtimeSession? | ||
|  | ||
| @RealtimeActor | ||
| func disconnect() { | ||
| kRealtimeSession?.disconnect() | ||
| } | ||
|  | ||
| @RealtimeActor | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lzell do you mind taking a look in case on top of your head you think my web socket gets disconnected? I am a bit lost on this one :/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't that fix amazing :) | ||
| func testOpenAIRealtime() async { | ||
| let sessionConfiguration = OpenAIRealtimeSessionUpdate.SessionConfiguration( | ||
| inputAudioFormat: "pcm16", | ||
| inputAudioTranscription: .init(model: "whisper-1"), | ||
| instructions: "You are tour guide for Monument Valley, Utah", | ||
| maxResponseOutputTokens: .int(4096), | ||
| modalities: ["audio", "text"], | ||
| outputAudioFormat: "pcm16", | ||
| temperature: 0.7, | ||
| turnDetection: .init(prefixPaddingMs: 200, silenceDurationMs: 500, threshold: 0.5), | ||
| voice: "shimmer" | ||
| ) | ||
|  | ||
| let microphoneSampleVendor = MicrophonePCMSampleVendor() | ||
| let audioStream: AsyncStream<AVAudioPCMBuffer> | ||
| do { | ||
| audioStream = try microphoneSampleVendor.start(useVoiceProcessing: true) | ||
| } catch { | ||
| fatalError("Could not start audio stream: \(error.localizedDescription)") | ||
| } | ||
|  | ||
| let realtimeSession: OpenAIRealtimeSession | ||
| do { | ||
| realtimeSession = try await service.realTimeSession( | ||
| sessionConfiguration: sessionConfiguration | ||
| ) | ||
| } catch { | ||
| fatalError("Could not create an OpenAI realtime session") | ||
| } | ||
|  | ||
| var isOpenAIReadyForAudio = false | ||
| Task { | ||
| for await buffer in audioStream { | ||
| if isOpenAIReadyForAudio, let base64Audio = AudioUtils.base64EncodeAudioPCMBuffer(from: buffer) { | ||
| try await realtimeSession.sendMessage( | ||
| OpenAIRealtimeInputAudioBufferAppend(audio: base64Audio) | ||
| ) | ||
| } | ||
| } | ||
| print("Done streaming microphone audio") | ||
| } | ||
|  | ||
| Task { | ||
| do { | ||
| print("Sending response create") | ||
| try await realtimeSession.sendMessage(OpenAIRealtimeResponseCreate()) | ||
| } catch { | ||
| print("Could not send the session configuration instructions") | ||
| } | ||
| } | ||
|  | ||
| Task { | ||
| for await message in realtimeSession.receiver { | ||
| switch message { | ||
| case .sessionUpdated: | ||
| isOpenAIReadyForAudio = true | ||
| case .responseAudioDelta(let base64Audio): | ||
| InternalAudioPlayer.playPCM16Audio(from: base64Audio) | ||
| default: | ||
| break | ||
| } | ||
| } | ||
| print("Done listening for messages from OpenAI") | ||
| } | ||
|  | ||
| // Some time later | ||
| // microphoneSampleVendor.stop() | ||
|  | ||
| kMicrophoneSampleVendor = microphoneSampleVendor | ||
| kRealtimeSession = realtimeSession | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RealTimeAPIViewModel and RealTimeAPIDemoView is how i try to test this. All the code has been copied from demo branch