7979</ head >
8080
8181< body >
82- < h1 > Chat with GPT-4 Realtime and WebRTC</ h1 >
82+ < h1 > Chat with GPT-4 Realtime with WebRTC</ h1 >
8383< p > Click below start/end a session. You must configure your OpenAI API key by clicking on settings first. </ p >
8484
8585<!-- Buttons for session control and settings -->
@@ -142,13 +142,14 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
142142 ...
143143-->
144144< script id ="rtc-logic ">
145- // Global references
145+ // Globals
146146 let pc ; // RTCPeerConnection
147- let localTrack ; // Local audio track
147+ let track ; // Local audio track
148148 let dc ; // Data channel
149149 const assistantResults = { } ; // Track interim/final transcripts
150150 const userMessages = { } ; // Track user messages per item ID
151151
152+
152153 // Model & function definitions
153154 const model = "gpt-4o-mini-realtime-preview" ;
154155 const gptFunctions = [
@@ -198,7 +199,7 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
198199 return key ;
199200 } catch ( error ) {
200201 console . error ( "Error fetching ephemeral key:" , error ) ;
201- return null ;
202+ return error ;
202203 }
203204 }
204205
@@ -207,7 +208,6 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
207208 */
208209 function handleMessage ( event ) {
209210 const message = JSON . parse ( event . data ) ;
210- console . info ( "Message from GPT server:" , message ) ;
211211 const itemId = message . item_id ;
212212
213213 switch ( message . type ) {
@@ -252,7 +252,7 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
252252
253253 default :
254254 // For debugging
255- // console.log(" Unhandled message type:", message.type);
255+ console . info ( ` Unhandled message from server: ${ message . type } ` , message ) ;
256256 break ;
257257 }
258258 }
@@ -271,13 +271,15 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
271271 return ;
272272 }
273273
274+ // An ephemeral key is required to start a session
275+ // Usually this should be requested from your server to avoid exposing the OPENAI_API_KEY
274276 const ephemeralKey = await fetchEphemeralKey ( apiKey ) ;
275- if ( ! ephemeralKey ) {
276- startButtonEl . hidden = false ;
277- endButtonEl . hidden = true ;
278- return ;
277+ if ( ! ephemeralKey || ephemeralKey === "error" ) {
278+ toggleSessionButtons ( false ) ;
279+ return ;
279280 }
280281
282+ // Start the WebRTC session
281283 try {
282284 // Create PeerConnection
283285 pc = new RTCPeerConnection ( ) ;
@@ -290,35 +292,27 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
290292 // On receiving remote track
291293 pc . ontrack = ( e ) => audioEl . srcObject = e . streams [ 0 ] ;
292294
293-
294- // Get local mic
295- const stream = await navigator . mediaDevices . getUserMedia ( { audio : true } )
296- . catch ( err => {
297- console . error ( "Error accessing mic:" , err ) ;
298- appendOrUpdateLog ( "Mic access error. Check permissions." , "system-message" ) ;
299- startButtonEl . disabled = false ;
300- startButtonEl . hidden = false ;
301- endButtonEl . hidden = true ;
302- } ) ;
303-
304- if ( ! stream ) {
305- // If no stream, something went wrong (permissions?)
306- console . error ( "Failed to get local stream" ) ;
307- return ;
295+ // Get local mic and add to PeerConnection
296+ try {
297+ const stream = await navigator . mediaDevices . getUserMedia ( { audio : true } ) ;
298+ if ( ! stream ) {
299+ console . error ( "Failed to get local stream" ) ;
300+ return ;
301+ }
302+ track = stream . getTracks ( ) [ 0 ] ;
303+ pc . addTrack ( track , stream ) ;
304+ } catch ( err ) {
305+ console . error ( "Error accessing mic:" , err ) ;
306+ appendOrUpdateLog ( "Mic access error. Check permissions." , "system-message" ) ;
307+ toggleSessionButtons ( false ) ;
308308 }
309- localTrack = stream . getTracks ( ) [ 0 ] ;
310- pc . addTrack ( localTrack ) ;
311309
312310 // Create data channel once
313- if ( ! dc ) {
314- dc = pc . createDataChannel ( "oai-events" ) ;
315- dc . addEventListener ( "message" , handleMessage ) ;
316- } else {
317- console . log ( "Data channel already exists" ) ;
318- }
311+ dc = pc . createDataChannel ( "oai-events" ) ;
312+ dc . addEventListener ( "message" , handleMessage ) ;
319313
320314 // Send session instructions upon opening the data channel
321- dc . onopen = ( ) => {
315+ dc . addEventListener ( "open" , ( ) => {
322316 const sessionInstruct = localStorage . getItem ( "sessionInstructions" ) || "You are a friendly assistant" ;
323317 const startInstruct = localStorage . getItem ( "startInstructions" ) || "Greet the user and ask how you can help" ;
324318 const temperature = parseFloat ( localStorage . getItem ( "temperature" ) ) || 0.7 ;
@@ -346,7 +340,7 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
346340 } ;
347341 dc . send ( JSON . stringify ( startMessage ) ) ;
348342 appendOrUpdateLog ( "Session started." , "system-message" ) ;
349- } ;
343+ } ) ;
350344
351345 // implicit setLocalDescription style
352346 await pc . setLocalDescription ( ) ;
@@ -366,22 +360,16 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
366360 const answer = { type : "answer" , sdp : await sdpResp . text ( ) } ;
367361 await pc . setRemoteDescription ( answer ) ;
368362
369- startButtonEl . disabled = false ;
370- startButtonEl . hidden = true ;
371- endButtonEl . hidden = false ;
363+ toggleSessionButtons ( true ) ;
372364
373365 console . log ( "Realtime session started!" ) ;
374366 } catch ( err ) {
375367 console . error ( "Error starting session:" , err ) ;
376368 appendOrUpdateLog ( "Error starting session. Please try again." , "system-message" ) ;
377- if ( pc ) {
369+ if ( pc ?. connectionState !== "closed" ) {
378370 pc . close ( ) ;
379- pc = null ;
380- dc = null ;
381371 }
382- startButtonEl . disabled = false ;
383- startButtonEl . hidden = false ;
384- endButtonEl . hidden = true ;
372+ toggleSessionButtons ( false ) ;
385373 }
386374 }
387375
@@ -405,9 +393,8 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
405393 }
406394
407395 // Turn off mic
408- if ( localTrack ) {
409- localTrack . stop ( ) ;
410- localTrack = null ;
396+ if ( track . status !== "ended" ) {
397+ track . stop ( ) ;
411398 }
412399
413400 endButtonEl . disabled = true ;
@@ -416,17 +403,11 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
416403 dc . addEventListener ( "message" , ( event ) => {
417404 const message = JSON . parse ( event . data ) ;
418405 if ( message . type === "output_audio_buffer.stopped" ) {
419- dc . close ( ) ;
420- dc = null ;
421406 pc . close ( ) ;
422- pc = null ;
423407
424408 console . log ( "Session ended." ) ;
425409 appendOrUpdateLog ( "Session ended." , "system-message" ) ;
426- startButtonEl . disabled = false ;
427- startButtonEl . hidden = false ;
428- endButtonEl . hidden = true ;
429-
410+ toggleSessionButtons ( false ) ;
430411 }
431412 } ) ;
432413 }
@@ -468,6 +449,18 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
468449 const defaultTemperature = 0.7 ;
469450 const defaultVoice = "alloy" ;
470451
452+
453+ /**
454+ * Toggle the session control buttons based on the session state.
455+ * @param {boolean } isSessionActive - Whether a session is currently active.
456+ */
457+ function toggleSessionButtons ( isSessionActive ) {
458+ [ startButtonEl , endButtonEl ] . forEach ( button => {
459+ button . hidden = ( button === startButtonEl ) ? isSessionActive : ! isSessionActive ;
460+ button . disabled = ( button === startButtonEl ) ? isSessionActive : false ;
461+ } ) ;
462+ }
463+
471464 /**
472465 * Insert a new log entry or update an existing one (by messageId).
473466 */
@@ -565,7 +558,7 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
565558 clearTimeout ( window . autoSaveTimeout ) ;
566559 window . autoSaveTimeout = setTimeout ( autoSaveSettings , 2000 ) ;
567560 } ) ;
568- } ) ;
561+ } ) ;
569562
570563 /**
571564 * Send the user's typed message to the GPT server over the data channel.
@@ -593,24 +586,22 @@ <h1>Chat with GPT-4 Realtime and WebRTC</h1>
593586
594587 // Button event handlers for starting / ending sessions
595588 startButtonEl . addEventListener ( "click" , async ( ) => {
596- if ( pc ) {
589+ if ( pc ?. connectionState === "connected" ) {
597590 console . log ( "Session already started" ) ;
598591 appendOrUpdateLog ( "Session already started." , "system-message" ) ;
599592 return ;
600593 }
601- startButtonEl . hidden = true ;
602- endButtonEl . hidden = false ;
594+ toggleSessionButtons ( true ) ;
603595 await startSession ( ) ;
604596 } ) ;
605597
606598 endButtonEl . addEventListener ( "click" , async ( ) => {
607- if ( ! pc ) {
608- console . log ( " No session to end" ) ;
599+ if ( pc ?. connectionState === "closed" ) {
600+ console . log ( ` No session to end. Connection state: ${ pc . connectionState } ` ) ;
609601 appendOrUpdateLog ( "No session to end." , "system-message" ) ;
610602 return ;
611603 }
612- startButtonEl . hidden = false ;
613- endButtonEl . hidden = true ;
604+ toggleSessionButtons ( false ) ;
614605 await endSession ( ) ;
615606 } ) ;
616607
0 commit comments