Skip to content

Commit 521113e

Browse files
adjustments to align with post draft
1 parent 5891481 commit 521113e

File tree

1 file changed

+112
-78
lines changed

1 file changed

+112
-78
lines changed

index.html

Lines changed: 112 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
border: 1px solid #666;
7777
}
7878

79+
/* Remote audio control styling */
7980
.audio-container {
8081
margin: 10px 0;
8182
padding: 8px;
@@ -122,7 +123,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
122123
value="Your OPENAI_API_KEY from platform.openai.com">
123124
<br>
124125
<label for="temperature">Temperature: </label>
125-
<input id="temperature" type="number" step="0.1" min="0" max="1" placeholder="Temperature" value="0.7"
126+
<input id="temperature" type="number" step="0.1" min="0" max="2" placeholder="Temperature" value="1.0"
126127
style="width: 60px">
127128
<br>
128129
<label for="voice">Select Voice: </label>
@@ -215,9 +216,11 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
215216
};
216217
const payload = {
217218
model: model,
218-
voice: voiceEl.value,
219-
input_audio_transcription: {model: "whisper-1"},
220-
instructions: sessionInstructionsEl.value
219+
// expires_at: Math.floor(Date.now() / 1000) + 60, // #Fail - unknown parameter!
220+
// load these later
221+
// voice: voiceEl.value,
222+
// input_audio_transcription: {model: "whisper-1"},
223+
// instructions: sessionInstructionsEl.value
221224
};
222225

223226
const response = await fetch("https://api.openai.com/v1/realtime/sessions", {
@@ -231,19 +234,59 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
231234
return null;
232235
}
233236
const data = await response.json();
234-
const key = data?.client_secret?.value;
235-
if (!key) {
237+
const token = data?.client_secret?.value;
238+
if (!token) {
236239
console.error("Invalid response format:", data);
237240
return null;
238241
}
239-
console.log("Received ephemeral key:", key);
240-
return key;
242+
// For debugging - don't make this easy to abuse
243+
console.log(`Received ephemeral key: ${token} will expire at ${new Date(data?.client_secret?.expires_at * 1000)
244+
.toLocaleString()}`, data);
245+
return token;
241246
} catch (error) {
242247
console.error("Error fetching ephemeral key:", error);
243248
return error;
244249
}
245250
}
246251

252+
253+
/**
254+
* Send initial session instructions and start message.
255+
* This is called when the data channel is opened.
256+
* @returns {void}
257+
*/
258+
function sessionStartMessages() {
259+
const sessionInstruct = localStorage.getItem("sessionInstructions") || defaultSessionInstructions;
260+
const startInstruct = localStorage.getItem("startInstructions") || defaultStartInstructions;
261+
const temperature = parseFloat(localStorage.getItem("temperature")) || defaultTemperature;
262+
263+
// Update the session
264+
const systemMessage = {
265+
type: "session.update",
266+
session: {
267+
instructions: sessionInstruct,
268+
voice: voiceEl.value,
269+
tools: gptFunctions,
270+
tool_choice: "auto",
271+
input_audio_transcription: {model: "whisper-1"},
272+
temperature: temperature,
273+
}
274+
};
275+
dc.send(JSON.stringify(systemMessage));
276+
277+
// Start instructions
278+
const startMessage = {
279+
type: "response.create",
280+
response: {
281+
modalities: ["text", "audio"],
282+
instructions: startInstruct,
283+
max_output_tokens: 100
284+
}
285+
};
286+
dc.send(JSON.stringify(startMessage));
287+
appendOrUpdateLog("Session started.", "system-message");
288+
}
289+
247290
/**
248291
* Handle incoming DataChannel messages from the GPT server.
249292
*/
@@ -312,8 +355,8 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
312355

313356
// An ephemeral key is required to start a session
314357
// Usually this should be requested from your server to avoid exposing the OPENAI_API_KEY
315-
const ephemeralKey = await fetchEphemeralKey(apiKey);
316-
if (!ephemeralKey || ephemeralKey === "error") {
358+
const ephemeralToken = await fetchEphemeralKey(apiKey);
359+
if (!ephemeralToken || ephemeralToken === "error") {
317360
toggleSessionButtons(false);
318361
return;
319362
}
@@ -327,7 +370,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
327370
console.error("Failed to get local stream");
328371
return;
329372
}
330-
track = stream.getTracks()[0];
373+
[track] = stream.getAudioTracks();
331374
} catch (err) {
332375
console.error("Error accessing mic:", err);
333376
appendOrUpdateLog("Mic access error. Check permissions.", "system-message");
@@ -340,47 +383,16 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
340383
// Create PeerConnection
341384
pc = new RTCPeerConnection();
342385

343-
// Create an audio element for the remote track
344-
const audioEl = document.querySelector("audio");
345-
346386
// On receiving remote track
347-
pc.ontrack = (e) => audioEl.srcObject = e.streams[0];
387+
pc.ontrack = (e) => remoteAudioEl.srcObject = e.streams[0];
348388
// Add the local audio track and reference to its source stream
349389
pc.addTrack(track, stream);
350390

351391
// Create data channel
352-
dc = pc.createDataChannel("oai-events");
392+
dc = pc.createDataChannel("oai");
353393

354394
// Send session instructions upon opening the data channel
355-
dc.addEventListener("open", () => {
356-
const sessionInstruct = localStorage.getItem("sessionInstructions") || "You are a friendly assistant";
357-
const startInstruct = localStorage.getItem("startInstructions") || "Greet the user and ask how you can help";
358-
const temperature = parseFloat(localStorage.getItem("temperature")) || 0.7;
359-
360-
// Update the session
361-
const systemMessage = {
362-
type: "session.update",
363-
session: {
364-
instructions: sessionInstruct,
365-
tools: gptFunctions,
366-
tool_choice: "auto"
367-
}
368-
};
369-
dc.send(JSON.stringify(systemMessage));
370-
371-
// Start instructions
372-
const startMessage = {
373-
type: "response.create",
374-
response: {
375-
modalities: ["text", "audio"],
376-
instructions: startInstruct,
377-
temperature: temperature,
378-
max_output_tokens: 100
379-
}
380-
};
381-
dc.send(JSON.stringify(startMessage));
382-
appendOrUpdateLog("Session started.", "system-message");
383-
});
395+
dc.addEventListener("open", () => sessionStartMessages());
384396

385397
// Handle incoming messages from the server
386398
dc.addEventListener("message", handleMessage);
@@ -390,22 +402,32 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
390402

391403
// Create answer
392404
const baseUrl = "https://api.openai.com/v1/realtime";
393-
const sdpResp = await fetch(`${baseUrl}?model=${model}`, {
405+
const response = await fetch(`${baseUrl}?model=${model}`, {
394406
method: "POST",
395407
body: pc.localDescription.sdp,
396408
headers: {
397-
Authorization: `Bearer ${ephemeralKey}`,
409+
Authorization: `Bearer ${ephemeralToken}`,
398410
"Content-Type": "application/sdp"
399411
},
400412
});
401-
if (!sdpResp.ok) {
402-
console.error("Failed to fetch SDP answer:", await sdpResp.text());
413+
if (!response.ok) {
414+
console.error("Failed to fetch SDP answer:", await response.text());
403415
}
404-
const answer = {type: "answer", sdp: await sdpResp.text()};
416+
const answer = {type: "answer", sdp: await response.text()};
405417
await pc.setRemoteDescription(answer);
406418

407-
toggleSessionButtons(true);
419+
// Wait for connection to be established before proceeding
420+
await new Promise((resolve, reject) => {
421+
const timeout = setTimeout(() => reject(`Connection timeout. Current state: ${pc.connectionState}`), 10_000);
422+
pc.addEventListener("connectionstatechange", () => {
423+
if (pc.connectionState === "connected") {
424+
clearTimeout(timeout);
425+
resolve();
426+
}
427+
});
428+
});
408429

430+
toggleSessionButtons(true);
409431
console.log("Realtime session started!");
410432
} catch (err) {
411433
console.error("Error starting session:", err);
@@ -423,16 +445,28 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
423445
*/
424446
async function endSession(instructions = endInstructionsEl.value || defaultEndInstructions) {
425447
console.log("Ending session...");
426-
const message = {
427-
type: "response.create",
428-
response: {
429-
modalities: ["text", "audio"],
430-
instructions: instructions,
431-
temperature: parseFloat(temperatureEl.value),
432-
max_output_tokens: 200
433-
}
434-
};
435-
if (dc && dc.readyState === "open") {
448+
449+
if (dc?.readyState === "open") {
450+
// Close after the final message
451+
dc.addEventListener("message", (event) => {
452+
const message = JSON.parse(event.data);
453+
if (message.type === "output_audio_buffer.stopped") {
454+
pc.close();
455+
456+
console.log("Session ended.");
457+
appendOrUpdateLog("Session ended.", "system-message");
458+
toggleSessionButtons(false);
459+
}
460+
});
461+
462+
const message = {
463+
type: "response.create",
464+
response: {
465+
modalities: ["text", "audio"],
466+
instructions: instructions,
467+
max_output_tokens: 200
468+
}
469+
};
436470
dc.send(JSON.stringify(message));
437471
}
438472

@@ -443,17 +477,6 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
443477

444478
endButtonEl.disabled = true;
445479

446-
// Close after the final message
447-
dc.addEventListener("message", (event) => {
448-
const message = JSON.parse(event.data);
449-
if (message.type === "output_audio_buffer.stopped") {
450-
pc.close();
451-
452-
console.log("Session ended.");
453-
appendOrUpdateLog("Session ended.", "system-message");
454-
toggleSessionButtons(false);
455-
}
456-
});
457480
}
458481
</script>
459482

@@ -480,7 +503,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
480503
const autoSaveStatusEl = $("autoSaveStatus");
481504
const settingsButtonEl = $("settingsButton");
482505
const settingsFormEl = $("settingsForm");
483-
const remoteAudio = $('remoteAudio');
506+
const remoteAudioEl = $('remoteAudio');
484507
const muteButton = $('muteButton');
485508
const volumeSlider = $('volumeSlider');
486509
const logEl = $("log");
@@ -493,19 +516,18 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
493516
const defaultSessionInstructions = "You are a friendly assistant";
494517
const defaultStartInstructions = "Greet the user and ask how you can help";
495518
const defaultEndInstructions = "Give a quick good-bye. Sometimes remind the user to press the button to start a new session.";
496-
const defaultTemperature = 0.7;
519+
const defaultTemperature = 1.0;
497520
const defaultVoice = "alloy";
498521

499522

500523
// Volume controls
501-
volumeSlider.addEventListener('input', (e) => {
502-
remoteAudio.volume = e.target.value;
503-
});
524+
volumeSlider.addEventListener('input', (e) => remoteAudioEl.volume = e.target.value);
504525

526+
// Mute button
505527
let isMuted = false;
506528
muteButton.addEventListener('click', () => {
507529
isMuted = !isMuted;
508-
remoteAudio.muted = isMuted;
530+
remoteAudioEl.muted = isMuted;
509531
muteButton.textContent = isMuted ? '🔇' : '🔊';
510532
});
511533

@@ -523,6 +545,9 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
523545

524546
/**
525547
* Insert a new log entry or update an existing one (by messageId).
548+
* @param {string} message - The message to log.
549+
* @param {string} className - Optional CSS class for styling
550+
* @param {string} messageId - Optional ID to update an existing log entry.
526551
*/
527552
function appendOrUpdateLog(message, className = "", messageId = null) {
528553
let logEntry;
@@ -542,6 +567,8 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
542567

543568
/**
544569
* Load settings from localStorage into UI fields.
570+
* If not set, use default values.
571+
* @returns {void}
545572
*/
546573
function loadSettings() {
547574
keyEl.value = localStorage.getItem("openaiApiKey") || defaultKey;
@@ -554,6 +581,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
554581

555582
/**
556583
* Autosave changed settings to localStorage (debounced by 2s).
584+
* @returns {void}
557585
*/
558586
function autoSaveSettings() {
559587
const settings = {
@@ -573,6 +601,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
573601

574602
/**
575603
* Reset settings and re-save them to localStorage.
604+
* @returns {void}
576605
*/
577606
function resetSettings() {
578607
const settings = {
@@ -591,6 +620,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
591620

592621
/**
593622
* Send updated session instructions to the GPT server (if data channel is open).
623+
* @returns {void}
594624
*/
595625
function sendNewSettings() {
596626
if (dc && dc.readyState === "open") {
@@ -622,6 +652,7 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
622652

623653
/**
624654
* Send the user's typed message to the GPT server over the data channel.
655+
* @returns {void}
625656
*/
626657
function sendText() {
627658
const text = textInputEl.value.trim();
@@ -655,13 +686,16 @@ <h1>Chat with GPT-4 Realtime with WebRTC</h1>
655686
await startSession();
656687
});
657688

689+
// End session button - cancel any in-progress response and close the session
658690
endButtonEl.addEventListener("click", async () => {
659691
if (pc?.connectionState === "closed") {
660692
console.log(`No session to end. Connection state: ${pc.connectionState}`);
661693
appendOrUpdateLog("No session to end.", "system-message");
662694
return;
663695
}
664696
toggleSessionButtons(false);
697+
// cancel any in-progress response - not needed for speech input due to turn detection
698+
dc.send(JSON.stringify({type: "response.cancel"}));
665699
await endSession();
666700
});
667701

0 commit comments

Comments
 (0)