diff --git a/Sources/Models/ConversationUpdate.swift b/Sources/Models/ConversationUpdate.swift index 917a3e8..c592ef1 100644 --- a/Sources/Models/ConversationUpdate.swift +++ b/Sources/Models/ConversationUpdate.swift @@ -5,12 +5,115 @@ public struct Message: Codable { case user = "user" case assistant = "assistant" case system = "system" + case tool = "tool" + case toolCalls = "tool_calls" + case bot = "bot" } public let role: Role - public let content: String + public let content: String? + public let tool_calls: [ToolCall]? + public let tool_call_id: String? + + enum CodingKeys: String, CodingKey { + case role + case content + case tool_calls + case tool_call_id + } +} + +public struct TimestampedMessage: Codable { + public enum Role: String, Codable { + case user = "user" + case bot = "bot" + case system = "system" + case tool = "tool" + case toolCalls = "tool_calls" + } + + public let role: Role + public let message: String? + public let time: Double + public let endTime: Double? + public let secondsFromStart: Double? + public let duration: Double? + public let toolCalls: [ToolCall]? + + enum CodingKeys: String, CodingKey { + case role + case message + case time + case endTime + case secondsFromStart + case duration + case toolCalls + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + role = try container.decode(Role.self, forKey: .role) + message = try container.decodeIfPresent(String.self, forKey: .message) + toolCalls = try container.decodeIfPresent([ToolCall].self, forKey: .toolCalls) + + if let timeStr = try? container.decodeIfPresent(String.self, forKey: .time), + let timeDouble = Double(timeStr) { + time = timeDouble + } else { + time = try container.decode(Double.self, forKey: .time) + } + + if let endTimeStr = try? container.decodeIfPresent(String.self, forKey: .endTime), + let endTimeDouble = Double(endTimeStr) { + endTime = endTimeDouble + } else { + endTime = try container.decodeIfPresent(Double.self, forKey: .endTime) + } + + secondsFromStart = try container.decodeIfPresent(Double.self, forKey: .secondsFromStart) + + if let durationStr = try? container.decodeIfPresent(String.self, forKey: .duration), + let durationDouble = Double(durationStr) { + duration = durationDouble + } else { + duration = try container.decodeIfPresent(Double.self, forKey: .duration) + } + } +} + +public struct ToolCall: Codable { + public let type: String? + public let id: String? + public let function: ToolFunction? +} + +public struct ToolFunction: Codable { + public let name: String + public let arguments: String } public struct ConversationUpdate: Codable { public let conversation: [Message] + public let messages: [TimestampedMessage]? + public let messagesOpenAIFormatted: [String]? + + enum CodingKeys: String, CodingKey { + case conversation + case messages + case messagesOpenAIFormatted + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + conversation = try container.decode([Message].self, forKey: .conversation) + messages = try container.decodeIfPresent([TimestampedMessage].self, forKey: .messages) + messagesOpenAIFormatted = try container.decodeIfPresent([String].self, forKey: .messagesOpenAIFormatted) + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(conversation, forKey: .conversation) + try container.encodeIfPresent(messages, forKey: .messages) + try container.encodeIfPresent(messagesOpenAIFormatted, forKey: .messagesOpenAIFormatted) + } } diff --git a/Sources/Vapi.swift b/Sources/Vapi.swift index 69bdaa3..b0e74f8 100644 --- a/Sources/Vapi.swift +++ b/Sources/Vapi.swift @@ -61,7 +61,9 @@ public final class Vapi: CallClientDelegate { private let networkManager = NetworkManager() private var call: CallClient? - + + private var lastMessageTimestamp: Double = 0 + // MARK: - Computed Properties private var publicKey: String { @@ -479,6 +481,33 @@ public final class Vapi: CallClientDelegate { case .conversationUpdate: let conv = try decoder.decode(ConversationUpdate.self, from: unescapedData) event = Event.conversationUpdate(conv) + + if let messages = conv.messages, !messages.isEmpty { + let newMessages = messages.filter { $0.time > self.lastMessageTimestamp } + + if !newMessages.isEmpty { + if let latestTime = newMessages.map({ $0.time }).max() { + self.lastMessageTimestamp = latestTime + } + + for message in newMessages { + if message.role == .toolCalls, let toolCalls = message.toolCalls, !toolCalls.isEmpty { + for toolCall in toolCalls { + if let functionName = toolCall.function?.name, + let functionArgs = toolCall.function?.arguments { + + if let argsData = functionArgs.data(using: .utf8), + let parameters = try? JSONSerialization.jsonObject(with: argsData, options: []) as? [String: Any] { + + let functionCall = FunctionCall(name: functionName, parameters: parameters) + eventSubject.send(Event.functionCall(functionCall)) + } + } + } + } + } + } + } case .statusUpdate: let statusUpdate = try decoder.decode(StatusUpdate.self, from: unescapedData) event = Event.statusUpdate(statusUpdate) @@ -495,7 +524,7 @@ public final class Vapi: CallClientDelegate { eventSubject.send(event) } catch { let messageText = String(data: jsonData, encoding: .utf8) - print("Error parsing app message \"\(messageText ?? "")\": \(error.localizedDescription)") + print("Error parsing app message \"\(messageText ?? "")\": \(String(describing: error))") } } } diff --git a/Tests/MessageParsingTests.swift b/Tests/MessageParsingTests.swift new file mode 100644 index 0000000..da6d039 --- /dev/null +++ b/Tests/MessageParsingTests.swift @@ -0,0 +1,210 @@ +import XCTest +import Combine +@testable import Vapi + +final class MessageParsingTests: XCTestCase { + + func testParseMessagesWithToolCallRole() throws { + let jsonString = """ + { + "conversation": [ + { + "role": "system", + "content": "System message" + }, + { + "role": "assistant", + "content": "Assistant message", + "tool_calls": [ + { + "type": "function", + "id": "tool123", + "function": { + "name": "start_exercise", + "arguments": "{}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "tool123", + "content": "Tool Result" + }, + { + "role": "tool_calls", + "content": null, + "tool_calls": [ + { + "type": "function", + "id": "tool456", + "function": { + "name": "another_function", + "arguments": "{}" + } + } + ] + } + ] + } + """ + + let jsonData = jsonString.data(using: .utf8)! + let decoder = JSONDecoder() + + // This should not throw with our updated Role enum + let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: jsonData) + + XCTAssertEqual(conversationUpdate.conversation.count, 4) + XCTAssertEqual(conversationUpdate.conversation[0].role, Message.Role.system) + XCTAssertEqual(conversationUpdate.conversation[1].role, Message.Role.assistant) + XCTAssertEqual(conversationUpdate.conversation[2].role, Message.Role.tool) + XCTAssertEqual(conversationUpdate.conversation[3].role, Message.Role.toolCalls) + + // Verify tool calls are properly parsed + XCTAssertNotNil(conversationUpdate.conversation[1].tool_calls) + XCTAssertEqual(conversationUpdate.conversation[1].tool_calls?.count, 1) + XCTAssertEqual(conversationUpdate.conversation[1].tool_calls?[0].function?.name, "start_exercise") + + // Verify tool_call_id is properly parsed + XCTAssertEqual(conversationUpdate.conversation[2].tool_call_id, "tool123") + } + + func testToolCallsInMessageAreExtractedAsFunctionCalls() throws { + let appMessageString = """ + { + "type": "conversation-update", + "conversation": [ + { + "role": "system", + "content": "System message" + }, + { + "role": "tool_calls", + "tool_calls": [ + { + "type": "function", + "id": "tool456", + "function": { + "name": "another_function", + "arguments": "{}" + } + } + ] + } + ] + } + """ + + let appMessageData = appMessageString.data(using: .utf8)! + + let decoder = JSONDecoder() + let appMessage = try decoder.decode(AppMessage.self, from: appMessageData) + + XCTAssertEqual(appMessage.type, AppMessage.MessageType.conversationUpdate) + + let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: appMessageData) + XCTAssertEqual(conversationUpdate.conversation.count, 2) + + let lastMessage = conversationUpdate.conversation.last! + XCTAssertEqual(lastMessage.role, Message.Role.toolCalls) + XCTAssertNotNil(lastMessage.tool_calls) + XCTAssertEqual(lastMessage.tool_calls!.count, 1) + + let toolCall = lastMessage.tool_calls![0] + XCTAssertEqual(toolCall.function?.name, "another_function") + XCTAssertEqual(toolCall.function?.arguments, "{}") + } + + func testParseTimestampedMessagesArray() throws { + let appMessageString = """ + { + "type": "conversation-update", + "conversation": [ + { + "role": "system", + "content": "System message" + } + ], + "messages": [ + { + "role": "system", + "message": "System message", + "time": 1741093883580, + "secondsFromStart": 0 + }, + { + "role": "bot", + "message": "Bot message", + "time": 1741093885838, + "endTime": 1741093886618, + "secondsFromStart": 1.8399999, + "duration": 780, + "source": "" + }, + { + "role": "user", + "message": "User message", + "time": 1741093897088, + "endTime": 1741093898238, + "secondsFromStart": 13.09, + "duration": 1150 + }, + { + "toolCalls": [ + { + "type": "function", + "id": "tool123", + "function": { + "name": "test_function", + "arguments": "{}" + } + } + ], + "role": "tool_calls", + "message": "", + "time": 1741093903823, + "secondsFromStart": 15.179 + } + ], + "messagesOpenAIFormatted": [] + } + """ + + let appMessageData = appMessageString.data(using: .utf8)! + + let decoder = JSONDecoder() + let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: appMessageData) + + // Check that messages array was parsed correctly + XCTAssertNotNil(conversationUpdate.messages) + XCTAssertEqual(conversationUpdate.messages?.count, 4) + + // Verify the first message (system) + XCTAssertEqual(conversationUpdate.messages?[0].role, TimestampedMessage.Role.system) + XCTAssertEqual(conversationUpdate.messages?[0].message, "System message") + XCTAssertEqual(conversationUpdate.messages?[0].time, 1741093883580.0) + + // Verify the bot message + XCTAssertEqual(conversationUpdate.messages?[1].role, TimestampedMessage.Role.bot) + XCTAssertEqual(conversationUpdate.messages?[1].message, "Bot message") + XCTAssertEqual(conversationUpdate.messages?[1].time, 1741093885838.0) + XCTAssertEqual(conversationUpdate.messages?[1].endTime, 1741093886618.0) + XCTAssertEqual(conversationUpdate.messages?[1].duration, 780.0) + + // Verify the user message + XCTAssertEqual(conversationUpdate.messages?[2].role, TimestampedMessage.Role.user) + XCTAssertEqual(conversationUpdate.messages?[2].message, "User message") + + // Verify the tool_calls message + XCTAssertEqual(conversationUpdate.messages?[3].role, TimestampedMessage.Role.toolCalls) + XCTAssertNotNil(conversationUpdate.messages?[3].toolCalls) + XCTAssertEqual(conversationUpdate.messages?[3].toolCalls?.count, 1) + + let toolCall = conversationUpdate.messages?[3].toolCalls?[0] + XCTAssertEqual(toolCall?.function?.name, "test_function") + XCTAssertEqual(toolCall?.function?.arguments, "{}") + XCTAssertEqual(toolCall?.id, "tool123") + } +} +