From 95b7a554e627acee24ce38d5038a7caa0f95f18a Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Mon, 18 Aug 2025 15:05:37 -0600
Subject: [PATCH 01/12] wip on a regenerate response functionality

---
 .../app/routers/index/sessions/__init__.py    | 50 +++++++++++
 llm-service/app/services/chat/chat.py         | 63 +-------------
 .../app/services/chat/streaming_chat.py       | 83 +++++++++++++++++--
 .../chat_history/chat_history_manager.py      | 16 ++++
 .../chat_history/s3_chat_history_manager.py   | 45 ++++++++++
 .../simple_chat_history_manager.py            | 60 ++++++++++++++
 ui/src/api/chatApi.ts                         | 54 ++++++------
 .../ChatOutput/ChatMessages/ChatMessage.tsx   | 14 +++-
 .../ChatMessages/ChatMessageBody.tsx          |  7 ++
 .../ChatMessages/RegenerateButton.tsx         | 67 +++++++++++++++
 10 files changed, 365 insertions(+), 94 deletions(-)
 create mode 100644 ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index 7a1c274af..efc4e62cb 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -66,6 +66,9 @@
 from ....services.mlflow import rating_mlflow_log_metric, feedback_mlflow_log_table
 from ....services.query.chat_events import ChatEvent
 from ....services.session import rename_session
+from ....services.metadata_apis import session_metadata_api
+from ....services import llm_completion
+from ....services.chat_history.chat_history_manager import RagMessage
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/sessions/{session_id}", tags=["Sessions"])
@@ -152,6 +155,44 @@ def chat_history(
     )
 
 
+class RegenerateRequest(BaseModel):
+    message_id: str
+
+
+@router.post(
+    "/chat-history/{message_id}/regenerate",
+    summary="Regenerate an assistant message by message ID and update chat history",
+)
+@exceptions.propagates
+def regenerate_message(session_id: int, message_id: str, remote_user: Optional[str] = Header(None)) -> RagStudioChatMessage:
+    # Load session
+    session = session_metadata_api.get_session(session_id, user_name=remote_user)
+
+    # Find existing message
+    messages: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history(session_id=session_id)
+    target: Optional[RagStudioChatMessage] = next((m for m in messages if m.id == message_id), None)
+    if target is None:
+        raise HTTPException(status_code=404, detail="Message not found")
+
+    # Regenerate assistant response for the same user message
+    completion = llm_completion.completion(session_id=session_id, question=target.rag_message.user, model_name=session.inference_model)
+
+    updated = RagStudioChatMessage(
+        id=target.id,
+        session_id=session_id,
+        source_nodes=target.source_nodes,
+        inference_model=session.inference_model,
+        evaluations=[],
+        rag_message=RagMessage(user=target.rag_message.user, assistant=str(completion.message.content)),
+        timestamp=time.time(),
+        condensed_question=None,
+    )
+
+    # Persist update in-place
+    chat_history_manager.update_message(session_id=session_id, message_id=message_id, message=updated)
+    return updated
+
+
 @router.get(
     "/chat-history/{message_id}",
     summary="Returns a specific chat messages for the provided session.",
@@ -286,12 +327,21 @@ def generate_stream() -> Generator[str, None, None]:
 
         try:
             executor = ThreadPoolExecutor(max_workers=1)
+            # If a response_id is provided in the request (e.g., regenerate), reuse it; else None
+            requested_response_id = None
+            try:
+                body_dict = request.model_dump()  # Pydantic BaseModel
+                requested_response_id = body_dict.get("response_id")
+            except Exception:
+                requested_response_id = None
+
             future = executor.submit(
                 stream_chat,
                 session=session,
                 query=request.query,
                 configuration=configuration,
                 user_name=remote_user,
+                response_id=requested_response_id,
             )
 
             # If we get here and the cancel_event is set, the client has disconnected
diff --git a/llm-service/app/services/chat/chat.py b/llm-service/app/services/chat/chat.py
index 91102b00d..ec51e7849 100644
--- a/llm-service/app/services/chat/chat.py
+++ b/llm-service/app/services/chat/chat.py
@@ -40,22 +40,19 @@
 import uuid
 from typing import Optional
 
-from llama_index.core.chat_engine.types import AgentChatResponse
-
 from app.ai.vector_stores.vector_store_factory import VectorStoreFactory
 from app.rag_types import RagPredictConfiguration
-from app.services import evaluators, llm_completion
-from app.services.chat.utils import retrieve_chat_history, format_source_nodes
+from app.services import llm_completion
+from app.services.chat.streaming_chat import finalize_response
+from app.services.chat.utils import retrieve_chat_history
 from app.services.chat_history.chat_history_manager import (
-    Evaluation,
     RagMessage,
     RagStudioChatMessage,
     chat_history_manager,
 )
 from app.services.metadata_apis.session_metadata_api import Session
-from app.services.mlflow import record_rag_mlflow_run, record_direct_llm_mlflow_run
+from app.services.mlflow import record_direct_llm_mlflow_run
 from app.services.query import querier
-from app.services.query.querier import get_nodes_from_output
 from app.services.query.query_configuration import QueryConfiguration
 
 logger = logging.getLogger(__name__)
@@ -125,58 +122,6 @@ def _run_chat(
     )
 
 
-def finalize_response(
-    chat_response: AgentChatResponse,
-    condensed_question: str | None,
-    query: str,
-    query_configuration: QueryConfiguration,
-    response_id: str,
-    session: Session,
-    user_name: Optional[str],
-) -> RagStudioChatMessage:
-    if condensed_question and (condensed_question.strip() == query.strip()):
-        condensed_question = None
-
-    orig_source_nodes = chat_response.source_nodes
-    source_nodes = get_nodes_from_output(chat_response.response, session)
-
-    # if node with id present in orig_source_nodes, then don't add it again
-    node_ids_present = set([node.node_id for node in orig_source_nodes])
-    for node in source_nodes:
-        if node.node_id not in node_ids_present:
-            orig_source_nodes.append(node)
-
-    chat_response.source_nodes = orig_source_nodes
-
-    evaluations = []
-    if len(chat_response.source_nodes) != 0:
-        relevance, faithfulness = evaluators.evaluate_response(
-            query, chat_response, session.inference_model
-        )
-        evaluations.append(Evaluation(name="relevance", value=relevance))
-        evaluations.append(Evaluation(name="faithfulness", value=faithfulness))
-    response_source_nodes = format_source_nodes(chat_response)
-    new_chat_message = RagStudioChatMessage(
-        id=response_id,
-        session_id=session.id,
-        source_nodes=response_source_nodes,
-        inference_model=session.inference_model,
-        rag_message=RagMessage(
-            user=query,
-            assistant=chat_response.response,
-        ),
-        evaluations=evaluations,
-        timestamp=time.time(),
-        condensed_question=condensed_question,
-    )
-    record_rag_mlflow_run(
-        new_chat_message, query_configuration, response_id, session, user_name
-    )
-    chat_history_manager.append_to_history(session.id, [new_chat_message])
-
-    return new_chat_message
-
-
 def direct_llm_chat(
     session: Session, response_id: str, query: str, user_name: Optional[str]
 ) -> RagStudioChatMessage:
diff --git a/llm-service/app/services/chat/streaming_chat.py b/llm-service/app/services/chat/streaming_chat.py
index 9b4aab434..0b6f85734 100644
--- a/llm-service/app/services/chat/streaming_chat.py
+++ b/llm-service/app/services/chat/streaming_chat.py
@@ -47,23 +47,22 @@
 
 from app.ai.vector_stores.vector_store_factory import VectorStoreFactory
 from app.rag_types import RagPredictConfiguration
-from app.services import llm_completion, models
-from app.services.chat.chat import finalize_response
-from app.services.chat.utils import retrieve_chat_history
+from app.services import llm_completion, models, evaluators
+from app.services.chat.utils import retrieve_chat_history, format_source_nodes
 from app.services.chat_history.chat_history_manager import (
     RagStudioChatMessage,
     RagMessage,
-    chat_history_manager,
+    chat_history_manager, Evaluation,
 )
 from app.services.metadata_apis.session_metadata_api import Session
-from app.services.mlflow import record_direct_llm_mlflow_run
+from app.services.mlflow import record_direct_llm_mlflow_run, record_rag_mlflow_run
 from app.services.query import querier
 from app.services.query.chat_engine import (
     FlexibleContextChatEngine,
     build_flexible_chat_engine,
 )
 from app.services.query.querier import (
-    build_retriever,
+    build_retriever, get_nodes_from_output,
 )
 from app.services.query.query_configuration import QueryConfiguration
 
@@ -73,6 +72,7 @@ def stream_chat(
     query: str,
     configuration: RagPredictConfiguration,
     user_name: Optional[str],
+    response_id: Optional[str] = None,
 ) -> Generator[ChatResponse, None, None]:
     query_configuration = QueryConfiguration(
         top_k=session.response_chunks,
@@ -86,7 +86,22 @@ def stream_chat(
         use_streaming=not session.query_configuration.disable_streaming,
     )
 
-    response_id = str(uuid.uuid4())
+    response_id = response_id or str(uuid.uuid4())
+    new_chat_message = RagStudioChatMessage(
+        id=response_id,
+        session_id=session.id,
+        source_nodes=[],
+        inference_model=session.inference_model,
+        evaluations=[],
+        rag_message=RagMessage(
+            user=query,
+            assistant="",
+        ),
+        timestamp=time.time(),
+        condensed_question=None,
+    )
+    chat_history_manager.append_to_history(session.id, [new_chat_message])
+
     total_data_sources_size: int = sum(
         map(
             lambda ds_id: VectorStoreFactory.for_chunks(ds_id).size() or 0,
@@ -217,4 +232,56 @@ def _stream_direct_llm_chat(
         timestamp=time.time(),
         condensed_question=None,
     )
-    chat_history_manager.append_to_history(session.id, [new_chat_message])
+    chat_history_manager.update_message(session.id, response_id, new_chat_message)
+
+
+def finalize_response(
+    chat_response: AgentChatResponse,
+    condensed_question: str | None,
+    query: str,
+    query_configuration: QueryConfiguration,
+    response_id: str,
+    session: Session,
+    user_name: Optional[str],
+) -> RagStudioChatMessage:
+    if condensed_question and (condensed_question.strip() == query.strip()):
+        condensed_question = None
+
+    orig_source_nodes = chat_response.source_nodes
+    source_nodes = get_nodes_from_output(chat_response.response, session)
+
+    # if node with id present in orig_source_nodes, then don't add it again
+    node_ids_present = set([node.node_id for node in orig_source_nodes])
+    for node in source_nodes:
+        if node.node_id not in node_ids_present:
+            orig_source_nodes.append(node)
+
+    chat_response.source_nodes = orig_source_nodes
+
+    evaluations = []
+    if len(chat_response.source_nodes) != 0:
+        relevance, faithfulness = evaluators.evaluate_response(
+            query, chat_response, session.inference_model
+        )
+        evaluations.append(Evaluation(name="relevance", value=relevance))
+        evaluations.append(Evaluation(name="faithfulness", value=faithfulness))
+    response_source_nodes = format_source_nodes(chat_response)
+    new_chat_message = RagStudioChatMessage(
+        id=response_id,
+        session_id=session.id,
+        source_nodes=response_source_nodes,
+        inference_model=session.inference_model,
+        rag_message=RagMessage(
+            user=query,
+            assistant=chat_response.response,
+        ),
+        evaluations=evaluations,
+        timestamp=time.time(),
+        condensed_question=condensed_question,
+    )
+    record_rag_mlflow_run(
+        new_chat_message, query_configuration, response_id, session, user_name
+    )
+    chat_history_manager.update_message(session.id, response_id, new_chat_message)
+
+    return new_chat_message
diff --git a/llm-service/app/services/chat_history/chat_history_manager.py b/llm-service/app/services/chat_history/chat_history_manager.py
index 24313a3b2..06ca2bd0a 100644
--- a/llm-service/app/services/chat_history/chat_history_manager.py
+++ b/llm-service/app/services/chat_history/chat_history_manager.py
@@ -84,6 +84,22 @@ def append_to_history(
     ) -> None:
         pass
 
+    @abstractmethod
+    def update_message(
+        self, session_id: int, message_id: str, message: RagStudioChatMessage
+    ) -> None:
+        """Update an existing message by ID for the given session.
+
+        Implementations should overwrite both the user and assistant entries
+        corresponding to this message ID.
+        """
+        pass
+
+    @abstractmethod
+    def delete_message(self, session_id: int, message_id: str) -> None:
+        """Delete an existing message by ID for the given session."""
+        pass
+
 
 def _create_chat_history_manager() -> ChatHistoryManager:
     from app.services.chat_history.simple_chat_history_manager import (
diff --git a/llm-service/app/services/chat_history/s3_chat_history_manager.py b/llm-service/app/services/chat_history/s3_chat_history_manager.py
index 974cf6063..349ef15b6 100644
--- a/llm-service/app/services/chat_history/s3_chat_history_manager.py
+++ b/llm-service/app/services/chat_history/s3_chat_history_manager.py
@@ -154,3 +154,48 @@ def append_to_history(
                 f"Error appending to chat history for session {session_id}: {e}"
             )
             raise
+
+    def update_message(
+        self, session_id: int, message_id: str, message: RagStudioChatMessage
+    ) -> None:
+        """Update an existing message's content and metadata by ID in S3."""
+        s3_key = self._get_s3_key(session_id)
+        try:
+            chat_history_data = self.retrieve_chat_history(session_id=session_id)
+            updated = False
+            for idx, existing in enumerate(chat_history_data):
+                if existing.id == message_id:
+                    chat_history_data[idx] = message
+                    updated = True
+                    break
+            if not updated:
+                return
+            chat_history_json = json.dumps(
+                [m.model_dump() for m in chat_history_data]
+            )
+            self.s3_client.put_object(
+                Bucket=self.bucket_name, Key=s3_key, Body=chat_history_json
+            )
+        except Exception as e:
+            logger.error(
+                f"Error updating chat message {message.id} for session {session_id}: {e}"
+            )
+            raise
+
+    def delete_message(self, session_id: int, message_id: str) -> None:
+        """Delete a specific message by ID in S3-backed store."""
+        s3_key = self._get_s3_key(session_id)
+        try:
+            chat_history_data = self.retrieve_chat_history(session_id=session_id)
+            chat_history_data = [m for m in chat_history_data if m.id != message_id]
+            chat_history_json = json.dumps(
+                [m.model_dump() for m in chat_history_data]
+            )
+            self.s3_client.put_object(
+                Bucket=self.bucket_name, Key=s3_key, Body=chat_history_json
+            )
+        except Exception as e:
+            logger.error(
+                f"Error deleting chat message {message_id} for session {session_id}: {e}"
+            )
+            raise
diff --git a/llm-service/app/services/chat_history/simple_chat_history_manager.py b/llm-service/app/services/chat_history/simple_chat_history_manager.py
index 1176134fb..90b813e66 100644
--- a/llm-service/app/services/chat_history/simple_chat_history_manager.py
+++ b/llm-service/app/services/chat_history/simple_chat_history_manager.py
@@ -159,6 +159,66 @@ def append_to_history(
             )
             store.persist(self._store_file(session_id))
 
+    def update_message(
+        self, session_id: int, message_id: str, message: RagStudioChatMessage
+    ) -> None:
+        """Update an existing message's user/assistant content and metadata by ID."""
+        store = self._store_for_session(session_id)
+        key = self._build_chat_key(session_id)
+        messages: list[ChatMessage] = store.get_messages(key)
+
+        # Each logical message is stored as a pair: USER, ASSISTANT with same id
+        for i in range(0, len(messages), 2):
+            user_msg = messages[i]
+            if user_msg.additional_kwargs.get("id") == message_id:
+                # Update user content
+                user_msg.content = message.rag_message.user
+                # Update assistant content and metadata (next message)
+                if i + 1 < len(messages):
+                    assistant_msg = messages[i + 1]
+                else:
+                    assistant_msg = ChatMessage(role=MessageRole.ASSISTANT, content="")
+                    messages.append(assistant_msg)
+                assistant_msg.content = message.rag_message.assistant
+                assistant_msg.additional_kwargs.update(
+                    {
+                        "id": message_id,
+                        "source_nodes": message.source_nodes,
+                        "inference_model": message.inference_model,
+                        "evaluations": message.evaluations,
+                        "timestamp": message.timestamp,
+                    }
+                )
+                # Persist updated list
+                store.delete_messages(key)
+                for m in messages:
+                    store.add_message(key, m)
+                store.persist(self._store_file(session_id))
+                return
+
+    def delete_message(self, session_id: int, message_id: str) -> None:
+        """Delete both USER and ASSISTANT entries for a given message id."""
+        store = self._store_for_session(session_id)
+        key = self._build_chat_key(session_id)
+        messages: list[ChatMessage] = store.get_messages(key)
+
+        new_messages: list[ChatMessage] = []
+        i = 0
+        while i < len(messages):
+            user_msg = messages[i]
+            assistant_msg = messages[i + 1] if i + 1 < len(messages) else None
+            current_id = user_msg.additional_kwargs.get("id")
+            if current_id != message_id:
+                new_messages.append(user_msg)
+                if assistant_msg is not None:
+                    new_messages.append(assistant_msg)
+            i += 2
+
+        store.delete_messages(key)
+        for m in new_messages:
+            store.add_message(key, m)
+        store.persist(self._store_file(session_id))
+
     @staticmethod
     def _build_chat_key(session_id: int) -> str:
         return "session_" + str(session_id)
diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts
index 5ce249ee4..816df5246 100644
--- a/ui/src/api/chatApi.ts
+++ b/ui/src/api/chatApi.ts
@@ -87,6 +87,7 @@ export interface ChatMutationRequest {
   query: string;
   session_id: number;
   configuration: QueryConfiguration;
+  response_id?: string;
 }
 
 interface ChatHistoryRequestType {
@@ -166,7 +167,7 @@ export interface ChatHistoryResponse {
 
 export const chatHistoryQuery = async (
   request: ChatHistoryRequestType,
-  pageParam: number | undefined,
+  pageParam: number | undefined
 ): Promise<ChatHistoryResponse> => {
   const params = new URLSearchParams();
   if (request.limit !== undefined) {
@@ -178,13 +179,13 @@ export const chatHistoryQuery = async (
 
   return await getRequest(
     `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history?` +
-      params.toString(),
+      params.toString()
   );
 };
 
 export const appendPlaceholderToChatHistory = (
   query: string,
-  cachedData?: InfiniteData<ChatHistoryResponse>,
+  cachedData?: InfiniteData<ChatHistoryResponse>
 ): InfiniteData<ChatHistoryResponse> => {
   if (!cachedData || cachedData.pages.length === 0) {
     const firstPage: ChatHistoryResponse = {
@@ -199,7 +200,7 @@ export const appendPlaceholderToChatHistory = (
   }
 
   const pageParams = cachedData.pageParams.map((pageParam, index) =>
-    index > 0 && typeof pageParam === "number" ? ++pageParam : pageParam,
+    index > 0 && typeof pageParam === "number" ? ++pageParam : pageParam
   );
 
   const pages = cachedData.pages.map((page) => {
@@ -215,7 +216,7 @@ export const appendPlaceholderToChatHistory = (
 
   const lastPage = pages[pages.length - 1];
   const filteredLastPageData = lastPage.data.filter(
-    (chatMessage) => !isPlaceholder(chatMessage),
+    (chatMessage) => !isPlaceholder(chatMessage)
   );
   return {
     pageParams,
@@ -231,7 +232,7 @@ export const appendPlaceholderToChatHistory = (
 
 export const replacePlaceholderInChatHistory = (
   data: ChatMessageType,
-  cachedData?: InfiniteData<ChatHistoryResponse>,
+  cachedData?: InfiniteData<ChatHistoryResponse>
 ): InfiniteData<ChatHistoryResponse> => {
   if (!cachedData || cachedData.pages.length == 0) {
     return (
@@ -265,7 +266,7 @@ export const replacePlaceholderInChatHistory = (
 };
 
 export const createQueryConfiguration = (
-  excludeKnowledgeBase: boolean,
+  excludeKnowledgeBase: boolean
 ): QueryConfiguration => {
   return {
     exclude_knowledge_base: excludeKnowledgeBase,
@@ -296,7 +297,7 @@ const ratingMutation = async ({
 }): Promise<ChatResponseFeedback> => {
   return await postRequest(
     `${llmServicePath}/sessions/${sessionId}/responses/${responseId}/rating`,
-    { rating },
+    { rating }
   );
 };
 
@@ -323,7 +324,7 @@ const feedbackMutation = async ({
 }): Promise<ChatResponseFeedback> => {
   return await postRequest(
     `${llmServicePath}/sessions/${sessionId}/responses/${responseId}/feedback`,
-    { feedback },
+    { feedback }
   );
 };
 
@@ -344,7 +345,7 @@ export interface ChatEvent {
 const customChatMessage = (
   variables: ChatMutationRequest,
   message: string,
-  prefix: string,
+  prefix: string
 ) => {
   const uuid = crypto.randomUUID();
   const customMessage: ChatMessageType = {
@@ -372,7 +373,7 @@ const canceledChatMessage = (variables: ChatMutationRequest) => {
   return customChatMessage(
     variables,
     "Request canceled by user",
-    CANCELED_PREFIX_ID,
+    CANCELED_PREFIX_ID
   );
 };
 
@@ -385,7 +386,7 @@ interface StreamingChatCallbacks {
 const modifyPlaceholderInChatHistory = (
   queryClient: QueryClient,
   variables: ChatMutationRequest,
-  replacementMessage: ChatMessageType,
+  replacementMessage: ChatMessageType
 ) => {
   queryClient.setQueryData<InfiniteData<ChatHistoryResponse>>(
     chatHistoryQueryKey({
@@ -393,14 +394,14 @@ const modifyPlaceholderInChatHistory = (
       offset: 0,
     }),
     (cachedData) =>
-      replacePlaceholderInChatHistory(replacementMessage, cachedData),
+      replacePlaceholderInChatHistory(replacementMessage, cachedData)
   );
 };
 
 const handlePrepareController = (
   getController: ((ctrl: AbortController) => void) | undefined,
   queryClient: QueryClient,
-  request: ChatMutationRequest,
+  request: ChatMutationRequest
 ) => {
   return (ctrl: AbortController) => {
     if (getController) {
@@ -410,7 +411,7 @@ const handlePrepareController = (
         modifyPlaceholderInChatHistory(
           queryClient,
           request,
-          canceledChatMessage(request),
+          canceledChatMessage(request)
         );
         ctrl.signal.removeEventListener("abort", onAbort);
       };
@@ -428,10 +429,10 @@ const handleStreamingSuccess = (
     | ((data: ChatMessageType, request?: unknown, context?: unknown) => unknown)
     | undefined,
   handleError: (request: ChatMutationRequest, error: Error) => void,
-  onError: ((error: Error) => void) | undefined,
+  onError: ((error: Error) => void) | undefined
 ) => {
   fetch(
-    `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history/${messageId}`,
+    `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history/${messageId}`
   )
     .then(async (res) => {
       const message = (await res.json()) as ChatMessageType;
@@ -439,7 +440,7 @@ const handleStreamingSuccess = (
         chatHistoryQueryKey({
           session_id: request.session_id,
         }),
-        (cachedData) => replacePlaceholderInChatHistory(message, cachedData),
+        (cachedData) => replacePlaceholderInChatHistory(message, cachedData)
       );
       queryClient
         .invalidateQueries({
@@ -479,7 +480,7 @@ export const useStreamingChatMutation = ({
       const handleGetController = handlePrepareController(
         getController,
         queryClient,
-        request,
+        request
       );
 
       return streamChatMutation(
@@ -487,7 +488,7 @@ export const useStreamingChatMutation = ({
         onChunk,
         onEvent,
         convertError,
-        handleGetController,
+        handleGetController
       );
     },
     onMutate: (variables) => {
@@ -496,7 +497,7 @@ export const useStreamingChatMutation = ({
           session_id: variables.session_id,
         }),
         (cachedData) =>
-          appendPlaceholderToChatHistory(variables.query, cachedData),
+          appendPlaceholderToChatHistory(variables.query, cachedData)
       );
     },
     onSuccess: (messageId, variables) => {
@@ -509,7 +510,7 @@ export const useStreamingChatMutation = ({
         queryClient,
         onSuccess,
         handleError,
-        onError,
+        onError
       );
     },
     onError: (error: Error, variables) => {
@@ -524,7 +525,7 @@ const streamChatMutation = async (
   onChunk: (chunk: string) => void,
   onEvent: (event: ChatEvent) => void,
   onError: (error: string) => void,
-  getController?: (ctrl: AbortController) => void,
+  getController?: (ctrl: AbortController) => void
 ): Promise<string> => {
   const ctrl = new AbortController();
   if (getController) {
@@ -542,6 +543,7 @@ const streamChatMutation = async (
       body: JSON.stringify({
         query: request.query,
         configuration: request.configuration,
+        response_id: request.response_id,
       }),
       signal: ctrl.signal,
       onmessage(msg: EventSourceMessage) {
@@ -567,7 +569,7 @@ const streamChatMutation = async (
         } catch (error) {
           console.error("Error parsing message data:", error);
           onError(
-            `An error occurred while processing the response.  Error message: ${JSON.stringify(msg)}. Error details: ${JSON.stringify(error)}.`,
+            `An error occurred while processing the response.  Error message: ${JSON.stringify(msg)}. Error details: ${JSON.stringify(error)}.`
           );
           ctrl.abort();
         }
@@ -592,13 +594,13 @@ const streamChatMutation = async (
           onError("An error occurred: " + response.statusText);
         }
       },
-    },
+    }
   );
   return responseId;
 };
 
 export const getOnEvent = (
-  setStreamedEvent: Dispatch<SetStateAction<ChatEvent[]>>,
+  setStreamedEvent: Dispatch<SetStateAction<ChatEvent[]>>
 ) => {
   return (event: ChatEvent) => {
     if (event.type === "done") {
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
index db5d27eb8..0cd92b370 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
@@ -49,6 +49,8 @@ import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.
 import "../tableMarkdown.css";
 import { ExclamationCircleTwoTone } from "@ant-design/icons";
 import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx";
+import { useContext } from "react";
+import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx";
 import { cdlAmber500 } from "src/cuix/variables.ts";
 
 const isError = (data: ChatMessageType) => {
@@ -100,6 +102,10 @@ const WarningMessage = ({
   );
 };
 const ChatMessage = ({ data }: { data: ChatMessageType }) => {
+  const { activeSession } = useContext(RagChatContext);
+  const excludeKnowledgeBases =
+    !activeSession?.dataSourceIds || activeSession.dataSourceIds.length === 0;
+
   if (isError(data)) {
     return <WarningMessage data={data} color={"#ff4d4f"} alertType={"error"} />;
   }
@@ -113,7 +119,13 @@ const ChatMessage = ({ data }: { data: ChatMessageType }) => {
     return <PendingRagOutputSkeleton question={data.rag_message.user} />;
   }
 
-  return <ChatMessageBody data={data} />;
+  return (
+    <ChatMessageBody
+      data={data}
+      sessionId={activeSession?.id ?? 0}
+      excludeKnowledgeBase={excludeKnowledgeBases}
+    />
+  );
 };
 
 export default ChatMessage;
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
index 0a4b8a32c..f08b87005 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
@@ -39,6 +39,7 @@
 import { ChatMessageType, ChatEvent } from "src/api/chatApi.ts";
 import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx";
 import { Divider, Flex, Typography } from "antd";
+import RegenerateButton from "pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx";
 import Images from "src/components/images/Images.ts";
 import { cdlBlue500, cdlGray200 } from "src/cuix/variables.ts";
 import { Evaluations } from "pages/RagChatTab/ChatOutput/ChatMessages/Evaluations.tsx";
@@ -51,9 +52,11 @@ import { MarkdownResponse } from "pages/RagChatTab/ChatOutput/ChatMessages/Markd
 export const ChatMessageBody = ({
   data,
   streamedEvents,
+  excludeKnowledgeBase,
 }: {
   data: ChatMessageType;
   streamedEvents?: ChatEvent[];
+  excludeKnowledgeBase: boolean;
 }) => {
   return (
     <div data-testid="chat-message">
@@ -99,6 +102,10 @@ export const ChatMessageBody = ({
               <SourceNodes data={data} />
               <Flex gap={16} align="center">
                 <CopyButton message={data} />
+                <RegenerateButton
+                  message={data}
+                  excludeKnowledgeBase={excludeKnowledgeBase}
+                />
                 <Evaluations evaluations={data.evaluations} />
                 <RatingFeedbackWrapper responseId={data.id} />
               </Flex>
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx
new file mode 100644
index 000000000..36bbf8ef5
--- /dev/null
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx
@@ -0,0 +1,67 @@
+import { Button, Tooltip } from "antd";
+import { ReloadOutlined } from "@ant-design/icons";
+import {
+  ChatMessageType,
+  createQueryConfiguration,
+  getOnEvent,
+  useStreamingChatMutation,
+} from "src/api/chatApi.ts";
+import { useStreamingChunkBuffer } from "src/hooks/useStreamingChunkBuffer.ts";
+import { useContext } from "react";
+import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx";
+
+const RegenerateButton = ({
+  message,
+  excludeKnowledgeBase,
+  onStarted,
+}: {
+  message: ChatMessageType;
+  excludeKnowledgeBase: boolean;
+  onStarted?: () => void;
+}) => {
+  const {
+    streamedChatState: [, setStreamedChat],
+    streamedEventState: [, setStreamedEvent],
+    streamedAbortControllerState: [, setStreamedAbortController],
+  } = useContext(RagChatContext);
+  // Use custom hook to handle batched streaming updates
+  const { onChunk, flush } = useStreamingChunkBuffer((chunks) => {
+    setStreamedChat((prev) => prev + chunks);
+  });
+
+  const streamChatMutation = useStreamingChatMutation({
+    onChunk,
+    onEvent: getOnEvent(setStreamedEvent),
+    onSuccess: () => {
+      // Flush any remaining chunks before cleanup
+      flush();
+      setStreamedChat("");
+    },
+    getController: (ctrl) => {
+      setStreamedAbortController(ctrl);
+    },
+  });
+
+  const handleClick = () => {
+    onStarted?.();
+    streamChatMutation.mutate({
+      query: message.rag_message.user,
+      session_id: message.session_id,
+      configuration: createQueryConfiguration(excludeKnowledgeBase),
+      response_id: message.id,
+    });
+  };
+
+  return (
+    <Tooltip title="Regenerate response">
+      <Button
+        type="text"
+        icon={<ReloadOutlined />}
+        size="small"
+        onClick={handleClick}
+      />
+    </Tooltip>
+  );
+};
+
+export default RegenerateButton;

From 68219916c787ca683fe29f490e5a0e9fa66f92e6 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Wed, 20 Aug 2025 17:09:51 -0600
Subject: [PATCH 02/12] poc for interrupted streaming

---
 .../app/routers/index/sessions/__init__.py    | 52 +-------------
 ui/src/api/chatApi.ts                         | 58 ++++++++--------
 .../ChatOutput/ChatMessages/ChatMessage.tsx   | 14 +---
 .../ChatMessages/ChatMessageBody.tsx          |  7 --
 .../ChatMessages/RegenerateButton.tsx         | 67 -------------------
 5 files changed, 34 insertions(+), 164 deletions(-)
 delete mode 100644 ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index efc4e62cb..c2f43c715 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -39,10 +39,10 @@
 import json
 import logging
 import threading
-import time
 from concurrent.futures import ThreadPoolExecutor
 from typing import Optional, Generator, Any
 
+import time
 from fastapi import APIRouter, Header, HTTPException
 from fastapi.responses import StreamingResponse
 from llama_index.core.base.llms.types import ChatResponse
@@ -66,9 +66,6 @@
 from ....services.mlflow import rating_mlflow_log_metric, feedback_mlflow_log_table
 from ....services.query.chat_events import ChatEvent
 from ....services.session import rename_session
-from ....services.metadata_apis import session_metadata_api
-from ....services import llm_completion
-from ....services.chat_history.chat_history_manager import RagMessage
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/sessions/{session_id}", tags=["Sessions"])
@@ -155,42 +152,7 @@ def chat_history(
     )
 
 
-class RegenerateRequest(BaseModel):
-    message_id: str
-
-
-@router.post(
-    "/chat-history/{message_id}/regenerate",
-    summary="Regenerate an assistant message by message ID and update chat history",
-)
-@exceptions.propagates
-def regenerate_message(session_id: int, message_id: str, remote_user: Optional[str] = Header(None)) -> RagStudioChatMessage:
-    # Load session
-    session = session_metadata_api.get_session(session_id, user_name=remote_user)
-
-    # Find existing message
-    messages: list[RagStudioChatMessage] = chat_history_manager.retrieve_chat_history(session_id=session_id)
-    target: Optional[RagStudioChatMessage] = next((m for m in messages if m.id == message_id), None)
-    if target is None:
-        raise HTTPException(status_code=404, detail="Message not found")
-
-    # Regenerate assistant response for the same user message
-    completion = llm_completion.completion(session_id=session_id, question=target.rag_message.user, model_name=session.inference_model)
-
-    updated = RagStudioChatMessage(
-        id=target.id,
-        session_id=session_id,
-        source_nodes=target.source_nodes,
-        inference_model=session.inference_model,
-        evaluations=[],
-        rag_message=RagMessage(user=target.rag_message.user, assistant=str(completion.message.content)),
-        timestamp=time.time(),
-        condensed_question=None,
-    )
-
-    # Persist update in-place
-    chat_history_manager.update_message(session_id=session_id, message_id=message_id, message=updated)
-    return updated
+## Regenerate endpoint removed
 
 
 @router.get(
@@ -327,21 +289,13 @@ def generate_stream() -> Generator[str, None, None]:
 
         try:
             executor = ThreadPoolExecutor(max_workers=1)
-            # If a response_id is provided in the request (e.g., regenerate), reuse it; else None
-            requested_response_id = None
-            try:
-                body_dict = request.model_dump()  # Pydantic BaseModel
-                requested_response_id = body_dict.get("response_id")
-            except Exception:
-                requested_response_id = None
-
             future = executor.submit(
                 stream_chat,
                 session=session,
                 query=request.query,
                 configuration=configuration,
                 user_name=remote_user,
-                response_id=requested_response_id,
+                response_id=None,
             )
 
             # If we get here and the cancel_event is set, the client has disconnected
diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts
index 816df5246..47362cace 100644
--- a/ui/src/api/chatApi.ts
+++ b/ui/src/api/chatApi.ts
@@ -114,7 +114,10 @@ export interface ChatResponseFeedback {
 export const placeholderChatResponseId = "placeholder";
 
 export const isPlaceholder = (chatMessage: ChatMessageType): boolean => {
-  return chatMessage.id === placeholderChatResponseId;
+  return (
+    chatMessage.id === placeholderChatResponseId ||
+    chatMessage.rag_message.assistant === ""
+  );
 };
 
 export const placeholderChatResponse = (query: string): ChatMessageType => {
@@ -167,7 +170,7 @@ export interface ChatHistoryResponse {
 
 export const chatHistoryQuery = async (
   request: ChatHistoryRequestType,
-  pageParam: number | undefined
+  pageParam: number | undefined,
 ): Promise<ChatHistoryResponse> => {
   const params = new URLSearchParams();
   if (request.limit !== undefined) {
@@ -179,13 +182,13 @@ export const chatHistoryQuery = async (
 
   return await getRequest(
     `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history?` +
-      params.toString()
+      params.toString(),
   );
 };
 
 export const appendPlaceholderToChatHistory = (
   query: string,
-  cachedData?: InfiniteData<ChatHistoryResponse>
+  cachedData?: InfiniteData<ChatHistoryResponse>,
 ): InfiniteData<ChatHistoryResponse> => {
   if (!cachedData || cachedData.pages.length === 0) {
     const firstPage: ChatHistoryResponse = {
@@ -200,7 +203,7 @@ export const appendPlaceholderToChatHistory = (
   }
 
   const pageParams = cachedData.pageParams.map((pageParam, index) =>
-    index > 0 && typeof pageParam === "number" ? ++pageParam : pageParam
+    index > 0 && typeof pageParam === "number" ? ++pageParam : pageParam,
   );
 
   const pages = cachedData.pages.map((page) => {
@@ -216,7 +219,7 @@ export const appendPlaceholderToChatHistory = (
 
   const lastPage = pages[pages.length - 1];
   const filteredLastPageData = lastPage.data.filter(
-    (chatMessage) => !isPlaceholder(chatMessage)
+    (chatMessage) => !isPlaceholder(chatMessage),
   );
   return {
     pageParams,
@@ -232,7 +235,7 @@ export const appendPlaceholderToChatHistory = (
 
 export const replacePlaceholderInChatHistory = (
   data: ChatMessageType,
-  cachedData?: InfiniteData<ChatHistoryResponse>
+  cachedData?: InfiniteData<ChatHistoryResponse>,
 ): InfiniteData<ChatHistoryResponse> => {
   if (!cachedData || cachedData.pages.length == 0) {
     return (
@@ -256,7 +259,6 @@ export const replacePlaceholderInChatHistory = (
   });
 
   const noDataInPages = pages[pages.length - 1].data.length === 0;
-
   return {
     pageParams: cachedData.pageParams,
     pages: noDataInPages
@@ -266,7 +268,7 @@ export const replacePlaceholderInChatHistory = (
 };
 
 export const createQueryConfiguration = (
-  excludeKnowledgeBase: boolean
+  excludeKnowledgeBase: boolean,
 ): QueryConfiguration => {
   return {
     exclude_knowledge_base: excludeKnowledgeBase,
@@ -297,7 +299,7 @@ const ratingMutation = async ({
 }): Promise<ChatResponseFeedback> => {
   return await postRequest(
     `${llmServicePath}/sessions/${sessionId}/responses/${responseId}/rating`,
-    { rating }
+    { rating },
   );
 };
 
@@ -324,7 +326,7 @@ const feedbackMutation = async ({
 }): Promise<ChatResponseFeedback> => {
   return await postRequest(
     `${llmServicePath}/sessions/${sessionId}/responses/${responseId}/feedback`,
-    { feedback }
+    { feedback },
   );
 };
 
@@ -345,7 +347,7 @@ export interface ChatEvent {
 const customChatMessage = (
   variables: ChatMutationRequest,
   message: string,
-  prefix: string
+  prefix: string,
 ) => {
   const uuid = crypto.randomUUID();
   const customMessage: ChatMessageType = {
@@ -373,7 +375,7 @@ const canceledChatMessage = (variables: ChatMutationRequest) => {
   return customChatMessage(
     variables,
     "Request canceled by user",
-    CANCELED_PREFIX_ID
+    CANCELED_PREFIX_ID,
   );
 };
 
@@ -386,7 +388,7 @@ interface StreamingChatCallbacks {
 const modifyPlaceholderInChatHistory = (
   queryClient: QueryClient,
   variables: ChatMutationRequest,
-  replacementMessage: ChatMessageType
+  replacementMessage: ChatMessageType,
 ) => {
   queryClient.setQueryData<InfiniteData<ChatHistoryResponse>>(
     chatHistoryQueryKey({
@@ -394,14 +396,14 @@ const modifyPlaceholderInChatHistory = (
       offset: 0,
     }),
     (cachedData) =>
-      replacePlaceholderInChatHistory(replacementMessage, cachedData)
+      replacePlaceholderInChatHistory(replacementMessage, cachedData),
   );
 };
 
 const handlePrepareController = (
   getController: ((ctrl: AbortController) => void) | undefined,
   queryClient: QueryClient,
-  request: ChatMutationRequest
+  request: ChatMutationRequest,
 ) => {
   return (ctrl: AbortController) => {
     if (getController) {
@@ -411,7 +413,7 @@ const handlePrepareController = (
         modifyPlaceholderInChatHistory(
           queryClient,
           request,
-          canceledChatMessage(request)
+          canceledChatMessage(request),
         );
         ctrl.signal.removeEventListener("abort", onAbort);
       };
@@ -429,10 +431,10 @@ const handleStreamingSuccess = (
     | ((data: ChatMessageType, request?: unknown, context?: unknown) => unknown)
     | undefined,
   handleError: (request: ChatMutationRequest, error: Error) => void,
-  onError: ((error: Error) => void) | undefined
+  onError: ((error: Error) => void) | undefined,
 ) => {
   fetch(
-    `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history/${messageId}`
+    `${llmServicePath}/sessions/${request.session_id.toString()}/chat-history/${messageId}`,
   )
     .then(async (res) => {
       const message = (await res.json()) as ChatMessageType;
@@ -440,7 +442,7 @@ const handleStreamingSuccess = (
         chatHistoryQueryKey({
           session_id: request.session_id,
         }),
-        (cachedData) => replacePlaceholderInChatHistory(message, cachedData)
+        (cachedData) => replacePlaceholderInChatHistory(message, cachedData),
       );
       queryClient
         .invalidateQueries({
@@ -480,7 +482,7 @@ export const useStreamingChatMutation = ({
       const handleGetController = handlePrepareController(
         getController,
         queryClient,
-        request
+        request,
       );
 
       return streamChatMutation(
@@ -488,7 +490,7 @@ export const useStreamingChatMutation = ({
         onChunk,
         onEvent,
         convertError,
-        handleGetController
+        handleGetController,
       );
     },
     onMutate: (variables) => {
@@ -497,7 +499,7 @@ export const useStreamingChatMutation = ({
           session_id: variables.session_id,
         }),
         (cachedData) =>
-          appendPlaceholderToChatHistory(variables.query, cachedData)
+          appendPlaceholderToChatHistory(variables.query, cachedData),
       );
     },
     onSuccess: (messageId, variables) => {
@@ -510,7 +512,7 @@ export const useStreamingChatMutation = ({
         queryClient,
         onSuccess,
         handleError,
-        onError
+        onError,
       );
     },
     onError: (error: Error, variables) => {
@@ -525,7 +527,7 @@ const streamChatMutation = async (
   onChunk: (chunk: string) => void,
   onEvent: (event: ChatEvent) => void,
   onError: (error: string) => void,
-  getController?: (ctrl: AbortController) => void
+  getController?: (ctrl: AbortController) => void,
 ): Promise<string> => {
   const ctrl = new AbortController();
   if (getController) {
@@ -569,7 +571,7 @@ const streamChatMutation = async (
         } catch (error) {
           console.error("Error parsing message data:", error);
           onError(
-            `An error occurred while processing the response.  Error message: ${JSON.stringify(msg)}. Error details: ${JSON.stringify(error)}.`
+            `An error occurred while processing the response.  Error message: ${JSON.stringify(msg)}. Error details: ${JSON.stringify(error)}.`,
           );
           ctrl.abort();
         }
@@ -594,13 +596,13 @@ const streamChatMutation = async (
           onError("An error occurred: " + response.statusText);
         }
       },
-    }
+    },
   );
   return responseId;
 };
 
 export const getOnEvent = (
-  setStreamedEvent: Dispatch<SetStateAction<ChatEvent[]>>
+  setStreamedEvent: Dispatch<SetStateAction<ChatEvent[]>>,
 ) => {
   return (event: ChatEvent) => {
     if (event.type === "done") {
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
index 0cd92b370..db5d27eb8 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
@@ -49,8 +49,6 @@ import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.
 import "../tableMarkdown.css";
 import { ExclamationCircleTwoTone } from "@ant-design/icons";
 import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx";
-import { useContext } from "react";
-import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx";
 import { cdlAmber500 } from "src/cuix/variables.ts";
 
 const isError = (data: ChatMessageType) => {
@@ -102,10 +100,6 @@ const WarningMessage = ({
   );
 };
 const ChatMessage = ({ data }: { data: ChatMessageType }) => {
-  const { activeSession } = useContext(RagChatContext);
-  const excludeKnowledgeBases =
-    !activeSession?.dataSourceIds || activeSession.dataSourceIds.length === 0;
-
   if (isError(data)) {
     return <WarningMessage data={data} color={"#ff4d4f"} alertType={"error"} />;
   }
@@ -119,13 +113,7 @@ const ChatMessage = ({ data }: { data: ChatMessageType }) => {
     return <PendingRagOutputSkeleton question={data.rag_message.user} />;
   }
 
-  return (
-    <ChatMessageBody
-      data={data}
-      sessionId={activeSession?.id ?? 0}
-      excludeKnowledgeBase={excludeKnowledgeBases}
-    />
-  );
+  return <ChatMessageBody data={data} />;
 };
 
 export default ChatMessage;
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
index f08b87005..0a4b8a32c 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
@@ -39,7 +39,6 @@
 import { ChatMessageType, ChatEvent } from "src/api/chatApi.ts";
 import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx";
 import { Divider, Flex, Typography } from "antd";
-import RegenerateButton from "pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx";
 import Images from "src/components/images/Images.ts";
 import { cdlBlue500, cdlGray200 } from "src/cuix/variables.ts";
 import { Evaluations } from "pages/RagChatTab/ChatOutput/ChatMessages/Evaluations.tsx";
@@ -52,11 +51,9 @@ import { MarkdownResponse } from "pages/RagChatTab/ChatOutput/ChatMessages/Markd
 export const ChatMessageBody = ({
   data,
   streamedEvents,
-  excludeKnowledgeBase,
 }: {
   data: ChatMessageType;
   streamedEvents?: ChatEvent[];
-  excludeKnowledgeBase: boolean;
 }) => {
   return (
     <div data-testid="chat-message">
@@ -102,10 +99,6 @@ export const ChatMessageBody = ({
               <SourceNodes data={data} />
               <Flex gap={16} align="center">
                 <CopyButton message={data} />
-                <RegenerateButton
-                  message={data}
-                  excludeKnowledgeBase={excludeKnowledgeBase}
-                />
                 <Evaluations evaluations={data.evaluations} />
                 <RatingFeedbackWrapper responseId={data.id} />
               </Flex>
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx
deleted file mode 100644
index 36bbf8ef5..000000000
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/RegenerateButton.tsx
+++ /dev/null
@@ -1,67 +0,0 @@
-import { Button, Tooltip } from "antd";
-import { ReloadOutlined } from "@ant-design/icons";
-import {
-  ChatMessageType,
-  createQueryConfiguration,
-  getOnEvent,
-  useStreamingChatMutation,
-} from "src/api/chatApi.ts";
-import { useStreamingChunkBuffer } from "src/hooks/useStreamingChunkBuffer.ts";
-import { useContext } from "react";
-import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx";
-
-const RegenerateButton = ({
-  message,
-  excludeKnowledgeBase,
-  onStarted,
-}: {
-  message: ChatMessageType;
-  excludeKnowledgeBase: boolean;
-  onStarted?: () => void;
-}) => {
-  const {
-    streamedChatState: [, setStreamedChat],
-    streamedEventState: [, setStreamedEvent],
-    streamedAbortControllerState: [, setStreamedAbortController],
-  } = useContext(RagChatContext);
-  // Use custom hook to handle batched streaming updates
-  const { onChunk, flush } = useStreamingChunkBuffer((chunks) => {
-    setStreamedChat((prev) => prev + chunks);
-  });
-
-  const streamChatMutation = useStreamingChatMutation({
-    onChunk,
-    onEvent: getOnEvent(setStreamedEvent),
-    onSuccess: () => {
-      // Flush any remaining chunks before cleanup
-      flush();
-      setStreamedChat("");
-    },
-    getController: (ctrl) => {
-      setStreamedAbortController(ctrl);
-    },
-  });
-
-  const handleClick = () => {
-    onStarted?.();
-    streamChatMutation.mutate({
-      query: message.rag_message.user,
-      session_id: message.session_id,
-      configuration: createQueryConfiguration(excludeKnowledgeBase),
-      response_id: message.id,
-    });
-  };
-
-  return (
-    <Tooltip title="Regenerate response">
-      <Button
-        type="text"
-        icon={<ReloadOutlined />}
-        size="small"
-        onClick={handleClick}
-      />
-    </Tooltip>
-  );
-};
-
-export default RegenerateButton;

From c4d60438f7301af88ea23e78bff02d74df8a9070 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Wed, 20 Aug 2025 17:11:57 -0600
Subject: [PATCH 03/12] remove comment

---
 llm-service/app/routers/index/sessions/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index c2f43c715..86b1adbf3 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -152,9 +152,6 @@ def chat_history(
     )
 
 
-## Regenerate endpoint removed
-
-
 @router.get(
     "/chat-history/{message_id}",
     summary="Returns a specific chat messages for the provided session.",

From 3e6fdd18a11f573234dd3f26aa26bb7798e40ba0 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Thu, 21 Aug 2025 14:17:44 -0600
Subject: [PATCH 04/12] switch to using status and fix bug with default
 streaming value

---
 llm-service/app/services/chat/chat.py                    | 1 +
 llm-service/app/services/chat/streaming_chat.py          | 9 +++++++--
 .../app/services/chat_history/chat_history_manager.py    | 1 +
 .../services/chat_history/simple_chat_history_manager.py | 3 +++
 ui/src/api/chatApi.ts                                    | 9 +++++++--
 .../ChatOutput/hooks/useCreateSessionAndRedirect.tsx     | 2 +-
 6 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/llm-service/app/services/chat/chat.py b/llm-service/app/services/chat/chat.py
index ec51e7849..1ce62128f 100644
--- a/llm-service/app/services/chat/chat.py
+++ b/llm-service/app/services/chat/chat.py
@@ -142,6 +142,7 @@ def direct_llm_chat(
         ),
         timestamp=time.time(),
         condensed_question=None,
+        status="success",
     )
     chat_history_manager.append_to_history(session.id, [new_chat_message])
     return new_chat_message
diff --git a/llm-service/app/services/chat/streaming_chat.py b/llm-service/app/services/chat/streaming_chat.py
index 0b6f85734..5c6b96169 100644
--- a/llm-service/app/services/chat/streaming_chat.py
+++ b/llm-service/app/services/chat/streaming_chat.py
@@ -52,7 +52,8 @@
 from app.services.chat_history.chat_history_manager import (
     RagStudioChatMessage,
     RagMessage,
-    chat_history_manager, Evaluation,
+    chat_history_manager,
+    Evaluation,
 )
 from app.services.metadata_apis.session_metadata_api import Session
 from app.services.mlflow import record_direct_llm_mlflow_run, record_rag_mlflow_run
@@ -62,7 +63,8 @@
     build_flexible_chat_engine,
 )
 from app.services.query.querier import (
-    build_retriever, get_nodes_from_output,
+    build_retriever,
+    get_nodes_from_output,
 )
 from app.services.query.query_configuration import QueryConfiguration
 
@@ -99,6 +101,7 @@ def stream_chat(
         ),
         timestamp=time.time(),
         condensed_question=None,
+        status="pending",
     )
     chat_history_manager.append_to_history(session.id, [new_chat_message])
 
@@ -231,6 +234,7 @@ def _stream_direct_llm_chat(
         ),
         timestamp=time.time(),
         condensed_question=None,
+        status="success",
     )
     chat_history_manager.update_message(session.id, response_id, new_chat_message)
 
@@ -278,6 +282,7 @@ def finalize_response(
         evaluations=evaluations,
         timestamp=time.time(),
         condensed_question=condensed_question,
+        status="success",
     )
     record_rag_mlflow_run(
         new_chat_message, query_configuration, response_id, session, user_name
diff --git a/llm-service/app/services/chat_history/chat_history_manager.py b/llm-service/app/services/chat_history/chat_history_manager.py
index 06ca2bd0a..0d7934d8e 100644
--- a/llm-service/app/services/chat_history/chat_history_manager.py
+++ b/llm-service/app/services/chat_history/chat_history_manager.py
@@ -61,6 +61,7 @@ class RagStudioChatMessage(BaseModel):
     evaluations: list[Evaluation]
     timestamp: float
     condensed_question: Optional[str]
+    status: Literal["pending", "error", "success"] = "success"
 
 
 class ChatHistoryManager(metaclass=ABCMeta):
diff --git a/llm-service/app/services/chat_history/simple_chat_history_manager.py b/llm-service/app/services/chat_history/simple_chat_history_manager.py
index 90b813e66..f15dd4a4e 100644
--- a/llm-service/app/services/chat_history/simple_chat_history_manager.py
+++ b/llm-service/app/services/chat_history/simple_chat_history_manager.py
@@ -102,6 +102,7 @@ def retrieve_chat_history(self, session_id: int) -> List[RagStudioChatMessage]:
                     ),
                     timestamp=assistant_message.additional_kwargs.get("timestamp", 0.0),
                     condensed_question=None,
+                    status=assistant_message.additional_kwargs.get("status", "success"),
                 )
             )
             i += 2
@@ -154,6 +155,7 @@ def append_to_history(
                         "inference_model": message.inference_model,
                         "evaluations": message.evaluations,
                         "timestamp": message.timestamp,
+                        "status": message.status,
                     },
                 ),
             )
@@ -187,6 +189,7 @@ def update_message(
                         "inference_model": message.inference_model,
                         "evaluations": message.evaluations,
                         "timestamp": message.timestamp,
+                        "status": message.status,
                     }
                 )
                 # Persist updated list
diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts
index 47362cace..d87feb8a6 100644
--- a/ui/src/api/chatApi.ts
+++ b/ui/src/api/chatApi.ts
@@ -105,6 +105,7 @@ export interface ChatMessageType {
   evaluations: Evaluation[];
   timestamp: number;
   condensed_question?: string;
+  status: "pending" | "error" | "success";
 }
 
 export interface ChatResponseFeedback {
@@ -116,7 +117,7 @@ export const placeholderChatResponseId = "placeholder";
 export const isPlaceholder = (chatMessage: ChatMessageType): boolean => {
   return (
     chatMessage.id === placeholderChatResponseId ||
-    chatMessage.rag_message.assistant === ""
+    chatMessage.status === "pending"
   );
 };
 
@@ -131,6 +132,7 @@ export const placeholderChatResponse = (query: string): ChatMessageType => {
     },
     evaluations: [],
     timestamp: Date.now(),
+    status: "pending",
   };
 };
 
@@ -348,6 +350,7 @@ const customChatMessage = (
   variables: ChatMutationRequest,
   message: string,
   prefix: string,
+  status: ChatMessageType["status"],
 ) => {
   const uuid = crypto.randomUUID();
   const customMessage: ChatMessageType = {
@@ -360,6 +363,7 @@ const customChatMessage = (
     },
     evaluations: [],
     timestamp: Date.now(),
+    status,
   };
   return customMessage;
 };
@@ -368,7 +372,7 @@ export const ERROR_PREFIX_ID = "error-";
 export const CANCELED_PREFIX_ID = "canceled-";
 
 const errorChatMessage = (variables: ChatMutationRequest, error: Error) => {
-  return customChatMessage(variables, error.message, ERROR_PREFIX_ID);
+  return customChatMessage(variables, error.message, ERROR_PREFIX_ID, "error");
 };
 
 const canceledChatMessage = (variables: ChatMutationRequest) => {
@@ -376,6 +380,7 @@ const canceledChatMessage = (variables: ChatMutationRequest) => {
     variables,
     "Request canceled by user",
     CANCELED_PREFIX_ID,
+    "error",
   );
 };
 
diff --git a/ui/src/pages/RagChatTab/ChatOutput/hooks/useCreateSessionAndRedirect.tsx b/ui/src/pages/RagChatTab/ChatOutput/hooks/useCreateSessionAndRedirect.tsx
index e48089c37..3afbe1744 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/hooks/useCreateSessionAndRedirect.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/hooks/useCreateSessionAndRedirect.tsx
@@ -50,7 +50,7 @@ const useCreateSessionAndRedirect = (
           enableHyde: false,
           enableSummaryFilter: true,
           enableToolCalling: supportsToolCalling ?? false,
-          disableStreaming: true,
+          disableStreaming: false,
           selectedTools: [],
         },
         embeddingModel: embeddingModels?.length

From 27302a0eff794b6977cd56283e4e4e53331205d9 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Thu, 21 Aug 2025 15:56:44 -0600
Subject: [PATCH 05/12] fix tests

---
 ui/src/api/chatApi.test.ts | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/ui/src/api/chatApi.test.ts b/ui/src/api/chatApi.test.ts
index 9ea3adee5..21f873aa8 100644
--- a/ui/src/api/chatApi.test.ts
+++ b/ui/src/api/chatApi.test.ts
@@ -40,27 +40,30 @@ import { describe, expect, it, vi } from "vitest";
 import {
   appendPlaceholderToChatHistory,
   ChatHistoryResponse,
+  ChatMessageType,
   replacePlaceholderInChatHistory,
 } from "src/api/chatApi.ts";
 import { InfiniteData } from "@tanstack/react-query";
 
 describe("replacePlaceholderInChatHistory", () => {
   it("replaces placeholder with actual data when cachedData contains placeholder", () => {
-    const placeholder = {
+    const placeholder: ChatMessageType = {
       id: "placeholder",
       session_id: 0,
       source_nodes: [],
       rag_message: { user: "query", assistant: "" },
       evaluations: [],
       timestamp: Date.now(),
+      status: "pending",
     };
-    const actualData = {
+    const actualData: ChatMessageType = {
       id: "actual",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "query", assistant: "response" },
       evaluations: [],
       timestamp: Date.now(),
+      status: "success",
     };
     const cachedData: InfiniteData<ChatHistoryResponse> = {
       pages: [{ data: [placeholder], next_id: null, previous_id: null }],
@@ -76,13 +79,14 @@ describe("replacePlaceholderInChatHistory", () => {
   });
 
   it("returns actual data when cachedData is undefined", () => {
-    const actualData = {
+    const actualData: ChatMessageType = {
       id: "actual",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "query", assistant: "response" },
       evaluations: [],
       timestamp: Date.now(),
+      status: "success",
     };
 
     const result = replacePlaceholderInChatHistory(actualData, undefined);
@@ -94,15 +98,16 @@ describe("replacePlaceholderInChatHistory", () => {
   });
 
   it("does not replace any data when cachedData does not contain placeholder", () => {
-    const actualData = {
+    const actualData: ChatMessageType = {
       id: "actual",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "query", assistant: "response" },
       evaluations: [],
       timestamp: Date.now(),
+      status: "success",
     };
-    const cachedItem = [
+    const cachedItem: ChatMessageType[] = [
       {
         id: "other",
         session_id: 2,
@@ -110,6 +115,7 @@ describe("replacePlaceholderInChatHistory", () => {
         rag_message: { user: "query", assistant: "response" },
         evaluations: [],
         timestamp: Date.now(),
+        status: "success",
       },
     ];
 
@@ -143,6 +149,7 @@ describe("appendPlaceholderToChatHistory", () => {
               rag_message: { user: query, assistant: "" },
               evaluations: [],
               timestamp,
+              status: "pending",
             },
           ],
           next_id: null,
@@ -161,13 +168,14 @@ describe("appendPlaceholderToChatHistory", () => {
     const timestamp = Date.now();
     vi.spyOn(Date, "now").mockImplementation(() => timestamp);
 
-    const existingMessage = {
+    const existingMessage: ChatMessageType = {
       id: "existing",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "previous query", assistant: "previous response" },
       evaluations: [],
       timestamp: timestamp - 1000, // Earlier timestamp
+      status: "success",
     };
 
     const cachedData: InfiniteData<ChatHistoryResponse> = {
@@ -195,6 +203,7 @@ describe("appendPlaceholderToChatHistory", () => {
               rag_message: { user: query, assistant: "" },
               evaluations: [],
               timestamp,
+              status: "pending",
             },
           ],
           next_id: 6, // Incremented
@@ -213,22 +222,24 @@ describe("appendPlaceholderToChatHistory", () => {
     const timestamp = Date.now();
     vi.spyOn(Date, "now").mockImplementation(() => timestamp);
 
-    const message1 = {
+    const message1: ChatMessageType = {
       id: "msg1",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "query 1", assistant: "response 1" },
       evaluations: [],
       timestamp: timestamp - 2000,
+      status: "success",
     };
 
-    const message2 = {
+    const message2: ChatMessageType = {
       id: "msg2",
       session_id: 1,
       source_nodes: [],
       rag_message: { user: "query 2", assistant: "response 2" },
       evaluations: [],
       timestamp: timestamp - 1000,
+      status: "success",
     };
 
     const cachedData: InfiniteData<ChatHistoryResponse> = {
@@ -266,6 +277,7 @@ describe("appendPlaceholderToChatHistory", () => {
               rag_message: { user: query, assistant: "" },
               evaluations: [],
               timestamp,
+              status: "pending",
             },
           ],
           next_id: 16, // Incremented

From 050c0de6b1554fc2c94a66e2edd7a7d95fcb3cd2 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Thu, 21 Aug 2025 16:05:28 -0600
Subject: [PATCH 06/12] fix more broken tests

---
 .../ChatOutput/ChatMessages/ChatMessageController.test.tsx   | 1 +
 .../ChatOutput/Loaders/PendingRagOutputSkeleton.tsx          | 1 +
 .../RagChatTab/FooterComponents/RagChatQueryInput.test.tsx   | 2 ++
 ui/src/pages/RagChatTab/hooks/flattenChatHistory.test.tsx    | 5 +++++
 4 files changed, 9 insertions(+)

diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.test.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.test.tsx
index 4cb68a6df..9041418e3 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.test.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageController.test.tsx
@@ -175,6 +175,7 @@ const createMockMessage = (
   rag_message: { user, assistant },
   evaluations: [],
   timestamp: Date.now(),
+  status: "success",
 });
 
 const createMockContext = (
diff --git a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx
index 09935e9af..6bcc60fec 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx
@@ -57,6 +57,7 @@ const PendingRagOutputSkeleton = ({ question }: { question: string }) => {
     },
     evaluations: [],
     timestamp: Date.now(),
+    status: "pending",
   };
 
   return (
diff --git a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.test.tsx b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.test.tsx
index a43e49f8e..378a60e97 100644
--- a/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.test.tsx
+++ b/ui/src/pages/RagChatTab/FooterComponents/RagChatQueryInput.test.tsx
@@ -538,6 +538,7 @@ describe("RagChatQueryInput", () => {
               evaluations: [],
               timestamp: Date.now(),
               condensed_question: "Previous question",
+              status: "success",
             },
           ],
           isFetching: false,
@@ -584,6 +585,7 @@ describe("RagChatQueryInput", () => {
               evaluations: [],
               timestamp: Date.now(),
               condensed_question: "Previous question",
+              status: "success",
             },
           ],
           isFetching: false,
diff --git a/ui/src/pages/RagChatTab/hooks/flattenChatHistory.test.tsx b/ui/src/pages/RagChatTab/hooks/flattenChatHistory.test.tsx
index c45686de5..e61039571 100644
--- a/ui/src/pages/RagChatTab/hooks/flattenChatHistory.test.tsx
+++ b/ui/src/pages/RagChatTab/hooks/flattenChatHistory.test.tsx
@@ -79,6 +79,7 @@ describe("flattenChatHistory", () => {
       rag_message: { user: "query 1", assistant: "response 1" },
       evaluations: [],
       timestamp: 1000,
+      status: "success",
     };
 
     const message2: ChatMessageType = {
@@ -88,6 +89,7 @@ describe("flattenChatHistory", () => {
       rag_message: { user: "query 2", assistant: "response 2" },
       evaluations: [],
       timestamp: 2000,
+      status: "success",
     };
 
     const chatHistory: InfiniteData<ChatHistoryResponse> = {
@@ -113,6 +115,7 @@ describe("flattenChatHistory", () => {
       rag_message: { user: "query 1", assistant: "response 1" },
       evaluations: [],
       timestamp: 1000,
+      status: "success",
     };
 
     const message2: ChatMessageType = {
@@ -122,6 +125,7 @@ describe("flattenChatHistory", () => {
       rag_message: { user: "query 2", assistant: "response 2" },
       evaluations: [],
       timestamp: 2000,
+      status: "success",
     };
 
     const message3: ChatMessageType = {
@@ -131,6 +135,7 @@ describe("flattenChatHistory", () => {
       rag_message: { user: "query 3", assistant: "response 3" },
       evaluations: [],
       timestamp: 3000,
+      status: "success",
     };
 
     const chatHistory: InfiniteData<ChatHistoryResponse> = {

From dcd46f2cc594897d14542c6677c5ef7befb530a5 Mon Sep 17 00:00:00 2001
From: Elijah Williams <ewilliams@cloudera.com>
Date: Thu, 21 Aug 2025 16:29:07 -0600
Subject: [PATCH 07/12] trying out updating status on cancelling

---
 llm-service/app/routers/index/sessions/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index 86b1adbf3..38df78e47 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -318,6 +318,14 @@ def generate_stream() -> Generator[str, None, None]:
                 response: ChatResponse = item
                 # Check for cancellation between each response
                 if cancel_event.is_set():
+                    print("Client disconnected between events")
+                    updated_response: ChatResponse = response
+                    updated_response.additional_kwargs["status"] = "success"
+                    chat_history_manager.update_message(
+                        session_id=session_id,
+                        message_id=response.additional_kwargs["response_id"],
+                        message=updated_response,
+                    )
                     logger.info("Client disconnected during result processing")
                     break
                 if "chat_event" in response.additional_kwargs:

From c88299383cffe3147b248e7b03161b1936143c4a Mon Sep 17 00:00:00 2001
From: Baasit Sharief <baasitsharief@gmail.com>
Date: Fri, 22 Aug 2025 14:04:40 -0700
Subject: [PATCH 08/12] Update chat message handling to support cancellation
 status and enhance chat history management

---
 .../app/routers/index/sessions/__init__.py    | 36 +++++++++++++++----
 .../chat_history/chat_history_manager.py      |  2 +-
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index 38df78e47..edfefb703 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -58,6 +58,7 @@
 )
 from ....services.chat.suggested_questions import generate_suggested_questions
 from ....services.chat_history.chat_history_manager import (
+    RagMessage,
     RagStudioChatMessage,
     chat_history_manager,
 )
@@ -319,13 +320,34 @@ def generate_stream() -> Generator[str, None, None]:
                 # Check for cancellation between each response
                 if cancel_event.is_set():
                     print("Client disconnected between events")
-                    updated_response: ChatResponse = response
-                    updated_response.additional_kwargs["status"] = "success"
-                    chat_history_manager.update_message(
-                        session_id=session_id,
-                        message_id=response.additional_kwargs["response_id"],
-                        message=updated_response,
-                    )
+                    if response.additional_kwargs.get("response_id"):
+                        updated_response = RagStudioChatMessage(
+                            id=response.additional_kwargs["response_id"],
+                            session_id=session_id,
+                            source_nodes=(
+                                response.source_nodes
+                                if hasattr(response, "source_nodes")
+                                else []
+                            ),
+                            inference_model=session.inference_model,
+                            rag_message=RagMessage(
+                                user=request.query,
+                                assistant=(
+                                    response.message.content
+                                    if response.message.content
+                                    else ""
+                                ),
+                            ),
+                            evaluations=[],
+                            timestamp=time.time(),
+                            condensed_question=None,
+                            status="cancelled",
+                        )
+                        chat_history_manager.update_message(
+                            session_id=session_id,
+                            message_id=updated_response.id,
+                            message=updated_response,
+                        )
                     logger.info("Client disconnected during result processing")
                     break
                 if "chat_event" in response.additional_kwargs:
diff --git a/llm-service/app/services/chat_history/chat_history_manager.py b/llm-service/app/services/chat_history/chat_history_manager.py
index 0d7934d8e..586a4bd65 100644
--- a/llm-service/app/services/chat_history/chat_history_manager.py
+++ b/llm-service/app/services/chat_history/chat_history_manager.py
@@ -61,7 +61,7 @@ class RagStudioChatMessage(BaseModel):
     evaluations: list[Evaluation]
     timestamp: float
     condensed_question: Optional[str]
-    status: Literal["pending", "error", "success"] = "success"
+    status: Literal["pending", "error", "cancelled", "success"] = "success"
 
 
 class ChatHistoryManager(metaclass=ABCMeta):

From 3b07c319b4066cd20d62fe81b566332d0eec8d94 Mon Sep 17 00:00:00 2001
From: Baasit Sharief <baasitsharief@gmail.com>
Date: Mon, 25 Aug 2025 11:34:01 -0700
Subject: [PATCH 09/12] Enhance chat message by adding `cancelled` and `error`
 status as well as Exception Handling

---
 .../app/routers/index/sessions/__init__.py    | 29 +++++++++++++++++++
 .../chat_history/chat_history_manager.py      |  1 +
 ui/src/api/chatApi.ts                         |  3 +-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py
index edfefb703..16e886e89 100644
--- a/llm-service/app/routers/index/sessions/__init__.py
+++ b/llm-service/app/routers/index/sessions/__init__.py
@@ -378,6 +378,35 @@ def generate_stream() -> Generator[str, None, None]:
             logger.exception("Timeout: Failed to stream chat completion")
             yield 'data: {{"error" : "Timeout: Failed to stream chat completion"}}\n\n'
         except Exception as e:
+            if response.additional_kwargs.get("response_id"):
+                updated_response = RagStudioChatMessage(
+                    id=response.additional_kwargs["response_id"],
+                    session_id=session_id,
+                    source_nodes=(
+                        response.source_nodes
+                        if hasattr(response, "source_nodes")
+                        else []
+                    ),
+                    inference_model=session.inference_model,
+                    rag_message=RagMessage(
+                        user=request.query,
+                        assistant=(
+                            response.message.content
+                            if response.message.content
+                            else ""
+                        ),
+                    ),
+                    evaluations=[],
+                    timestamp=time.time(),
+                    condensed_question=None,
+                    status="error",
+                    error_message=str(e),
+                )
+                chat_history_manager.update_message(
+                    session_id=session_id,
+                    message_id=updated_response.id,
+                    message=updated_response,
+                )
             logger.exception("Failed to stream chat completion")
             yield f'data: {{"error" : "{e}"}}\n\n'
         finally:
diff --git a/llm-service/app/services/chat_history/chat_history_manager.py b/llm-service/app/services/chat_history/chat_history_manager.py
index 586a4bd65..7a6991321 100644
--- a/llm-service/app/services/chat_history/chat_history_manager.py
+++ b/llm-service/app/services/chat_history/chat_history_manager.py
@@ -62,6 +62,7 @@ class RagStudioChatMessage(BaseModel):
     timestamp: float
     condensed_question: Optional[str]
     status: Literal["pending", "error", "cancelled", "success"] = "success"
+    error_message: Optional[str] = None
 
 
 class ChatHistoryManager(metaclass=ABCMeta):
diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts
index d87feb8a6..5340eb64b 100644
--- a/ui/src/api/chatApi.ts
+++ b/ui/src/api/chatApi.ts
@@ -105,7 +105,8 @@ export interface ChatMessageType {
   evaluations: Evaluation[];
   timestamp: number;
   condensed_question?: string;
-  status: "pending" | "error" | "success";
+  status: "pending" | "error" | "cancelled" | "success";
+  error_message?: string;
 }
 
 export interface ChatResponseFeedback {

From be38ddf44129d9beb4f289952729084a5d0c04a8 Mon Sep 17 00:00:00 2001
From: Baasit Sharief <baasitsharief@gmail.com>
Date: Mon, 25 Aug 2025 14:24:51 -0700
Subject: [PATCH 10/12] refactor: ChatMessageBody to display warning messages
 for error and cancelled statuses when loaded from chat history

---
 .../ChatMessages/ChatMessageBody.tsx          | 194 ++++++++++++++----
 1 file changed, 150 insertions(+), 44 deletions(-)

diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
index 0a4b8a32c..5dd71fb93 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
@@ -38,15 +38,55 @@
 
 import { ChatMessageType, ChatEvent } from "src/api/chatApi.ts";
 import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx";
-import { Divider, Flex, Typography } from "antd";
+import { Alert, AlertProps, Divider, Flex, Typography } from "antd";
 import Images from "src/components/images/Images.ts";
-import { cdlBlue500, cdlGray200 } from "src/cuix/variables.ts";
+import { cdlBlue500, cdlGray200, cdlAmber500 } from "src/cuix/variables.ts";
 import { Evaluations } from "pages/RagChatTab/ChatOutput/ChatMessages/Evaluations.tsx";
 import RatingFeedbackWrapper from "pages/RagChatTab/ChatOutput/ChatMessages/RatingFeedbackWrapper.tsx";
 import CopyButton from "pages/RagChatTab/ChatOutput/ChatMessages/CopyButton.tsx";
 import StreamedEvents from "pages/RagChatTab/ChatOutput/ChatMessages/StreamedEvents.tsx";
 import SourceNodes from "pages/RagChatTab/ChatOutput/Sources/SourceNodes.tsx";
 import { MarkdownResponse } from "pages/RagChatTab/ChatOutput/ChatMessages/MarkdownResponse.tsx";
+import { ExclamationCircleTwoTone } from "@ant-design/icons";
+
+const WarningMessage = ({
+  data,
+  color,
+  alertType,
+}: {
+  data: ChatMessageType;
+  color: string;
+  alertType: AlertProps["type"];
+}) => {
+  return (
+    <Flex
+      style={{ marginTop: 15 }}
+      align="baseline"
+      justify="space-between"
+      gap={8}
+    >
+      <div style={{ flex: 1 }}>
+        <ExclamationCircleTwoTone
+          type={alertType}
+          twoToneColor={color}
+          style={{ fontSize: 22 }}
+        />
+      </div>
+      <Flex vertical gap={8} style={{ width: "100%" }}>
+        <Typography.Text style={{ fontSize: 16, marginTop: 8 }}>
+          <Alert
+            type={alertType}
+            message={
+              data.status === "error"
+                ? data.error_message || "An error occurred"
+                : "Request canceled by user"
+            }
+          />
+        </Typography.Text>
+      </Flex>
+    </Flex>
+  );
+};
 
 export const ChatMessageBody = ({
   data,
@@ -55,55 +95,121 @@ export const ChatMessageBody = ({
   data: ChatMessageType;
   streamedEvents?: ChatEvent[];
 }) => {
+  const isError = data.status === "error";
+  const isCancelled = data.status === "cancelled";
+  const hasPartialResponse =
+    data.rag_message.assistant && data.rag_message.assistant.trim().length > 0;
+
   return (
     <div data-testid="chat-message">
       {data.rag_message.user ? (
         <div>
           <UserQuestion question={data.rag_message.user} />
-          <Flex
-            style={{ marginTop: 15 }}
-            align="self-start"
-            justify="space-between"
-            gap={8}
-          >
-            <div style={{ flex: 1, marginTop: 24 }}>
-              {data.source_nodes.length > 0 ? (
-                <Images.AiAssistantWhite
-                  style={{
-                    padding: 4,
-                    backgroundColor: cdlBlue500,
-                    borderRadius: 20,
-                    width: 24,
-                    height: 24,
-                    flex: 1,
-                  }}
-                />
-              ) : (
-                <Images.Models
-                  style={{
-                    padding: 4,
-                    backgroundColor: cdlGray200,
-                    borderRadius: 20,
-                    width: 26,
-                    height: 24,
-                    flex: 1,
-                  }}
-                />
-              )}
-            </div>
-            <Flex vertical gap={8} style={{ width: "100%" }}>
-              <StreamedEvents streamedEvents={streamedEvents} />
-              <Typography.Text style={{ fontSize: 16, marginTop: 8 }}>
-                <MarkdownResponse data={data} />
-              </Typography.Text>
-              <SourceNodes data={data} />
-              <Flex gap={16} align="center">
-                <CopyButton message={data} />
-                <Evaluations evaluations={data.evaluations} />
-                <RatingFeedbackWrapper responseId={data.id} />
+
+          {/* Show warning message at the top for error/cancelled status */}
+          {(isError || isCancelled) && (
+            <WarningMessage
+              data={data}
+              color={isError ? "#ff4d4f" : cdlAmber500}
+              alertType={isError ? "error" : "warning"}
+            />
+          )}
+
+          {/* Show partial response if available */}
+          {(isError || isCancelled) && hasPartialResponse && (
+            <Flex
+              style={{ marginTop: 15 }}
+              align="self-start"
+              justify="space-between"
+              gap={8}
+            >
+              <div style={{ flex: 1, marginTop: 24 }}>
+                {data.source_nodes.length > 0 ? (
+                  <Images.AiAssistantWhite
+                    style={{
+                      padding: 4,
+                      backgroundColor: cdlBlue500,
+                      borderRadius: 20,
+                      width: 24,
+                      height: 24,
+                      flex: 1,
+                    }}
+                  />
+                ) : (
+                  <Images.Models
+                    style={{
+                      padding: 4,
+                      backgroundColor: cdlGray200,
+                      borderRadius: 20,
+                      width: 26,
+                      height: 24,
+                      flex: 1,
+                    }}
+                  />
+                )}
+              </div>
+              <Flex vertical gap={8} style={{ width: "100%" }}>
+                <StreamedEvents streamedEvents={streamedEvents} />
+                <Typography.Text style={{ fontSize: 16, marginTop: 8 }}>
+                  <MarkdownResponse data={data} />
+                </Typography.Text>
+                <SourceNodes data={data} />
+                <Flex gap={16} align="center">
+                  <CopyButton message={data} />
+                  <Evaluations evaluations={data.evaluations} />
+                  <RatingFeedbackWrapper responseId={data.id} />
+                </Flex>
+              </Flex>
+            </Flex>
+          )}
+
+          {/* Show normal layout for success status */}
+          {!isError && !isCancelled && (
+            <Flex
+              style={{ marginTop: 15 }}
+              align="self-start"
+              justify="space-between"
+              gap={8}
+            >
+              <div style={{ flex: 1, marginTop: 24 }}>
+                {data.source_nodes.length > 0 ? (
+                  <Images.AiAssistantWhite
+                    style={{
+                      padding: 4,
+                      backgroundColor: cdlBlue500,
+                      borderRadius: 20,
+                      width: 24,
+                      height: 24,
+                      flex: 1,
+                    }}
+                  />
+                ) : (
+                  <Images.Models
+                    style={{
+                      padding: 4,
+                      backgroundColor: cdlGray200,
+                      borderRadius: 20,
+                      width: 26,
+                      height: 24,
+                      flex: 1,
+                    }}
+                  />
+                )}
+              </div>
+              <Flex vertical gap={8} style={{ width: "100%" }}>
+                <StreamedEvents streamedEvents={streamedEvents} />
+                <Typography.Text style={{ fontSize: 16, marginTop: 8 }}>
+                  <MarkdownResponse data={data} />
+                </Typography.Text>
+                <SourceNodes data={data} />
+                <Flex gap={16} align="center">
+                  <CopyButton message={data} />
+                  <Evaluations evaluations={data.evaluations} />
+                  <RatingFeedbackWrapper responseId={data.id} />
+                </Flex>
               </Flex>
             </Flex>
-          </Flex>
+          )}
         </div>
       ) : null}
       <Divider />

From 266c52afc413cf93c8c4793daa759ba156057204 Mon Sep 17 00:00:00 2001
From: Baasit Sharief <baasitsharief@gmail.com>
Date: Mon, 25 Aug 2025 16:58:38 -0700
Subject: [PATCH 11/12] refactor: enhance error and cancellation handling in
 chat messages with improved message structure and streaming support

---
 ui/src/api/chatApi.ts                         | 49 ++++++++++---
 .../ChatOutput/ChatMessages/ChatMessage.tsx   | 69 +------------------
 2 files changed, 39 insertions(+), 79 deletions(-)

diff --git a/ui/src/api/chatApi.ts b/ui/src/api/chatApi.ts
index 5340eb64b..2ff11e9e2 100644
--- a/ui/src/api/chatApi.ts
+++ b/ui/src/api/chatApi.ts
@@ -372,16 +372,30 @@ const customChatMessage = (
 export const ERROR_PREFIX_ID = "error-";
 export const CANCELED_PREFIX_ID = "canceled-";
 
-const errorChatMessage = (variables: ChatMutationRequest, error: Error) => {
-  return customChatMessage(variables, error.message, ERROR_PREFIX_ID, "error");
+const errorChatMessage = (
+  variables: ChatMutationRequest,
+  error: Error,
+  assistantMessage: string,
+): ChatMessageType => {
+  const message = customChatMessage(
+    variables,
+    assistantMessage,
+    ERROR_PREFIX_ID,
+    "error",
+  );
+  message.error_message = error.message;
+  return message;
 };
 
-const canceledChatMessage = (variables: ChatMutationRequest) => {
+const canceledChatMessage = (
+  variables: ChatMutationRequest,
+  assistantMessage: string,
+): ChatMessageType => {
   return customChatMessage(
     variables,
-    "Request canceled by user",
+    assistantMessage,
     CANCELED_PREFIX_ID,
-    "error",
+    "cancelled",
   );
 };
 
@@ -410,16 +424,18 @@ const handlePrepareController = (
   getController: ((ctrl: AbortController) => void) | undefined,
   queryClient: QueryClient,
   request: ChatMutationRequest,
+  getStreamedText?: () => string,
 ) => {
   return (ctrl: AbortController) => {
     if (getController) {
       getController(ctrl);
 
       const onAbort = () => {
+        const streamedAssistant = getStreamedText ? getStreamedText() : "";
         modifyPlaceholderInChatHistory(
           queryClient,
           request,
-          canceledChatMessage(request),
+          canceledChatMessage(request, streamedAssistant),
         );
         ctrl.signal.removeEventListener("abort", onAbort);
       };
@@ -473,27 +489,38 @@ export const useStreamingChatMutation = ({
   getController,
 }: UseMutationType<ChatMessageType> & StreamingChatCallbacks) => {
   const queryClient = useQueryClient();
-  const handleError = (variables: ChatMutationRequest, error: Error) => {
-    const errorMessage = errorChatMessage(variables, error);
-    modifyPlaceholderInChatHistory(queryClient, variables, errorMessage);
+  const handleError = (request: ChatMutationRequest, error: Error, streamedText?: string) => {
+    const errorMessage = errorChatMessage(request, error, streamedText ?? "");
+    modifyPlaceholderInChatHistory(
+      queryClient,
+      request,
+      errorMessage
+    );
   };
   return useMutation({
     mutationKey: [MutationKeys.chatMutation],
     mutationFn: (request: ChatMutationRequest) => {
+      let streamedText = "";
+      const accumulatingOnChunk = (chunk: string) => {
+        streamedText += chunk;
+        onChunk(chunk);
+      };
+
       const convertError = (errorMessage: string) => {
         const error = new Error(errorMessage);
-        handleError(request, error);
+        handleError(request, error, streamedText);
         onError?.(error);
       };
       const handleGetController = handlePrepareController(
         getController,
         queryClient,
         request,
+        () => streamedText,
       );
 
       return streamChatMutation(
         request,
-        onChunk,
+        accumulatingOnChunk,
         onEvent,
         convertError,
         handleGetController,
diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
index db5d27eb8..fe0f6c895 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessage.tsx
@@ -36,83 +36,16 @@
  * DATA.
  ******************************************************************************/
 
-import { Alert, AlertProps, Divider, Flex, Typography } from "antd";
 import PendingRagOutputSkeleton from "pages/RagChatTab/ChatOutput/Loaders/PendingRagOutputSkeleton.tsx";
-import {
-  CANCELED_PREFIX_ID,
-  ChatMessageType,
-  ERROR_PREFIX_ID,
-  isPlaceholder,
-} from "src/api/chatApi.ts";
-import UserQuestion from "pages/RagChatTab/ChatOutput/ChatMessages/UserQuestion.tsx";
+import { ChatMessageType, isPlaceholder } from "src/api/chatApi.ts";
 
 import "../tableMarkdown.css";
-import { ExclamationCircleTwoTone } from "@ant-design/icons";
 import { ChatMessageBody } from "pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx";
-import { cdlAmber500 } from "src/cuix/variables.ts";
 
-const isError = (data: ChatMessageType) => {
-  return data.id.startsWith(ERROR_PREFIX_ID);
-};
-
-const isCanceled = (data: ChatMessageType) => {
-  return data.id.startsWith(CANCELED_PREFIX_ID);
-};
-
-const WarningMessage = ({
-  data,
-  color,
-  alertType,
-}: {
-  data: ChatMessageType;
-  color: string;
-  alertType: AlertProps["type"];
-}) => {
-  return (
-    <div data-testid="chat-message">
-      <div>
-        <UserQuestion question={data.rag_message.user} />
-        <Flex
-          style={{ marginTop: 15 }}
-          align="baseline"
-          justify="space-between"
-          gap={8}
-        >
-          <div style={{ flex: 1 }}>
-            <ExclamationCircleTwoTone
-              type={alertType}
-              twoToneColor={color}
-              style={{ fontSize: 22 }}
-            />
-          </div>
-          <Flex vertical gap={8} style={{ width: "100%" }}>
-            <Typography.Text style={{ fontSize: 16, marginTop: 8 }}>
-              <Alert
-                type={alertType}
-                message={data.rag_message.assistant.trimStart()}
-              />
-            </Typography.Text>
-          </Flex>
-        </Flex>
-        <Divider />
-      </div>
-    </div>
-  );
-};
 const ChatMessage = ({ data }: { data: ChatMessageType }) => {
-  if (isError(data)) {
-    return <WarningMessage data={data} color={"#ff4d4f"} alertType={"error"} />;
-  }
-  if (isCanceled(data)) {
-    return (
-      <WarningMessage data={data} color={cdlAmber500} alertType={"warning"} />
-    );
-  }
-
   if (isPlaceholder(data)) {
     return <PendingRagOutputSkeleton question={data.rag_message.user} />;
   }
-
   return <ChatMessageBody data={data} />;
 };
 

From cc0adc50cc544ca11ee03c7b9db347f9b74a80ff Mon Sep 17 00:00:00 2001
From: Baasit Sharief <baasitsharief@gmail.com>
Date: Tue, 26 Aug 2025 14:46:37 -0700
Subject: [PATCH 12/12] fix: pnpm lint errors

---
 .../RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
index 5dd71fb93..859904cbe 100644
--- a/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
+++ b/ui/src/pages/RagChatTab/ChatOutput/ChatMessages/ChatMessageBody.tsx
@@ -78,7 +78,7 @@ const WarningMessage = ({
             type={alertType}
             message={
               data.status === "error"
-                ? data.error_message || "An error occurred"
+                ? data.error_message ?? "An error occurred"
                 : "Request canceled by user"
             }
           />