Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openhands-sdk/openhands/sdk/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def step(

# Prepare LLM messages using the utility function
_messages_or_condensation = prepare_llm_messages(
state.events, condenser=self.condenser
state.events, condenser=self.condenser, llm=self.llm
)

# Process condensation event before agent sampels another action
Expand Down
13 changes: 9 additions & 4 deletions openhands-sdk/openhands/sdk/agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def prepare_llm_messages(
events: Sequence[Event],
condenser: None = None,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message]: ...


Expand All @@ -125,13 +126,15 @@ def prepare_llm_messages(
events: Sequence[Event],
condenser: CondenserBase,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message] | Condensation: ...


def prepare_llm_messages(
events: Sequence[Event],
condenser: CondenserBase | None = None,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message] | Condensation:
"""Prepare LLM messages from conversation context.

Expand All @@ -140,13 +143,15 @@ def prepare_llm_messages(
It handles condensation internally and calls the callback when needed.

Args:
state: The conversation state containing events
events: Sequence of events to prepare messages from
condenser: Optional condenser for handling context window limits
additional_messages: Optional additional messages to append
on_event: Optional callback for handling condensation events
llm: Optional LLM instance from the agent, passed to condenser for
token counting or other LLM features

Returns:
List of messages ready for LLM completion
List of messages ready for LLM completion, or a Condensation event
if condensation is needed

Raises:
RuntimeError: If condensation is needed but no callback is provided
Expand All @@ -160,7 +165,7 @@ def prepare_llm_messages(
# produce a list of events, exactly as expected, or a
# new condensation that needs to be processed
if condenser is not None:
condensation_result = condenser.condense(view)
condensation_result = condenser.condense(view, llm=llm)

match condensation_result:
case View():
Expand Down
15 changes: 9 additions & 6 deletions openhands-sdk/openhands/sdk/context/condenser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM
from openhands.sdk.utils.models import (
DiscriminatedUnionMixin,
)
Expand All @@ -28,7 +29,7 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
"""

@abstractmethod
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, llm: LLM | None = None) -> View | Condensation:
"""Condense a sequence of events into a potentially smaller list.

New condenser strategies should override this method to implement their own
Expand All @@ -37,6 +38,8 @@ def condense(self, view: View) -> View | Condensation:

Args:
view: A view of the history containing all events that should be condensed.
llm: LLM instance used by the agent. Condensers use this for token counting
purposes. Defaults to None.

Returns:
View | Condensation: A condensed view of the events or an event indicating
Expand Down Expand Up @@ -77,18 +80,18 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
"""

@abstractmethod
def should_condense(self, view: View) -> bool:
def should_condense(self, view: View, llm: LLM | None = None) -> bool:
"""Determine if a view should be condensed."""

@abstractmethod
def get_condensation(self, view: View) -> Condensation:
def get_condensation(self, view: View, llm: LLM | None = None) -> Condensation:
"""Get the condensation from a view."""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, llm: LLM | None = None) -> View | Condensation:
# If we trigger the condenser-specific condensation threshold, compute and
# return the condensation.
if self.should_condense(view):
return self.get_condensation(view)
if self.should_condense(view, llm=llm):
return self.get_condensation(view, llm=llm)

# Otherwise we're safe to just return the view.
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,42 @@
import os
from collections.abc import Sequence
from enum import Enum

from pydantic import Field, model_validator

from openhands.sdk.context.condenser.base import RollingCondenser
from openhands.sdk.context.condenser.utils import (
get_suffix_length_for_token_reduction,
get_total_token_count,
)
from openhands.sdk.context.prompts import render_template
from openhands.sdk.context.view import View
from openhands.sdk.event.base import LLMConvertibleEvent
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.event.llm_convertible import MessageEvent
from openhands.sdk.llm import LLM, Message, TextContent
from openhands.sdk.observability.laminar import observe


class Reason(Enum):
"""Reasons for condensation."""

REQUEST = "request"
TOKENS = "tokens"
EVENTS = "events"


class LLMSummarizingCondenser(RollingCondenser):
"""LLM-based condenser that summarizes forgotten events.

Uses an independent LLM for generating summaries of forgotten events. The optional
LLM parameter passed to condense() is the LLM used by the agent, and you should not
assume it is the same as the one defined in this condenser.
"""

llm: LLM
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
llm: LLM
summarizer_llm: LLM

Just a thought: what if we name this one condenser_llm or summarizing_llm or simply summarizer? Just for our brains and potential contributors, to keep them easily understood 😅

max_size: int = Field(default=120, gt=0)
max_tokens: int | None = None
keep_first: int = Field(default=4, ge=0)

@model_validator(mode="after")
Expand All @@ -29,23 +52,47 @@ def validate_keep_first_vs_max_size(self):
def handles_condensation_requests(self) -> bool:
return True

def should_condense(self, view: View) -> bool:
if view.unhandled_condensation_request:
return True
return len(view) > self.max_size
def get_condensation_reasons(
self, view: View, llm: LLM | None = None
) -> set[Reason]:
"""Determine the reasons why the view should be condensed.

Args:
view: The current view to evaluate.
llm: The LLM used by the agent. Required if token counting is needed.

Returns:
A set of Reason enums indicating why condensation is needed.
"""
reasons = set()

@observe(ignore_inputs=["view"])
def get_condensation(self, view: View) -> Condensation:
head = view[: self.keep_first]
target_size = self.max_size // 2
# Reason 1: Unhandled condensation request. The view handles the detection of
# these requests while processing the event stream.
if view.unhandled_condensation_request:
# Condensation triggered by a condensation request
# should be calculated based on the view size.
target_size = len(view) // 2
# Number of events to keep from the tail -- target size, minus however many
# prefix events from the head, minus one for the summarization event
events_from_tail = target_size - len(head) - 1
reasons.add(Reason.REQUEST)

# Reason 2: Token limit is provided and exceeded.
if self.max_tokens and llm:
total_tokens = get_total_token_count(view.events, llm)
if total_tokens > self.max_tokens:
reasons.add(Reason.TOKENS)

# Reason 3: View exceeds maximum size in number of events.
if len(view) > self.max_size:
reasons.add(Reason.EVENTS)

return reasons

def should_condense(self, view: View, llm: LLM | None = None) -> bool:
reasons = self.get_condensation_reasons(view, llm)
return reasons != set()

def _get_summary_event_content(self, view: View) -> str:
"""Extract the text content from the summary event in the view, if any.

If there is no summary event or it does not contain text content, returns an
empty string.
"""
summary_event_content: str = ""

summary_event = view.summary_event
Expand All @@ -54,9 +101,23 @@ def get_condensation(self, view: View) -> Condensation:
if isinstance(message_content, TextContent):
summary_event_content = message_content.text

# Identify events to be forgotten (those not in head or tail)
forgotten_events = view[self.keep_first : -events_from_tail]
return summary_event_content

def _generate_condensation(
self,
summary_event_content: str,
forgotten_events: Sequence[LLMConvertibleEvent],
) -> Condensation:
"""Generate a condensation by using the condenser's LLM to summarize forgotten
events.

Args:
summary_event_content: The content of the previous summary event.
forgotten_events: The list of events to be summarized.

Returns:
Condensation: The generated condensation object.
"""
# Convert events to strings for the template
event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Unrelated to the PR) By the way, we've always done this, but is this best? Now we have to_llm_message() to choose and transform the parts of an Event in a form suitable for LLM... 🤔

I actually wonder if the reverse might be the case: I know I've seen a summarizer get confused by the messages and continue them, instead of summarizing them. 😅 Maybe stringifying them this way prevents such thing?


Expand Down Expand Up @@ -87,3 +148,68 @@ def get_condensation(self, view: View) -> Condensation:
summary_offset=self.keep_first,
llm_response_id=llm_response.id,
)

def _get_forgotten_events(
self, view: View, llm: LLM | None = None
) -> Sequence[LLMConvertibleEvent]:
"""Identify events to be forgotten.

Relies on the condensation reasons to determine how many events we need to drop
in order to maintain our resource constraints.

Args:
view: The current view from which to identify forgotten events.
llm: The LLM used by the agent, required for token-based calculations.

Returns:
A sequence of events to be forgotten.
"""
reasons = self.get_condensation_reasons(view, llm=llm)
assert reasons != set(), "No condensation reasons found."

suffix_events_to_keep: set[int] = set()

if Reason.REQUEST in reasons:
target_size = len(view) // 2
suffix_events_to_keep.add(target_size - self.keep_first - 1)

if Reason.EVENTS in reasons:
target_size = self.max_size // 2
suffix_events_to_keep.add(target_size - self.keep_first - 1)

if Reason.TOKENS in reasons:
# Compute the number of tokens we need to eliminate to be under half the
# max_tokens value. We know max_tokens and the agent LLM are not None here
# because we can't have Reason.TOKENS without them.
assert self.max_tokens is not None
assert llm is not None

total_tokens = get_total_token_count(view.events, llm)
tokens_to_reduce = total_tokens - (self.max_tokens // 2)

suffix_events_to_keep.add(
get_suffix_length_for_token_reduction(
events=view.events[self.keep_first :],
llm=llm,
token_reduction=tokens_to_reduce,
)
)

# We might have multiple reasons to condense, so pick the strictest condensation
# to ensure all resource constraints are met.
events_from_tail = min(suffix_events_to_keep)

# Identify events to be forgotten (those not in head or tail)
return view[self.keep_first : -events_from_tail]

@observe(ignore_inputs=["view", "llm"])
def get_condensation(self, view: View, llm: LLM | None = None) -> Condensation:
# The condensation is dependent on the events we want to drop and the previous
# summary.
summary_event_content = self._get_summary_event_content(view)
forgotten_events = self._get_forgotten_events(view, llm=llm)

return self._generate_condensation(
summary_event_content=summary_event_content,
forgotten_events=forgotten_events,
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from openhands.sdk.context.condenser.base import CondenserBase
from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM


class NoOpCondenser(CondenserBase):
Expand All @@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
Primarily intended for testing purposes.
"""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, llm: LLM | None = None) -> View | Condensation: # noqa: ARG002
return view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from openhands.sdk.context.condenser.base import CondenserBase
from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM


class PipelineCondenser(CondenserBase):
Expand Down Expand Up @@ -41,12 +42,12 @@ class PipelineCondenser(CondenserBase):
condensers: list[CondenserBase]
"""The list of condensers to apply in order."""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, llm: LLM | None = None) -> View | Condensation:
result: View | Condensation = view
for condenser in self.condensers:
if isinstance(result, Condensation):
break
result = condenser.condense(result)
result = condenser.condense(result, llm=llm)
return result

def handles_condensation_requests(self) -> bool:
Expand Down
Loading
Loading