-
Notifications
You must be signed in to change notification settings - Fork 90
feat(condenser): Token-aware condensation in LLMSummarizingCondenser #1380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ea026aa
4093731
980c1af
0d2b16d
d3d875e
7504dba
753d088
226b488
81b5590
cf710cd
de66479
3bafbec
16a5be5
bc63019
4bba5fd
119e868
093eeb9
e5518b5
acc15c8
011b50f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,19 +1,42 @@ | ||
| import os | ||
| from collections.abc import Sequence | ||
| from enum import Enum | ||
|
|
||
| from pydantic import Field, model_validator | ||
|
|
||
| from openhands.sdk.context.condenser.base import RollingCondenser | ||
| from openhands.sdk.context.condenser.utils import ( | ||
| get_suffix_length_for_token_reduction, | ||
| get_total_token_count, | ||
| ) | ||
| from openhands.sdk.context.prompts import render_template | ||
| from openhands.sdk.context.view import View | ||
| from openhands.sdk.event.base import LLMConvertibleEvent | ||
| from openhands.sdk.event.condenser import Condensation | ||
| from openhands.sdk.event.llm_convertible import MessageEvent | ||
| from openhands.sdk.llm import LLM, Message, TextContent | ||
| from openhands.sdk.observability.laminar import observe | ||
|
|
||
|
|
||
| class Reason(Enum): | ||
| """Reasons for condensation.""" | ||
|
|
||
| REQUEST = "request" | ||
| TOKENS = "tokens" | ||
| EVENTS = "events" | ||
|
|
||
|
|
||
| class LLMSummarizingCondenser(RollingCondenser): | ||
| """LLM-based condenser that summarizes forgotten events. | ||
|
|
||
| Uses an independent LLM for generating summaries of forgotten events. The optional | ||
| LLM parameter passed to condense() is the LLM used by the agent, and you should not | ||
| assume it is the same as the one defined in this condenser. | ||
| """ | ||
|
|
||
| llm: LLM | ||
| max_size: int = Field(default=120, gt=0) | ||
| max_tokens: int | None = None | ||
| keep_first: int = Field(default=4, ge=0) | ||
|
|
||
| @model_validator(mode="after") | ||
|
|
@@ -29,23 +52,47 @@ def validate_keep_first_vs_max_size(self): | |
| def handles_condensation_requests(self) -> bool: | ||
| return True | ||
|
|
||
| def should_condense(self, view: View) -> bool: | ||
| if view.unhandled_condensation_request: | ||
| return True | ||
| return len(view) > self.max_size | ||
| def get_condensation_reasons( | ||
| self, view: View, llm: LLM | None = None | ||
| ) -> set[Reason]: | ||
| """Determine the reasons why the view should be condensed. | ||
|
|
||
| Args: | ||
| view: The current view to evaluate. | ||
| llm: The LLM used by the agent. Required if token counting is needed. | ||
|
|
||
| Returns: | ||
| A set of Reason enums indicating why condensation is needed. | ||
| """ | ||
| reasons = set() | ||
|
|
||
| @observe(ignore_inputs=["view"]) | ||
| def get_condensation(self, view: View) -> Condensation: | ||
| head = view[: self.keep_first] | ||
| target_size = self.max_size // 2 | ||
| # Reason 1: Unhandled condensation request. The view handles the detection of | ||
| # these requests while processing the event stream. | ||
| if view.unhandled_condensation_request: | ||
| # Condensation triggered by a condensation request | ||
| # should be calculated based on the view size. | ||
| target_size = len(view) // 2 | ||
| # Number of events to keep from the tail -- target size, minus however many | ||
| # prefix events from the head, minus one for the summarization event | ||
| events_from_tail = target_size - len(head) - 1 | ||
| reasons.add(Reason.REQUEST) | ||
|
|
||
| # Reason 2: Token limit is provided and exceeded. | ||
| if self.max_tokens and llm: | ||
| total_tokens = get_total_token_count(view.events, llm) | ||
| if total_tokens > self.max_tokens: | ||
| reasons.add(Reason.TOKENS) | ||
|
|
||
| # Reason 3: View exceeds maximum size in number of events. | ||
| if len(view) > self.max_size: | ||
| reasons.add(Reason.EVENTS) | ||
|
|
||
| return reasons | ||
|
|
||
| def should_condense(self, view: View, llm: LLM | None = None) -> bool: | ||
| reasons = self.get_condensation_reasons(view, llm) | ||
| return reasons != set() | ||
|
|
||
| def _get_summary_event_content(self, view: View) -> str: | ||
| """Extract the text content from the summary event in the view, if any. | ||
|
|
||
| If there is no summary event or it does not contain text content, returns an | ||
| empty string. | ||
| """ | ||
| summary_event_content: str = "" | ||
|
|
||
| summary_event = view.summary_event | ||
|
|
@@ -54,9 +101,23 @@ def get_condensation(self, view: View) -> Condensation: | |
| if isinstance(message_content, TextContent): | ||
| summary_event_content = message_content.text | ||
|
|
||
| # Identify events to be forgotten (those not in head or tail) | ||
| forgotten_events = view[self.keep_first : -events_from_tail] | ||
| return summary_event_content | ||
|
|
||
| def _generate_condensation( | ||
| self, | ||
| summary_event_content: str, | ||
| forgotten_events: Sequence[LLMConvertibleEvent], | ||
| ) -> Condensation: | ||
| """Generate a condensation by using the condenser's LLM to summarize forgotten | ||
| events. | ||
|
|
||
| Args: | ||
| summary_event_content: The content of the previous summary event. | ||
| forgotten_events: The list of events to be summarized. | ||
|
|
||
| Returns: | ||
| Condensation: The generated condensation object. | ||
| """ | ||
| # Convert events to strings for the template | ||
| event_strings = [str(forgotten_event) for forgotten_event in forgotten_events] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Unrelated to the PR) By the way, we've always done this, but is this best? Now we have I actually wonder if the reverse might be the case: I know I've seen a summarizer get confused by the messages and continue them, instead of summarizing them. 😅 Maybe stringifying them this way prevents such thing? |
||
|
|
||
|
|
@@ -87,3 +148,68 @@ def get_condensation(self, view: View) -> Condensation: | |
| summary_offset=self.keep_first, | ||
| llm_response_id=llm_response.id, | ||
| ) | ||
|
|
||
| def _get_forgotten_events( | ||
| self, view: View, llm: LLM | None = None | ||
| ) -> Sequence[LLMConvertibleEvent]: | ||
| """Identify events to be forgotten. | ||
|
|
||
| Relies on the condensation reasons to determine how many events we need to drop | ||
| in order to maintain our resource constraints. | ||
|
|
||
| Args: | ||
| view: The current view from which to identify forgotten events. | ||
| llm: The LLM used by the agent, required for token-based calculations. | ||
|
|
||
| Returns: | ||
| A sequence of events to be forgotten. | ||
| """ | ||
| reasons = self.get_condensation_reasons(view, llm=llm) | ||
| assert reasons != set(), "No condensation reasons found." | ||
|
|
||
| suffix_events_to_keep: set[int] = set() | ||
|
|
||
| if Reason.REQUEST in reasons: | ||
| target_size = len(view) // 2 | ||
| suffix_events_to_keep.add(target_size - self.keep_first - 1) | ||
|
|
||
| if Reason.EVENTS in reasons: | ||
| target_size = self.max_size // 2 | ||
| suffix_events_to_keep.add(target_size - self.keep_first - 1) | ||
|
|
||
| if Reason.TOKENS in reasons: | ||
| # Compute the number of tokens we need to eliminate to be under half the | ||
| # max_tokens value. We know max_tokens and the agent LLM are not None here | ||
| # because we can't have Reason.TOKENS without them. | ||
| assert self.max_tokens is not None | ||
| assert llm is not None | ||
|
|
||
| total_tokens = get_total_token_count(view.events, llm) | ||
| tokens_to_reduce = total_tokens - (self.max_tokens // 2) | ||
|
|
||
| suffix_events_to_keep.add( | ||
| get_suffix_length_for_token_reduction( | ||
| events=view.events[self.keep_first :], | ||
| llm=llm, | ||
| token_reduction=tokens_to_reduce, | ||
| ) | ||
| ) | ||
|
|
||
| # We might have multiple reasons to condense, so pick the strictest condensation | ||
| # to ensure all resource constraints are met. | ||
| events_from_tail = min(suffix_events_to_keep) | ||
|
|
||
| # Identify events to be forgotten (those not in head or tail) | ||
| return view[self.keep_first : -events_from_tail] | ||
|
|
||
| @observe(ignore_inputs=["view", "llm"]) | ||
| def get_condensation(self, view: View, llm: LLM | None = None) -> Condensation: | ||
| # The condensation is dependent on the events we want to drop and the previous | ||
| # summary. | ||
| summary_event_content = self._get_summary_event_content(view) | ||
| forgotten_events = self._get_forgotten_events(view, llm=llm) | ||
|
|
||
| return self._generate_condensation( | ||
| summary_event_content=summary_event_content, | ||
| forgotten_events=forgotten_events, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a thought: what if we name this one
condenser_llmorsummarizing_llmor simplysummarizer? Just for our brains and potential contributors, to keep them easily understood 😅