Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,9 @@ async def check_message_length(message_content: str) -> tuple[bool, str]:
return False, "We've encountered an issue. Please try again later ..."

if num_required_tokens > config.embeddings_llm_max_context:
# On average, a single token corresponds to approximately 4 characters.
# Because logs often require more tokens to process, we estimate 3
# characters per token.
# Calculate the maximum character limit estimation for the embedding model.
approx_max_chars = round(
config.embeddings_llm_max_context * 3, -2)
config.embeddings_llm_max_context * config.chars_per_token_estimation, -2)

error_message = (
"⚠️ **Your input is too lengthy!**\n We can process inputs of up "
Expand Down
21 changes: 20 additions & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class Config:
prompt_header: str
welcome_message: str
jira_formatting_syntax_prompt: str
chars_per_token_estimation: int
generative_model_max_context_percentage: float

@classmethod
def from_env(cls) -> 'Config':
Expand Down Expand Up @@ -131,8 +133,25 @@ def from_env(cls) -> 'Config':
# The maximum number of points we pass to the generative model after
# reranking.
rerank_top_n=int(os.environ.get("RERANK_TOP_N", 5)),
)

# The maximum percentage of the full context of the generative model
# that we can use. The idea is that we do not want to use the full
# available context to prevent decreased quality of the responses.
generative_model_max_context_percentage=float(os.environ.get(
"GENERATIVE_MODEL_MAX_CONTEXT_PERCENTAGE",
0.75,
)),

# The estimated number of characters per token we should use in our
# internal computations. For example, we use this value to estimate
# the maximum number of characters the generative model can process.
#
# On average, a single token corresponds to approximately 4 characters.
# Because logs often require more tokens to process, we estimate 3
# characters per token.
chars_per_token_estimation=int(os.environ.get(
"CHARS_PER_TOKEN_ESTIMATION", 3)),
)

# Initialize the configuration
config = Config.from_env()
10 changes: 4 additions & 6 deletions src/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,10 @@ async def build_prompt(

full_prompt_len += len(str(full_prompt))

# NOTE: On average, a single token corresponds to approximately 4 characters.
# Because logs often require more tokens to process, we estimate 3
# characters per token. Also, we do not want to use the full context
# of the model as trying to use the full context of the model might lead
# to decreased performance (0.75 constant).
approx_max_chars = config.generative_model_max_context * 3 * 0.75
# Calculate the maximum character limit estimation for the generative model.
approx_max_chars = (config.generative_model_max_context *
config.generative_model_max_context_percentage *
config.chars_per_token_estimation)

# If no information was retrieved from the vector database, end the generation
# of the prompt.
Expand Down
Loading