Skip to content

feat: add token limit chunks to embedding models #670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ jobs:
npm ci
cd -
npm run lint
- name: Update CDK CLI
run: |
npm install -g aws-cdk@latest
cdk --version
- name: Backend
run: |
npm ci
Expand Down
1 change: 0 additions & 1 deletion .python-version

This file was deleted.

10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ This blueprint deploys the complete AWS GenAI LLM Chatbot solution in your AWS a
- AWS CLI configured with credentials
- Node.js 18+ and npm
- Python 3.8+
- AWS CDK CLI version compatible with aws-cdk-lib 2.206.0 or later
```bash
# Install or update the CDK CLI globally
npm install -g aws-cdk@latest

# Verify the installed version
cdk --version
```

> **Important**: The CDK CLI version must be compatible with the aws-cdk-lib version used in this project (currently 2.206.0). If you encounter a "Cloud assembly schema version mismatch" error during deployment, update your CDK CLI to the latest version using the command above.

### Deployment

Expand Down
7 changes: 3 additions & 4 deletions lib/chatbot-api/functions/api-handler/routes/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@

@router.resolver(field_name="listModels")
@tracer.capture_method
@permissions.approved_roles([
permissions.ADMIN_ROLE,
permissions.WORKSPACES_MANAGER_ROLE
])
@permissions.approved_roles(
[permissions.ADMIN_ROLE, permissions.WORKSPACES_MANAGER_ROLE]
)
def models() -> list[dict[str, Any]]:
return genai_core.models.list_models()
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _forward(self, model_inputs, **kwargs):
input_ids=input_ids.to(self.model.device),
attention_mask=attention_mask.to(self.model.device),
return_dict_in_generate=True,
**kwargs
**kwargs,
)

return {"input_ids": input_ids, "outputs": outputs}
Expand Down
90 changes: 75 additions & 15 deletions lib/shared/layers/python-sdk/python/genai_core/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,85 @@
logger = Logger()


def get_model_token_limit(model_name):
# Extract provider from model name
model_provider = model_name.split(".")[0]

# https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html
# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/embeddings
# https://docs.cohere.com/v2/docs/cohere-embed
PROVIDER_TOKEN_LIMITS = {
Provider.AMAZON.value: 8000, # Amazon Titan models
Provider.COHERE.value: 512, # Cohere models
Provider.OPENAI.value: 8191, # OpenAI models
"default": 2500, # Default fallback (2500 * 4 = 10000)
}

return PROVIDER_TOKEN_LIMITS.get(model_provider, PROVIDER_TOKEN_LIMITS["default"])


def generate_embeddings(
model: EmbeddingsModel, input: list[str], task: str = "store", batch_size: int = 50
) -> list[list[float]]:
input = [x[:10000] for x in input]

ret_value = []
batch_split = [input[i : i + batch_size] for i in range(0, len(input), batch_size)]

for batch in batch_split:
if model.provider == Provider.OPENAI.value:
ret_value.extend(_generate_embeddings_openai(model, batch))
elif model.provider == Provider.BEDROCK.value:
ret_value.extend(_generate_embeddings_bedrock(model, batch, task))
elif model.provider == Provider.SAGEMAKER.value:
ret_value.extend(_generate_embeddings_sagemaker(model, batch))
else:
raise CommonError(f"Unknown provider: {model.provider}")
try:
# Get model-specific token limit
token_limit = get_model_token_limit(model.name)
char_limit = min(token_limit * 4, 10000) # Use existing 10000 char limit as max

# Chunk inputs and track mapping
chunked_input = []
chunk_mapping = []
current_idx = 0

for text in input:
# Split text into chunks if it exceeds the limit
if len(text) <= char_limit:
chunks = [text]
else:
chunks = [
text[i : i + char_limit] for i in range(0, len(text), char_limit)
]

# Track which chunks belong to which original input using a chunk map
chunk_indices = list(range(current_idx, current_idx + len(chunks)))
chunk_mapping.append(chunk_indices)
current_idx += len(chunks)

chunked_input.extend(chunks)

ret_value = []
batch_split = [
chunked_input[i : i + batch_size]
for i in range(0, len(chunked_input), batch_size)
]

for batch in batch_split:
if model.provider == Provider.OPENAI.value:
ret_value.extend(_generate_embeddings_openai(model, batch))
elif model.provider == Provider.BEDROCK.value:
ret_value.extend(_generate_embeddings_bedrock(model, batch, task))
elif model.provider == Provider.SAGEMAKER.value:
ret_value.extend(_generate_embeddings_sagemaker(model, batch))
else:
raise CommonError(f"Unknown provider: {model.provider}")

return ret_value
# Combine embeddings from the same original input
final_embeddings = []
for chunks_idx in chunk_mapping:
if len(chunks_idx) == 1:
final_embeddings.append(ret_value[chunks_idx[0]])
else:
# Average the embeddings
chunk_embeddings = [ret_value[idx] for idx in chunks_idx]
avg_embedding = [
sum(values) / len(values) for values in zip(*chunk_embeddings)
]
final_embeddings.append(avg_embedding)

return final_embeddings
except Exception as e:
logger.error(f"Error in generate_embeddings: {str(e)}")
raise CommonError(f"Failed to generate embeddings: {str(e)}")


def get_embeddings_models():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,85 @@
logger = Logger()


def get_model_token_limit(model_name):
# Extract provider from model name
model_provider = model_name.split(".")[0]

# https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html
# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/embeddings
# https://docs.cohere.com/v2/docs/cohere-embed
PROVIDER_TOKEN_LIMITS = {
Provider.AMAZON.value: 8000, # Amazon Titan models
Provider.COHERE.value: 512, # Cohere models
Provider.OPENAI.value: 8191, # OpenAI models
"default": 2500, # Default fallback (2500 * 4 = 10000)
}

return PROVIDER_TOKEN_LIMITS.get(model_provider, PROVIDER_TOKEN_LIMITS["default"])


def generate_embeddings(
model: EmbeddingsModel, input: list[str], task: str = "store", batch_size: int = 50
) -> list[list[float]]:
input = [x[:10000] for x in input]

ret_value = []
batch_split = [input[i : i + batch_size] for i in range(0, len(input), batch_size)]

for batch in batch_split:
if model.provider == Provider.OPENAI.value:
ret_value.extend(_generate_embeddings_openai(model, batch))
elif model.provider == Provider.BEDROCK.value:
ret_value.extend(_generate_embeddings_bedrock(model, batch, task))
elif model.provider == Provider.SAGEMAKER.value:
ret_value.extend(_generate_embeddings_sagemaker(model, batch))
else:
raise CommonError(f"Unknown provider: {model.provider}")
try:
# Get model-specific token limit
token_limit = get_model_token_limit(model.name)
char_limit = min(token_limit * 4, 10000) # Use existing 10000 char limit as max

# Chunk inputs and track mapping
chunked_input = []
chunk_mapping = []
current_idx = 0

for text in input:
# Split text into chunks if it exceeds the limit
if len(text) <= char_limit:
chunks = [text]
else:
chunks = [
text[i : i + char_limit] for i in range(0, len(text), char_limit)
]

# Track which chunks belong to which original input using a chunk map
chunk_indices = list(range(current_idx, current_idx + len(chunks)))
chunk_mapping.append(chunk_indices)
current_idx += len(chunks)

chunked_input.extend(chunks)

ret_value = []
batch_split = [
chunked_input[i : i + batch_size]
for i in range(0, len(chunked_input), batch_size)
]

for batch in batch_split:
if model.provider == Provider.OPENAI.value:
ret_value.extend(_generate_embeddings_openai(model, batch))
elif model.provider == Provider.BEDROCK.value:
ret_value.extend(_generate_embeddings_bedrock(model, batch, task))
elif model.provider == Provider.SAGEMAKER.value:
ret_value.extend(_generate_embeddings_sagemaker(model, batch))
else:
raise CommonError(f"Unknown provider: {model.provider}")

return ret_value
# Combine embeddings from the same original input
final_embeddings = []
for chunks_idx in chunk_mapping:
if len(chunks_idx) == 1:
final_embeddings.append(ret_value[chunks_idx[0]])
else:
# Average the embeddings
chunk_embeddings = [ret_value[idx] for idx in chunks_idx]
avg_embedding = [
sum(values) / len(values) for values in zip(*chunk_embeddings)
]
final_embeddings.append(avg_embedding)

return final_embeddings
except Exception as e:
logger.error(f"Error in generate_embeddings: {str(e)}")
raise CommonError(f"Failed to generate embeddings: {str(e)}")


def get_embeddings_models():
Expand Down
Loading
Loading